* listing all relevant settings is preferred. */ public function viewSettings(); /** * Get importer Id */ public function getImporterId(); /** * @param array of MicaDatasetQueryTerm $terms */ public function query(array $terms); /** * @param array of MicaDatasetQueryTerm $terms * @param array of strings $fields */ public function queryBy(array $terms, array $fields); /** * Returns a facet for a specific term. * * Categorical variables will return an associative array: * array('YES' => 3000, 'NO' => 50000) * * Continuous variables will return statistics (min, max, mean, variance,std_deviation,count,total,sum_of_squares) * array( * 'min' => 10.45, * 'max' => 23.78, * ... * ) */ public function facetTerm(QueryTermInterface $term); } /** * Abstract class with generic implementation of most connection methods. */ abstract class MicaDatasetAbstractConnection implements MicaDatasetConnectionInterface { /** * @var MicaDatasetConnector */ protected $connector; /** * Direct reference to the connector's $options property. * * @var array */ protected $options = array(); /** * Constructor for a connection class, setting the connector configuration used with * this connection. * * The default implementation sets $this->connector and $this->options. * * @param MicaDatasetConnector $connector * The connector object for this connection. */ public function __construct(MicaDatasetConnector $connector) { $this->connector = $connector; $this->options = &$connector->options; } /** * Form callback. Might be called on an uninitialized object - in this case, * the form is for configuring a newly created connector. * * Returns an empty form by default. * * @return array * A form array for setting connection-specific options. */ public function configurationForm($study_node) { return array(); } /** * Validation callback for the form returned by configurationForm(). * * Does nothing by default. * * @param array $form * The form returned by configurationForm(). * @param array $values * The part of the $form_state['values'] array corresponding to this form. * @param array $form_state * The complete form state. */ public function configurationFormValidate(array $form, array &$values, array &$form_state) { return; } /** * Submit callback for the form returned by configurationForm(). * * The default implementation just ensures that additional elements in * $options, not present in the form, don't get lost at the update. * * @param array $form * The form returned by configurationForm(). * @param array $values * The part of the $form_state['values'] array corresponding to this form. * @param array $form_state * The complete form state. */ public function configurationFormSubmit(array $form, array &$values, array &$form_state) { if (!empty($this->options)) { $values += $this->options; } $this->options = $values; } /** * Determines whether this connection class implementation supports a given * feature. Features are optional extensions to Search API functionality and * usually defined and used by third-party modules. * * @param string $feature * The name of the optional feature. * * @return boolean * TRUE if this connection knows and supports the specified feature. FALSE * otherwise. */ public function supportsFeature($feature) { return FALSE; } /** * View this connector's settings. Output can be HTML or a render array, a
* listing all relevant settings is preferred. * * The default implementation does a crude output as a definition list, with * option names taken from the configuration form. */ public function viewSettings() { $output = ''; $form = $form_state = array(); $option_form = $this->configurationForm($form, $form_state); $option_names = array(); foreach ($option_form as $key => $element) { if (isset($element['#title']) && isset($this->options[$key])) { $option_names[$key] = $element['#title']; } } foreach ($option_names as $key => $name) { $value = $this->options[$key]; $output .= '
' . check_plain($name) . '
' . "\n"; $output .= '
' . nl2br(check_plain(print_r($value, TRUE))) . '
' . "\n"; } return $output ? "
\n$output
" : ''; } } /** * An abstract implementation of a dataset connection that builds and sends an Elastic Search query from the query terms. */ abstract class MicaDatasetElasticSearchConnection extends MicaDatasetAbstractConnection { /** * Sends the Elastic Search query to study server. * @param string $query * @return the Elastic Search query response */ protected abstract function sendQuery($query); /** * Given a query term, get with which field it is indexed in ES. * @param QueryTermInterface $term * @return the field name string */ protected function getFieldName(QueryTermInterface $term) { return $this->getFieldNameMapping($term->getFieldName()); } /** * Given a expected field name, allow mapping to a study specific field name. Default implementation is identity, * @param $fieldname * @return the field name string */ protected function getFieldNameMapping($fieldname) { return $fieldname; } public function query(array $terms) { $facets = array(); $filters = array(); if ($terms !== FALSE) { foreach ($terms as $term) { $impl = $term->termImpl(); $facets[] = $this->asFacet($impl); $filters[] = $this->asFilter($impl); } } $facets[] = '"_matched" :{ "filter":{ "and":[ {' . implode('},{', $filters) . '}]}}'; $q = '{"query": {"match_all":{}},"size":0,"facets":{'; $q .= implode(',', $facets); $q .= '}}'; // debug($q); $data = $this->sendQuery($q); $total = $data['hits']['total']; $result = array('total' => array('_all' => $total)); $result["_matched"] = $this->count($data['facets']['_matched']); foreach ($terms as $term) { $impl = $term->termImpl(); $result[$impl->getName()] = $this->count($data['facets'][$impl->getName()]); } // debug($result); return $result; } public function queryBy(array $terms, array $fields) { $by_field = $fields[0]; // one facet for population matching each term $facets = array(); $filters = array(); if ($terms !== FALSE) { foreach ($terms as $term) { $impl = $term->termImpl(); $facets[] = $this->asFacetBy($impl, $by_field); $filters[] = $this->asFilter($impl); } } // one facet for the population matching all terms $and_filters = implode('},{', $filters); $facets[] = <<< END "_matched" : { "terms" : { "field":"{$this->getFieldNameMapping($by_field)}","all_terms":true } , "facet_filter": { "and":[{{$and_filters}}] } } END; // one facet for all the population $facets[] = <<< END "_all" : { "terms" : { "field":"{$this->getFieldNameMapping($by_field)}","all_terms":true } } END; $q = '{"query": {"match_all":{}},"size":0,"facets":{'; $q .= implode(',', $facets); $q .= '}}'; $data = $this->sendQuery($q); $total = $data['hits']['total']; $result = array('total' => array('_all' => $total)); $result["_all"] = $this->count($data['facets']['_all']); $result["_matched"] = $this->count($data['facets']['_matched']); foreach ($terms as $term) { $impl = $term->termImpl(); $result[$impl->getName()] = $this->count($data['facets'][$impl->getName()]); } //debug($result); return $result; } public function facetTerm(QueryTermInterface $term) { $fieldname = $this->getFieldName($term); switch ($term->getType()) { case 'CategoryTerm' : $facets = <<< END "{$term->getName()}" : { "terms" : { "field" : "{$fieldname}", "size": 1000, "all_fields" : true } } END; break; case 'RangeTerm' : $facets = <<< END "{$term->getName()}" : { "statistical" : { "field" : "{$fieldname}" } } END; break; default: $facets = <<< END "{$term->getName()}" : { "filter": { "exists" : { "field" : "{$fieldname}" } } } END; break; } $q = '{"query": {"match_all":{}},"size":0,"facets":{'; $q .= $facets; $q .= '}}'; $data = $this->sendQuery($q); $facetResult = $data['facets'][$term->getName()]; // TODO: handle statistical result $result = array(); if ($term->getType() == 'CategoryTerm' || $term->getType() == 'TodoTerm') { $result = $this->count($facetResult); } else { $result = $facetResult; } //debug($result); return $result; } protected function asFacet(QueryTermInterface $term) { $facet = <<< END "{$term->getName()}" : { "filter" : { {$this->asFilter($term)} }, "global":true } END; return $facet; } protected function asFacetBy(QueryTermInterface $term, $field) { $facet = <<< END "{$term->getName()}" : { "terms" : { "field":"{$this->getFieldNameMapping($field)}","all_terms":true } , "facet_filter": { {$this->asFilter($term)} } } END; return $facet; } private function asFilter(QueryTermInterface $term) { $fieldname = $this->getFieldName($term); // The default filter $filter = <<< END "exists" : { "field" : "{$fieldname}" } END; switch ($term->getType()) { case 'CategoryTerm': if (count($term->categories()) > 0) { $terms = '"' . implode('","', $term->categories()) . '"'; $filter = <<< END "terms" : { "{$fieldname}":[$terms] } END; } break; case 'RangeTerm': if (count($term->ranges()) > 0) { $ranges = $term->ranges(); $range = $ranges[0]; if (isset($range['value'])) { $filter = <<< END "term" : { "{$fieldname}": {$range['value']} } END; } else { $bounds = array(); if (isset($range['from'])) { $bounds[] = '"from" : ' . $range['from']; $bounds[] = '"include_lower" : true'; } if (isset($range['to'])) { $bounds[] = '"to" : ' . $range['to']; $bounds[] = '"include_upper" : false'; } $boundStr = implode(',', $bounds); $filter = <<< END "numeric_range" : { "{$fieldname}": { {$boundStr} } } END; } } break; case 'TodoTerm': if (count($term->match())>0) { $matchStr = implode('","',$term->match()); $filter = <<< END "terms" : { "{$fieldname}":["{$matchStr}"] } END; } break; default: break; } if ($term->isInverted()) { $invert = $filter; $filter = <<< END "not" : { "filter" : { $invert } } END; } return $filter; } private function count($facet) { $rval = array('_all' => 0); switch ($facet['_type']) { case 'filter': $rval['_all'] = $facet['count']; break; case 'terms': $rval['_all'] = $facet['total']; $rval['_missing'] = $facet['missing']; $rval['_other'] = $facet['other']; foreach ($facet['terms'] as $term) { $rval[$term['term']] = $term['count']; } break; case 'histogram': $sum = 0; foreach ($facet['entries'] as $term) { $sum += $term['count']; } $rval['_all'] = $sum; break; } return $rval; } }