chamilo/chamilo-lms

View on GitHub
public/main/inc/lib/search/xapian/XapianQuery.php

Summary

Maintainability
A
0 mins
Test Coverage
<?php

/* For licensing terms, see /license.txt */

require_once 'xapian.php';
//TODO: think another way without including specific fields here
require_once api_get_path(LIBRARY_PATH).'specific_fields_manager.lib.php';
define('XAPIAN_DB', api_get_path(SYS_UPLOAD_PATH).'plugins/xapian/searchdb/');

/**
 * Queries the database.
 * The xapian_query function queries the database using both a query string
 * and application-defined terms. Based on drupal-xapian.
 *
 * @param string         $query_string The search string. This string will
 *                                     be parsed and stemmed automatically.
 * @param XapianDatabase $db           Xapian database to connect
 * @param int            $start        An integer defining the first
 *                                     document to return
 * @param int            $length       the number of results to return
 * @param array          $extra        an array containing arrays of
 *                                     extra terms to search for
 * @param int            $count_type   Number of items to retrieve
 *
 * @return array an array of nids corresponding to the results
 */
function xapian_query($query_string, $db = null, $start = 0, $length = 10, $extra = [], $count_type = 0)
{
    try {
        if (!is_object($db)) {
            $db = new XapianDatabase(XAPIAN_DB);
        }

        // Build subqueries from $extra array. Now only used by tags search filter on search widget
        $subqueries = [];
        foreach ($extra as $subquery) {
            if (!empty($subquery)) {
                $subqueries[] = new XapianQuery($subquery);
            }
        }

        $query = null;
        $enquire = new XapianEnquire($db);
        if (!empty($query_string)) {
            $query_parser = new XapianQueryParser();
            //TODO: choose stemmer
            $stemmer = new XapianStem("english");
            $query_parser->set_stemmer($stemmer);
            $query_parser->set_database($db);
            $query_parser->set_stemming_strategy(XapianQueryParser::STEM_SOME);
            $query_parser->add_boolean_prefix('courseid', XAPIAN_PREFIX_COURSEID);
            $query_parser->add_boolean_prefix('toolid', XAPIAN_PREFIX_TOOLID);
            $query = $query_parser->parse_query($query_string);
            $final_array = array_merge($subqueries, [$query]);
            $query = new XapianQuery(XapianQuery::OP_AND, $final_array);
        } else {
            $query = new XapianQuery(XapianQuery::OP_OR, $subqueries);
        }

        $enquire->set_query($query);

        $matches = $enquire->get_mset((int) $start, (int) $length);

        $specific_fields = get_specific_field_list();

        $results = [];
        $i = $matches->begin();

        // Display the results.
        //echo $matches->get_matches_estimated().'results found';

        $count = 0;

        while (!$i->equals($matches->end())) {
            $count++;
            $document = $i->get_document();

            if (is_object($document)) {
                // process one item terms
                $courseid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_COURSEID);
                $results[$count]['courseid'] = substr($courseid_terms[0]['name'], 1);
                $toolid_terms = xapian_get_doc_terms($document, XAPIAN_PREFIX_TOOLID);
                $results[$count]['toolid'] = substr($toolid_terms[0]['name'], 1);

                // process each specific field prefix
                foreach ($specific_fields as $specific_field) {
                    $results[$count]['sf-'.$specific_field['code']] = xapian_get_doc_terms($document, $specific_field['code']);
                }

                // rest of data
                $results[$count]['xapian_data'] = unserialize($document->get_data());
                $results[$count]['score'] = ($i->get_percent());
            }
            $i->next();
        }

        switch ($count_type) {
            case 1: // Lower bound
                $count = $matches->get_matches_lower_bound();
                break;

            case 2: // Upper bound
                $count = $matches->get_matches_upper_bound();
                break;

            case 0: // Best estimate
            default:
                $count = $matches->get_matches_estimated();
                break;
        }

        return [$count, $results];
    } catch (Exception $e) {
        display_xapian_error($e->getMessage());

        return null;
    }
}

/**
 * build a boolean query.
 */
function xapian_get_boolean_query($term)
{
    return new XapianQuery($term);
}

/**
 * Retrieve a list db terms.
 *
 * @param int            $count  Number of terms to retrieve
 * @param char           $prefix The prefix of the term to retrieve
 * @param XapianDatabase $db     Xapian database to connect
 *
 * @return array
 */
function xapian_get_all_terms($count = 0, $prefix, $db = null)
{
    try {
        if (!is_object($db)) {
            $db = new XapianDatabase(XAPIAN_DB);
        }

        if (!empty($prefix)) {
            $termi = $db->allterms_begin($prefix);
        } else {
            $termi = $db->allterms_begin();
        }

        $terms = [];
        $i = 0;
        for (; !$termi->equals($db->allterms_end()) && (++$i <= $count || 0 == $count); $termi->next()) {
            $terms[] = [
                'frequency' => $termi->get_termfreq(),
                'name' => $termi->get_term(),
            ];
        }

        return $terms;
    } catch (Exception $e) {
        display_xapian_error($e->getMessage());

        return null;
    }
}

/**
 * Retrieve all terms of a document.
 *
 * @param   XapianDocument  document searched
 *
 * @return array
 */
function xapian_get_doc_terms($doc = null, $prefix)
{
    try {
        if (!is_a($doc, 'XapianDocument')) {
            return;
        }

        //TODO: make the filter by prefix on xapian if possible
        //ojwb marvil07: use Document::termlist_begin() and then skip_to(prefix) on the TermIterator
        //ojwb you'll need to check the end condition by hand though
        $terms = [];
        for ($termi = $doc->termlist_begin(); !$termi->equals($doc->termlist_end()); $termi->next()) {
            $term = [
                'frequency' => $termi->get_termfreq(),
                'name' => $termi->get_term(),
            ];
            if ($term['name'][0] === $prefix) {
                $terms[] = $term;
            }
        }

        return $terms;
    } catch (Exception $e) {
        display_xapian_error($e->getMessage());

        return null;
    }
}

/**
 * Join xapian queries.
 *
 * @param XapianQuery|array $query1
 * @param XapianQuery|array $query2
 * @param string            $op
 *
 * @return XapianQuery query joined
 */
function xapian_join_queries($query1, $query2 = null, $op = 'or')
{
    // let decide how to join, avoiding include xapian.php outside
    switch ($op) {
        case 'or':
            $op = XapianQuery::OP_OR;
            break;
        case 'and':
            $op = XapianQuery::OP_AND;
            break;
        default:
            $op = XapianQuery::OP_OR;
            break;
    }

    // review parameters to decide how to join
    if (!is_array($query1)) {
        $query1 = [$query1];
    }
    if (is_null($query2)) {
        // join an array of queries with $op
        return new XapianQuery($op, $query1);
    }
    if (!is_array($query2)) {
        $query2 = [$query2];
    }

    return new XapianQuery($op, array_merge($query1, $query2));
}

/**
 * @author Isaac flores paz <florespaz@bidsoftperu.com>
 *
 * @param string The xapian error message
 *
 * @return string The chamilo error message
 */
function display_xapian_error($xapian_error_message)
{
    $message = explode(':', $xapian_error_message);
    $type_error_message = $message[0];
    if ('DatabaseOpeningError' == $type_error_message) {
        $message_error = get_lang('Failed to open the search database');
    } elseif ('DatabaseVersionError' == $type_error_message) {
        $message_error = get_lang('The search database uses an unsupported format');
    } elseif ('DatabaseModifiedError' == $type_error_message) {
        $message_error = get_lang('The search database has been modified/broken');
    } elseif ('DatabaseLockError' == $type_error_message) {
        $message_error = get_lang('Failed to lock the search database');
    } elseif ('DatabaseCreateError' == $type_error_message) {
        $message_error = get_lang('Failed to create the search database');
    } elseif ('DatabaseCorruptError' == $type_error_message) {
        $message_error = get_lang('The search database has suffered corruption');
    } elseif ('NetworkTimeoutError' == $type_error_message) {
        $message_error = get_lang('Connection timed out while communicating with the remote search database');
    } else {
        $message_error = get_lang('Error in search engine');
    }
    $display_message = get_lang('Error').' : '.$message_error;
    echo Display::return_message($display_message, 'error');
}