TeaThemeOptions/TeaThemeOptions

View on GitHub
src/Plugins/Search/SearchElastica.php

Summary

Maintainability
F
5 days
Test Coverage
<?php

namespace crewstyle\OlympusZeus\Plugins\Search;

use Elastica\Client;
use Elastica\Document;
use Elastica\Exception\NotFoundException;
use Elastica\Filter\Bool;
use Elastica\Filter\Term as FilterTerm;
use Elastica\Query;
use Elastica\Query\QueryString;
use Elastica\Suggest;
use Elastica\Suggest\Term as SuggestTerm;
use Elastica\Type\Mapping;
use crewstyle\OlympusZeus\OlympusZeus;
use crewstyle\OlympusZeus\Core\Posttype\PosttypeEngine;
use crewstyle\OlympusZeus\Plugins\Search\Search;

/**
 * Works with Search.
 *
 * @package Olympus Zeus
 * @subpackage Plugins\Search\SearchElastica
 * @author Achraf Chouk <achrafchouk@gmail.com>
 * @since 4.0.0
 *
 */

abstract class SearchElastica
{
    /**
     * @var object
     */
    protected $client = null;

    /**
     * @var array
     */
    protected $configs = array();

    /**
     * @var object
     */
    protected $index = null;

    /**
     * Get Elastica Client object.
     *
     * @return object $client Object of the Elastica Client datas
     *
     * @since 3.0.0
     */
    protected function getClient()
    {
        return $this->client;
    }

    /**
     * Set Elastica Client object.
     *
     * @param object $client Object of the Elastica Client datas
     *
     * @since 3.0.0
     */
    protected function setClient($client)
    {
        $this->client = $client;
    }

    /**
     * Get configs.
     *
     * @return array $search Array of all search datas
     *
     * @since 3.3.0
     */
    protected function getConfig()
    {
        return $this->config;
    }

    /**
     * Set configs.
     *
     * @param array $config Array of all new config datas
     *
     * @since 3.0.0
     */
    protected function setConfig($config)
    {
        $this->config = $config;
    }

    /**
     * Get Index object.
     *
     * @return object $index Object of the Elastica index
     *
     * @since 3.0.0
     */
    protected function getIndex()
    {
        if (null === $this->index) {
            //Get configs
            $ctn = $this->getConfig();

            //Set client
            $client = new Client(array(
                'host' => $ctn['server_host'],
                'port' => $ctn['server_port'],
            ));

            $index = $client->getIndex($ctn['index_name']);
            $this->setIndex($index);
        }

        return $this->index;
    }

    /**
     * Set index.
     *
     * @param object $index
     *
     * @since 3.0.0
     */
    protected function setIndex($index)
    {
        $this->index = $index;
    }

    /**
     * Create Elastica Document for special post.
     *
     * @param object $post Wordpress post object
     * @return array $document Elastica Document indexed with post ID
     *
     * @since 4.0.0
     */
    protected function addDocumentPost($post)
    {
        global $blog_id;

        //Create document
        $doc = array(
            'blog_id' => $blog_id
        );

        //Check field 'ID'
        if (isset($post->ID)) {
            $doc['id'] = $post->ID;

            //Get tags
            $tags = get_the_term_list($post->ID, 'post_tag', '', ',', '');
            $tags = strip_tags($tags);

            //Check tags
            if (!empty($tags)) {
                $doc['tags'] = $tags;
            }
        }

        //Check field 'post_parent'
        if (isset($post->post_parent)) {
            $doc['parent'] = $post->post_parent;
        }

        //Check field 'post_title'
        if (isset($post->post_title)) {
            $doc['title'] = $post->post_title;
        }

        //Check field 'post_content'
        if (isset($post->post_content)) {
            $doc['content'] = strip_tags(stripcslashes($post->post_content));
        }

        //Check field 'post_excerpt'
        if (isset($post->post_excerpt)) {
            $doc['excerpt'] = strip_tags(stripcslashes($post->post_excerpt));
        }

        //Check field 'post_author'
        if (isset($post->post_author)) {
            $doc['author'] = $post->post_author;
        }

        //Check field 'post_date'
        /**
         * @todo ES 2.0 incompatibility for now...
         */
        /*if (isset($post->post_date)) {
            $doc['date'] = date('c', strtotime($post->post_date));
        }*/

        /**
         * Update post document.
         *
         * @param array $doc
         *
         * @since 4.0.0
         */
        do_action('olz_plugin_search_adddocumentpost', $doc);

        //Return document
        return $doc;
    }

    /**
     * Create Elastica Document for special term.
     *
     * @param object $term Wordpress term object
     * @return array $document Elastica Document indexed with term ID
     *
     * @since 4.0.0
     */
    protected function addDocumentTerm($term)
    {
        global $blog_id;

        //Create document
        $doc = array(
            'blog_id' => $blog_id
        );

        //Check field 'term_id'
        if (isset($term->term_id)) {
            $doc['id'] = $term->term_id;
        }

        //Check field 'name'
        if (isset($term->name)) {
            $doc['title'] = $term->name;
        }

        //Check field 'description'
        if (isset($term->description)) {
            $doc['content'] = strip_tags(stripcslashes($term->description));
        }

        /**
         * Update term document.
         *
         * @param array $doc
         *
         * @since 4.0.0
         */
        do_action('olz_plugin_search_adddocumentterm', $doc);

        //Return document
        return $doc;
    }

    /**
     * Create Elastica Analysis.
     *
     * @param object $index Elastica Index
     * @param array $posttypes Array containing all post types
     * @param array $terms Array containing all terms
     * @return object $index Elastica Index
     *
     * @since 4.0.0
     */
    protected function analysis($index, $posttypes, $terms)
    {
        //Check integrity
        if (empty($index)) {
            return null;
        }

        //Check integrity
        if (empty($posttypes) && empty($terms)) {
            return null;
        }

        //Define properties
        $props = array(
            'id' => array(
                'type' => 'integer',
                'include_in_all' => false,
            ),
            'tags' => array(
                'type' => 'string',
                'index' => 'analyzed',
            ),
            'parent' => array(
                'type' => 'integer',
                'index' => 'analyzed',
            ),
            'title' => array(
                'type' => 'string',
                'index' => 'analyzed',
            ),
            'content' => array(
                'type' => 'string',
                'index' => 'analyzed',
            ),
            'excerpt' => array(
                'type' => 'string',
                'index' => 'analyzed',
            ),
            'author' => array(
                'type' => 'integer',
                'index' => 'analyzed',
            ),
            'date' => array(
                'type' => 'date',
                'format' => 'date_time_no_millis',
            ),
            'tags_suggest' => array(
                'type' => 'completion',
                'analyzer' => 'simple',
                'search_analyzer' => 'simple',
                'payloads' => false,
            ),
            '_boost' => array(
                'type' => 'float',
                'include_in_all' => false,
            ),
        );

        /**
         * Update props analysis.
         *
         * @param array $props
         *
         * @since 4.0.0
         */
        do_action('olz_plugin_search_analysis', $props);

        //Set analysis
        if (isset($posttypes) && !empty($posttypes)) {
            foreach ($posttypes as $k) {
                if (empty($k)) {
                    continue;
                }

                $index->create(array(
                    'number_of_shards' => 4,
                    'number_of_replicas' => 1,
                    'analysis' => array(
                        'analyzer' => array(
                            'indexAnalyzer' => array(
                                'type' => 'custom',
                                'tokenizer' => 'standard',
                                'filter' => array('lowercase', 'asciifolding', 'filter_'.$k),
                            ),
                            'searchAnalyzer' => array(
                                'type' => 'custom',
                                'tokenizer' => 'standard',
                                'filter' => array('standard', 'lowercase', 'asciifolding', 'filter_'.$k),
                            )
                        ),
                        'filter' => array(
                            'filter_'.$k => array(
                                'type' => 'standard',
                                'language' => OLZ_LOCAL,
                                'ignoreCase' => true,
                            )
                        ),
                    ),
                ), true);

                //Define new Type
                $type = $index->getType($k);

                //Define a new Elastica Mapper
                $mapping = new Mapping();
                $mapping->setType($type);
                //$mapping->setParam('analyzer', 'indexAnalyzer');
                //$mapping->setParam('search_analyzer', 'searchAnalyzer');

                //Define boost field
                /*$mapping->setParam('_boost', array(
                    'name' => '_boost',
                    'null_value' => 1.0
                ));*/

                //Set mapping
                $mapping->setProperties($props);

                //Send mapping to type
                $mapping->send();
            }
        }

        //Set analysis
        if (isset($terms) && !empty($terms)) {
            foreach ($terms as $t) {
                if (empty($t)) {
                    continue;
                }

                $index->create(array(
                    'number_of_shards' => 4,
                    'number_of_replicas' => 1,
                    'analysis' => array(
                        'analyzer' => array(
                            'indexAnalyzer' => array(
                                'type' => 'custom',
                                'tokenizer' => 'standard',
                                'filter' => array('lowercase', 'asciifolding', 'filter_' . $t),
                            ),
                            'searchAnalyzer' => array(
                                'type' => 'custom',
                                'tokenizer' => 'standard',
                                'filter' => array('standard', 'lowercase', 'asciifolding', 'filter_' . $t),
                            )
                        ),
                        'filter' => array(
                            'filter_' . $t => array(
                                'type' => 'standard',
                                'language' => OLZ_LOCAL,
                                'ignoreCase' => true,
                            )
                        ),
                    ),
                ), true);

                //Define new Type
                $type = $index->getType($t);

                //Define a new Elastica Mapper
                $mapping = new Mapping();
                $mapping->setType($type);
                $mapping->setParam('index_analyzer', 'indexAnalyzer');
                $mapping->setParam('search_analyzer', 'searchAnalyzer');

                //Define boost field
                $mapping->setParam('_boost', array(
                    'name' => '_boost',
                    'null_value' => 1.0
                ));

                //Set mapping
                $mapping->setProperties($props);

                // Send mapping to type
                $mapping->send();
            }
        }

        //Return index
        return $index;
    }

    /**
     * Check Elastica Connection.
     *
     * @return int $status HTTP header status curl code
     *
     * @since 4.0.0
     */
    public function connection()
    {
        //Check if we are in admin panel
        if (!OLZ_ISADMIN) {
            return;
        }

        //Get index
        $index = Search::getIndex();

        //Get enable
        $enable = OlympusZeus::getConfigs($index, false);

        //Check if this action was properly called
        if (!$enable) {
            return;
        }

        //Defaults
        $ctn = Search::getConfigs();

        //Build url
        $url = 'http://'.$ctn['server_host'].':'.$ctn['server_port'].'/'.$ctn['index_name'];
        $url .= '/_recovery?ignore_unavailable=true';

        //Make curl
        $ch = curl_init();
        curl_setopt($ch, CURLOPT_URL, $url);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
        curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'XGET');
        $head = curl_exec($ch);
        $status = (string) curl_getinfo($ch, CURLINFO_HTTP_CODE);
        curl_close($ch);

        //Get JSON head
        $json = json_decode($head, true);

        //Check errors
        if ($status && (200 == $status || 404 == $status)) {
            //Everything is good, everything is CocaCola!
            return 200 == $status ? 200 : 404;
        }
        else if (null !== $json) {
            //Okay, only the brave, by Diesel
            $error = 'IndexMissingException[['.$ctn['index_name'].'] missing]';
            return isset($json->error) && $error == $json->error ? 404 : 0;
        }

        //Hum... Nothing is good over here.
        return 0;
    }

    /**
     * Index contents.
     *
     * @param boolean $idxctn Define it we have to index contents or just create index
     * @return int $count Get number of items indexed
     *
     * @since 4.0.0
     */
    public function indexContents($idxctn = true)
    {
        //Check page
        if (!OLZ_ISADMIN) {
            return 0;
        }

        //Do we have to index contents
        if (!$idxctn) {
            return 0;
        }

        //Get search datas
        $ctn = $this->getConfig();

        //Get posttypes and terms
        $idp = isset($ctn['posttypes']) && !empty($ctn['posttypes']) 
            ? array_filter($ctn['posttypes'], 'strlen') 
            : array();
        $idt = isset($ctn['index_terms']) && !empty($ctn['index_terms']) 
            ? array_filter($ctn['index_terms'], 'strlen') 
            : array();

        //Check if we can index some post types
        if (empty($idp)) {
            return 0;
        }

        //Get index
        $index = $this->getIndex();

        //Check index
        if (null === $index || empty($index)) {
            return 0;
        }

        //Get status
        $status = $this->connection();

        //Check status
        if (200 !== $status) {
            OlympusZeus::setConfigs(Search::getIndex().'-status', $status);
            return 0;
        }

        //Get all wanted posts
        $count = 0;
        $posts = get_posts(array(
            'posts_per_page' => -1,
            'numberposts' => -1,
            'post_type' => $idp,
            'post_status' => 'publish',
            'orderby' => 'post_date',
            'order' => 'DESC',
        ));

        //Iterate on all posts to create documents
        if (!empty($posts)) {
            foreach ($posts as $post) {
                //Check post type
                if (!in_array($post->post_type, $idp)) {
                    continue;
                }

                //Update document
                $this->postUpdate($post, $index);

                //Update counter
                $count++;
            }
        }

        //Check taxonomies
        if (!empty($idt)) {
            //Get all wanted taxonomies
            $terms = get_terms($idt, array(
                'orderby' => 'slug',
                'hide_empty' => false,
            ));

            //Iterate on all posts to create documents
            if (!empty($terms)) {
                foreach ($terms as $term) {
                    //Check post type
                    if (!in_array($term->taxonomy, $idt)) {
                        continue;
                    }

                    //Update document
                    $this->termUpdate($term, $index);

                    //Update counter
                    $count++;
                }
            }
        }

        //Refresh index
        $index->refresh();

        //Set and return count
        return $count;
    }

    /**
     * Create Elastica Client.
     *
     * @param boolean $write Define if we are writing transactions or reading them
     * @return object $client Elastica Client
     *
     * @since 3.0.0
     */
    protected function makeClient($write = false)
    {
        //Get Elastica Client
        $client = $this->getClient();

        //Check integrity
        if (!empty($client)) {
            return $client;
        }

        //Get search datas
        $ctn = $this->getConfig();

        //Intensiate new object with server URL
        $client = new Client(array(
            'host' => $ctn['server_host'],
            'port' => $ctn['server_port'],
            'timeout' => $write ? $ctn['write_timeout'] : $ctn['read_timeout']
        ));

        //Define the new Client
        $this->setClient($client);

        //Return the created client
        return $client;
    }

    /**
     * Create Elastica Index.
     *
     * @param object $client Elastica Client
     * @return object $index Elastica Index
     *
     * @since 3.0.0
     */
    protected function makeIndex($client)
    {
        //Check integrity
        if (empty($client) || null === $client) {
            return null;
        }

        //Get Elastica Index
        $index = $this->getIndex();

        //Check integrity
        if (isset($index) && !empty($index) && null !== $index) {
            return $index;
        }

        //Get search datas
        $ctn = $this->getConfig();

        //Update index
        $index = $client->getIndex($ctn['index_name']);

        //Update Index var
        $this->setIndex($index);

        //Return the created client
        return $index;
    }

    /**
     * Index contents.
     *
     * @since 4.0.0
     */
    public function makeSearch()
    {
        //Check page
        if (!OLZ_ISADMIN) {
            return;
        }

        //Get client and index
        $client = $this->getClient();
        $index = $this->getIndex();

        //Check integrity
        if (empty($client)) {
            $client = $this->makeClient(true);
        }

        //Check integrity
        if (empty($index)) {
            $index = $this->makeIndex($client);
        }

        //Get search datas
        $ctn = $this->getConfig();

        //Get datas for mapping
        $idp = isset($ctn['posttypes']) ? $ctn['posttypes'] : array();
        $idt = isset($ctn['terms']) ? $ctn['terms'] : array();

        //Create analysers and mappers for Posts
        $index = $this->analysis($index, $idp, $idt);

        //Update index
        $this->setIndex($index);
    }

    /**
     * Delete post from Elastica Client.
     *
     * @param object $post Post to delete
     *
     * @since 4.0.0
     */
    public function postDelete($post)
    {
        //Get configs
        $ctn = $this->getConfig();

        //Check post integrity
        if (null == $post || !in_array($post->post_type, $ctn['posttypes'])) {
            return;
        }

        //Get index
        $index = $this->getIndex();

        //Get type
        $type = $index->getType($post->post_type);

        //Try to delete post
        try {
            //Delete post by its ID
            $type->deleteById($post->ID);

            //Update counter
            $index = Search::getIndex();
            $count = OlympusZeus::getConfigs($index.'-count');
            $count = empty($count) ? 0 : $count[0] - 1;

            //Save in DB
            OlympusZeus::setConfigs($index.'-count', $count);
        } catch (NotFoundException $ex){}
    }

    /**
     * Update or Add a post into Elastica Client.
     *
     * @param object $post Post to update or add
     * @param object $index Elastica Index
     *
     * @since 3.0.0
     */
    public function postUpdate($post, $index = null)
    {
        //Get configs
        $ctn = $this->getConfig();

        //Check post integrity
        if (null == $post || !in_array($post->post_type, $ctn['posttypes'])) {
            return;
        }

        //Get index
        $index = null !== $index ? $index : $this->getIndex();

        //Check index
        if (null === $index) {
            return;
        }

        //Try to update or add post
        try {
            $doc = $this->addDocumentPost($post);
            $type = $index->getType($post->post_type);
            $type->addDocument(new Document($post->ID, $doc));
        } catch (NotFoundException $ex){}
    }

    /**
     * Update or Add a taxonomy into Elastica Client.
     *
     * @param object $term Term to update or add
     * @param object $index Elastica Index
     *
     * @since 3.0.0
     */
    public function termUpdate($term, $index = null)
    {
        //Get index
        $index = null != $index ? $index : $this->getIndex();

        //Check index
        if (null === $index) {
            return;
        }

        //Try to update or add post
        try {
            //Make the magic
            $doc = $this->addDocumentTerm($term);
            $type = $index->getType($term->taxonomy);
            $type->addDocument(new Document($term->term_id, $doc));
        } catch (NotFoundException $ex){}
    }

    /**
     * Search children.
     *
     * @param string $type Post type
     * @param int $parent Parent ID to get all children
     * @param string $order Order way
     * @return array $search Combine of all results, total and aggregations
     *
     * @since 4.0.0
     */
    public function searchChildren($type, $parent, $order = 'desc')
    {
        //Check page
        if (is_search()) {
            return;
        }

        //Return array
        $return = array(
            'parent' => $parent,
            'total' => 0,
            'results' => array()
        );

        //Check request
        if (empty($parent)) {
            return $return;
        }

        //Get query vars
        $results = array();
        $types = array();

        //Get Elasticsearch datas
        $index = $this->getIndex();

        //Check index
        if (null === $index || empty($index)) {
            return $return;
        }

        //Create the actual search object with some data.
        $es_query = new Query();

        //Define term
        $es_term = new FilterTerm();
        $es_term->setTerm($type . '.parent', $parent);

        //Filter 'And'
        $es_filter = new Bool();
        $es_filter->addMust($es_term);

        //Add filter to the search object
        $es_query->setFilter($es_filter);

        //Add sort
        $es_query->setSort(array(
            $type . '.date' => array('order' => $order)
        ));

        //Search!
        $es_resultset = $index->search($es_query);

        //Retrieve data
        $es_results = $es_resultset->getResults();

        //Check results
        if (null == $es_results || empty($es_results)) {
            return $return;
        }

        //Iterate to retrieve all IDs
        foreach ($es_results as $res) {
            $typ = $res->getType();

            //Save type
            $types[$typ] = $typ;

            //Save datas
            $results[$typ][] = array(
                'id' => $res->getId(),
                'score' => $res->getScore(),
                'source' => $res->getSource(),
            );
        }

        //Get total
        $total = $es_resultset->getTotalHits();

        //Return everything
        $return = array(
            'parent' => $parent,
            'total' => $total,
            'results' => $results
        );

        return $return;
    }

    /**
     * Search contents.
     *
     * @return array $elasticsearches Combine of all results, total and aggregations
     *
     * @since 4.0.0
     */
    public function searchContents()
    {
        //Return array
        $return = array(
            'query' => array(
                'search' => '',
                'type' => '',
                'paged' => 0,
                'perpage' => 0
            ),
            'total' => 0,
            'types' => array(),
            'results' => array()
        );

        //Check page
        if (!is_search()) {
            return $return;
        }

        //Get query vars
        $request = isset($_REQUEST) ? $_REQUEST : array();
        $results = array();
        $types = array();

        //Check request
        if (empty($request)) {
            return $return;
        }

        //Get Elasticsearch datas
        $index = $this->getIndex();

        //Check index
        if (null === $index || empty($index)) {
            return $return;
        }

        //Get search datas
        $search = isset($request['s'])
            ? str_replace('\"', '"', $request['s'])
            : '';

        //Return everything
        if (empty($search)) {
            return $return;
        }

        //Get search datas
        $type = isset($request['type']) ? $request['type'] : '';
        $paged = isset($request['paged']) && !empty($request['paged'])
            ? $request['paged'] - 1
            : 0;
        $perpage = isset($request['perpage'])
            ? $request['perpage']
            : OlympusZeus::getOption('posts_per_page', 10);

        //Build query string
        $es_querystring = new QueryString();

        //'And' or 'Or' default: 'Or'
        $es_querystring->setDefaultOperator('OR');
        $es_querystring->setQuery($search);

        //Create the actual search object with some data.
        $es_query = new Query();
        $es_query->setQuery($es_querystring);

        //Define options
        $es_query->setFrom($paged);     //Start
        $es_query->setLimit($perpage);  //How many

        //Search!
        $es_resultset = $index->search($es_query);

        //Retrieve data
        $es_results = $es_resultset->getResults();

        //Check results
        if (null == $es_results || empty($es_results)) {
            $return['query']['search'] = str_replace(' ', '+', $search);
            return $return;
        }

        //Iterate to retrieve all IDs
        foreach ($es_results as $res) {
            $typ = $res->getType();

            //Save type
            $types[$typ] = $typ;

            //Save datas
            $results[$typ][] = array(
                'id' => $res->getId(),
                'score' => $res->getScore(),
                'source' => $res->getSource(),
            );
        }

        //Get total
        $total = $es_resultset->getTotalHits();

        //Return everything
        $return = array(
            'query' => array(
                'search' => str_replace(' ', '+', $search),
                'type' => $type,
                'paged' => $paged,
                'perpage' => $perpage
            ),
            'total' => $total,
            'types' => $types,
            'results' => $results
        );

        return $return;
    }

    /**
     * Search suggest.
     *
     * @param string $type Post type
     * @param int $post Post ID to get all suggestions
     * @param array $tags Array contains all post tags
     * @return array $search Combine of all results, total and aggregations
     *
     * @since 3.0.0
     */
    public function searchSuggest($type, $post, $tags)
    {
        //Check page
        if (!is_search()) {
            return;
        }

        //Return array
        $return = array(
            'post' => $post,
            'tags' => $tags,
            'total' => 0,
            'results' => array()
        );

        //Check request
        if (empty($post)) {
            return $return;
        }

        //Get query vars
        $results = array();

        //Get Elasticsearch datas
        $index = $this->getIndex();

        //Check index
        if (null === $index || empty($index)) {
            return $return;
        }

        //Create suggestion
        $es_suggest = new Suggest();

        //Iterate on all tags
        foreach ($tags as $k => $tag) {
            //Create Term with options
            $es_term = new SuggestTerm('tags_suggest_' . $k, '_all');
            $es_term->setText($tag);
            $es_term->setSize(5);
            $es_term->setAnalyzer('simple');

            //Add Term to current suggestion
            $es_suggest->addSuggestion($es_term);
        }

        //Search!
        $es_resultset = $index->search($es_suggest);

        //Retrieve data
        $es_results = $es_resultset->getSuggests();

        //Check results
        if (null == $es_results || empty($es_results)) {
            return $return;
        }

        //Iterate to retrieve all IDs
        foreach ($es_results as $res) {
            //Check suggestions
            if (empty($res[0]['options'])) {
                continue;
            }

            //Iterate on all options
            foreach ($res[0]['options'] as $opt) {
                //Save datas
                $results[$opt['text']] = array(
                    'score' => $opt['score'],
                    'freq' => $opt['freq'],
                );
            }
        }

        //Get total
        $total = $es_resultset->getTotalHits();

        //Return everything
        $return = array(
            'post' => $post,
            'tags' => $tags,
            'total' => $total,
            'results' => $results
        );

        return $return;
    }
}