wikimedia/mediawiki-core

View on GitHub
includes/api/ApiOpenSearch.php

Summary

Maintainability
D
1 day
Test Coverage
<?php
/**
 * Copyright © 2006 Yuri Astrakhan "<Firstname><Lastname>@gmail.com"
 * Copyright © 2008 Brooke Vibber <bvibber@wikimedia.org>
 * Copyright © 2014 Wikimedia Foundation and contributors
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along
 * with this program; if not, write to the Free Software Foundation, Inc.,
 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 * http://www.gnu.org/copyleft/gpl.html
 *
 * @file
 */

namespace MediaWiki\Api;

use InvalidArgumentException;
use MediaWiki\Cache\LinkBatchFactory;
use MediaWiki\MainConfigNames;
use MediaWiki\MediaWikiServices;
use MediaWiki\Title\Title;
use MediaWiki\Utils\UrlUtils;
use SearchEngine;
use SearchEngineConfig;
use SearchEngineFactory;
use Wikimedia\ParamValidator\ParamValidator;

/**
 * @ingroup API
 */
class ApiOpenSearch extends ApiBase {
    use \MediaWiki\Api\SearchApi;

    /** @var string|null */
    private $format = null;
    /** @var string|null */
    private $fm = null;

    private LinkBatchFactory $linkBatchFactory;
    private UrlUtils $urlUtils;

    /**
     * @param ApiMain $mainModule
     * @param string $moduleName
     * @param LinkBatchFactory $linkBatchFactory
     * @param SearchEngineConfig $searchEngineConfig
     * @param SearchEngineFactory $searchEngineFactory
     * @param UrlUtils $urlUtils
     */
    public function __construct(
        ApiMain $mainModule,
        $moduleName,
        LinkBatchFactory $linkBatchFactory,
        SearchEngineConfig $searchEngineConfig,
        SearchEngineFactory $searchEngineFactory,
        UrlUtils $urlUtils
    ) {
        parent::__construct( $mainModule, $moduleName );
        $this->linkBatchFactory = $linkBatchFactory;
        // Services needed in SearchApi trait
        $this->searchEngineConfig = $searchEngineConfig;
        $this->searchEngineFactory = $searchEngineFactory;
        $this->urlUtils = $urlUtils;
    }

    /**
     * Get the output format
     *
     * @return string
     */
    protected function getFormat() {
        if ( $this->format === null ) {
            $format = $this->getParameter( 'format' );

            if ( str_ends_with( $format, 'fm' ) ) {
                $this->format = substr( $format, 0, -2 );
                $this->fm = 'fm';
            } else {
                $this->format = $format;
                $this->fm = '';
            }
        }
        return $this->format;
    }

    public function getCustomPrinter() {
        switch ( $this->getFormat() ) {
            case 'json':
                return new ApiOpenSearchFormatJson(
                    $this->getMain(), $this->fm, $this->getParameter( 'warningsaserror' )
                );

            case 'xml':
                $printer = $this->getMain()->createPrinterByName( 'xml' . $this->fm );
                '@phan-var ApiFormatXml $printer';
                /** @var ApiFormatXml $printer */
                $printer->setRootElement( 'SearchSuggestion' );
                return $printer;

            default:
                ApiBase::dieDebug( __METHOD__, "Unsupported format '{$this->getFormat()}'" );
        }
    }

    public function execute() {
        $params = $this->extractRequestParams();
        $search = $params['search'];

        // Open search results may be stored for a very long time
        $this->getMain()->setCacheMaxAge(
            $this->getConfig()->get( MainConfigNames::SearchSuggestCacheExpiry ) );
        $this->getMain()->setCacheMode( 'public' );
        $results = $this->search( $search, $params );

        // Allow hooks to populate extracts and images
        $this->getHookRunner()->onApiOpenSearchSuggest( $results );

        // Trim extracts, if necessary
        $length = $this->getConfig()->get( MainConfigNames::OpenSearchDescriptionLength );
        foreach ( $results as &$r ) {
            if ( is_string( $r['extract'] ) && !$r['extract trimmed'] ) {
                $r['extract'] = self::trimExtract( $r['extract'], $length );
            }
        }

        // Populate result object
        $this->populateResult( $search, $results );
    }

    /**
     * Perform the search
     * @param string $search the search query
     * @param array $params api request params
     * @return array search results. Keys are integers.
     * @phan-return array<array{title:Title,redirect_from:?Title,extract:false,extract_trimmed:false,image:false,url:string}>
     *  Note that phan annotations don't support keys containing a space.
     */
    private function search( $search, array $params ) {
        $searchEngine = $this->buildSearchEngine( $params );
        $titles = $searchEngine->extractTitles( $searchEngine->completionSearchWithVariants( $search ) );
        $results = [];

        if ( !$titles ) {
            return $results;
        }

        // Special pages need unique integer ids in the return list, so we just
        // assign them negative numbers because those won't clash with the
        // always positive articleIds that non-special pages get.
        $nextSpecialPageId = -1;

        if ( $params['redirects'] === null ) {
            // Backwards compatibility, don't resolve for JSON.
            $resolveRedir = $this->getFormat() !== 'json';
        } else {
            $resolveRedir = $params['redirects'] === 'resolve';
        }

        if ( $resolveRedir ) {
            // Query for redirects
            $redirects = [];
            $lb = $this->linkBatchFactory->newLinkBatch( $titles );
            if ( !$lb->isEmpty() ) {
                $db = $this->getDB();
                $res = $db->newSelectQueryBuilder()
                    ->select( [ 'page_namespace', 'page_title', 'rd_namespace', 'rd_title' ] )
                    ->from( 'page' )
                    ->join( 'redirect', null, [ 'rd_from = page_id' ] )
                    ->where( [
                        'rd_interwiki' => '',
                        $lb->constructSet( 'page', $db )
                    ] )
                    ->caller( __METHOD__ )
                    ->fetchResultSet();
                foreach ( $res as $row ) {
                    $redirects[$row->page_namespace][$row->page_title] =
                        [ $row->rd_namespace, $row->rd_title ];
                }
            }

            // Bypass any redirects
            $seen = [];
            foreach ( $titles as $title ) {
                $ns = $title->getNamespace();
                $dbkey = $title->getDBkey();
                $from = null;
                if ( isset( $redirects[$ns][$dbkey] ) ) {
                    [ $ns, $dbkey ] = $redirects[$ns][$dbkey];
                    $from = $title;
                    $title = Title::makeTitle( $ns, $dbkey );
                }
                if ( !isset( $seen[$ns][$dbkey] ) ) {
                    $seen[$ns][$dbkey] = true;
                    $resultId = $title->getArticleID();
                    if ( $resultId === 0 ) {
                        $resultId = $nextSpecialPageId;
                        $nextSpecialPageId--;
                    }
                    $results[$resultId] = [
                        'title' => $title,
                        'redirect from' => $from,
                        'extract' => false,
                        'extract trimmed' => false,
                        'image' => false,
                        'url' => (string)$this->urlUtils->expand( $title->getFullURL(), PROTO_CURRENT ),
                    ];
                }
            }
        } else {
            foreach ( $titles as $title ) {
                $resultId = $title->getArticleID();
                if ( $resultId === 0 ) {
                    $resultId = $nextSpecialPageId;
                    $nextSpecialPageId--;
                }
                $results[$resultId] = [
                    'title' => $title,
                    'redirect from' => null,
                    'extract' => false,
                    'extract trimmed' => false,
                    'image' => false,
                    'url' => (string)$this->urlUtils->expand( $title->getFullURL(), PROTO_CURRENT ),
                ];
            }
        }

        return $results;
    }

    /**
     * @param string $search
     * @param array[] &$results
     */
    protected function populateResult( $search, &$results ) {
        $result = $this->getResult();

        switch ( $this->getFormat() ) {
            case 'json':
                // http://www.opensearch.org/Specifications/OpenSearch/Extensions/Suggestions/1.1
                $result->addArrayType( null, 'array' );
                $result->addValue( null, 0, strval( $search ) );
                $terms = [];
                $descriptions = [];
                $urls = [];
                foreach ( $results as $r ) {
                    $terms[] = $r['title']->getPrefixedText();
                    $descriptions[] = strval( $r['extract'] );
                    $urls[] = $r['url'];
                }
                $result->addValue( null, 1, $terms );
                $result->addValue( null, 2, $descriptions );
                $result->addValue( null, 3, $urls );
                break;

            case 'xml':
                // https://msdn.microsoft.com/en-us/library/cc891508(v=vs.85).aspx
                $imageKeys = [
                    'source' => true,
                    'alt' => true,
                    'width' => true,
                    'height' => true,
                    'align' => true,
                ];
                $items = [];
                foreach ( $results as $r ) {
                    $item = [
                        'Text' => $r['title']->getPrefixedText(),
                        'Url' => $r['url'],
                    ];
                    if ( is_string( $r['extract'] ) && $r['extract'] !== '' ) {
                        $item['Description'] = $r['extract'];
                    }
                    if ( is_array( $r['image'] ) && isset( $r['image']['source'] ) ) {
                        $item['Image'] = array_intersect_key( $r['image'], $imageKeys );
                    }
                    ApiResult::setSubelementsList( $item, array_keys( $item ) );
                    $items[] = $item;
                }
                ApiResult::setIndexedTagName( $items, 'Item' );
                $result->addValue( null, 'version', '2.0' );
                $result->addValue( null, 'xmlns', 'http://opensearch.org/searchsuggest2' );
                $result->addValue( null, 'Query', strval( $search ) );
                $result->addSubelementsList( null, 'Query' );
                $result->addValue( null, 'Section', $items );
                break;

            default:
                ApiBase::dieDebug( __METHOD__, "Unsupported format '{$this->getFormat()}'" );
        }
    }

    public function getAllowedParams() {
        $allowedParams = $this->buildCommonApiParams( false ) + [
            'suggest' => [
                ParamValidator::PARAM_DEFAULT => false,
                // Deprecated since 1.35
                ParamValidator::PARAM_DEPRECATED => true,
            ],
            'redirects' => [
                ParamValidator::PARAM_TYPE => [ 'return', 'resolve' ],
                ApiBase::PARAM_HELP_MSG_PER_VALUE => [],
                ApiBase::PARAM_HELP_MSG_APPEND => [ 'apihelp-opensearch-param-redirects-append' ],
            ],
            'format' => [
                ParamValidator::PARAM_DEFAULT => 'json',
                ParamValidator::PARAM_TYPE => [ 'json', 'jsonfm', 'xml', 'xmlfm' ],
            ],
            'warningsaserror' => false,
        ];

        // Use open search specific default limit
        $allowedParams['limit'][ParamValidator::PARAM_DEFAULT] = $this->getConfig()->get(
            MainConfigNames::OpenSearchDefaultLimit
        );

        return $allowedParams;
    }

    public function getSearchProfileParams() {
        return [
            'profile' => [
                'profile-type' => SearchEngine::COMPLETION_PROFILE_TYPE,
                'help-message' => 'apihelp-query+prefixsearch-param-profile'
            ],
        ];
    }

    protected function getExamplesMessages() {
        return [
            'action=opensearch&search=Te'
                => 'apihelp-opensearch-example-te',
        ];
    }

    public function getHelpUrls() {
        return 'https://www.mediawiki.org/wiki/Special:MyLanguage/API:Opensearch';
    }

    /**
     * Trim an extract to a sensible length.
     *
     * Adapted from Extension:OpenSearchXml, which adapted it from
     * Extension:ActiveAbstract.
     *
     * @param string $text
     * @param int $length Target length; actual result will continue to the end of a sentence.
     * @return string
     */
    public static function trimExtract( $text, $length ) {
        static $regex = null;

        if ( $regex === null ) {
            $endchars = [
                '([^\d])\.\s', '\!\s', '\?\s', // regular ASCII
                '。', // full-width ideographic full-stop
                '.', '!', '?', // double-width roman forms
                '。', // half-width ideographic full stop
            ];
            $endgroup = implode( '|', $endchars );
            $end = "(?:$endgroup)";
            $sentence = ".{{$length},}?$end+";
            $regex = "/^($sentence)/u";
        }

        $matches = [];
        if ( preg_match( $regex, $text, $matches ) ) {
            return trim( $matches[1] );
        } else {
            // Just return the first line
            return trim( explode( "\n", $text )[0] );
        }
    }

    /**
     * Fetch the template for a type.
     *
     * @param string $type MIME type
     * @return string
     */
    public static function getOpenSearchTemplate( $type ) {
        $services = MediaWikiServices::getInstance();
        $canonicalServer = $services->getMainConfig()->get( MainConfigNames::CanonicalServer );
        $searchEngineConfig = $services->getSearchEngineConfig();
        $ns = implode( '|', $searchEngineConfig->defaultNamespaces() );
        if ( !$ns ) {
            $ns = '0';
        }

        switch ( $type ) {
            case 'application/x-suggestions+json':
                return $canonicalServer .
                    wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;

            case 'application/x-suggestions+xml':
                return $canonicalServer .
                    wfScript( 'api' ) .
                    '?action=opensearch&format=xml&search={searchTerms}&namespace=' . $ns;

            default:
                throw new InvalidArgumentException( __METHOD__ . ": Unknown type '$type'" );
        }
    }
}

/** @deprecated class alias since 1.43 */
class_alias( ApiOpenSearch::class, 'ApiOpenSearch' );