wikimedia/mediawiki-extensions-Wikibase

View on GitHub
lib/includes/Sites/SiteMatrixParser.php

Summary

Maintainability
A
0 mins
Test Coverage
<?php

namespace Wikibase\Lib\Sites;

use InvalidArgumentException;
use MediaWiki\Site\MediaWikiSite;
use MediaWiki\Site\Site;

/**
 * Translates api sitematrix results json into an array of Site objects
 *
 * @license GPL-2.0-or-later
 * @author Katie Filbert < aude.wiki@gmail.com >
 */
class SiteMatrixParser {

    /**
     * @var string
     */
    private $scriptPath;

    /**
     * @var string
     */
    private $articlePath;

    /**
     * @var bool
     */
    private $expandGroup;

    /**
     * @var bool|string
     */
    private $protocol;

    /**
     * @param string $scriptPath (e.g. '/w/$1')
     * @param string $articlePath (e.g. '/wiki/$1')
     * @param string|bool $protocol (true: default, false: strip, string: protocol to force)
     * @param bool $expandGroup expands site matrix group codes from wiki to wikipedia
     */
    public function __construct( $scriptPath, $articlePath, $protocol, $expandGroup = true ) {
        $this->scriptPath = $scriptPath;
        $this->articlePath = $articlePath;
        $this->protocol = $protocol;
        $this->expandGroup = $expandGroup;
    }

    /**
     * @param string $json
     *
     * @throws InvalidArgumentException
     * @return Site[]
     */
    public function sitesFromJson( $json ) {
        $specials = [];

        $data = json_decode( $json, true );

        if ( !is_array( $data ) || !array_key_exists( 'sitematrix', $data ) ) {
            throw new InvalidArgumentException( 'Cannot decode site matrix data.' );
        }

        if ( array_key_exists( 'specials', $data['sitematrix'] ) ) {
            $specials = $data['sitematrix']['specials'];
            unset( $data['sitematrix']['specials'] );
        }

        if ( array_key_exists( 'count', $data['sitematrix'] ) ) {
            unset( $data['sitematrix']['count'] );
        }

        $groups = $data['sitematrix'];

        $sites = [];

        foreach ( $groups as $groupData ) {
            $sites = array_merge(
                $sites,
                $this->getSitesFromLangGroup( $groupData )
            );
        }

        $sites = array_merge(
            $sites,
            $this->getSpecialSites( $specials )
        );

        return $sites;
    }

    /**
     * @param array[] $specialSites
     *
     * @return Site[]
     */
    private function getSpecialSites( array $specialSites ) {
        $sites = [];

        foreach ( $specialSites as $specialSite ) {
            $site = $this->getSiteFromSiteData( $specialSite );
            $siteId = $site->getGlobalId();

            // todo: get this from $wgConf
            $site->setLanguageCode( 'en' );

            $site->addInterwikiId( $specialSite['code'] );

            $sites[$siteId] = $site;
        }

        return $sites;
    }

    /**
     * Gets an array of Site objects for all sites of the same language
     * subdomain grouping used in the site matrix.
     *
     * @param array $langGroup
     *
     * @return Site[]
     */
    private function getSitesFromLangGroup( array $langGroup ) {
        $sites = [];

        foreach ( $langGroup['site'] as $siteData ) {
            if ( !array_key_exists( 'code', $langGroup ) ) {
                continue;
            }

            $site = $this->getSiteFromSiteData( $siteData );
            $site->setLanguageCode( $langGroup['code'] );
            $siteId = $site->getGlobalId();
            $sites[$siteId] = $site;
        }

        return $sites;
    }

    /**
     * @param array $siteData
     *
     * @return Site
     */
    private function getSiteFromSiteData( array $siteData ) {
        $site = new MediaWikiSite();
        $site->setGlobalId( $siteData['dbname'] );

        // @note: expandGroup is specific to wikimedia site matrix sources
        $siteGroup = ( $this->expandGroup && $siteData['code'] === 'wiki' )
            ? 'wikipedia' : $siteData['code'];

        $site->setGroup( $siteGroup );

        $url = $siteData['url'];

        if ( $this->protocol === false ) {
            $url = preg_replace( '@^https?:@', '', $url );
        } elseif ( is_string( $this->protocol ) ) {
            $url = preg_replace( '@^https?:@', $this->protocol . ':', $url );
        }

        $site->setFilePath( $url . $this->scriptPath );
        $site->setPagePath( $url . $this->articlePath );

        return $site;
    }

}