classes/RDFIO_SMWPageWriter.php

Summary

Maintainability
B
4 hrs
Test Coverage
<?php

class RDFIOSMWPageWriter {

    public function __construct() {
    }

    /**
     * Main function, that takes an array of RDFIOWikiPage objects, and writes to
     * MediaWiki.
     * @param array $wikiPages
     */
    public function import( $wikiPages ) {

        // ----------------------------------------------------------------------
        //  1. Loop over wiki pages
        // ----------------------------------------------------------------------
        foreach ( $wikiPages as $wikiTitle => $wikiPage ) {
            /* @var $wikiPage RDFIOWikiPage */

            if ( $wikiTitle == '' ) {
                throw new MWException( 'Could not import page: Title is empty!' );
            }

            // ----------------------------------------------------------------------
            //  3. Find all existing fact statements in page (to be updated)
            // ----------------------------------------------------------------------
            $oldWikiText = $this->getTextForPage( $wikiTitle );
            $newWikiText = $oldWikiText; // using new variable to separate extraction from editing

            // ----------------------------------------------------------------------
            //  2. Get the old wiki text for current page
            // ----------------------------------------------------------------------
            $oldFacts = $this->extractFacts( $oldWikiText );

            // ----------------------------------------------------------------------
            //  4. Find all existing template statements in page (to be updated)
            // ----------------------------------------------------------------------
            $oldTplCalls = $this->extractTemplateCalls( $oldWikiText );

            // ----------------------------------------------------------------------
            //  5. Find all templates that might be used (in current page, and via all its categories)
            // ----------------------------------------------------------------------
            $newCategories = $wikiPage->getCategories();
            $tplsForNewCats = $this->getTemplatesForCategories( $newCategories );

            // Collect old and new template names in one array
            $allTemplateNames = [];
            foreach ( array_keys( $oldTplCalls ) as $tplName ) {
                $allTemplateNames[] = $tplName;
            }
            $allTemplateNames = array_merge( $allTemplateNames, $tplsForNewCats );

            // ----------------------------------------------------------------------
            //  6. Build an index: Property -> Template(s) -> Parameter name(s)
            // ----------------------------------------------------------------------
            // Collect template wiki texts for building property/template index
            $allTemplateTexts = array();
            foreach ( $allTemplateNames as $tplName ) {
                $tplPageText = $this->getTextForPage( $tplName, NS_TEMPLATE );
                $allTemplateTexts[$tplName] = $tplPageText;
            }

            // Extract facts from template wiki texts (for building property/template index)
            $allTemplateFacts = array();
            foreach ( $allTemplateTexts as $tplName => $tplPageText ) {
                $allTemplateFacts[$tplName] = $this->extractPropertyParameterIndex( $tplPageText );
            }

            $propTplIndex = $this->buildPropertyTemplateParamIndex( $wikiPage->getFacts(), $allTemplateFacts );

            // ----------------------------------------------------------------------
            //  7. Loop over each fact and:
            // ----------------------------------------------------------------------
            foreach ( $wikiPage->getFacts() as $fact ) {
                $wikiTextUpdated = $newWikiText;
                // ----------------------------------------------------------------------
                //  8. Update all existing fact statements on page
                // ----------------------------------------------------------------------
                $wikiTextUpdated = $this->updateExplicitFactsInText( $fact, $oldFacts, $wikiTextUpdated );

                // ----------------------------------------------------------------------
                //  9. Update in all existing template calls, based on index
                // ----------------------------------------------------------------------
                $currentTplCalls = $this->extractTemplateCalls( $wikiTextUpdated );
                $wikiTextUpdated = $this->updateTemplateCalls( $fact, $propTplIndex, $currentTplCalls, $wikiTextUpdated );

                // ----------------------------------------------------------------------
                // 10. If the fact is not updated yet, write via any relevant templates as new template calls
                // ----------------------------------------------------------------------
                //if ( $wikiTextUpdated === $newWikiText ) {
                //    $wikiTextUpdated = $this->addViaNewTemplateCalls( $wikiTextUpdated );
                //}

                // ----------------------------------------------------------------------
                // 11. If neither of 8-10 was done, add as new fact statements
                // ----------------------------------------------------------------------
                $prop = $fact['p'];
                if ( !array_key_exists( $prop, $oldFacts ) && !( $propTplIndex[$prop] ) ) {
                    $wikiTextUpdated = $this->addNewExplicitFact( $fact, $wikiTextUpdated );
                }

                // ----------------------------------------------------------------------
                // 12. Update any URI-type objects with an Equivalent URI fact.
                // ----------------------------------------------------------------------

                // Update main wiki text variable with changes for fact
                $newWikiText = $wikiTextUpdated;
            }

            // ----------------------------------------------------------------------
            // 13. Add category tags (if template with category has not been added yet).
            // ----------------------------------------------------------------------
            foreach ( $newCategories as $cat ) {
                $newWikiText = $this->addNewCategory( $cat, $newWikiText );
            }

            // ----------------------------------------------------------------------
            // 14. Write updated page
            // ----------------------------------------------------------------------
            $this->writeToPage( $wikiTitle, $newWikiText, 'Page updated by RDFIO' );
        }
    }

    /**
     * @param array $newFacts
     * @param array $allTemplateFacts
     * @return array
     */
    private function buildPropertyTemplateParamIndex( $newFacts, $allTemplateFacts ) {
        // Build the index
        $propTplIndex = array();
        foreach ( $newFacts  as $fact ) {
            $prop = $fact['p'];
            $propTplIndex[$prop] = array();
            foreach ( $allTemplateFacts as $tplName => $tplFacts ) {
                if ( array_key_exists( $prop, $tplFacts  ) ) {
                    $paramName = $tplFacts[$prop];
                    $propTplIndex[$prop][$tplName] = $paramName;
                }
            }
        }
        return $propTplIndex;
    }

    /**
     * @param array $fact
     * @param string $wikiText
     * @return string $wikiText
     */
    private function updateExplicitFactsInText( $fact, $oldFacts, $wikiText ) {
        $prop = $fact['p'];
        $newVal = $fact['o'];

        $propWikified = $this->getWikifiedTitle( $prop );

        if ( array_key_exists( $propWikified, $oldFacts ) ) {
            $oldVal = $oldFacts[$propWikified]['value'];
            $wikiText = str_replace( $oldVal, $newVal, $wikiText );
        }
        return $wikiText;
    }


    /**
     * @param $fact
     * @param $propTplIndex
     * @param $wikiText
     * @return array
     */
    private function updateTemplateCalls( $fact, $propTplIndex, $oldTemplateCalls, $wikiText ) {
        $prop = $fact['p'];
        $newVal = $fact['o'];

        if ( array_key_exists( $prop, $propTplIndex ) ) {
            foreach ( $propTplIndex[$prop] as $tplName => $paramName ) {
                $oldTplCallText = $oldTemplateCalls[$tplName]['calltext'];

                preg_match( '/\|' . $paramName . '\=([^\=\|\}\n]+)/', $oldTplCallText, $matches );
                if ( $matches ) {
                    $oldVal = $matches[1];
                    $newTplCallText = str_replace( $oldVal, $newVal, $oldTplCallText );
                } else {
                    $newTplCallText = str_replace('}}', '|' . $paramName . '=' . $newVal . "\n}}", $oldTplCallText);
                }
                $wikiText = str_replace( $oldTplCallText, $newTplCallText, $wikiText );
            }
        }

        return $wikiText;
    }

    /**
     * @param array $fact
     * @param string $wikiText
     * @return string $wikiText
     */
    private function addNewExplicitFact( $fact, $wikiText ) {
        $prop = $fact['p'];
        $val = $fact['o'];

        $propWikified = $this->getWikifiedTitle( $prop );

        $oldFacts = $this->extractFacts( $wikiText );
        if ( !array_key_exists( $propWikified, $oldFacts ) ) {
            $newFactText = "\n" . '[[' . $propWikified . '::' . $val . ']]';
            $wikiText .= $newFactText;
        }

        return $wikiText;
    }

    /**
     * @param string $category
     * @param string $wikiText
     * @return string $wikiText
     */
    private function addNewCategory( $category, $wikiText ) {
        $oldCategories = $this->extractCategories( $wikiText );
        $catTitleWikified = $this->getWikifiedTitle( $category, NS_CATEGORY );
        if ( !array_key_exists( $catTitleWikified, $oldCategories ) ) {
            $newCatText = "\n" . '[[Category:' . $catTitleWikified . "]]"; // Is there an inbuilt class method to do this?  Can't find one in Category.
            $wikiText .= $newCatText;
        }
        return $wikiText;
    }

    /**
     * @param $tplPageText
     * @return array $propParamIndex
     */
    private function extractPropertyParameterIndex( $tplPageText ) {
        $propParamIndex = array();
        // Get the properties and parameter names used in the templates
        preg_match_all( '/\[\[([^\:]+)::\{\{\{([^\|\}]+)\|?\}\}\}\]\]/', $tplPageText, $tplParamMatches );
        $propNames = $tplParamMatches[1];
        $paramNames = $tplParamMatches[2];
        foreach ( $propNames as $idx => $propName ) {
            $propParamIndex[$propName] = $paramNames[$idx];
        }
        return $propParamIndex;
    }

    /**
     * Extract an array of facts from wiki text
     * @param string $wikiContent
     * @return array $facts
     */
    private function extractFacts( $wikiContent ) {
        $facts = array();
        preg_match_all( '/\[\[([^\:]+)::([^\|\]]+)(\|([^\]]*))?\]\]/', $wikiContent, $matches );
        $wikiText = $matches[0];
        $propName = $matches[1];
        $propVal = $matches[2];
        foreach ( $propName as $idx => $pName ) {
            $facts[$pName] = array( 'property' => $pName, 'value' => $propVal[$idx], 'wikitext' => $wikiText[$idx] );
        }
        return $facts;
    }

    /**
     * Extract an array of categories from wiki text
     * @param string $wikiContent
     * @return array $mwCategories
     */
    private function extractCategories( $wikiContent ) {
        $mwCategories = array();
        preg_match_all( '/\[\[Category:([^\|]*)\|?[^\|]*\]\]/', $wikiContent, $matches );
        $wikiText = $matches[0];
        $catName = $matches[1];
        foreach ( $catName as $idx => $cName ) {
            $mwCategories[$cName] = array( 'wikitext' => $wikiText[$idx] );
        }
        return $mwCategories;
    }

    /**
     * Extract an array of templates from wiki text
     * @param string $wikiContent
     * @return array $templates
     */
    private function extractTemplateCalls( $wikiContent ) {
        $templates = array();
        preg_match_all( '/\{\{\s?([^#][A-Za-z0-9\ ]+)\s?(\|([^\}]*))?\s?\}\}/U', $wikiContent, $matches );
        $wikiText = $matches[0];
        $tplName = $matches[1];
        $tplParamsText = $matches[2];
        foreach ( $tplName as $idx => $tName ) {
            $templates[$tName] = array();
            $templates[$tName]['calltext'] = $wikiText[$idx];

            $paramVals = array();
            preg_match_all( '/\|([^\|\n\=]+)\=([^\|\=\n]+)/', $tplParamsText[$idx], $paramMatches );
            $names = $paramMatches[1];
            $vals = $paramMatches[2];
            foreach ( $names as $idx => $name ) {
                $paramVals[] = array( 'name' => $name, 'val' => $vals[$idx] );
            }
            $templates[$tName]['paramvals'] = $paramVals;
        }
        return $templates;
    }

    /**
     * Get the wikified title
     * @param string $title
     * @param int $wikiNamespace
     * @return string $wikifiedTitle
     */
    private function getWikifiedTitle( $title, $wikiNamespace = NS_MAIN ) {
        $titleObj = Title::newFromText( $title, $wikiNamespace );
        $wikifiedTitle = $titleObj->getText();
        return $wikifiedTitle;
    }

    /**
     * Retrieves wiki text from wiki database
     * @param string $title
     * @param int $wikiNamespace
     * @return string $wikiText
     */
    private function getTextForPage( $title, $wikiNamespace = NS_MAIN ) {
        $wikiText = '';
        $titleObj = Title::newFromText( $title, $wikiNamespace );

        $pageObj = WikiPage::factory( $titleObj );
        $content = $pageObj->getContent();
        if ( $content !== null ) {
            $wikiText = $content->getNativeData();
        }
        return $wikiText;
    }

    /**
     * Takes the parsed and updated content as
     * a string and writes to the wiki.
     * @param string $wikiTitle
     * @param string $content
     * @param string $summary
     */
    private function writeToPage( $wikiTitle, $content, $summary ) {
        $mwTitleObj = Title::newFromText( $wikiTitle );
        $mwArticleObj = WikiPage::factory( $mwTitleObj );
        $mwArticleObj->doEditContent( ContentHandler::makeContent( $content, $mwTitleObj), $summary );
    }

    /**
     * Get templates referred to via "Has template", by a set of categories
     * @param RDFIOWikiPage $wikiPage
     * @return array $templateNames
     */
    private function getTemplatesForCategories( $categories ) {
        $templateNames = array();
        foreach ( $categories as $cat ) {
            $catPageText = $this->getTextForPage( $cat, NS_CATEGORY );
            $tplNamesForCat = $this->extractTplNameFromHasTemplateFact( $catPageText );
            $templateNames = array_merge( $templateNames, $tplNamesForCat );
        }
        return $templateNames;
    }

    /**
     * Extract template names from facts of the form [[Has template::Template:...]]
     * @param string $wikiText
     * @return array
     */
    private function extractTplNameFromHasTemplateFact( $wikiText ) {
        preg_match_all( '/\[\[Has template::Template:([^\|\]]+)(\|[^\|\]]*)?\]\]/', $wikiText, $matches ); // get Has template property, if exists
        $templateNames = $matches[1];
        return $templateNames;
    }
}