specials/SpecialSPARQLEndpoint.php

Summary

Maintainability
D
2 days
Test Coverage
<?php

class SPARQLEndpoint extends RDFIOSpecialPage {
    protected $sparqlendpoint;
    protected $storewrapper;

    public function __construct() {
        parent::__construct( 'SPARQLEndpoint', 'rdfio-sparql' );
        $this->sparqlendpoint = new ARC2_StoreEndpoint( $this->getSPARQLEndpointConfig(), $this );
        if ( !$this->sparqlendpoint->isSetUp() ) {
            $this->sparqlendpoint->setUp();
        }
        $this->storewrapper = new RDFIOARC2StoreWrapper();
    }

    /**
     * Execute the SPARQL Endpoint Special page
     * @param string $par (unused)
     */
    public function execute( $par ) {
        unset( $par ); // Needed to suppress warning about unused variable which we include just for consistency.

        // Require rdfio-sparql permission for the current user
        if ( !$this->userCanExecute( $this->getUser() ) ) {
            throw new PermissionsError( 'rdfio-sparql', array( 'rdfio-specialpage-access-permission-missing' ) );
        }

        global $rdfiogQueryByEquivURIs, $rdfiogOutputEquivUris;
        $wUser = $this->getUser();
        $wRequest = $this->getRequest();

        $this->setHeaders();
        $options = $this->buildOptionsObj( $this->getRequest(), $rdfiogQueryByEquivURIs, $rdfiogOutputEquivUris );

        if ( $options->query == '' ) {
            $this->printHTMLForm( $options );
            return;
        }
        if ( $options->queryByEquivUris ) {
            $this->urisToEquivURIsInQuery( $options );
        }

        switch ( $options->queryType ) {
            case 'select':
            case 'construct':
                $this->executeReadOnlyQuery( $options );
                return;
            case 'insert':
                if ( !$this->allowInsert( $wUser, $wRequest ) ) {
                    $this->errorMsg(  );
                    $this->printHTMLForm( $options );
                    return;
                }
                $this->importTriplesInQuery( $options );
                $this->printHTMLForm( $options );
                return;
            case 'delete':
                if ( !$this->allowDelete( $wUser ) ) {
                    $this->errorMsg( wfMessage( 'rdfio-delete-not-allowed' )->parse() );
                    $this->printHTMLForm( $options );
                }
                $this->deleteTriplesInQuery( $options );
                $this->printHTMLForm( $options );
                return;
        }
        $this->errorMsg( wfMessage( 'rdfio-error-invalid-query-type-or-combination' )->parse() );
        $this->printHTMLForm( $options );
    }

    /**
     * Execute method for SPARQL queries that only queries and returns results, but
     * does not modify, add or delete triples.
     */
    private function executeReadOnlyQuery( $options ) {
        $wikiOut = $this->getOutput();

        $outputSer = $this->passSparqlToARC2AndGetSerializedOutput();

        if ( $outputSer == '' ) {
            $this->errorMsg( wfMessage( 'rdfio-error-no-sparql-results' )->parse() );
            return;
        }

        $outputArr = unserialize( $outputSer );
        if ( $options->outputEquivUris ) {
            $outputArr = $this->toEquivURIsInSparqlResults( $outputArr );
        }

        if ( $options->queryType == 'select' ) {
            if ( in_array( $options->outputType, array( 'rdfxml' ) ) ) {
                $this->errorMsg( wfMessage( 'rdfio-error-invalid-output-for-select' )->parse() );
                $this->printHTMLForm( $options );
                return;
            }

            if ( $options->outputType == 'htmltab' ) {
                $resultHtml = $this->sparqlResultToHTML( $outputArr );
                $this->printHTMLForm( $options );
                $wikiOut->addHTML( $resultHtml );
                return;
            }

            // Default option: Return SPARQL result document
            $this->prepareCreatingDownloadableFile( $options );
            // Using echo instead of $wgOut->addHTML() here, since output format is not HTML
            echo $this->sparqlendpoint->getSPARQLXMLSelectResultDoc( $outputArr );
            return;
        }

        if ( $options->queryType == 'construct' ) {
            if ( $options->outputType !== 'turtle' ) {
                // Falling back on using RDF/XML as default
                $options->outputType = 'rdfxml';
            }

            // Here the results should be RDF/XML triples,
            // not just plain XML SPARQL result set
            $tripleindex = $outputArr['result'];

            $arc2 = new ARC2_Class( array(), $this );
            $triples = $arc2->toTriples( $tripleindex );

            if ( $options->outputEquivUris ) {
                $triples = $this->storewrapper->toEquivUrisInTriples( $triples );
            }

            $this->prepareCreatingDownloadableFile( $options );
            // Using echo instead of $wgOut->addHTML() here, since output format is not HTML
            echo $this->triplesToRDFXML( $triples );
            return;
        }
    }

    private function passSparqlToARC2AndGetSerializedOutput() {
        // Make sure ARC2 returns a PHP serialization, so that we
        // can do stuff with it programmatically
        $_POST['output'] = 'php_ser';

        $this->sparqlendpoint->handleRequest();
        if ( $this->sparqlendpoint->getErrors() ) {
            $this->errorMsgArr( $this->sparqlendpoint->getErrors );
            return null;
        }

        return $this->sparqlendpoint->getResult();
    }

    /**
     * Figure out options for the query, based on arguments in the request,and global settings variables
     * all taken as parameters.
     * @param request string
     * @param $queryByEquivURIs bool
     * @param $outputEquivURIs bool
     * @return $seOptions RDFIOSPARQLEndpointOptions
     */
    private function buildOptionsObj( $request, $queryByEquivURIs, $outputEquivURIs ) {
        $seOptions = new RDFIOSPARQLEndpointOptions();

        $seOptions->query = $request->getText( 'query' );
        $seOptions->queryByEquivUris = isset( $queryByEquivURIs ) ? $queryByEquivURIs : $request->getBool( 'equivuri_q' );
        $seOptions->outputEquivUris = isset( $outputEquivURIs ) ? $outputEquivURIs : $request->getBool( 'equivuri_o' );
        $seOptions->outputType = $request->getText( 'output' );
        if ( $seOptions->outputType === '' ) {
            $seOptions->outputType = 'sparqlresult'; // Default according to https://www.w3.org/TR/sparql11-protocol
        }

        if ( $seOptions->query != '' ) {
            $result = $this->extractQueryInfosAndType( $seOptions->query );
            if ( $result == null ) {
                return null;
            }
            $seOptions->queryInfos = $result[0];
            $seOptions->queryType = $result[1];
        }

        return $seOptions;
    }

    /**
     * Extract query information via ARC2's SPARQL (plus) parser
     * @param $query string
     * @return array
     */
    private function extractQueryInfosAndType( $query ) {
        // Convert Sparql Update syntax to ARC2's SPARQL+ syntax:
        $querySparqlPlus = str_replace( 'INSERT DATA', 'INSERT INTO <>', $query );

        $parser = new ARC2_SPARQLPlusParser( array(), $this );
        $parser->parse( $querySparqlPlus, '' );
        if ( $parser->getErrors() ) {
            $this->errorMsgArr( $parser->getErrors() );
            return null;
        }

        $queryInfos = $parser->getQueryInfos();
        if ( array_key_exists( 'query', $queryInfos ) ) {
            $queryType = $queryInfos['query']['type'];
        }
        return array( $queryInfos, $queryType );
    }

    /**
     * Modify the SPARQL pattern to allow querying using the original URI
     */
    private function urisToEquivURIsInQuery( $options ) {
        $queryInfo = $options->queryInfos;
        $patterns = $queryInfo['query']['pattern']['patterns'][0]['patterns'];

        $patterns = $this->extendQueryPatternsWithEquivUriLinks( $patterns );
        $queryInfo['query']['pattern']['patterns'][0]['patterns'] = $patterns;

        $sparqlserializer = new ARC2_SPARQLSerializerPlugin( array(), $this );
        $query = $sparqlserializer->toString( $queryInfo );

        // Modify the $_POST variable directly, so that ARC2 can pick up the modified query
        $_POST['query'] = $query;
    }

    /**
     * Extend the patterns in the SPARQL query so that every time an URI is found,
     * that place in the pattern is replaced by a temporary SPARQL variable
     * which is then linked with an Equivalent URI property to its equivalent URI.
     * @param $patterns
     * @return array
     */
    private function extendQueryPatternsWithEquivUriLinks( $patterns ) {
        $patternIdx = 0;
        foreach ( $patterns as $pattern ) {
            $equivUriUris = array(
                's' => $this->storewrapper->getEquivURIURI(),
                'p' => $this->storewrapper->getEquivPropertyURIURI(),
                'o' => $this->storewrapper->getEquivURIURI()
            );
            foreach ( array( 's', 'p', 'o' ) as $varType ) {
                if ( $pattern[$varType . '_type'] === 'uri' ) {
                    $tempVar = 'rdfio_var_' . $patternIdx . '_' . $varType;
                    $uri = $pattern[$varType];

                    // Add new Equivalent URI triple, linking to the
                    $patterns[] = array(
                        'type' => 'triple',
                        's' => $tempVar,
                        'p' => $equivUriUris[$varType],
                        'o' => $uri,
                        's_type' => 'var',
                        'p_type' => 'uri',
                        'o_type' => 'uri',
                        'o_datatype' => '',
                        'o_lang' => ''
                    );

                    // Replace the existing URI with a variable, so the Equiv URI link works
                    $pattern[$varType] = $tempVar;
                    $pattern[$varType . '_type'] = 'var';
                }
            }
            // Put back the pattern in patterns array, since foreach does not edit in place
            $patterns[$patternIdx] = $pattern;
            $patternIdx++;
        }
        return $patterns;
    }

    /**
     * Print out the HTML Form
     */
    private function printHTMLForm( $options ) {
        $wOut = $this->getOutput();
        $wOut->addScript( $this->getHTMLFormScript() );
        $wOut->addHTML( $this->getHTMLForm( $options->query ) );
    }

    /**
     * Extract the main content from ARC:s SPARQL result HTML
     * and do some enhancing (wikify tables)
     * @param string $output
     * @return string $html
     */
    private function sparqlResultToHTML( $resultStructure ) {
        $html = '';
        $html = '<h3>' . wfMessage( 'rdfio-result-from-sparql-query' )->parse() . ':</h3><div style="font-size: 11px;">' . $html . '</div>';
        $html .= '<table class="wikitable sortable">';

        $result = $resultStructure['result'];
        $vars = $result['variables'];

        $html .= '<tr>';
        foreach ( $vars as $var ) {
            $html .= '<th width="34%">' . $var . '</th>';
        }
        $html .= '</tr>';

        $rows = $result['rows'];
        foreach ( $rows as $row ) {
            $html .= "<tr>";
            foreach ( $vars as $var ) {
                $val = $row[$var];
                //$valueType = $row[$variable . ' type'];
                $html .= '<td style="font-size:9px!important;white-space:nowrap!important;">' . $val . '</td>';
            }
            $html .= '</tr>';
        }

        $html .= '</table>';
        return $html;
    }

    /**
     * After a query is parsed, import the parsed data to the wiki
     */
    private function importTriplesInQuery( $options ) {
        $rdfImporter = new RDFIORDFImporter();
        $triples = $options->queryInfos['query']['construct_triples'];
        try {
            $rdfImporter->importTriples( $triples );
            $this->successMsg( wfMessage( 'rdfio-import-success' )->parse() );
        } catch ( MWException $e ) {
            $this->errorMsg( wfMessage( 'rdfio-import-error' )->parse() . '!<br>' . $e->getMessage() );
        }
    }

    /**
     * After a query is parsed, delete the parsed data from the wiki
     */
    private function deleteTriplesInQuery( $options ) {
        $triples = $options->queryInfos['query']['construct_triples'];
        $rdfImporter = new RDFIOSMWBatchWriter( $triples, 'triples_array' );
        $rdfImporter->executeDelete();
    }

    /**
     * Replace URI:s with an accompanying "Equivalent URI" one. If
     * there are more than one Equivalent URI for a given URI, the others than
     * the first one will be ignored.
     * @param array $sparqlResult
     * @return array $sparqlResult
     */
    private function toEquivURIsInSparqlResults( $sparqlResult ) {
        $rows = $sparqlResult['result']['rows'];
        $vars = $sparqlResult['result']['variables'];
        foreach ( $rows as $rowid => $row ) {
            foreach ( $vars as $var ) {
                $typeKey = "$var type";
                $type = $row[$typeKey];
                $uri = $row[$var];
                if ( $type === 'uri' ) {
                    try {
                        $equivURIs = $this->storewrapper->getEquivURIsForURI( $uri );
                    } catch ( RDFIOARC2StoreWrapperException $e ) {
                        $this->errorMsg( $e );
                        return;
                    }
                    if ( !empty( $equivURIs ) ) {
                        $equivURI = $equivURIs[0];
                        // Replace URI with the 'Equivalent URI'
                        $rows[$rowid][$var] = $equivURI;
                    }
                }
            }
        }
        // Put back the modified rows into the results structure
        $sparqlResult['result']['rows'] = $rows;
        return $sparqlResult;
    }


    /**
     * Convert an ARC triples array into RDF/XML
     * @param array $triples
     * @return string $rdfxml
     */
    private function triplesToRDFXML( $triples ) {
        $ser = new ARC2_RDFXMLSerializer();
        // Serialize into RDF/XML, since it will contain
        // all URIs in un-abbreviated form, so that they
        // can easily be replaced by search-and-replace
        $rdfxml = $ser->getSerializedTriples( $triples );
        if ( $ser->getErrors() ) {
            $this->errorMsgArr( $ser->getErrors() );
            return null;
        }
        return $rdfxml;
    }

    /**
     * Get a configuration array for initializing the ARCs
     * SPARQL endpoint
     */
    private function getSPARQLEndpointConfig() {
        global $wgDBserver, $wgDBname, $wgDBuser, $wgDBpassword, $wgDBprefix;
        $epconfig = array(
            'db_host' => $wgDBserver,
            'db_name' => $wgDBname,
            'db_user' => $wgDBuser,
            'db_pwd' => $wgDBpassword,
            'store_name' => $wgDBprefix . 'arc2store', // Determines table prefix
        );
        $epconfig['endpoint_features'] =
            array(
                'select',
                'construct',
                'ask',
                'describe',
                // 'load',
                // 'insert',                   // This is not needed, since it is done via SMWWriter instead
                // 'delete',                   // This is not needed, since it is done via SMWWriter instead
                // 'dump'                      // dump is a special command for streaming SPOG export
            );
        $epconfig['endpoint_timeout'] = 60;   // not implemented in ARC2 preview
        // 'endpoint_read_key' => '',         // optional
        // 'endpoint_write_key' => 'somekey', // optional
        // 'endpoint_max_limit' => 250,       // optional
        return $epconfig;
    }

    /**
     * Get the HTML for the main SPARQL querying form. If $query is set, use it to prefill the main textarea
     * @param string $query
     * @return string $htmlForm
     */
    private function getHTMLForm( $query = '' ) {
        $wRequest = $this->getRequest();
        $wUser = $this->getUser();
        $uriResolverURI = SpecialPage::getTitleFor( 'URIResolver' )->getFullURL() . '/';
        $defaultQuery = "@PREFIX w : <$uriResolverURI> .\n\nSELECT *\nWHERE { ?s ?p ?o }\nLIMIT 25";

        if ( $query == '' ) {
            $query = $defaultQuery;
        }

        $chkEquivUriQ = $wRequest->getBool( 'equivuri_q', false ) == 1 ? ' checked="true" ' : '';
        $chkEquivUriO = $wRequest->getBool( 'equivuri_o', false ) == 1 ? ' checked="true" ' : '';
        $chkFilterVocab = $wRequest->getBool( 'filtervocab', false ) == 1 ? ' checked="true" ' : '';
        $selOutputHTML = $wRequest->getText( 'output', '' ) == 'htmltab' ? ' selected="selected" ' : '';
        $selOutputRDFXML = $wRequest->getText( 'output', '' ) == 'rdfxml' ? ' selected="selected" ' : '';
        //$selOutputTurtle = $wRequest->getText( 'output', '' ) == 'turtle' ? ' selected="selected" ' : '';

        // Make the HTML format selected by default
        if ( $selOutputRDFXML == '' ) {
            $selOutputHTML = ' selected="selected" ';
        }

        $htmlForm = '<form method="post" action=""
            name="createEditQuery">
    <div style="font-size: 10px">
    <table border="0"><tbody>
        <tr><td colspan="3">' . wfMessage( 'rdfio-enter-sparql-query' )->parse() . ':</td><tr>
        <tr><td colspan="3"><textarea cols="80" rows="9" name="query">' . $query . '</textarea></td></tr>
        <tr>
            <td style="vertical-align: top; border-right: 1px solid #ccc;">
                <table border="0" style="background: transparent; font-size: 11px;">
                    <tr>
                        <td style="text-align: right">' . wfMessage( 'rdfio-query-by-equivalent-uris' )->parse() . ':</td>
                        <td><input type="checkbox" name="equivuri_q" value="1" ' . $chkEquivUriQ . '/></td>
                    </tr>
                </table>
            </td>
            <td width="170" style="vertical-align: top; border-right: 1px solid #ccc;">
                <table border="0" style="font-size: 11px; background: transparent;">
                    <tr>
                        <td style="text-align: right">' . wfMessage( 'rdfio-output-equivalent-uris' )->parse() . ':</td>
                        <td><input type="checkbox" name="equivuri_o" id="outputequivuri" value="1" ' . $chkEquivUriO /* . ' onChange="toggleDisplay(\'byontology\');" */ . '/></td>
                    </tr>
                </table>
            </td>
            <td width="260" style="vertical-align: top;">
                <table border="0" style="font-size: 11px; background: transparent;" >
                <tr><td style="text-align: right" width="180">' . wfMessage( 'rdfio-output-format' )->parse() . ':</td>
                <td style="vertical-align: top">
                <select id="output" name="output" onChange="toggleDisplay(\'byontology\');" >
                  <!-- <option value="" >default</option> -->
                  <!-- <option value="json" >JSON</option> -->
                  <!-- <option value="plain" >Plain</option> -->
                  <!-- <option value="php_ser" >Serialized PHP</option> -->
                  <option value="htmltab" ' . $selOutputHTML . '>HTML</option>
                  <option value="sparqlresult" >SPARQL Resultset (XML)</option>
                  <option value="rdfxml" ' . $selOutputRDFXML . '>RDF/XML</option>
                  <!-- option value="turtle" >Turtle</option -->
                  <!-- <option value="infos" >Query Structure</option> -->
                  <!-- <option value="tsv" >TSV</option> -->
                </select>
                </td></tr>
                <tr>
                <td colspan="2">
                <span style="font-family: arial, helvetica, sans-serif; font-size: 10px; color: #777;">(' . wfMessage( 'rdfio-rdfxml-requires-using' )->parse() . ' <a href="http://www.w3.org/TR/rdf-sparql-query/#construct">CONSTRUCT</a>)</span>
                </td>
                </table>
            </td>
        </tr>
        <tr>
            <td colspan="3">
                <div id="byontology" style="display: none; background: #ffd; border: 1px solid #ee7;">
                    <table border="0" style="font-size: 11px; background: transparent;" >
                        <tr><td style="text-align: right;">Filter by vocabulary:</td>
                            <td><input type="checkbox" name="filtervocab" value="1" ' . $chkFilterVocab . '/></td>
                            <td style="text-align: right">Vocabulary URL:</td>
                            <td><input type="text" name="filtervocaburl" size="48" /></td>
                        </tr>
                        <tr>
                            <td>&#160;</td>
                            <td>&#160;</td>
                            <td>&#160;</td>
                            <td><span style="font-family: arial, helvetica, sans-serif; font-size: 10px; color: #777">Example: http://xmlns.com/foaf/spec/index.rdf</span></td>
                        </tr>
                    </table>
                </div>
            </td>
        </tr>
    </table>
    </div>
    <input type="submit" value="' . wfMessage( 'rdfio-submit' )->parse() . '">
    <input type="hidden" name="token" value="' . $wUser->getEditToken() . '">
</form>';
        return $htmlForm;
    }

    /**
     * Get the javascript used for some functionality in the main SPARQL
     * querying HTML form
     * @return string $htmlFormScript
     */
    private function getHTMLFormScript() {
        $htmlFormScript = "<script type=\"text/javascript\">
        function toggleDisplay(id1) {
            var bostyle = document.getElementById(id1).style.display;
            var fmtsel = document.getElementById('output');
            var fmt = fmtsel.options[fmtsel.selectedIndex].value;
            var outsel = document.getElementById('outputequivuri');
            if ( outsel.checked && fmt.match('rdfxml') ) {
                document.getElementById(id1).style.display = 'block';
            } else {
                document.getElementById(id1).style.display = 'none';
            }
        }
         </script>";
        return $htmlFormScript;
    }
}

class RDFIOSPARQLEndpointOptions {
    public $query;
    public $queryType;
    public $queryByEquivUris = false;
    public $outputEquivUris = false;
    public $outputType;
    public $queryInfos = array();

    function __construct() {}
}