Port categories dump header fix
authorStanislav Malyshev <smalyshev@gmail.com>
Sat, 18 Nov 2017 00:17:09 +0000 (16:17 -0800)
committerStanislav Malyshev <smalyshev@gmail.com>
Wed, 29 Nov 2017 02:04:43 +0000 (18:04 -0800)
This fix is ported from https://gerrit.wikimedia.org/r/#/c/372905
since it does not belong to SPARQL updates really.

It changes main node for category dump to
<http://acme.test/wiki/Special:CategoryDump>
thus allowing to support multiple wikis living on the same domain.w

Change-Id: Ie1ac5ddf6b3b73f3966274f90abc1db50061b494

includes/CategoriesRdf.php
maintenance/dumpCategoriesAsRdf.php
tests/phpunit/data/categoriesrdf/categoriesRdf-out.nt
tests/phpunit/maintenance/categoriesRdfTest.php

index e19dc2a..463f6e8 100644 (file)
@@ -37,7 +37,13 @@ class CategoriesRdf {
        /**
         * Current version of the dump format.
         */
-       const FORMAT_VERSION = "1.0";
+       const FORMAT_VERSION = "1.1";
+       /**
+        * Special page for Dump identification.
+        * Used as head URI for each wiki's category dump, e.g.:
+        * https://en.wikipedia.org/wiki/Special:CategoryDump
+        */
+       const SPECIAL_DUMP = 'Special:CategoryDump';
        /**
         * @var RdfWriter
         */
@@ -84,12 +90,30 @@ class CategoriesRdf {
                $this->rdfWriter->say( 'rdfs', 'label' )->value( $titletext );
        }
 
+       /**
+        * Make URL from title label
+        * @param string $titleLabel Short label (without namespace) of the category
+        * @return string URL for the category
+        */
+       public function labelToUrl( $titleLabel ) {
+               return $this->titleToUrl( Title::makeTitle( NS_CATEGORY, $titleLabel ) );
+       }
+
        /**
         * Convert Title to link to target page.
         * @param Title $title
-        * @return string
+        * @return string URL for the category
         */
        private function titleToUrl( Title $title ) {
                return $title->getFullURL( '', false, PROTO_CANONICAL );
        }
+
+       /**
+        * Get URI of the dump for this particular wiki.
+        * @return false|string
+        */
+       public function getDumpURI() {
+               return $this->titleToUrl( Title::makeTitle( NS_MAIN, self::SPECIAL_DUMP ) );
+       }
+
 }
index 282a04b..c1835d0 100644 (file)
@@ -96,7 +96,7 @@ class DumpCategoriesAsRdf extends Maintenance {
                if ( substr( $licenseUrl, 0, 2 ) == '//' ) {
                        $licenseUrl = 'https:' . $licenseUrl;
                }
-               $this->rdfWriter->about( wfExpandUrl( '/categoriesDump', PROTO_CANONICAL ) )
+               $this->rdfWriter->about( $this->categoriesRdf->getDumpURI() )
                        ->a( 'schema', 'Dataset' )
                        ->a( 'owl', 'Ontology' )
                        ->say( 'cc', 'license' )->is( $licenseUrl )
index d2d7ea8..b8bd8e0 100644 (file)
@@ -1,10 +1,10 @@
-<http://acme.test/categoriesDump> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Dataset> .
-<http://acme.test/categoriesDump> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Ontology> .
-<http://acme.test/categoriesDump> <http://creativecommons.org/ns#license> <https://creativecommons.org/licenses/by-sa/3.0/> .
-<http://acme.test/categoriesDump> <http://schema.org/softwareVersion> "1.0" .
-<http://acme.test/categoriesDump> <http://schema.org/dateModified> "{DATE}"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
-<http://acme.test/categoriesDump> <http://schema.org/isPartOf> <http://acme.test/> .
-<http://acme.test/categoriesDump> <http://www.w3.org/2002/07/owl#imports> <https://www.mediawiki.org/ontology/ontology.owl> .
+<http://acme.test/wiki/Special:CategoryDump> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://schema.org/Dataset> .
+<http://acme.test/wiki/Special:CategoryDump> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.w3.org/2002/07/owl#Ontology> .
+<http://acme.test/wiki/Special:CategoryDump> <http://creativecommons.org/ns#license> <https://creativecommons.org/licenses/by-sa/3.0/> .
+<http://acme.test/wiki/Special:CategoryDump> <http://schema.org/softwareVersion> "1.1" .
+<http://acme.test/wiki/Special:CategoryDump> <http://schema.org/dateModified> "{DATE}"^^<http://www.w3.org/2001/XMLSchema#dateTime> .
+<http://acme.test/wiki/Special:CategoryDump> <http://schema.org/isPartOf> <http://acme.test/> .
+<http://acme.test/wiki/Special:CategoryDump> <http://www.w3.org/2002/07/owl#imports> <https://www.mediawiki.org/ontology/ontology.owl> .
 <http://acme.test/wiki/Category:Category_One> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.mediawiki.org/ontology#Category> .
 <http://acme.test/wiki/Category:Category_One> <http://www.w3.org/2000/01/rdf-schema#label> "Category One" .
 <http://acme.test/wiki/Category:2_Category_Two> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <https://www.mediawiki.org/ontology#Category> .
index ec2746e..9026bd9 100644 (file)
@@ -60,8 +60,8 @@ class CategoriesRdfTest extends MediaWikiLangTestCase {
                $dumpScript->execute();
                $actualOut = file_get_contents( $outFileName );
                $actualOut = preg_replace(
-                       '|<http://acme.test/categoriesDump> <http://schema.org/dateModified> "[^"]+?"|',
-                       '<http://acme.test/categoriesDump> <http://schema.org/dateModified> "{DATE}"',
+                       '|<http://acme.test/wiki/Special:CategoryDump> <http://schema.org/dateModified> "[^"]+?"|',
+                       '<http://acme.test/wiki/Special:CategoryDump> <http://schema.org/dateModified> "{DATE}"',
                        $actualOut
                );