From: daniel Date: Mon, 31 Mar 2014 11:00:28 +0000 (+0200) Subject: Introducing pp_sortkey. X-Git-Tag: 1.31.0-rc.0~16019^2 X-Git-Url: https://git.cyclocoop.org/%7B%24www_url%7Dadmin/compta/banques/%7B%7B%20url_for%28%27admin_users%27%29%20%7D%7D?a=commitdiff_plain;h=b1d6e78f7444211cc93793103c3673e1a2637a69;p=lhc%2Fweb%2Fwiklou.git Introducing pp_sortkey. This adds the pp_sortkey column to the page_props table. pp_sortkeys allows for top-k queries for pages, e.g. the 100 pages with the most language links, etc. It is also possible to query for exact values. For now, pp_sortkey will contain pp_value's numeric value if the value was set to a float, int or boolean. Associated tasks: * create a maintenance script for populating pp_sortkey. Tricky, because when reading from the database, all values are strings. * create an API module for querying pages by property value. bug: 58032 Change-Id: I217c42656fb877ff35a36eb446a22bdaf119faac --- diff --git a/RELEASE-NOTES-1.23 b/RELEASE-NOTES-1.23 index ff7b5f9a67..58e49637c4 100644 --- a/RELEASE-NOTES-1.23 +++ b/RELEASE-NOTES-1.23 @@ -9,6 +9,9 @@ MediaWiki 1.23 is an alpha-quality branch and is not recommended for use in production. === Configuration changes in 1.23 === +* Introduced $wgPagePropsHaveSortkey as a backwards-compatibility switch, + for using the old schema of the page_props table, in case the respective + schema update was not applied. * When $wgJobRunRate is higher that zero, jobs are now executed via an asynchronous HTTP request to a MediaWiki entry point. This may require increasing the number of server worker threads. @@ -245,6 +248,9 @@ changes to languages because of Bugzilla reports. * Support was added for Northern Luri (lrc). === Other changes in 1.23 === +* Added pp_sortkey column to page_props table, so pages can be efficiently + queried and sorted by property value (bug 58032). + See $wgPagePropsHaveSortkey if you want to postpone the schema change. * The rc_type field in the recentchanges table has been superseded by a new rc_source field. The rc_source field is a string representation of the change type where rc_type was a numeric constant. This field is not yet diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index 4326ecaa6f..f0917f98c8 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -7085,6 +7085,13 @@ $wgSiteTypes = array( */ $wgCompiledFiles = array(); +/** + * Whether the page_props table has a pp_sortkey column. Set to false in case + * the respective database schema change was not applied. + * @since 1.23 + */ +$wgPagePropsHaveSortkey = true; + /** * For really cool vim folding this needs to be at the end: * vim: foldmarker=@{,@} foldmethod=marker diff --git a/includes/deferred/LinksUpdate.php b/includes/deferred/LinksUpdate.php index 79232e5d45..65c04d630e 100644 --- a/includes/deferred/LinksUpdate.php +++ b/includes/deferred/LinksUpdate.php @@ -502,18 +502,69 @@ class LinksUpdate extends SqlDataUpdate { */ function getPropertyInsertions( $existing = array() ) { $diffs = array_diff_assoc( $this->mProperties, $existing ); + $arr = array(); - foreach ( $diffs as $name => $value ) { - $arr[] = array( - 'pp_page' => $this->mId, - 'pp_propname' => $name, - 'pp_value' => $value, - ); + foreach ( array_keys( $diffs ) as $name ) { + $arr[] = $this->getPagePropRowData( $name ); } return $arr; } + /** + * Returns an associative array to be used for inserting a row into + * the page_props table. Besides the given property name, this will + * include the page id from $this->mId and any property value from + * $this->mProperties. + * + * The array returned will include the pp_sortkey field if this + * is present in the database (as indicated by $wgPagePropsHaveSortkey). + * The sortkey value is currently determined by getPropertySortKeyValue(). + * + * @note: this assumes that $this->mProperties[$prop] is defined. + * + * @param string $prop The name of the property. + * + * @return array + */ + private function getPagePropRowData( $prop ) { + global $wgPagePropsHaveSortkey; + + $value = $this->mProperties[$prop]; + + $row = array( + 'pp_page' => $this->mId, + 'pp_propname' => $prop, + 'pp_value' => $value, + ); + + if ( $wgPagePropsHaveSortkey ) { + $row['pp_sortkey'] = $this->getPropertySortKeyValue( $value ); + } + + return $row; + } + + /** + * Determines the sort key for the given property value. + * This will return $value if it is a float or int, + * 1 or resp. 0 if it is a bool, and null otherwise. + * + * @note: In the future, we may allow the sortkey to be specified explicitly + * in ParserOutput::setProperty. + * + * @param mixed $value + * + * @return float|null + */ + private function getPropertySortKeyValue( $value ) { + if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) { + return floatval( $value ); + } + + return null; + } + /** * Get an array of interwiki insertions for passing to the DB * Skips the titles specified by the 2-D array $existing diff --git a/includes/installer/MysqlUpdater.php b/includes/installer/MysqlUpdater.php index bcd50b3a34..be1c8a7189 100644 --- a/includes/installer/MysqlUpdater.php +++ b/includes/installer/MysqlUpdater.php @@ -250,6 +250,7 @@ class MysqlUpdater extends DatabaseUpdater { array( 'addIndex', 'logging', 'log_user_text_time', 'patch-logging_user_text_time_index.sql' ), array( 'addField', 'page', 'page_links_updated', 'patch-page_links_updated.sql' ), array( 'addField', 'user', 'user_password_expires', 'patch-user_password_expire.sql' ), + array( 'addField', 'page_props', 'pp_sortkey', 'patch-pp_sortkey.sql' ), ); } diff --git a/includes/installer/PostgresUpdater.php b/includes/installer/PostgresUpdater.php index c5c10b381f..4f74bfa8f7 100644 --- a/includes/installer/PostgresUpdater.php +++ b/includes/installer/PostgresUpdater.php @@ -405,6 +405,9 @@ class PostgresUpdater extends DatabaseUpdater { array( 'addPgField', 'recentchanges', 'rc_source', "TEXT NOT NULL DEFAULT ''" ), array( 'addPgField', 'page', 'page_links_updated', "TIMESTAMPTZ NULL" ), array( 'addPgField', 'mwuser', 'user_password_expires', 'TIMESTAMPTZ NULL' ), + array( 'addPgField', 'page_props', 'pp_sortkey', 'float NULL' ), + array( 'addPgIndex', 'page_props', 'pp_propname_sortkey_page', + '( pp_propname, pp_sortkey, pp_page ) WHERE ( pp_sortkey NOT NULL )' ), ); } diff --git a/includes/installer/SqliteUpdater.php b/includes/installer/SqliteUpdater.php index ea19efaaee..15f8160de4 100644 --- a/includes/installer/SqliteUpdater.php +++ b/includes/installer/SqliteUpdater.php @@ -128,6 +128,7 @@ class SqliteUpdater extends DatabaseUpdater { array( 'addIndex', 'logging', 'log_user_text_time', 'patch-logging_user_text_time_index.sql' ), array( 'addField', 'page', 'page_links_updated', 'patch-page_links_updated.sql' ), array( 'addField', 'user', 'user_password_expires', 'patch-user_password_expire.sql' ), + array( 'addField', 'page_props', 'pp_sortkey', 'patch-pp_sortkey.sql' ), ); } diff --git a/includes/parser/ParserOutput.php b/includes/parser/ParserOutput.php index d1e3e580fc..cfee3cf8ca 100644 --- a/includes/parser/ParserOutput.php +++ b/includes/parser/ParserOutput.php @@ -398,6 +398,9 @@ class ParserOutput extends CacheTime { * retrieved given the page ID or via a DB join when given the page * title. * + * Since 1.23, page_props are also indexed by numeric value, to allow + * for efficient "top k" queries of pages wrt a given property. + * * setProperty() is thus used to propagate properties from the parsed * page to request contexts other than a page view of the currently parsed * article. diff --git a/maintenance/archives/patch-pp_sortkey.sql b/maintenance/archives/patch-pp_sortkey.sql new file mode 100644 index 0000000000..b13b6055b1 --- /dev/null +++ b/maintenance/archives/patch-pp_sortkey.sql @@ -0,0 +1,8 @@ +-- Add a 'sortkey' field to page_props so pages can be efficiently +-- queried by the numeric value of a property. + +ALTER TABLE /*_*/page_props + ADD pp_sortkey float DEFAULT NULL; + +CREATE UNIQUE INDEX /*i*/pp_propname_sortkey_page + ON /*_*/page_props ( pp_propname, pp_sortkey, pp_page ); diff --git a/maintenance/tables.sql b/maintenance/tables.sql index ca610fc36b..d39ee87fe0 100644 --- a/maintenance/tables.sql +++ b/maintenance/tables.sql @@ -1429,12 +1429,13 @@ CREATE INDEX /*i*/pt_timestamp ON /*_*/protected_titles (pt_timestamp); CREATE TABLE /*_*/page_props ( pp_page int NOT NULL, pp_propname varbinary(60) NOT NULL, - pp_value blob NOT NULL + pp_value blob NOT NULL, + pp_sortkey float DEFAULT NULL ) /*$wgDBTableOptions*/; CREATE UNIQUE INDEX /*i*/pp_page_propname ON /*_*/page_props (pp_page,pp_propname); CREATE UNIQUE INDEX /*i*/pp_propname_page ON /*_*/page_props (pp_propname,pp_page); - +CREATE UNIQUE INDEX /*i*/pp_propname_sortkey_page ON /*_*/page_props (pp_propname,pp_sortkey,pp_page); -- A table to log updates, one text key row per update. CREATE TABLE /*_*/updatelog ( diff --git a/tests/phpunit/includes/LinksUpdateTest.php b/tests/phpunit/includes/LinksUpdateTest.php index 33643aceff..ddb521b8e9 100644 --- a/tests/phpunit/includes/LinksUpdateTest.php +++ b/tests/phpunit/includes/LinksUpdateTest.php @@ -180,14 +180,48 @@ class LinksUpdateTest extends MediaWikiTestCase { * @covers ParserOutput::setProperty */ public function testUpdate_page_props() { + global $wgPagePropsHaveSortkey; + /** @var ParserOutput $po */ list( $t, $po ) = $this->makeTitleAndParserOutput( "Testing", 111 ); - $po->setProperty( "foo", "bar" ); + $fields = array( 'pp_propname', 'pp_value' ); + $expected = array(); - $this->assertLinksUpdate( $t, $po, 'page_props', 'pp_propname, pp_value', 'pp_page = 111', array( - array( 'foo', 'bar' ), - ) ); + $po->setProperty( "bool", true ); + $expected[] = array( "bool", true ); + + $po->setProperty( "float", 4.0 + 1.0/4.0 ); + $expected[] = array( "float", 4.0 + 1.0/4.0 ); + + $po->setProperty( "int", -7 ); + $expected[] = array( "int", -7 ); + + $po->setProperty( "string", "33 bar" ); + $expected[] = array( "string", "33 bar" ); + + // compute expected sortkey values + if ( $wgPagePropsHaveSortkey ) { + $fields[] = 'pp_sortkey'; + + foreach ( $expected as &$row ) { + $value = $row[1]; + + if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) { + $row[] = floatval( $value ); + } else { + $row[] = null; + } + } + } + + $this->assertLinksUpdate( $t, $po, 'page_props', $fields, 'pp_page = 111', $expected ); + } + + public function testUpdate_page_props_without_sortkey() { + $this->setMwGlobals( 'wgPagePropsHaveSortkey', false ); + + $this->testUpdate_page_props(); } // @todo test recursive, too!