This adds the pp_sortkey column to the page_props table.
pp_sortkeys allows for top-k queries for pages, e.g.
the 100 pages with the most language links, etc. It is also
possible to query for exact values.
For now, pp_sortkey will contain pp_value's numeric value if
the value was set to a float, int or boolean.
Associated tasks:
* create a maintenance script for populating pp_sortkey. Tricky,
because when reading from the database, all values are strings.
* create an API module for querying pages by property value.
bug: 58032
Change-Id: I217c42656fb877ff35a36eb446a22bdaf119faac
production.
=== Configuration changes in 1.23 ===
+* Introduced $wgPagePropsHaveSortkey as a backwards-compatibility switch,
+ for using the old schema of the page_props table, in case the respective
+ schema update was not applied.
* When $wgJobRunRate is higher that zero, jobs are now executed via an
asynchronous HTTP request to a MediaWiki entry point. This may require
increasing the number of server worker threads.
* Support was added for Northern Luri (lrc).
=== Other changes in 1.23 ===
+* Added pp_sortkey column to page_props table, so pages can be efficiently
+ queried and sorted by property value (bug 58032).
+ See $wgPagePropsHaveSortkey if you want to postpone the schema change.
* The rc_type field in the recentchanges table has been superseded by a new
rc_source field. The rc_source field is a string representation of the
change type where rc_type was a numeric constant. This field is not yet
*/
$wgCompiledFiles = array();
+/**
+ * Whether the page_props table has a pp_sortkey column. Set to false in case
+ * the respective database schema change was not applied.
+ * @since 1.23
+ */
+$wgPagePropsHaveSortkey = true;
+
/**
* For really cool vim folding this needs to be at the end:
* vim: foldmarker=@{,@} foldmethod=marker
*/
function getPropertyInsertions( $existing = array() ) {
$diffs = array_diff_assoc( $this->mProperties, $existing );
+
$arr = array();
- foreach ( $diffs as $name => $value ) {
- $arr[] = array(
- 'pp_page' => $this->mId,
- 'pp_propname' => $name,
- 'pp_value' => $value,
- );
+ foreach ( array_keys( $diffs ) as $name ) {
+ $arr[] = $this->getPagePropRowData( $name );
}
return $arr;
}
+ /**
+ * Returns an associative array to be used for inserting a row into
+ * the page_props table. Besides the given property name, this will
+ * include the page id from $this->mId and any property value from
+ * $this->mProperties.
+ *
+ * The array returned will include the pp_sortkey field if this
+ * is present in the database (as indicated by $wgPagePropsHaveSortkey).
+ * The sortkey value is currently determined by getPropertySortKeyValue().
+ *
+ * @note: this assumes that $this->mProperties[$prop] is defined.
+ *
+ * @param string $prop The name of the property.
+ *
+ * @return array
+ */
+ private function getPagePropRowData( $prop ) {
+ global $wgPagePropsHaveSortkey;
+
+ $value = $this->mProperties[$prop];
+
+ $row = array(
+ 'pp_page' => $this->mId,
+ 'pp_propname' => $prop,
+ 'pp_value' => $value,
+ );
+
+ if ( $wgPagePropsHaveSortkey ) {
+ $row['pp_sortkey'] = $this->getPropertySortKeyValue( $value );
+ }
+
+ return $row;
+ }
+
+ /**
+ * Determines the sort key for the given property value.
+ * This will return $value if it is a float or int,
+ * 1 or resp. 0 if it is a bool, and null otherwise.
+ *
+ * @note: In the future, we may allow the sortkey to be specified explicitly
+ * in ParserOutput::setProperty.
+ *
+ * @param mixed $value
+ *
+ * @return float|null
+ */
+ private function getPropertySortKeyValue( $value ) {
+ if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) {
+ return floatval( $value );
+ }
+
+ return null;
+ }
+
/**
* Get an array of interwiki insertions for passing to the DB
* Skips the titles specified by the 2-D array $existing
array( 'addIndex', 'logging', 'log_user_text_time', 'patch-logging_user_text_time_index.sql' ),
array( 'addField', 'page', 'page_links_updated', 'patch-page_links_updated.sql' ),
array( 'addField', 'user', 'user_password_expires', 'patch-user_password_expire.sql' ),
+ array( 'addField', 'page_props', 'pp_sortkey', 'patch-pp_sortkey.sql' ),
);
}
array( 'addPgField', 'recentchanges', 'rc_source', "TEXT NOT NULL DEFAULT ''" ),
array( 'addPgField', 'page', 'page_links_updated', "TIMESTAMPTZ NULL" ),
array( 'addPgField', 'mwuser', 'user_password_expires', 'TIMESTAMPTZ NULL' ),
+ array( 'addPgField', 'page_props', 'pp_sortkey', 'float NULL' ),
+ array( 'addPgIndex', 'page_props', 'pp_propname_sortkey_page',
+ '( pp_propname, pp_sortkey, pp_page ) WHERE ( pp_sortkey NOT NULL )' ),
);
}
array( 'addIndex', 'logging', 'log_user_text_time', 'patch-logging_user_text_time_index.sql' ),
array( 'addField', 'page', 'page_links_updated', 'patch-page_links_updated.sql' ),
array( 'addField', 'user', 'user_password_expires', 'patch-user_password_expire.sql' ),
+ array( 'addField', 'page_props', 'pp_sortkey', 'patch-pp_sortkey.sql' ),
);
}
* retrieved given the page ID or via a DB join when given the page
* title.
*
+ * Since 1.23, page_props are also indexed by numeric value, to allow
+ * for efficient "top k" queries of pages wrt a given property.
+ *
* setProperty() is thus used to propagate properties from the parsed
* page to request contexts other than a page view of the currently parsed
* article.
--- /dev/null
+-- Add a 'sortkey' field to page_props so pages can be efficiently
+-- queried by the numeric value of a property.
+
+ALTER TABLE /*_*/page_props
+ ADD pp_sortkey float DEFAULT NULL;
+
+CREATE UNIQUE INDEX /*i*/pp_propname_sortkey_page
+ ON /*_*/page_props ( pp_propname, pp_sortkey, pp_page );
CREATE TABLE /*_*/page_props (
pp_page int NOT NULL,
pp_propname varbinary(60) NOT NULL,
- pp_value blob NOT NULL
+ pp_value blob NOT NULL,
+ pp_sortkey float DEFAULT NULL
) /*$wgDBTableOptions*/;
CREATE UNIQUE INDEX /*i*/pp_page_propname ON /*_*/page_props (pp_page,pp_propname);
CREATE UNIQUE INDEX /*i*/pp_propname_page ON /*_*/page_props (pp_propname,pp_page);
-
+CREATE UNIQUE INDEX /*i*/pp_propname_sortkey_page ON /*_*/page_props (pp_propname,pp_sortkey,pp_page);
-- A table to log updates, one text key row per update.
CREATE TABLE /*_*/updatelog (
* @covers ParserOutput::setProperty
*/
public function testUpdate_page_props() {
+ global $wgPagePropsHaveSortkey;
+
/** @var ParserOutput $po */
list( $t, $po ) = $this->makeTitleAndParserOutput( "Testing", 111 );
- $po->setProperty( "foo", "bar" );
+ $fields = array( 'pp_propname', 'pp_value' );
+ $expected = array();
- $this->assertLinksUpdate( $t, $po, 'page_props', 'pp_propname, pp_value', 'pp_page = 111', array(
- array( 'foo', 'bar' ),
- ) );
+ $po->setProperty( "bool", true );
+ $expected[] = array( "bool", true );
+
+ $po->setProperty( "float", 4.0 + 1.0/4.0 );
+ $expected[] = array( "float", 4.0 + 1.0/4.0 );
+
+ $po->setProperty( "int", -7 );
+ $expected[] = array( "int", -7 );
+
+ $po->setProperty( "string", "33 bar" );
+ $expected[] = array( "string", "33 bar" );
+
+ // compute expected sortkey values
+ if ( $wgPagePropsHaveSortkey ) {
+ $fields[] = 'pp_sortkey';
+
+ foreach ( $expected as &$row ) {
+ $value = $row[1];
+
+ if ( is_int( $value ) || is_float( $value ) || is_bool( $value ) ) {
+ $row[] = floatval( $value );
+ } else {
+ $row[] = null;
+ }
+ }
+ }
+
+ $this->assertLinksUpdate( $t, $po, 'page_props', $fields, 'pp_page = 111', $expected );
+ }
+
+ public function testUpdate_page_props_without_sortkey() {
+ $this->setMwGlobals( 'wgPagePropsHaveSortkey', false );
+
+ $this->testUpdate_page_props();
}
// @todo test recursive, too!