From ed1c89b88454744d0ae08e7048992f80920276d8 Mon Sep 17 00:00:00 2001 From: Brian Wolff Date: Thu, 4 Jul 2013 13:19:47 -0300 Subject: [PATCH] Add Special:RandomInCategory. The method used is quite biased, but I believe its the best possible without a schema change and still being efficient. I think it is good enough for many of the use cases that need different articles to pop up, but not "real" randomness. The method used is to chose a random timestamp and look at cl_timestamp. This method will give good results if the timestamps are uniformly distributed (which probably is not usually true). I think it may give acceptable results in general, especially given most people are not interested in true randomness, but more in "give me a result I haven't seen before". (For example, to pick a random entry in a maintenance category to clean up). It also fudges the result a little bit using offset to stop really biased results from happening. This is mostly meant to stop a category with an extremely clumped distributed from returning the exact same article every time. It is not meant to generally increase randomness. Bug: 25931 Change-Id: I0c48e4a236b50fb627af94f0df47fef8372ea14d --- RELEASE-NOTES-1.22 | 1 + includes/AutoLoader.php | 1 + includes/SpecialPageFactory.php | 1 + includes/specials/SpecialRandomInCategory.php | 283 ++++++++++++++++++ languages/messages/MessagesEn.php | 10 + languages/messages/MessagesQqq.php | 6 + maintenance/language/messages.inc | 8 + 7 files changed, 310 insertions(+) create mode 100644 includes/specials/SpecialRandomInCategory.php diff --git a/RELEASE-NOTES-1.22 b/RELEASE-NOTES-1.22 index 2fc737c6b6..a06ca64edc 100644 --- a/RELEASE-NOTES-1.22 +++ b/RELEASE-NOTES-1.22 @@ -171,6 +171,7 @@ production. * $wgHTCPRouting rules can now be passed an array of hosts/ports to send purge too. Can be used whenever several multicast group could be interested by a specific purge. +* (bug 25931) Add Special:RandomInCategory. === Bug fixes in 1.22 === * Disable Special:PasswordReset when $wgEnableEmail is false. Previously one diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index bb8c272985..dc9acf3211 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -970,6 +970,7 @@ $wgAutoloadLocalClasses = array( 'SpecialPrefixindex' => 'includes/specials/SpecialPrefixindex.php', 'SpecialProtectedpages' => 'includes/specials/SpecialProtectedpages.php', 'SpecialProtectedtitles' => 'includes/specials/SpecialProtectedtitles.php', + 'SpecialRandomInCategory' => 'includes/specials/SpecialRandomInCategory.php', 'SpecialRandomredirect' => 'includes/specials/SpecialRandomredirect.php', 'SpecialRecentChanges' => 'includes/specials/SpecialRecentchanges.php', 'SpecialRecentchangeslinked' => 'includes/specials/SpecialRecentchangeslinked.php', diff --git a/includes/SpecialPageFactory.php b/includes/SpecialPageFactory.php index 02bd9e82ce..9f5d4ada10 100644 --- a/includes/SpecialPageFactory.php +++ b/includes/SpecialPageFactory.php @@ -131,6 +131,7 @@ class SpecialPageFactory { // Redirecting special pages 'LinkSearch' => 'LinkSearchPage', 'Randompage' => 'Randompage', + 'RandomInCategory' => 'SpecialRandomInCategory', 'Randomredirect' => 'SpecialRandomredirect', // High use pages diff --git a/includes/specials/SpecialRandomInCategory.php b/includes/specials/SpecialRandomInCategory.php new file mode 100644 index 0000000000..2e4bf2009f --- /dev/null +++ b/includes/specials/SpecialRandomInCategory.php @@ -0,0 +1,283 @@ +category = $cat; + $this->maxTimestamp = null; + $this->minTimestamp = null; + } + + public function execute( $par ) { + $cat = false; + + $categoryStr = $this->getRequest()->getText( 'category', $par ); + + if ( $categoryStr ) { + $cat = Title::newFromText( $categoryStr, NS_CATEGORY ); + } + + if ( $cat ) { + $this->setCategory( $cat ); + } + + + if ( !$this->category && $categoryStr ) { + $this->setHeaders(); + $this->getOutput()->addWikiMsg( 'randomincategory-invalidcategory', + wfEscapeWikiText( $categoryStr ) ); + + return; + } elseif ( !$this->category ) { + $this->setHeaders(); + $input = Html::input( 'category' ); + $submitText = $this->msg( 'randomincategory-selectcategory-submit' )->text(); + $submit = Html::input( '', $submitText, 'submit' ); + + $msg = $this->msg( 'randomincategory-selectcategory' ); + $form = Html::rawElement( 'form', array( 'action' => $this->getTitle()->getLocalUrl() ), + $msg->rawParams( $input, $submit )->parse() + ); + $this->getOutput()->addHtml( $form ); + + return; + } + + $title = $this->getRandomTitle(); + + if ( is_null( $title ) ) { + $this->setHeaders(); + $this->getOutput()->addWikiMsg( 'randomincategory-nopages', + $this->category->getText() ); + + return; + } + + $query = $this->getRequest()->getValues(); + unset( $query['title'] ); + unset( $query['category'] ); + $this->getOutput()->redirect( $title->getFullURL( $query ) ); + } + + /** + * Choose a random title. + * @return Title object (or null if nothing to choose from) + */ + public function getRandomTitle() { + // Convert to float, since we do math with the random number. + $rand = (float) wfRandom(); + $title = null; + + // Given that timestamps are rather unevenly distributed, we also + // use an offset between 0 and 30 to make any biases less noticeable. + $offset = mt_rand( 0, $this->maxOffset ); + + if ( mt_rand( 0, 1 ) ) { + $up = true; + } else { + $up = false; + } + + $row = $this->selectRandomPageFromDB( $rand, $offset, $up ); + + // Try again without the timestamp offset (wrap around the end) + if ( !$row ) { + $row = $this->selectRandomPageFromDB( false, $offset, $up ); + } + + // Maybe the category is really small and offset too high + if ( !$row ) { + $row = $this->selectRandomPageFromDB( $rand, 0, $up ); + } + + // Just get the first entry. + if ( !$row ) { + $row = $this->selectRandomPageFromDB( false, 0, true ); + } + + if ( $row ) { + return Title::makeTitle( $row->page_namespace, $row->page_title ); + } + + return null; + } + + /** + * @param float $rand Random number between 0 and 1 + * @param int $offset Extra offset to fudge randomness + * @param bool $up True to get the result above the random number, false for below + * + * @note The $up parameter is supposed to counteract what would happen if there + * was a large gap in the distribution of cl_timestamp values. This way instead + * of things to the right of the gap being favoured, both sides of the gap + * are favoured. + * @return Array Query information. + */ + protected function getQueryInfo( $rand, $offset, $up ) { + $op = $up ? '>=' : '<='; + $dir = $up ? 'ASC' : 'DESC'; + if ( !$this->category instanceof Title ) { + throw new MWException( 'No category set' ); + } + $qi = array( + 'tables' => array( 'categorylinks', 'page' ), + 'fields' => array( 'page_title', 'page_namespace' ), + 'conds' => array_merge( array( + 'cl_to' => $this->category->getDBKey(), + ), $this->extra ), + 'options' => array( + 'ORDER BY' => 'cl_timestamp ' . $dir, + 'LIMIT' => 1, + 'OFFSET' => $offset + ), + 'join_conds' => array( + 'page' => array( 'INNER JOIN', 'cl_from = page_id' ) + ) + ); + + $dbr = wfGetDB( DB_SLAVE ); + $minClTime = $this->getTimestampOffset( $rand ); + if ( $minClTime ) { + $qi['conds'][] = 'cl_timestamp ' . $op . ' ' . + $dbr->addQuotes( $dbr->timestamp( $minClTime ) ); + } + return $qi; + } + + /** + * @param float $rand Random number between 0 and 1 + * + * @return int|bool A random (unix) timestamp from the range of the category or false on failure + */ + protected function getTimestampOffset( $rand ) { + if ( $rand === false ) { + return false; + } + if ( !$this->minTimestamp || !$this->maxTimestamp ) { + try { + list( $this->minTimestamp, $this->maxTimestamp ) = $this->getMinAndMaxForCat( $this->category ); + } catch( MWException $e ) { + // Possibly no entries in category. + return false; + } + } + + $ts = ( $this->maxTimestamp - $this->minTimestamp ) * $rand + $this->minTimestamp; + return intval( $ts ); + } + + /** + * Get the lowest and highest timestamp for a category. + * + * @param Title $category + * @return Array The lowest and highest timestamp + * @throws MWException if category has no entries. + */ + protected function getMinAndMaxForCat( Title $category ) { + $dbr = wfGetDB( DB_SLAVE ); + $res = $dbr->selectRow( + 'categorylinks', + array( + 'low' => 'MIN( cl_timestamp )', + 'high' => 'MAX( cl_timestamp )' + ), + array( + 'cl_to' => $this->category->getDBKey(), + ), + __METHOD__, + array( + 'LIMIT' => 1 + ) + ); + if ( !$res ) { + throw new MWException( 'No entries in category' ); + } + return array( wfTimestamp( TS_UNIX, $res->low ), wfTimestamp( TS_UNIX, $res->high ) ); + } + + /** + * @param float $rand A random number that is converted to a random timestamp + * @param int $offset A small offset to make the result seem more "random" + * @param bool $up Get the result above the random value + * @param String $fname The name of the calling method + * @return Array Info for the title selected. + */ + private function selectRandomPageFromDB( $rand, $offset, $up, $fname = __METHOD__ ) { + $dbr = wfGetDB( DB_SLAVE ); + + $query = $this->getQueryInfo( $rand, $offset, $up ); + $res = $dbr->select( + $query['tables'], + $query['fields'], + $query['conds'], + $fname, + $query['options'], + $query['join_conds'] + ); + + return $res->fetchObject(); + } + + protected function getGroupName() { + return 'redirects'; + } +} diff --git a/languages/messages/MessagesEn.php b/languages/messages/MessagesEn.php index 826c364c4d..5b41873bf6 100644 --- a/languages/messages/MessagesEn.php +++ b/languages/messages/MessagesEn.php @@ -446,6 +446,7 @@ $specialPageAliases = array( 'Protectedpages' => array( 'ProtectedPages' ), 'Protectedtitles' => array( 'ProtectedTitles' ), 'Randompage' => array( 'Random', 'RandomPage' ), + 'RandomInCategory' => array( 'RandomInCategory' ), 'Randomredirect' => array( 'RandomRedirect' ), 'Recentchanges' => array( 'RecentChanges' ), 'Recentchangeslinked' => array( 'RecentChangesLinked', 'RelatedChanges' ), @@ -2624,6 +2625,15 @@ Remember to check for other links to the templates before deleting them.', 'randompage-nopages' => 'There are no pages in the following {{PLURAL:$2|namespace|namespaces}}: $1.', 'randompage-url' => 'Special:Random', # do not translate or duplicate this message to other languages +# Random page in category +'randomincategory' => 'Random page in category', +'randomincategory-invalidcategory' => '"$1" is not a valid category name.', +'randomincategory-nopages' => 'There are no pages in [[:Category:$1]].', +'randomincategory-selectcategory' => 'Get random page from category: $1 $2. + +The selection process of this page may be biased towards certain articles and should not be used for statistical purposes.', +'randomincategory-selectcategory-submit' => 'Go', + # Random redirect 'randomredirect' => 'Random redirect', 'randomredirect-nopages' => 'There are no redirects in the namespace "$1".', diff --git a/languages/messages/MessagesQqq.php b/languages/messages/MessagesQqq.php index 082115fbd8..280100a0da 100644 --- a/languages/messages/MessagesQqq.php +++ b/languages/messages/MessagesQqq.php @@ -4114,6 +4114,12 @@ See also: 'randompage-nopages' => '* $1 - list of namespaces * $2 - number of namespaces', +'randomincategory' => '{{doc-special|RandomInCategory}}', +'randomincategory-invalidcategory' => 'Message shown if an invalid category is specified. (Note, if the category is simply empty, but could possibly exist, {{msg-mw|randomincategory-nopages}} is shown instead). $1 is the invalid category name given.', +'randomincategory-nopages' => 'Message shown from Special:RandomInCategory if the category is empty. $1 is the category name (without the namespace prefix)', +'randomincategory-selectcategory' => 'Shown on Special:RandomInCategory if no category is selected. Displays a form allowing the user to input a category name. $1 is the text field input box, $2 is the go button. The text content of the button comes from {{msg-mw|randomcategory-selectcategory}}.', +'randomincategory-selectcategory-submit' => 'Text of button used in {{msg-mw|randomcategory-selectcategory}}', + # Random redirect 'randomredirect' => '{{doc-special|RandomRedirect}}', 'randomredirect-nopages' => '* $1 - namespace name', diff --git a/maintenance/language/messages.inc b/maintenance/language/messages.inc index 388904a8ad..9fabc43a1f 100644 --- a/maintenance/language/messages.inc +++ b/maintenance/language/messages.inc @@ -1683,6 +1683,13 @@ $wgMessageStructure = array( 'randompage-nopages', 'randompage-url', ), + 'randomincategory' => array( + 'randomincategory', + 'randomincategory-invalidcategory', + 'randomincategory-nopages', + 'randomincategory-selectcategory', + 'randomincategory-selectcategory-submit', + ), 'randomredirect' => array( 'randomredirect', 'randomredirect-nopages', @@ -4028,6 +4035,7 @@ future releases. Also note that since each list value is wrapped in a unique 'listredirects' => 'List redirects', 'unusedtemplates' => 'Unused templates', 'randompage' => 'Random page', + 'randomincategory' => 'Special:RandomInCategory', 'randomredirect' => 'Random redirect', 'statistics' => 'Statistics', 'disambiguations' => '', -- 2.20.1