From 06e3d0e37777a221559c52d28de479e0a21a8f72 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 19 Aug 2008 20:32:30 +0000 Subject: [PATCH] * $wgSpamRegex now matches the edit summary and page move descriptions in addition to body text. Could use some cleanup to the error display on page moves; it recycles the general edit notice for spam hits currently, which is pretty big on English Wikipedia, and doesn't say what match you hit. --- RELEASE-NOTES | 4 ++++ includes/DefaultSettings.php | 14 +++++++++++--- includes/EditPage.php | 32 +++++++++++++++++++++++++++----- includes/Title.php | 6 ++++++ 4 files changed, 48 insertions(+), 8 deletions(-) diff --git a/RELEASE-NOTES b/RELEASE-NOTES index 683880826e..f4bb65bf26 100644 --- a/RELEASE-NOTES +++ b/RELEASE-NOTES @@ -37,6 +37,7 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN * $wgRestrictDisplayTitle controls if the use of the {{DISPLAYTITLE}} magic word is restricted to titles equivalent to the actual page title. This is true per default, but can be set to false to allow any title. +* $wgSpamRegex may now be an array of multiple regular expressions. === New features in 1.14 === @@ -134,6 +135,9 @@ it from source control: http://www.mediawiki.org/wiki/Download_from_SVN * Avoid recursive crazy expansions in section edit comments for pages which contain '/*' in the title * Fix excessive memory usage when parsing pages with lots of links +* $wgSpamRegex now matches the edit summary and page move descriptions in + addition to body text. + === API changes in 1.14 === diff --git a/includes/DefaultSettings.php b/includes/DefaultSettings.php index c78a772207..e3a48d401f 100644 --- a/includes/DefaultSettings.php +++ b/includes/DefaultSettings.php @@ -2095,9 +2095,17 @@ $wgExportMaxHistory = 0; $wgExportAllowListContributors = false ; -/** Text matching this regular expression will be recognised as spam - * See http://en.wikipedia.org/wiki/Regular_expression */ -$wgSpamRegex = false; +/** + * Edits matching these regular expressions in body text or edit summary + * will be recognised as spam and rejected automatically. + * + * There's no administrator override on-wiki, so be careful what you set. :) + * May be an array of regexes or a single string for backwards compatibility. + * + * See http://en.wikipedia.org/wiki/Regular_expression + */ +$wgSpamRegex = array(); + /** Similarly you can get a function to do the job. The function will be given * the following args: * - a Title object for the article the edit is made on diff --git a/includes/EditPage.php b/includes/EditPage.php index 1b86871937..614019ed0b 100644 --- a/includes/EditPage.php +++ b/includes/EditPage.php @@ -733,7 +733,7 @@ class EditPage { * @return one of the constants describing the result */ function internalAttemptSave( &$result, $bot = false ) { - global $wgSpamRegex, $wgFilterCallback, $wgUser, $wgOut, $wgParser; + global $wgFilterCallback, $wgUser, $wgOut, $wgParser; global $wgMaxArticleSize; $fname = 'EditPage::attemptSave'; @@ -762,12 +762,15 @@ class EditPage { $this->mMetaData = '' ; # Check for spam - $matches = array(); - if ( $wgSpamRegex && preg_match( $wgSpamRegex, $this->textbox1, $matches ) ) { - $result['spam'] = $matches[0]; + $match = self::matchSpamRegex( $this->summary ); + if( $match === false ) { + $match = self::matchSpamRegex( $this->textbox1 ); + } + if( $match !== false ) { + $result['spam'] = $match; $ip = wfGetIP(); $pdbk = $this->mTitle->getPrefixedDBkey(); - $match = str_replace( "\n", '', $matches[0] ); + $match = str_replace( "\n", '', $match ); wfDebugLog( 'SpamRegex', "$ip spam regex hit [[$pdbk]]: \"$match\"" ); wfProfileOut( "$fname-checks" ); wfProfileOut( $fname ); @@ -1022,6 +1025,25 @@ class EditPage { wfProfileOut( $fname ); return self::AS_END; } + + /** + * Check given input text against $wgSpamRegex, and return the text of the first match. + * @return mixed -- matching string or false + */ + public static function matchSpamRegex( $text ) { + global $wgSpamRegex; + if( $wgSpamRegex ) { + // For back compatibility, $wgSpamRegex may be a single string or an array of regexes. + $regexes = (array)$wgSpamRegex; + foreach( $regexes as $regex ) { + $matches = array(); + if ( preg_match( $regex, $text, $matches ) ) { + return $matches[0]; + } + } + } + return false; + } /** * Initialise form fields in the object diff --git a/includes/Title.php b/includes/Title.php index 0f1a18939b..67cd6e6051 100644 --- a/includes/Title.php +++ b/includes/Title.php @@ -2500,6 +2500,12 @@ class Title { $nt->getUserPermissionsErrors('edit', $wgUser)); } + $match = EditPage::matchSpamRegex( $reason ); + if( $match !== false ) { + // This is kind of lame, won't display nice + $errors[] = array('spamprotectiontext'); + } + global $wgUser; $err = null; if( !wfRunHooks( 'AbortMove', array( $this, $nt, $wgUser, &$err, $reason ) ) ) { -- 2.20.1