*
* @param Content $content Content to check
* @param string $filterEntry Domainparts, see makeRegex() for more details
+ * @param string $protocol 'http://' or 'https://'
* @return int 0 if no match or 1 if there's at least one match
*/
- static function matchEntry( Content $content, $filterEntry ) {
+ public static function matchEntry( Content $content, $filterEntry, $protocol = 'http://' ) {
if ( !( $content instanceof TextContent ) ) {
// TODO: handle other types of content too.
// Maybe create ContentHandler::matchFilter( LinkFilter ).
$text = $content->getNativeData();
- $regex = self::makeRegex( $filterEntry );
+ $regex = self::makeRegex( $filterEntry, $protocol );
return preg_match( $regex, $text );
}
*
* @param string $filterEntry URL, if it begins with "*.", it'll be
* replaced to match any subdomain
+ * @param string $protocol 'http://' or 'https://'
+ *
* @return string Regex pattern, for preg_match()
*/
- private static function makeRegex( $filterEntry ) {
- $regex = '!http://';
+ private static function makeRegex( $filterEntry, $protocol ) {
+ $regex = '!' . preg_quote( $protocol );
if ( substr( $filterEntry, 0, 2 ) == '*.' ) {
$regex .= '(?:[A-Za-z0-9.-]+\.|)';
$filterEntry = substr( $filterEntry, 2 );
$wgUser->addGroup( 'bot' );
$spec = $this->getArg();
- $like = LinkFilter::makeLikeArray( $spec );
- if ( !$like ) {
- $this->fatalError( "Not a valid hostname specification: $spec" );
+
+ $likes = [];
+ foreach ( [ 'http://', 'https://' ] as $prot ) {
+ $like = LinkFilter::makeLikeArray( $spec, $prot );
+ if ( !$like ) {
+ $this->fatalError( "Not a valid hostname specification: $spec" );
+ }
+ $likes[$prot] = $like;
}
if ( $this->hasOption( 'all' ) ) {
$this->output( "Finding spam on " . count( $wgLocalDatabases ) . " wikis\n" );
$found = false;
foreach ( $wgLocalDatabases as $wikiID ) {
+ /** @var $dbr Database */
$dbr = $this->getDB( DB_REPLICA, [], $wikiID );
- $count = $dbr->selectField( 'externallinks', 'COUNT(*)',
- [ 'el_index' . $dbr->buildLike( $like ) ], __METHOD__ );
- if ( $count ) {
- $found = true;
- $cmd = wfShellWikiCmd( "$IP/maintenance/cleanupSpam.php",
- [ '--wiki', $wikiID, $spec ] );
- passthru( "$cmd | sed 's/^/$wikiID: /'" );
+ foreach ( $likes as $like ) {
+ $count = $dbr->selectField(
+ 'externallinks',
+ 'COUNT(*)',
+ [ 'el_index' . $dbr->buildLike( $like ) ],
+ __METHOD__
+ );
+ if ( $count ) {
+ $found = true;
+ $cmd = wfShellWikiCmd(
+ "$IP/maintenance/cleanupSpam.php",
+ [ '--wiki', $wikiID, $spec ]
+ );
+ passthru( "$cmd | sed 's/^/$wikiID: /'" );
+ }
}
}
if ( $found ) {
} else {
// Clean up spam on this wiki
+ $count = 0;
+ /** @var $dbr Database */
$dbr = $this->getDB( DB_REPLICA );
- $res = $dbr->select( 'externallinks', [ 'DISTINCT el_from' ],
- [ 'el_index' . $dbr->buildLike( $like ) ], __METHOD__ );
- $count = $dbr->numRows( $res );
- $this->output( "Found $count articles containing $spec\n" );
- foreach ( $res as $row ) {
- $this->cleanupArticle( $row->el_from, $spec );
+ foreach ( $likes as $prot => $like ) {
+ $res = $dbr->select(
+ 'externallinks',
+ [ 'DISTINCT el_from' ],
+ [ 'el_index' . $dbr->buildLike( $like ) ],
+ __METHOD__
+ );
+ $count = $dbr->numRows( $res );
+ $this->output( "Found $count articles containing $spec\n" );
+ foreach ( $res as $row ) {
+ $this->cleanupArticle( $row->el_from, $spec, $prot );
+ }
}
if ( $count ) {
$this->output( "Done\n" );
}
}
- private function cleanupArticle( $id, $domain ) {
+ /**
+ * @param int $id
+ * @param string $domain
+ * @param string $protocol
+ * @throws MWException
+ */
+ private function cleanupArticle( $id, $domain, $protocol ) {
$title = Title::newFromID( $id );
if ( !$title ) {
$this->error( "Internal error: no page for ID $id" );
$currentRevId = $rev->getId();
while ( $rev && ( $rev->isDeleted( Revision::DELETED_TEXT )
- || LinkFilter::matchEntry( $rev->getContent( Revision::RAW ), $domain ) )
+ || LinkFilter::matchEntry( $rev->getContent( Revision::RAW ), $domain, $protocol ) )
) {
$rev = $rev->getPrevious();
}