From 827f24deed0e677018604c6b13707c72f33815f8 Mon Sep 17 00:00:00 2001 From: Daniel Kinzler Date: Tue, 20 Mar 2012 09:57:41 +0000 Subject: [PATCH] auto-summary, auto-delete-reason --- includes/Article.php | 5 +- includes/Content.php | 51 +++++++++-- includes/ContentHandler.php | 166 ++++++++++++++++++++++++++++++++++++ includes/WikiPage.php | 138 +++--------------------------- includes/api/ApiDelete.php | 2 +- 5 files changed, 225 insertions(+), 137 deletions(-) diff --git a/includes/Article.php b/includes/Article.php index 9a0fb806dd..d1bdc03bdb 100644 --- a/includes/Article.php +++ b/includes/Article.php @@ -1894,7 +1894,9 @@ class Article extends Page { * @return mixed */ public function generateReason( &$hasHistory ) { - return $this->mPage->getAutoDeleteReason( $hasHistory ); + $title = $this->mPage->getTitle(); + $handler = ContentHandler::getForTitle( $title ); + return $handler->getAutoDeleteReason( $title, $hasHistory ); } // ****** B/C functions for static methods ( __callStatic is PHP>=5.3 ) ****** // @@ -1932,6 +1934,7 @@ class Article extends Page { * @param $newtext * @param $flags * @return string + * @deprecated since 1.20, use ContentHandler::getAutosummary() instead */ public static function getAutosummary( $oldtext, $newtext, $flags ) { return WikiPage::getAutosummary( $oldtext, $newtext, $flags ); diff --git a/includes/Content.php b/includes/Content.php index 2744b014ad..fe2783a896 100644 --- a/includes/Content.php +++ b/includes/Content.php @@ -16,22 +16,30 @@ abstract class Content { return $this->mModelName; } - public abstract function getSearchText( ); + public abstract function getTextForSearchIndex( ); public abstract function getWikitextForTransclusion( ); + public abstract function getTextForSummary( $maxlength = 250 ); + /** * Returns native represenation of the data. Interpretation depends on the data model used, * as given by getDataModel(). * + * @return mixed the native representation of the content. Could be a string, a nested array + * structure, an object, a binary blob... anything, really. */ - public abstract function getNativeData( ); + public abstract function getNativeData( ); #FIXME: review all calls carefully, caller must be aware of content model! /** * returns the content's nominal size in bogo-bytes. */ public abstract function getSize( ); #XXX: do we really need/want this here? we could just use the byte syse of the serialized form... + public function isEmpty() { + return $this->getSize() == 0; + } + /** * Returns true if this content is countable as a "real" wiki page, provided * that it's also in a countable location (e.g. a current revision in the main namespace). @@ -43,12 +51,16 @@ abstract class Content { public abstract function getParserOutput( Title $title = null, $revId = null, ParserOptions $options = NULL ); - public function getRedirectChain() { + public function getRedirectChain() { #TODO: document! + return null; + } + + public function getRedirectTarget() { return null; } public function isRedirect() { - return false; + return $this->getRedirectTarget() != null; } /** @@ -64,7 +76,7 @@ abstract class Content { } /** - * Replaces a section of the content. + * Replaces a section of the content and returns a Content object with the section replaced. * * @param $section empty/null/false or a section number (0, 1, 2, T1, T2...), or "new" * @param $with Content: new content of the section @@ -85,6 +97,7 @@ abstract class Content { # TODO: EditPage::getPreloadedText( $preload ) // $wgParser->getPreloadText # TODO: tie into EditPage, make it use Content-objects throughout, make edit form aware of content model and format + # TODO: tie into WikiPage, make it use Content-objects throughout, especially in doEdit(), doDelete(), etc # TODO: make model-aware diff view! # TODO: handle ImagePage and CategoryPage @@ -106,6 +119,18 @@ abstract class TextContent extends Content { $this->mText = $text; } + public function getTextForSummary( $maxlength = 250 ) { + global $wgContLang; + + $text = $this->getNativeData(); + + $truncatedtext = $wgContLang->truncate( + preg_replace( "/[\n\r]/", ' ', $text ), + max( 0, $maxlength ) ); + + return $truncatedtext; + } + /** * returns the content's nominal size in bogo-bytes. */ @@ -149,7 +174,7 @@ abstract class TextContent extends Content { * * @return String the raw text */ - public function getSearchText( ) { #FIXME: use! + public function getTextForSearchIndex( ) { #FIXME: use! return $this->getNativeData(); } @@ -286,9 +311,9 @@ class WikitextContent extends TextContent { return Title::newFromRedirectArray( $text ); } - public function isRedirect() { + public function getRedirectTarget() { $text = $this->getNativeData(); - return Title::newFromRedirect( $text ) !== null; + return Title::newFromRedirect( $text ); } /** @@ -326,6 +351,16 @@ class WikitextContent extends TextContent { } } + public function getTextForSummary( $maxlength = 250 ) { + $truncatedtext = parent::getTextForSummary( $maxlength ); + + #clean up unfinished links + #XXX: make this optional? wasn't there in autosummary, but required for deletion summary. + $truncatedtext = preg_replace( '/\[\[([^\]]*)\]?$/', '$1', $truncatedtext ); + + return $truncatedtext; + } + } class MessageContent extends TextContent { diff --git a/includes/ContentHandler.php b/includes/ContentHandler.php index 8a1badcaa8..39f9fcde36 100644 --- a/includes/ContentHandler.php +++ b/includes/ContentHandler.php @@ -104,6 +104,12 @@ abstract class ContentHandler { return ContentHandler::getForModelName( $modelName ); } + /** + * @static + * @param $modelName String the name of the content model for which to get a handler. Use CONTENT_MODEL_XXX constants. + * @return ContentHandler + * @throws MWException + */ public static function getForModelName( $modelName ) { global $wgContentHandlers; @@ -143,8 +149,20 @@ abstract class ContentHandler { return $this->mSupportedFormats[0]; } + /** + * @abstract + * @param Content $content + * @param null $format + * @return String + */ public abstract function serialize( Content $content, $format = null ); + /** + * @abstract + * @param $blob String + * @param null $format + * @return Content + */ public abstract function unserialize( $blob, $format = null ); public abstract function emptyContent(); @@ -216,6 +234,153 @@ abstract class ContentHandler { return false; } + /** + * Return an applicable autosummary if one exists for the given edit. + * + * @param $oldContent Content: the previous text of the page. + * @param $newContent Content: The submitted text of the page. + * @param $flags Int bitmask: a bitmask of flags submitted for the edit. + * + * @return string An appropriate autosummary, or an empty string. + */ + public function getAutosummary( Content $oldContent, Content $newContent, $flags ) { + global $wgContLang; + + # Decide what kind of autosummary is needed. + + # Redirect autosummaries + $ot = $oldContent->getRedirectTarget(); + $rt = $newContent->getRedirectTarget(); + + if ( is_object( $rt ) && ( !is_object( $ot ) || !$rt->equals( $ot ) || $ot->getFragment() != $rt->getFragment() ) ) { + + $truncatedtext = $newContent->getTextForSummary( + 250 + - strlen( wfMsgForContent( 'autoredircomment' ) ) + - strlen( $rt->getFullText() ) ); + + return wfMsgForContent( 'autoredircomment', $rt->getFullText(), $truncatedtext ); + } + + # New page autosummaries + if ( $flags & EDIT_NEW && $newContent->getSize() > 0 ) { + # If they're making a new article, give its text, truncated, in the summary. + + $truncatedtext = $newContent->getTextForSummary( + 200 - strlen( wfMsgForContent( 'autosumm-new' ) ) ); + + return wfMsgForContent( 'autosumm-new', $truncatedtext ); + } + + # Blanking autosummaries + if ( $oldContent->getSize() > 0 && $newContent->getSize() == 0 ) { + return wfMsgForContent( 'autosumm-blank' ); + } elseif ( $oldContent->getSize() > 10 * $newContent->getSize() && $newContent->getSize() < 500 ) { + # Removing more than 90% of the article + + $truncatedtext = $newContent->getTextForSummary( + 200 - strlen( wfMsgForContent( 'autosumm-replace' ) ) ); + + return wfMsgForContent( 'autosumm-replace', $truncatedtext ); + } + + # If we reach this point, there's no applicable autosummary for our case, so our + # autosummary is empty. + return ''; + } + + /** + * Auto-generates a deletion reason + * + * @param $title Title: the page's title + * @param &$hasHistory Boolean: whether the page has a history + * @return mixed String containing deletion reason or empty string, or boolean false + * if no revision occurred + */ + public function getAutoDeleteReason( Title $title, &$hasHistory ) { + global $wgContLang; + + $dbw = wfGetDB( DB_MASTER ); + + // Get the last revision + $rev = Revision::newFromTitle( $title ); + + if ( is_null( $rev ) ) { + return false; + } + + // Get the article's contents + $content = $rev->getContent(); + $blank = false; + + // If the page is blank, use the text from the previous revision, + // which can only be blank if there's a move/import/protect dummy revision involved + if ( $content->getSize() == 0 ) { + $prev = $rev->getPrevious(); + + if ( $prev ) { + $content = $rev->getContent(); + $blank = true; + } + } + + // Find out if there was only one contributor + // Only scan the last 20 revisions + $res = $dbw->select( 'revision', 'rev_user_text', + array( 'rev_page' => $title->getArticleID(), $dbw->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0' ), + __METHOD__, + array( 'LIMIT' => 20 ) + ); + + if ( $res === false ) { + // This page has no revisions, which is very weird + return false; + } + + $hasHistory = ( $res->numRows() > 1 ); + $row = $dbw->fetchObject( $res ); + + if ( $row ) { // $row is false if the only contributor is hidden + $onlyAuthor = $row->rev_user_text; + // Try to find a second contributor + foreach ( $res as $row ) { + if ( $row->rev_user_text != $onlyAuthor ) { // Bug 22999 + $onlyAuthor = false; + break; + } + } + } else { + $onlyAuthor = false; + } + + // Generate the summary with a '$1' placeholder + if ( $blank ) { + // The current revision is blank and the one before is also + // blank. It's just not our lucky day + $reason = wfMsgForContent( 'exbeforeblank', '$1' ); + } else { + if ( $onlyAuthor ) { + $reason = wfMsgForContent( 'excontentauthor', '$1', $onlyAuthor ); + } else { + $reason = wfMsgForContent( 'excontent', '$1' ); + } + } + + if ( $reason == '-' ) { + // Allow these UI messages to be blanked out cleanly + return ''; + } + + // Max content length = max comment length - length of the comment (excl. $1) + $text = $content->getTextForSummary( 255 - ( strlen( $reason ) - 2 ) ); + + // Now replace the '$1' placeholder + $reason = str_replace( '$1', $text, $reason ); + + return $reason; + } + + #TODO: cover patch/undo just like merge3. #TODO: how to handle extra message for JS/CSS previews?? @@ -280,6 +445,7 @@ class WikitextContentHandler extends TextContentHandler { return new WikitextContent(""); } + } class JavaScriptContentHandler extends TextContentHandler { diff --git a/includes/WikiPage.php b/includes/WikiPage.php index 01476963b9..4bf9ff9a78 100644 --- a/includes/WikiPage.php +++ b/includes/WikiPage.php @@ -1271,7 +1271,7 @@ class WikiPage extends Page { # Provide autosummaries if one is not provided and autosummaries are enabled. if ( $wgUseAutomaticEditSummaries && $flags & EDIT_AUTOSUMMARY && $summary == '' ) { - $summary = self::getAutosummary( $oldtext, $text, $flags ); #FIXME: auto-summary from ContentHandler + $summary = self::getAutosummary( $oldtext, $text, $flags ); #FIXME: ContentHandler::getAutosummary() } $editInfo = $this->prepareTextForEdit( $text, null, $user ); @@ -2385,53 +2385,16 @@ class WikiPage extends Page { * @param $newtext String: The submitted text of the page. * @param $flags Int bitmask: a bitmask of flags submitted for the edit. * @return string An appropriate autosummary, or an empty string. + * @deprecated since 1.20, use ContentHandler::getAutosummary() instead */ public static function getAutosummary( $oldtext, $newtext, $flags ) { - global $wgContLang; - - # Decide what kind of autosummary is needed. - - # Redirect autosummaries - $ot = Title::newFromRedirect( $oldtext ); - $rt = Title::newFromRedirect( $newtext ); - - if ( is_object( $rt ) && ( !is_object( $ot ) || !$rt->equals( $ot ) || $ot->getFragment() != $rt->getFragment() ) ) { - $truncatedtext = $wgContLang->truncate( - str_replace( "\n", ' ', $newtext ), - max( 0, 250 - - strlen( wfMsgForContent( 'autoredircomment' ) ) - - strlen( $rt->getFullText() ) - ) ); - return wfMsgForContent( 'autoredircomment', $rt->getFullText(), $truncatedtext ); - } - - # New page autosummaries - if ( $flags & EDIT_NEW && strlen( $newtext ) ) { - # If they're making a new article, give its text, truncated, in the summary. - - $truncatedtext = $wgContLang->truncate( - str_replace( "\n", ' ', $newtext ), - max( 0, 200 - strlen( wfMsgForContent( 'autosumm-new' ) ) ) ); - - return wfMsgForContent( 'autosumm-new', $truncatedtext ); - } - - # Blanking autosummaries - if ( $oldtext != '' && $newtext == '' ) { - return wfMsgForContent( 'autosumm-blank' ); - } elseif ( strlen( $oldtext ) > 10 * strlen( $newtext ) && strlen( $newtext ) < 500 ) { - # Removing more than 90% of the article - - $truncatedtext = $wgContLang->truncate( - $newtext, - max( 0, 200 - strlen( wfMsgForContent( 'autosumm-replace' ) ) ) ); + # NOTE: stub for backwards-compatibility. assumes the given text is wikitext. will break horribly if it isn't. - return wfMsgForContent( 'autosumm-replace', $truncatedtext ); - } + $handler = ContentHandler::getForModelName( CONTENT_MODEL_WIKITEXT ); + $oldContent = $handler->unserialize( $oldtext ); + $newContent = $handler->unserialize( $newtext ); - # If we reach this point, there's no applicable autosummary for our case, so our - # autosummary is empty. - return ''; + return $handler->getAutosummary( $oldContent, $newContent, $flags ); } /** @@ -2440,92 +2403,13 @@ class WikiPage extends Page { * @param &$hasHistory Boolean: whether the page has a history * @return mixed String containing deletion reason or empty string, or boolean false * if no revision occurred + * @deprecated since 1.20, use ContentHandler::getAutoDeleteReason() instead */ public function getAutoDeleteReason( &$hasHistory ) { - global $wgContLang; - - $dbw = wfGetDB( DB_MASTER ); - // Get the last revision - $rev = Revision::newFromTitle( $this->getTitle() ); - - if ( is_null( $rev ) ) { - return false; - } - - // Get the article's contents - $contents = $rev->getText(); - $blank = false; - - // If the page is blank, use the text from the previous revision, - // which can only be blank if there's a move/import/protect dummy revision involved - if ( $contents == '' ) { - $prev = $rev->getPrevious(); - - if ( $prev ) { - $contents = $prev->getText(); - $blank = true; - } - } - - // Find out if there was only one contributor - // Only scan the last 20 revisions - $res = $dbw->select( 'revision', 'rev_user_text', - array( 'rev_page' => $this->getID(), $dbw->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0' ), - __METHOD__, - array( 'LIMIT' => 20 ) - ); - - if ( $res === false ) { - // This page has no revisions, which is very weird - return false; - } - - $hasHistory = ( $res->numRows() > 1 ); - $row = $dbw->fetchObject( $res ); - - if ( $row ) { // $row is false if the only contributor is hidden - $onlyAuthor = $row->rev_user_text; - // Try to find a second contributor - foreach ( $res as $row ) { - if ( $row->rev_user_text != $onlyAuthor ) { // Bug 22999 - $onlyAuthor = false; - break; - } - } - } else { - $onlyAuthor = false; - } - - // Generate the summary with a '$1' placeholder - if ( $blank ) { - // The current revision is blank and the one before is also - // blank. It's just not our lucky day - $reason = wfMsgForContent( 'exbeforeblank', '$1' ); - } else { - if ( $onlyAuthor ) { - $reason = wfMsgForContent( 'excontentauthor', '$1', $onlyAuthor ); - } else { - $reason = wfMsgForContent( 'excontent', '$1' ); - } - } - - if ( $reason == '-' ) { - // Allow these UI messages to be blanked out cleanly - return ''; - } - - // Replace newlines with spaces to prevent uglyness - $contents = preg_replace( "/[\n\r]/", ' ', $contents ); - // Calculate the maximum amount of chars to get - // Max content length = max comment length - length of the comment (excl. $1) - $maxLength = 255 - ( strlen( $reason ) - 2 ); - $contents = $wgContLang->truncate( $contents, $maxLength ); - // Remove possible unfinished links - $contents = preg_replace( '/\[\[([^\]]*)\]?$/', '$1', $contents ); - // Now replace the '$1' placeholder - $reason = str_replace( '$1', $contents, $reason ); + #NOTE: stub for backwards-compatibility. - return $reason; + $handler = ContentHandler::getForTitle( $this->getTitle() ); + $handler->getAutoDeleteReason( $this->getTitle(), $hasHistory ); } /** diff --git a/includes/api/ApiDelete.php b/includes/api/ApiDelete.php index cfaf6cc1d7..a6a33102a4 100644 --- a/includes/api/ApiDelete.php +++ b/includes/api/ApiDelete.php @@ -123,7 +123,7 @@ class ApiDelete extends ApiBase { // Need to pass a throwaway variable because generateReason expects // a reference $hasHistory = false; - $reason = $page->getAutoDeleteReason( $hasHistory ); + $reason = $page->getAutoDeleteReason( $hasHistory ); #FIXME: use ContentHandler::getAutoDeleteReason() if ( $reason === false ) { return array( array( 'cannotdelete', $title->getPrefixedText() ) ); } -- 2.20.1