From f037d4deee68bca5b07bc0b0b400398d9c37c9d6 Mon Sep 17 00:00:00 2001 From: Daniel Kinzler Date: Mon, 19 Mar 2012 21:09:37 +0000 Subject: [PATCH] isCountable() --- includes/Content.php | 91 ++++++++++++++++++++++++++++++++++++++++--- includes/Revision.php | 3 +- includes/WikiPage.php | 62 ++++++++++++++--------------- 3 files changed, 118 insertions(+), 38 deletions(-) diff --git a/includes/Content.php b/includes/Content.php index 8280fc9178..2744b014ad 100644 --- a/includes/Content.php +++ b/includes/Content.php @@ -27,7 +27,19 @@ abstract class Content { */ public abstract function getNativeData( ); - public abstract function getSize( ); + /** + * returns the content's nominal size in bogo-bytes. + */ + public abstract function getSize( ); #XXX: do we really need/want this here? we could just use the byte syse of the serialized form... + + /** + * Returns true if this content is countable as a "real" wiki page, provided + * that it's also in a countable location (e.g. a current revision in the main namespace). + * + * @param $hasLinks Bool: if it is known whether this content contains links, provide this information here, + * to avoid redundant parsing to find out. + */ + public abstract function isCountable( $hasLinks = null ) ; public abstract function getParserOutput( Title $title = null, $revId = null, ParserOptions $options = NULL ); @@ -35,6 +47,10 @@ abstract class Content { return null; } + public function isRedirect() { + return false; + } + /** * Returns the section with the given id. * @@ -62,10 +78,7 @@ abstract class Content { #TODO: implement specialized ParserOutput for Wikidata model #TODO: provide "combined" ParserOutput for Multipart... somehow. - # TODO: Wikipage::isCountable(Content $a) - - # TODO: isCacheable( ) - # TODO: getSize( ) + # XXX: isCacheable( ) # can/should we do this here? # TODO: WikiPage::getUndoText( Revision $undo, Revision $undoafter = null ) # TODO: WikiPage::getAutosummary( $oldtext, $text, $flags ) @@ -93,6 +106,34 @@ abstract class TextContent extends Content { $this->mText = $text; } + /** + * returns the content's nominal size in bogo-bytes. + */ + public function getSize( ) { #FIXME: use! replace strlen in WikiPage. + $text = $this->getNativeData( ); + return strlen( $text ); + } + + /** + * Returns true if this content is not a redirect, and $wgArticleCountMethod is "any". + * + * @param $hasLinks Bool: if it is known whether this content contains links, provide this information here, + * to avoid redundant parsing to find out. + */ + public function isCountable( $hasLinks = null ) { + global $wgArticleCountMethod; + + if ( $this->isRedirect( ) ) { + return false; + } + + if ( $wgArticleCountMethod === 'any' ) { + return true; + } + + return false; + } + /** * Returns the text represented by this Content object, as a string. * @@ -245,6 +286,46 @@ class WikitextContent extends TextContent { return Title::newFromRedirectArray( $text ); } + public function isRedirect() { + $text = $this->getNativeData(); + return Title::newFromRedirect( $text ) !== null; + } + + /** + * Returns true if this content is not a redirect, and this content's text is countable according to + * the criteria defiend by $wgArticleCountMethod. + * + * @param $hasLinks Bool: if it is known whether this content contains links, provide this information here, + * to avoid redundant parsing to find out. + */ + public function isCountable( $hasLinks = null ) { + global $wgArticleCountMethod; + + if ( $this->isRedirect( ) ) { + return false; + } + + $text = $this->getNativeData(); + + switch ( $wgArticleCountMethod ) { + case 'any': + return true; + case 'comma': + if ( $text === false ) { + $text = $this->getRawText(); + } + return strpos( $text, ',' ) !== false; + case 'link': + if ( $hasLinks === null ) { # not know, find out + $po = $this->getParserOutput(); + $links = $po->getLinks(); + $hasLinks = !empty( $links ); + } + + return $hasLinks; + } + } + } class MessageContent extends TextContent { diff --git a/includes/Revision.php b/includes/Revision.php index da1f881613..4c62d049ef 100644 --- a/includes/Revision.php +++ b/includes/Revision.php @@ -496,7 +496,8 @@ class Revision { $this->mCurrent = false; # If we still have no length, see it we have the text to figure it out if ( !$this->mSize ) { - $this->mSize = is_null( $this->mText ) ? null : strlen( $this->mText ); #FIXME: do strlen in Content object + #XXX: my be inconsistent with the notion of "size" use for the present content model + $this->mSize = is_null( $this->mText ) ? null : strlen( $this->mText ); } # Same for sha1 if ( $this->mSha1 === null ) { diff --git a/includes/WikiPage.php b/includes/WikiPage.php index df81657edb..01476963b9 100644 --- a/includes/WikiPage.php +++ b/includes/WikiPage.php @@ -300,15 +300,10 @@ class WikiPage extends Page { * @return bool */ public function isRedirect( $text = false ) { - if ( $text === false ) { - if ( !$this->mDataLoaded ) { - $this->loadPageData(); - } + if ( $text === false ) $content = $this->getContent(); + else $content = ContentHandler::makeContent( $text, $this->mTitle ); # TODO: allow model and format to be provided; or better, expect a Content object - return (bool)$this->mIsRedirect; - } else { - return Title::newFromRedirect( $text ) !== null; - } + return $content->isRedirect(); } /** @@ -391,7 +386,7 @@ class WikiPage extends Page { * Revision::FOR_PUBLIC to be displayed to all users * Revision::FOR_THIS_USER to be displayed to $wgUser * Revision::RAW get the text regardless of permissions - * @return String|null The content of the current revision + * @return Content|null The content of the current revision */ public function getContent( $audience = Revision::FOR_PUBLIC ) { $this->loadLastEdit(); @@ -549,39 +544,42 @@ class WikiPage extends Page { * if false, the current database state will be used * @return Boolean */ - public function isCountable( $editInfo = false ) { #FIXME: move this to Content object + public function isCountable( $editInfo = false ) { global $wgArticleCountMethod; if ( !$this->mTitle->isContentPage() ) { return false; } - $text = $editInfo ? $editInfo->pst : false; + if ( $editInfo ) { + $content = ContentHandler::makeContent( $editInfo->pst, $this->mTitle ); + # TODO: take model and format from edit info! + } else { + $content = $this->getContent(); + } - if ( $this->isRedirect( $text ) ) { + if ( $content->isRedirect( ) ) { return false; } - switch ( $wgArticleCountMethod ) { - case 'any': - return true; - case 'comma': - if ( $text === false ) { - $text = $this->getRawText(); - } - return strpos( $text, ',' ) !== false; - case 'link': - if ( $editInfo ) { - // ParserOutput::getLinks() is a 2D array of page links, so - // to be really correct we would need to recurse in the array - // but the main array should only have items in it if there are - // links. - return (bool)count( $editInfo->output->getLinks() ); - } else { - return (bool)wfGetDB( DB_SLAVE )->selectField( 'pagelinks', 1, - array( 'pl_from' => $this->getId() ), __METHOD__ ); - } - } + $hasLinks = null; + + if ( $wgArticleCountMethod === 'link' ) { + # nasty special case to avoid re-parsing to detect links + + if ( $editInfo ) { + // ParserOutput::getLinks() is a 2D array of page links, so + // to be really correct we would need to recurse in the array + // but the main array should only have items in it if there are + // links. + $hasLinks = (bool)count( $editInfo->output->getLinks() ); + } else { + $hasLinks = (bool)wfGetDB( DB_SLAVE )->selectField( 'pagelinks', 1, + array( 'pl_from' => $this->getId() ), __METHOD__ ); + } + } + + return $content->isCountable( $hasLinks ); } /** -- 2.20.1