summary |
shortlog |
log |
commit | commitdiff |
tree
raw |
patch |
inline | side by side (from parent 1:
808c0ea)
Going to use this in CirrusSearch since the default text
handling is insane for Solr.
While we're at it, further move content handling to SearchEngine
so children can override behavior here.
Change-Id: I09d11b81c224d53609c57d75d54021e697b56629
$flags: a comma-delimited list of strings representing the options used. May
include: utf8 (this will always be set for new revisions); gzip; external.
$flags: a comma-delimited list of strings representing the options used. May
include: utf8 (this will always be set for new revisions); gzip; external.
-'SearchUpdate': Prior to search update completion.
+'SearchUpdate': Prior to search update completion. Return false to stop any
+further text/content processing
-$namespace : Page namespace
-$title : Page title
$text : Current text being indexed
$text : Current text being indexed
+$content : Content object for text being indexed.
'SearchGetNearMatchBefore': Perform exact-title-matches in "go" searches before
the normal operations.
'SearchGetNearMatchBefore': Perform exact-title-matches in "go" searches before
the normal operations.
return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
}
}
return $wgCanonicalServer . wfScript( 'api' ) . '?action=opensearch&search={searchTerms}&namespace=' . $ns;
}
}
+
+ /**
+ * Get the raw text for updating the index from a content object
+ * Nicer search backends could possibly do something cooler than
+ * just returning raw text
+ *
+ * @todo This isn't ideal, we'd really like to have content-specific handling here
+ * @param Title $t Title we're indexing
+ * @param Content $c Content of the page to index
+ * @return string
+ */
+ public function getTextFromContent( Title $t, Content $c = null ) {
+ return $c ? $c->getTextForSearchIndex() : '';
+ }
protected function initText() {
if ( !isset( $this->mText ) ) {
if ( $this->mRevision != null ) {
protected function initText() {
if ( !isset( $this->mText ) ) {
if ( $this->mRevision != null ) {
- //TODO: if we could plug in some code that knows about special content models *and* about
- // special features of the search engine, the search could benefit. See similar
- // comment in SearchUpdate's constructor
- $content = $this->mRevision->getContent();
- $this->mText = $content ? $content->getTextForSearchIndex() : '';
+ $this->mText = SearchEngine::create()
+ ->getTextFromContent( $this->mTitle, $this->mRevision->getContent() );
} else { // TODO: can we fetch raw wikitext for commons images?
$this->mText = '';
}
} else { // TODO: can we fetch raw wikitext for commons images?
$this->mText = '';
}
- * Namespace of page being updated
- * @var int
- */
- private $namespace;
-
- /**
- * Title we're updating (without namespace)
- * @var string
+ * Title we're updating
+ * @var Title
- * Raw text to put into the index
+ * Content of the page (not text)
+ * @var Content|false
/**
* Constructor
*
* @param int $id Page id to update
* @param Title|string $title Title of page to update
/**
* Constructor
*
* @param int $id Page id to update
* @param Title|string $title Title of page to update
- * @param Content|string|false $content Content of the page to update.
+ * @param Content|string|false $c Content of the page to update.
* If a Content object, text will be gotten from it. String is for back-compat.
* Passing false tells the backend to just update the title, not the content
*/
* If a Content object, text will be gotten from it. String is for back-compat.
* Passing false tells the backend to just update the title, not the content
*/
- public function __construct( $id, $title, $content = false ) {
+ public function __construct( $id, $title, $c = false ) {
if ( is_string( $title ) ) {
$nt = Title::newFromText( $title );
} else {
if ( is_string( $title ) ) {
$nt = Title::newFromText( $title );
} else {
if ( $nt ) {
$this->id = $id;
if ( $nt ) {
$this->id = $id;
- // @todo This isn't ideal, we'd really like to have content-specific
- // handling here. See similar content in SearchEngine::initText().
- if( is_string( $content ) ) {
- // b/c for ApprovedRevs
- $this->text = $content;
+ // is_string() check is back-compat for ApprovedRevs
+ if( is_string( $c ) ) {
+ $this->content = new TextContent( $c );
- $this->text = $content ? $content->getTextForSearchIndex() : false;
+ $this->content = $c ?: false;
-
- $this->namespace = $nt->getNamespace();
- $this->title = $nt->getText(); # Discard namespace
} else {
wfDebug( "SearchUpdate object created with invalid title '$title'\n" );
}
} else {
wfDebug( "SearchUpdate object created with invalid title '$title'\n" );
}
wfProfileIn( __METHOD__ );
$search = SearchEngine::create();
wfProfileIn( __METHOD__ );
$search = SearchEngine::create();
- $normalTitle = $search->normalizeText( Title::indexTitle( $this->namespace, $this->title ) );
+ $normalTitle = $search->normalizeText(
+ Title::indexTitle( $this->title->getNamespace(), $this->title->getText() ) );
if ( WikiPage::newFromId( $this->id ) === null ) {
$search->delete( $this->id, $normalTitle );
wfProfileOut( __METHOD__ );
return;
if ( WikiPage::newFromId( $this->id ) === null ) {
$search->delete( $this->id, $normalTitle );
wfProfileOut( __METHOD__ );
return;
- } elseif ( $this->text === false ) {
+ } elseif ( $this->content === false ) {
$search->updateTitle( $this->id, $normalTitle );
wfProfileOut( __METHOD__ );
return;
}
$search->updateTitle( $this->id, $normalTitle );
wfProfileOut( __METHOD__ );
return;
}
- $text = self::updateText( $this->text );
-
- wfRunHooks( 'SearchUpdate', array( $this->id, $this->namespace, $this->title, &$text ) );
+ $text = $search->getTextFromContent( $this->title, $this->content );
+ if( wfRunHooks( 'SearchUpdate', array( $this->id, $this->title, &$text, $this->content ) ) ) {
+ $text = self::updateText( $text );
+ }
# Perform the actual update
$search->update( $this->id, $normalTitle, $search->normalizeText( $text ) );
# Perform the actual update
$search->update( $this->id, $normalTitle, $search->normalizeText( $text ) );