'NukePage' => __DIR__ . '/maintenance/nukePage.php',
'NullFileJournal' => __DIR__ . '/includes/filebackend/filejournal/FileJournal.php',
'NullFileOp' => __DIR__ . '/includes/filebackend/FileOp.php',
+ 'NullIndexField' => __DIR__ . '/includes/search/NullIndexField.php',
'NullJob' => __DIR__ . '/includes/jobqueue/jobs/NullJob.php',
'NullLockManager' => __DIR__ . '/includes/filebackend/lockmanager/LockManager.php',
'NullRepo' => __DIR__ . '/includes/filerepo/NullRepo.php',
'SearchEngineFactory' => __DIR__ . '/includes/search/SearchEngineFactory.php',
'SearchExactMatchRescorer' => __DIR__ . '/includes/search/SearchExactMatchRescorer.php',
'SearchHighlighter' => __DIR__ . '/includes/search/SearchHighlighter.php',
+ 'SearchIndexField' => __DIR__ . '/includes/search/SearchIndexField.php',
+ 'SearchIndexFieldDefinition' => __DIR__ . '/includes/search/SearchIndexFieldDefinition.php',
'SearchMssql' => __DIR__ . '/includes/search/SearchMssql.php',
'SearchMySQL' => __DIR__ . '/includes/search/SearchMySQL.php',
'SearchNearMatchResultSet' => __DIR__ . '/includes/search/SearchNearMatchResultSet.php',
protected function getContentClass() {
throw new MWException( 'Subclass must override' );
}
+
+ /**
+ * @param SearchEngine $engine
+ * @return array
+ */
+ public function getFieldsForSearchIndex( SearchEngine $engine ) {
+ return [];
+ }
}
return $ok;
}
+
+ /**
+ * Get fields definition for search index
+ * @param SearchEngine $engine
+ * @return SearchIndexField[] List of fields this content handler can provide.
+ * @since 1.28
+ */
+ public function getFieldsForSearchIndex( SearchEngine $engine ) {
+ /* Default fields:
+ /*
+ * namespace
+ * namespace_text
+ * redirect
+ * source_text
+ * suggest
+ * timestamp
+ * title
+ * text
+ * text_bytes
+ */
+ return [];
+ }
}
class TextContentHandler extends ContentHandler {
// @codingStandardsIgnoreStart bug 57585
- public function __construct( $modelId = CONTENT_MODEL_TEXT,
- $formats = [ CONTENT_FORMAT_TEXT ] ) {
+ public function __construct( $modelId = CONTENT_MODEL_TEXT, $formats = [ CONTENT_FORMAT_TEXT ] ) {
parent::__construct( $modelId, $formats );
}
// @codingStandardsIgnoreEnd
* Returns the content's text as-is.
*
* @param Content $content
- * @param string $format The serialization format to check
+ * @param string $format The serialization format to check
*
* @return mixed
*/
return true;
}
+ public function getFieldsForSearchIndex( SearchEngine $engine ) {
+ $fields = [];
+ $fields['language'] =
+ $engine->makeSearchFieldMapping( 'language', SearchIndexField::INDEX_TYPE_KEYWORD );
+ return $fields;
+ }
}
return true;
}
+ public function getFieldsForSearchIndex( SearchEngine $engine ) {
+ $fields = [];
+
+ $fields['category'] =
+ $engine->makeSearchFieldMapping( 'category', SearchIndexField::INDEX_TYPE_TEXT );
+ $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD );
+
+ $fields['external_link'] =
+ $engine->makeSearchFieldMapping( 'external_link', SearchIndexField::INDEX_TYPE_KEYWORD );
+
+ $fields['heading'] =
+ $engine->makeSearchFieldMapping( 'heading', SearchIndexField::INDEX_TYPE_TEXT );
+ $fields['heading']->setFlag( SearchIndexField::FLAG_SCORING );
+
+ $fields['auxiliary_text'] =
+ $engine->makeSearchFieldMapping( 'auxiliary_text', SearchIndexField::INDEX_TYPE_TEXT );
+
+ $fields['opening_text'] =
+ $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT );
+ $fields['opening_text']->setFlag( SearchIndexField::FLAG_SCORING );
+
+ $fields['outgoing_link'] =
+ $engine->makeSearchFieldMapping( 'outgoing_link', SearchIndexField::INDEX_TYPE_KEYWORD );
+
+ $fields['template'] =
+ $engine->makeSearchFieldMapping( 'template', SearchIndexField::INDEX_TYPE_KEYWORD );
+ $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD );
+
+ // FIXME: this really belongs in separate file handler but files
+ // do not have separate handler. Sadness.
+ $fields['file_text'] =
+ $engine->makeSearchFieldMapping( 'file_text', SearchIndexField::INDEX_TYPE_TEXT );
+
+ return $fields;
+ }
+
}
--- /dev/null
+<?php
+
+/**
+ * Null index field - means search engine does not implement this field.
+ */
+class NullIndexField implements SearchIndexField {
+
+ /**
+ * Get mapping for specific search engine
+ * @param SearchEngine $engine
+ * @return array|null Null means this field does not map to anything
+ */
+ public function getMapping( SearchEngine $engine ) {
+ return null;
+ }
+
+ /**
+ * Set global flag for this field.
+ *
+ * @param int $flag Bit flag to set/unset
+ * @param bool $unset True if flag should be unset, false by default
+ * @return $this
+ */
+ public function setFlag( $flag, $unset = false ) {
+ }
+
+ /**
+ * Check if flag is set.
+ * @param $flag
+ * @return int 0 if unset, !=0 if set
+ */
+ public function checkFlag( $flag ) {
+ return 0;
+ }
+
+ /**
+ * Merge two field definitions if possible.
+ *
+ * @param SearchIndexField $that
+ * @return SearchIndexField|false New definition or false if not mergeable.
+ */
+ public function merge( SearchIndexField $that ) {
+ return $that;
+ }
+}
return null;
}
+ /**
+ * Create a search field definition.
+ * Specific search engines should override this method to create search fields.
+ * @param string $name
+ * @param int $type
+ * @return SearchIndexField
+ * @since 1.28
+ */
+ public function makeSearchFieldMapping( $name, $type ) {
+ return new NullIndexField();
+ }
+
+ /**
+ * Get fields for search index
+ * @since 1.28
+ * @return SearchIndexField[] Index field definitions for all content handlers
+ */
+ public function getSearchIndexFields() {
+ $models = ContentHandler::getContentModels();
+ $fields = [];
+ foreach ( $models as $model ) {
+ $handler = ContentHandler::getForModelID( $model );
+ $handlerFields = $handler->getFieldsForSearchIndex( $this );
+ foreach ( $handlerFields as $fieldName => $fieldData ) {
+ if ( empty( $fields[$fieldName] ) ) {
+ $fields[$fieldName] = $fieldData;
+ } else {
+ // TODO: do we allow some clashes with the same type or reject all of them?
+ $mergeDef = $fields[$fieldName]->merge( $fieldData );
+ if ( !$mergeDef ) {
+ throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" );
+ }
+ $fields[$fieldName] = $mergeDef;
+ }
+ }
+ }
+ // Hook to allow extensions to produce search mapping fields
+ Hooks::run( 'SearchIndexFields', [ &$fields, $this ] );
+ return $fields;
+ }
}
/**
--- /dev/null
+<?php
+/**
+ * Definition of a mapping for the search index field.
+ * @since 1.28
+ */
+interface SearchIndexField {
+ /**
+ * Field types
+ */
+ const INDEX_TYPE_TEXT = 0;
+ const INDEX_TYPE_KEYWORD = 1;
+ const INDEX_TYPE_INTEGER = 2;
+ const INDEX_TYPE_NUMBER = 3;
+ const INDEX_TYPE_DATETIME = 4;
+ const INDEX_TYPE_NESTED = 5;
+ const INDEX_TYPE_BOOL = 6;
+ /**
+ * Generic field flags.
+ */
+ /**
+ * This field is case-insensitive.
+ */
+ const FLAG_CASEFOLD = 1;
+ /**
+ * This field is for scoring only.
+ */
+ const FLAG_SCORING = 2;
+ /**
+ * This field does not need highlight handling.
+ */
+ const FLAG_NO_HIGHLIGHT = 4;
+ /**
+ * Do not index this field.
+ */
+ const FLAG_NO_INDEX = 8;
+ /**
+ * Get mapping for specific search engine
+ * @param SearchEngine $engine
+ * @return array|null Null means this field does not map to anything
+ */
+ public function getMapping( SearchEngine $engine );
+ /**
+ * Set global flag for this field.
+ *
+ * @param int $flag Bit flag to set/unset
+ * @param bool $unset True if flag should be unset, false by default
+ * @return $this
+ */
+ public function setFlag( $flag, $unset = false );
+ /**
+ * Check if flag is set.
+ * @param $flag
+ * @return int 0 if unset, !=0 if set
+ */
+ public function checkFlag( $flag );
+ /**
+ * Merge two field definitions if possible.
+ *
+ * @param SearchIndexField $that
+ * @return SearchIndexField|false New definition or false if not mergeable.
+ */
+ public function merge( SearchIndexField $that );
+}
--- /dev/null
+<?php
+
+/**
+ * Basic infrastructure of the field definition.
+ * Specific engines will need to override it at least for getMapping,
+ * but can reuse other parts.
+ * @since 1.28
+ */
+abstract class SearchIndexFieldDefinition implements SearchIndexField {
+ /**
+ * Name of the field
+ *
+ * @var string
+ */
+ protected $name;
+ /**
+ * Type of the field, one of the constants above
+ *
+ * @var int
+ */
+ protected $type;
+ /**
+ * Bit flags for the field.
+ *
+ * @var int
+ */
+ protected $flags = 0;
+ /**
+ * Subfields
+ * @var SearchIndexFieldDefinition[]
+ */
+ protected $subfields = [];
+
+ /**
+ * SearchIndexFieldDefinition constructor.
+ * @param string $name Field name
+ * @param int $type Index type
+ */
+ public function __construct( $name, $type ) {
+ $this->name = $name;
+ $this->type = $type;
+ }
+
+ /**
+ * Get field name
+ * @return string
+ */
+ public function getName() {
+ return $this->name;
+ }
+
+ /**
+ * Get index type
+ * @return int
+ */
+ public function getIndexType() {
+ return $this->type;
+ }
+
+ /**
+ * Set global flag for this field.
+ *
+ * @param int $flag Bit flag to set/unset
+ * @param bool $unset True if flag should be unset, false by default
+ * @return $this
+ */
+ public function setFlag( $flag, $unset = false ) {
+ if ( $unset ) {
+ $this->flags &= ~$flag;
+ } else {
+ $this->flags |= $flag;
+ }
+ return $this;
+ }
+
+ /**
+ * Check if flag is set.
+ * @param $flag
+ * @return int 0 if unset, !=0 if set
+ */
+ public function checkFlag( $flag ) {
+ return $this->flags & $flag;
+ }
+
+ /**
+ * Merge two field definitions if possible.
+ *
+ * @param SearchIndexField $that
+ * @return SearchIndexField|false New definition or false if not mergeable.
+ */
+ public function merge( SearchIndexField $that ) {
+ // TODO: which definitions may be compatible?
+ if ( ( $that instanceof self ) && $this->type === $that->type &&
+ $this->flags === $that->flags && $this->type !== self::INDEX_TYPE_NESTED
+ ) {
+ return $that;
+ }
+ return false;
+ }
+
+ /**
+ * Get subfields
+ * @return SearchIndexFieldDefinition[]
+ */
+ public function getSubfields() {
+ return $this->subfields;
+ }
+
+ /**
+ * Set subfields
+ * @param SearchIndexFieldDefinition[] $subfields
+ * @return $this
+ */
+ public function setSubfields( array $subfields ) {
+ $this->subfields = $subfields;
+ return $this;
+ }
+}
$this->assertTrue( $handler->supportsDirectEditing(), 'direct editing is supported' );
}
+ /**
+ * @covers SearchEngine::makeSearchFieldMapping
+ * @covers ContentHandler::getFieldsForSearchIndex
+ */
+ public function testFieldsForIndex() {
+ $handler = new TextContentHandler();
+
+ $mockEngine = $this->getMock( 'SearchEngine' );
+
+ $mockEngine->expects( $this->atLeastOnce() )
+ ->method( 'makeSearchFieldMapping' )
+ ->willReturnCallback( function ( $name, $type ) {
+ $mockField =
+ $this->getMockBuilder( 'SearchIndexFieldDefinition' )
+ ->setConstructorArgs( [ $name, $type ] )
+ ->getMock();
+ $mockField->expects( $this->atLeastOnce() )->method( 'getMapping' )->willReturn( [
+ 'testData' => 'test',
+ 'name' => $name,
+ 'type' => $type,
+ ] );
+ return $mockField;
+ } );
+
+ /**
+ * @var $mockEngine SearchEngine
+ */
+ $fields = $handler->getFieldsForSearchIndex( $mockEngine );
+ $mappedFields = [];
+ foreach ( $fields as $name => $field ) {
+ $this->assertInstanceOf( 'SearchIndexField', $field );
+ /**
+ * @var $field SearchIndexField
+ */
+ $mappedFields[$name] = $field->getMapping( $mockEngine );
+ }
+ $this->assertArrayHasKey( 'language', $mappedFields );
+ $this->assertEquals( 'test', $mappedFields['language']['testData'] );
+ $this->assertEquals( 'language', $mappedFields['language']['name'] );
+ }
+
}
"Title power search failed" );
}
+ /**
+ * @covers SearchEngine::getSearchIndexFields
+ */
+ public function testSearchIndexFields() {
+ /**
+ * @var $mockEngine SearchEngine
+ */
+ $mockEngine = $this->getMock( 'SearchEngine', [ 'makeSearchFieldMapping' ] );
+
+ $mockFieldBuilder = function ( $name, $type ) {
+ $mockField =
+ $this->getMockBuilder( 'SearchIndexFieldDefinition' )->setConstructorArgs( [
+ $name,
+ $type
+ ] )->getMock();
+ $mockField->expects( $this->any() )->method( 'getMapping' )->willReturn( [
+ 'testData' => 'test',
+ 'name' => $name,
+ 'type' => $type,
+ ] );
+ return $mockField;
+ };
+
+ $mockEngine->expects( $this->atLeastOnce() )
+ ->method( 'makeSearchFieldMapping' )
+ ->willReturnCallback( $mockFieldBuilder );
+
+ // Not using mock since PHPUnit mocks do not work properly with references in params
+ $this->mergeMwGlobalArrayValue( 'wgHooks',
+ [ 'SearchIndexFields' => [ [ $this, 'hookSearchIndexFields', $mockFieldBuilder ] ] ] );
+
+ $fields = $mockEngine->getSearchIndexFields();
+ $this->assertArrayHasKey( 'language', $fields );
+ $this->assertArrayHasKey( 'category', $fields );
+ $this->assertInstanceOf( 'SearchIndexField', $fields['testField'] );
+
+ $mapping = $fields['testField']->getMapping( $mockEngine );
+ $this->assertArrayHasKey( 'testData', $mapping );
+ $this->assertEquals( 'test', $mapping['testData'] );
+ }
+
+ public function hookSearchIndexFields( $mockFieldBuilder, &$fields, SearchEngine $engine ) {
+ $fields['testField'] = $mockFieldBuilder( "testField", SearchIndexField::INDEX_TYPE_TEXT );
+ return true;
+ }
}
--- /dev/null
+<?php
+
+/**
+ * @group Search
+ * @covers SearchIndexFieldDefinition
+ */
+class SearchIndexFieldTest extends MediaWikiTestCase {
+
+ public function getMergeCases() {
+ return [
+ [ 0, 'test', 0, 'test', true ],
+ [ SearchIndexField::INDEX_TYPE_NESTED, 'test',
+ SearchIndexField::INDEX_TYPE_NESTED, 'test', false ],
+ [ 0, 'test', 0, 'test2', true ],
+ [ 0, 'test', 1, 'test', false ],
+ ];
+ }
+
+ /**
+ * @dataProvider getMergeCases
+ */
+ public function testMerge( $t1, $n1, $t2, $n2, $result ) {
+ $field1 = $this->getMockBuilder( 'SearchIndexFieldDefinition' )
+ ->setMethods( [ 'getMapping' ] )
+ ->setConstructorArgs( [ $n1, $t1 ] )->getMock();
+ $field2 = $this->getMockBuilder( 'SearchIndexFieldDefinition' )
+ ->setMethods( [ 'getMapping' ] )
+ ->setConstructorArgs( [ $n2, $t2 ] )->getMock();
+
+ if ( $result ) {
+ $this->assertNotFalse( $field1->merge( $field2 ) );
+ } else {
+ $this->assertFalse( $field1->merge( $field2 ) );
+ }
+
+ $field1->setFlag( 0xFF );
+ $this->assertFalse( $field1->merge( $field2 ) );
+ }
+}