From 86b55ad03c6bd74c6447cf10b245d534928d6574 Mon Sep 17 00:00:00 2001 From: Stanislav Malyshev Date: Thu, 12 May 2016 17:10:52 -0700 Subject: [PATCH] Create API to allow content handlers to handle structured data definitions Change-Id: Ia1738803c42f6114575587c1c838fec62b6f54aa Bug: T89733 --- autoload.php | 3 + includes/content/CodeContentHandler.php | 8 ++ includes/content/ContentHandler.php | 22 ++++ includes/content/TextContentHandler.php | 11 +- includes/content/WikitextContentHandler.php | 36 ++++++ includes/search/NullIndexField.php | 45 +++++++ includes/search/SearchEngine.php | 40 ++++++ includes/search/SearchIndexField.php | 63 ++++++++++ .../search/SearchIndexFieldDefinition.php | 118 ++++++++++++++++++ .../content/TextContentHandlerTest.php | 41 ++++++ .../includes/search/SearchEngineTest.php | 45 +++++++ .../includes/search/SearchIndexFieldTest.php | 39 ++++++ 12 files changed, 468 insertions(+), 3 deletions(-) create mode 100644 includes/search/NullIndexField.php create mode 100644 includes/search/SearchIndexField.php create mode 100644 includes/search/SearchIndexFieldDefinition.php create mode 100644 tests/phpunit/includes/search/SearchIndexFieldTest.php diff --git a/autoload.php b/autoload.php index 0211c6d076..16d69d0768 100644 --- a/autoload.php +++ b/autoload.php @@ -956,6 +956,7 @@ $wgAutoloadLocalClasses = [ 'NukePage' => __DIR__ . '/maintenance/nukePage.php', 'NullFileJournal' => __DIR__ . '/includes/filebackend/filejournal/FileJournal.php', 'NullFileOp' => __DIR__ . '/includes/filebackend/FileOp.php', + 'NullIndexField' => __DIR__ . '/includes/search/NullIndexField.php', 'NullJob' => __DIR__ . '/includes/jobqueue/jobs/NullJob.php', 'NullLockManager' => __DIR__ . '/includes/filebackend/lockmanager/LockManager.php', 'NullRepo' => __DIR__ . '/includes/filerepo/NullRepo.php', @@ -1209,6 +1210,8 @@ $wgAutoloadLocalClasses = [ 'SearchEngineFactory' => __DIR__ . '/includes/search/SearchEngineFactory.php', 'SearchExactMatchRescorer' => __DIR__ . '/includes/search/SearchExactMatchRescorer.php', 'SearchHighlighter' => __DIR__ . '/includes/search/SearchHighlighter.php', + 'SearchIndexField' => __DIR__ . '/includes/search/SearchIndexField.php', + 'SearchIndexFieldDefinition' => __DIR__ . '/includes/search/SearchIndexFieldDefinition.php', 'SearchMssql' => __DIR__ . '/includes/search/SearchMssql.php', 'SearchMySQL' => __DIR__ . '/includes/search/SearchMySQL.php', 'SearchNearMatchResultSet' => __DIR__ . '/includes/search/SearchNearMatchResultSet.php', diff --git a/includes/content/CodeContentHandler.php b/includes/content/CodeContentHandler.php index 694b633e7b..2bbf6ca7b2 100644 --- a/includes/content/CodeContentHandler.php +++ b/includes/content/CodeContentHandler.php @@ -63,4 +63,12 @@ abstract class CodeContentHandler extends TextContentHandler { protected function getContentClass() { throw new MWException( 'Subclass must override' ); } + + /** + * @param SearchEngine $engine + * @return array + */ + public function getFieldsForSearchIndex( SearchEngine $engine ) { + return []; + } } diff --git a/includes/content/ContentHandler.php b/includes/content/ContentHandler.php index e225fb783f..1ecd6145c7 100644 --- a/includes/content/ContentHandler.php +++ b/includes/content/ContentHandler.php @@ -1248,4 +1248,26 @@ abstract class ContentHandler { return $ok; } + + /** + * Get fields definition for search index + * @param SearchEngine $engine + * @return SearchIndexField[] List of fields this content handler can provide. + * @since 1.28 + */ + public function getFieldsForSearchIndex( SearchEngine $engine ) { + /* Default fields: + /* + * namespace + * namespace_text + * redirect + * source_text + * suggest + * timestamp + * title + * text + * text_bytes + */ + return []; + } } diff --git a/includes/content/TextContentHandler.php b/includes/content/TextContentHandler.php index ad40cd9077..748c81069f 100644 --- a/includes/content/TextContentHandler.php +++ b/includes/content/TextContentHandler.php @@ -31,8 +31,7 @@ class TextContentHandler extends ContentHandler { // @codingStandardsIgnoreStart bug 57585 - public function __construct( $modelId = CONTENT_MODEL_TEXT, - $formats = [ CONTENT_FORMAT_TEXT ] ) { + public function __construct( $modelId = CONTENT_MODEL_TEXT, $formats = [ CONTENT_FORMAT_TEXT ] ) { parent::__construct( $modelId, $formats ); } // @codingStandardsIgnoreEnd @@ -41,7 +40,7 @@ class TextContentHandler extends ContentHandler { * Returns the content's text as-is. * * @param Content $content - * @param string $format The serialization format to check + * @param string $format The serialization format to check * * @return mixed */ @@ -143,4 +142,10 @@ class TextContentHandler extends ContentHandler { return true; } + public function getFieldsForSearchIndex( SearchEngine $engine ) { + $fields = []; + $fields['language'] = + $engine->makeSearchFieldMapping( 'language', SearchIndexField::INDEX_TYPE_KEYWORD ); + return $fields; + } } diff --git a/includes/content/WikitextContentHandler.php b/includes/content/WikitextContentHandler.php index 0701a0f3de..86f0d50574 100644 --- a/includes/content/WikitextContentHandler.php +++ b/includes/content/WikitextContentHandler.php @@ -108,4 +108,40 @@ class WikitextContentHandler extends TextContentHandler { return true; } + public function getFieldsForSearchIndex( SearchEngine $engine ) { + $fields = []; + + $fields['category'] = + $engine->makeSearchFieldMapping( 'category', SearchIndexField::INDEX_TYPE_TEXT ); + $fields['category']->setFlag( SearchIndexField::FLAG_CASEFOLD ); + + $fields['external_link'] = + $engine->makeSearchFieldMapping( 'external_link', SearchIndexField::INDEX_TYPE_KEYWORD ); + + $fields['heading'] = + $engine->makeSearchFieldMapping( 'heading', SearchIndexField::INDEX_TYPE_TEXT ); + $fields['heading']->setFlag( SearchIndexField::FLAG_SCORING ); + + $fields['auxiliary_text'] = + $engine->makeSearchFieldMapping( 'auxiliary_text', SearchIndexField::INDEX_TYPE_TEXT ); + + $fields['opening_text'] = + $engine->makeSearchFieldMapping( 'opening_text', SearchIndexField::INDEX_TYPE_TEXT ); + $fields['opening_text']->setFlag( SearchIndexField::FLAG_SCORING ); + + $fields['outgoing_link'] = + $engine->makeSearchFieldMapping( 'outgoing_link', SearchIndexField::INDEX_TYPE_KEYWORD ); + + $fields['template'] = + $engine->makeSearchFieldMapping( 'template', SearchIndexField::INDEX_TYPE_KEYWORD ); + $fields['template']->setFlag( SearchIndexField::FLAG_CASEFOLD ); + + // FIXME: this really belongs in separate file handler but files + // do not have separate handler. Sadness. + $fields['file_text'] = + $engine->makeSearchFieldMapping( 'file_text', SearchIndexField::INDEX_TYPE_TEXT ); + + return $fields; + } + } diff --git a/includes/search/NullIndexField.php b/includes/search/NullIndexField.php new file mode 100644 index 0000000000..933e0ad332 --- /dev/null +++ b/includes/search/NullIndexField.php @@ -0,0 +1,45 @@ +getFieldsForSearchIndex( $this ); + foreach ( $handlerFields as $fieldName => $fieldData ) { + if ( empty( $fields[$fieldName] ) ) { + $fields[$fieldName] = $fieldData; + } else { + // TODO: do we allow some clashes with the same type or reject all of them? + $mergeDef = $fields[$fieldName]->merge( $fieldData ); + if ( !$mergeDef ) { + throw new InvalidArgumentException( "Duplicate field $fieldName for model $model" ); + } + $fields[$fieldName] = $mergeDef; + } + } + } + // Hook to allow extensions to produce search mapping fields + Hooks::run( 'SearchIndexFields', [ &$fields, $this ] ); + return $fields; + } } /** diff --git a/includes/search/SearchIndexField.php b/includes/search/SearchIndexField.php new file mode 100644 index 0000000000..2ea255f400 --- /dev/null +++ b/includes/search/SearchIndexField.php @@ -0,0 +1,63 @@ +name = $name; + $this->type = $type; + } + + /** + * Get field name + * @return string + */ + public function getName() { + return $this->name; + } + + /** + * Get index type + * @return int + */ + public function getIndexType() { + return $this->type; + } + + /** + * Set global flag for this field. + * + * @param int $flag Bit flag to set/unset + * @param bool $unset True if flag should be unset, false by default + * @return $this + */ + public function setFlag( $flag, $unset = false ) { + if ( $unset ) { + $this->flags &= ~$flag; + } else { + $this->flags |= $flag; + } + return $this; + } + + /** + * Check if flag is set. + * @param $flag + * @return int 0 if unset, !=0 if set + */ + public function checkFlag( $flag ) { + return $this->flags & $flag; + } + + /** + * Merge two field definitions if possible. + * + * @param SearchIndexField $that + * @return SearchIndexField|false New definition or false if not mergeable. + */ + public function merge( SearchIndexField $that ) { + // TODO: which definitions may be compatible? + if ( ( $that instanceof self ) && $this->type === $that->type && + $this->flags === $that->flags && $this->type !== self::INDEX_TYPE_NESTED + ) { + return $that; + } + return false; + } + + /** + * Get subfields + * @return SearchIndexFieldDefinition[] + */ + public function getSubfields() { + return $this->subfields; + } + + /** + * Set subfields + * @param SearchIndexFieldDefinition[] $subfields + * @return $this + */ + public function setSubfields( array $subfields ) { + $this->subfields = $subfields; + return $this; + } +} diff --git a/tests/phpunit/includes/content/TextContentHandlerTest.php b/tests/phpunit/includes/content/TextContentHandlerTest.php index 492fec6b68..e8681c7658 100644 --- a/tests/phpunit/includes/content/TextContentHandlerTest.php +++ b/tests/phpunit/includes/content/TextContentHandlerTest.php @@ -9,4 +9,45 @@ class TextContentHandlerTest extends MediaWikiLangTestCase { $this->assertTrue( $handler->supportsDirectEditing(), 'direct editing is supported' ); } + /** + * @covers SearchEngine::makeSearchFieldMapping + * @covers ContentHandler::getFieldsForSearchIndex + */ + public function testFieldsForIndex() { + $handler = new TextContentHandler(); + + $mockEngine = $this->getMock( 'SearchEngine' ); + + $mockEngine->expects( $this->atLeastOnce() ) + ->method( 'makeSearchFieldMapping' ) + ->willReturnCallback( function ( $name, $type ) { + $mockField = + $this->getMockBuilder( 'SearchIndexFieldDefinition' ) + ->setConstructorArgs( [ $name, $type ] ) + ->getMock(); + $mockField->expects( $this->atLeastOnce() )->method( 'getMapping' )->willReturn( [ + 'testData' => 'test', + 'name' => $name, + 'type' => $type, + ] ); + return $mockField; + } ); + + /** + * @var $mockEngine SearchEngine + */ + $fields = $handler->getFieldsForSearchIndex( $mockEngine ); + $mappedFields = []; + foreach ( $fields as $name => $field ) { + $this->assertInstanceOf( 'SearchIndexField', $field ); + /** + * @var $field SearchIndexField + */ + $mappedFields[$name] = $field->getMapping( $mockEngine ); + } + $this->assertArrayHasKey( 'language', $mappedFields ); + $this->assertEquals( 'test', $mappedFields['language']['testData'] ); + $this->assertEquals( 'language', $mappedFields['language']['name'] ); + } + } diff --git a/tests/phpunit/includes/search/SearchEngineTest.php b/tests/phpunit/includes/search/SearchEngineTest.php index 40a33d9ab5..f084c6461c 100644 --- a/tests/phpunit/includes/search/SearchEngineTest.php +++ b/tests/phpunit/includes/search/SearchEngineTest.php @@ -157,4 +157,49 @@ class SearchEngineTest extends MediaWikiLangTestCase { "Title power search failed" ); } + /** + * @covers SearchEngine::getSearchIndexFields + */ + public function testSearchIndexFields() { + /** + * @var $mockEngine SearchEngine + */ + $mockEngine = $this->getMock( 'SearchEngine', [ 'makeSearchFieldMapping' ] ); + + $mockFieldBuilder = function ( $name, $type ) { + $mockField = + $this->getMockBuilder( 'SearchIndexFieldDefinition' )->setConstructorArgs( [ + $name, + $type + ] )->getMock(); + $mockField->expects( $this->any() )->method( 'getMapping' )->willReturn( [ + 'testData' => 'test', + 'name' => $name, + 'type' => $type, + ] ); + return $mockField; + }; + + $mockEngine->expects( $this->atLeastOnce() ) + ->method( 'makeSearchFieldMapping' ) + ->willReturnCallback( $mockFieldBuilder ); + + // Not using mock since PHPUnit mocks do not work properly with references in params + $this->mergeMwGlobalArrayValue( 'wgHooks', + [ 'SearchIndexFields' => [ [ $this, 'hookSearchIndexFields', $mockFieldBuilder ] ] ] ); + + $fields = $mockEngine->getSearchIndexFields(); + $this->assertArrayHasKey( 'language', $fields ); + $this->assertArrayHasKey( 'category', $fields ); + $this->assertInstanceOf( 'SearchIndexField', $fields['testField'] ); + + $mapping = $fields['testField']->getMapping( $mockEngine ); + $this->assertArrayHasKey( 'testData', $mapping ); + $this->assertEquals( 'test', $mapping['testData'] ); + } + + public function hookSearchIndexFields( $mockFieldBuilder, &$fields, SearchEngine $engine ) { + $fields['testField'] = $mockFieldBuilder( "testField", SearchIndexField::INDEX_TYPE_TEXT ); + return true; + } } diff --git a/tests/phpunit/includes/search/SearchIndexFieldTest.php b/tests/phpunit/includes/search/SearchIndexFieldTest.php new file mode 100644 index 0000000000..ec046a767f --- /dev/null +++ b/tests/phpunit/includes/search/SearchIndexFieldTest.php @@ -0,0 +1,39 @@ +getMockBuilder( 'SearchIndexFieldDefinition' ) + ->setMethods( [ 'getMapping' ] ) + ->setConstructorArgs( [ $n1, $t1 ] )->getMock(); + $field2 = $this->getMockBuilder( 'SearchIndexFieldDefinition' ) + ->setMethods( [ 'getMapping' ] ) + ->setConstructorArgs( [ $n2, $t2 ] )->getMock(); + + if ( $result ) { + $this->assertNotFalse( $field1->merge( $field2 ) ); + } else { + $this->assertFalse( $field1->merge( $field2 ) ); + } + + $field1->setFlag( 0xFF ); + $this->assertFalse( $field1->merge( $field2 ) ); + } +} -- 2.20.1