From d5bd85c5d18f7e0488a1b9dbc4f4baea7b660969 Mon Sep 17 00:00:00 2001 From: Amir Sarabadani Date: Thu, 1 Jun 2017 20:09:21 +0430 Subject: [PATCH] Start a very basic version of Special:PageData Bug: T163923 Change-Id: I2a1a12f20a38d8d3c50a8f6c9a363be1cb656c70 --- autoload.php | 2 + .../linkeddata/PageDataRequestHandler.php | 152 ++++++++++ includes/specialpage/SpecialPageFactory.php | 1 + includes/specials/SpecialPageData.php | 87 ++++++ languages/i18n/en.json | 6 +- languages/i18n/qqq.json | 6 +- languages/messages/MessagesEn.php | 1 + .../linkeddata/PageDataRequestHandlerTest.php | 270 ++++++++++++++++++ .../includes/specials/SpecialPageDataTest.php | 155 ++++++++++ 9 files changed, 678 insertions(+), 2 deletions(-) create mode 100644 includes/linkeddata/PageDataRequestHandler.php create mode 100644 includes/specials/SpecialPageData.php create mode 100644 tests/phpunit/includes/linkeddata/PageDataRequestHandlerTest.php create mode 100644 tests/phpunit/includes/specials/SpecialPageDataTest.php diff --git a/autoload.php b/autoload.php index 3d97a749cc..33de777471 100644 --- a/autoload.php +++ b/autoload.php @@ -1071,6 +1071,7 @@ $wgAutoloadLocalClasses = [ 'PackedOverlayImageGallery' => __DIR__ . '/includes/gallery/PackedOverlayImageGallery.php', 'Page' => __DIR__ . '/includes/page/Page.php', 'PageArchive' => __DIR__ . '/includes/page/PageArchive.php', + 'PageDataRequestHandler' => __DIR__ . '/includes/linkeddata/PageDataRequestHandler.php', 'PageExists' => __DIR__ . '/maintenance/pageExists.php', 'PageLangLogFormatter' => __DIR__ . '/includes/logging/PageLangLogFormatter.php', 'PageProps' => __DIR__ . '/includes/PageProps.php', @@ -1377,6 +1378,7 @@ $wgAutoloadLocalClasses = [ 'SpecialNewpages' => __DIR__ . '/includes/specials/SpecialNewpages.php', 'SpecialPage' => __DIR__ . '/includes/specialpage/SpecialPage.php', 'SpecialPageAction' => __DIR__ . '/includes/actions/SpecialPageAction.php', + 'SpecialPageData' => __DIR__ . '/includes/specials/SpecialPageData.php', 'SpecialPageFactory' => __DIR__ . '/includes/specialpage/SpecialPageFactory.php', 'SpecialPageLanguage' => __DIR__ . '/includes/specials/SpecialPageLanguage.php', 'SpecialPagesWithProp' => __DIR__ . '/includes/specials/SpecialPagesWithProp.php', diff --git a/includes/linkeddata/PageDataRequestHandler.php b/includes/linkeddata/PageDataRequestHandler.php new file mode 100644 index 0000000000..3da20bf1a3 --- /dev/null +++ b/includes/linkeddata/PageDataRequestHandler.php @@ -0,0 +1,152 @@ +getText( 'title', '' ) === '' ) { + return false; + } + } + + return true; + } + + /** + * Main method for handling requests. + * + * @param string $title Page title + * @param WebRequest $request The request parameters. Known parameters are: + * - title: the page title + * - format: the format + * - oldid|revision: the revision ID + * @param OutputPage $output + * + * @note: Instead of an output page, a WebResponse could be sufficient, but + * redirect logic is currently implemented in OutputPage. + * + * @throws HttpError + */ + public function handleRequest( $title, WebRequest $request, OutputPage $output ) { + // No matter what: The response is always public + $output->getRequest()->response()->header( 'Access-Control-Allow-Origin: *' ); + + $revision = 0; + + $title = $request->getText( 'title', $title ); + $revision = $request->getInt( 'oldid', $revision ); + $revision = $request->getInt( 'revision', $revision ); + + if ( $title === null || $title === '' ) { + //TODO: different error message? + throw new HttpError( 400, wfMessage( 'pagedata-bad-title', $title ) ); + } + + try { + $title = Title::newFromTextThrow( $title ); + } catch ( MalformedTitleException $ex ) { + throw new HttpError( 400, wfMessage( 'pagedata-bad-title', $title ) ); + } + + $this->httpContentNegotiation( $request, $output, $title, $revision ); + } + + /** + * Applies HTTP content negotiation. + * If the negotiation is successful, this method will set the appropriate redirect + * in the OutputPage object and return. Otherwise, an HttpError is thrown. + * + * @param WebRequest $request + * @param OutputPage $output + * @param Title $title + * @param int $revision The desired revision + * + * @throws HttpError + */ + public function httpContentNegotiation( + WebRequest $request, + OutputPage $output, + Title $title, + $revision = 0 + ) { + $contentHandler = ContentHandler::getForTitle( $title ); + $mimeTypes = $contentHandler->getSupportedFormats(); + + $headers = $request->getAllHeaders(); + if ( isset( $headers['ACCEPT'] ) ) { + $parser = new HttpAcceptParser(); + $accept = $parser->parseWeights( $headers['ACCEPT'] ); + } else { + // anything goes + $accept = [ + '*' => 0.1 // just to make extra sure + ]; + // prefer the default + $accept[$mimeTypes[0]] = 1; + } + + $negotiator = new HttpAcceptNegotiator( $mimeTypes ); + $format = $negotiator->getBestSupportedKey( $accept, null ); + + if ( $format === null ) { + $format = isset( $accept['text/html'] ) ? 'text/html' : null; + } + + if ( $format === null ) { + $msg = wfMessage( 'pagedata-not-acceptable', implode( ', ', $mimeTypes ) ); + throw new HttpError( 406, $msg ); + } + + $url = $this->getDocUrl( $title, $format, $revision ); + $output->redirect( $url, 303 ); + } + + /** + * Returns a url representing the given title. + * + * @param Title $title + * @param string|null $format The (normalized) format name, or '' + * @param int $revision + * @return string + */ + private function getDocUrl( Title $title, $format = '', $revision = 0 ) { + $params = []; + + if ( $revision > 0 ) { + $params['oldid'] = $revision; + } + + if ( $format === 'text/html' ) { + return $title->getFullURL( $params ); + } + + $params[ 'action' ] = 'raw'; + + return $title->getFullURL( $params ); + } + +} diff --git a/includes/specialpage/SpecialPageFactory.php b/includes/specialpage/SpecialPageFactory.php index 84d3b08095..81e2b7ef2c 100644 --- a/includes/specialpage/SpecialPageFactory.php +++ b/includes/specialpage/SpecialPageFactory.php @@ -186,6 +186,7 @@ class SpecialPageFactory { 'Revisiondelete' => 'SpecialRevisionDelete', 'RunJobs' => 'SpecialRunJobs', 'Specialpages' => 'SpecialSpecialpages', + 'PageData' => 'SpecialPageData' ]; private static $list; diff --git a/includes/specials/SpecialPageData.php b/includes/specials/SpecialPageData.php new file mode 100644 index 0000000000..f7084a870e --- /dev/null +++ b/includes/specials/SpecialPageData.php @@ -0,0 +1,87 @@ +. + * + * @license GPL-2.0+ + */ +class SpecialPageData extends SpecialPage { + + /** + * @var PageDataRequestHandler|null + */ + private $requestHandler = null; + + public function __construct() { + parent::__construct( 'PageData' ); + } + + /** + * Sets the request handler to be used by the special page. + * May be used when a particular instance of PageDataRequestHandler is already + * known, e.g. during testing. + * + * If no request handler is set using this method, a default handler is created + * on demand by initDependencies(). + * + * @param PageDataRequestHandler $requestHandler + */ + public function setRequestHandler( PageDataRequestHandler $requestHandler ) { + $this->requestHandler = $requestHandler; + } + + /** + * Initialize any un-initialized members from global context. + * In particular, this initializes $this->requestHandler + */ + protected function initDependencies() { + if ( $this->requestHandler === null ) { + $this->requestHandler = $this->newDefaultRequestHandler(); + } + } + + /** + * Creates a PageDataRequestHandler based on global defaults. + * + * @return PageDataRequestHandler + */ + private function newDefaultRequestHandler() { + + return new PageDataRequestHandler(); + } + + /** + * @see SpecialWikibasePage::execute + * + * @param string|null $subPage + * + * @throws HttpError + */ + public function execute( $subPage ) { + $this->initDependencies(); + + // If there is no title, show an HTML form + // TODO: Don't do this if HTML is not acceptable according to HTTP headers. + if ( !$this->requestHandler->canHandleRequest( $subPage, $this->getRequest() ) ) { + $this->showForm(); + return; + } + + $this->requestHandler->handleRequest( $subPage, $this->getRequest(), $this->getOutput() ); + } + + /** + * Shows an informative page to the user; Called when there is no page to output. + */ + public function showForm() { + $this->getOutput()->showErrorPage( 'pagedata-title', 'pagedata-text' ); + } + + public function isListed() { + // Do not list this page in Special:SpecialPages + return false; + } + +} diff --git a/languages/i18n/en.json b/languages/i18n/en.json index 22f274555d..ed31e70d06 100644 --- a/languages/i18n/en.json +++ b/languages/i18n/en.json @@ -4327,5 +4327,9 @@ "gotointerwiki-invalid": "The specified title is invalid.", "gotointerwiki-external": "You are about to leave {{SITENAME}} to visit [[$2]], which is a separate website.\n\n'''[$1 Continue to $1]'''", "undelete-cantedit": "You cannot undelete this page as you are not allowed to edit this page.", - "undelete-cantcreate": "You cannot undelete this page as there is no existing page with this name and you are not allowed to create this page." + "undelete-cantcreate": "You cannot undelete this page as there is no existing page with this name and you are not allowed to create this page.", + "pagedata-title": "Page data", + "pagedata-text": "This page provides a data interface to pages. Please provide the page title in the URL, using subpage syntax.\n* Content negotiation applies based on you client's Accept header. This means that the page data will be provided in the format preferred by your client.", + "pagedata-not-acceptable": "No matching format found. Supported MIME types: $1", + "pagedata-bad-title": "Invalid title: $1." } diff --git a/languages/i18n/qqq.json b/languages/i18n/qqq.json index 3c9791e1c5..e09eeade09 100644 --- a/languages/i18n/qqq.json +++ b/languages/i18n/qqq.json @@ -4515,5 +4515,9 @@ "gotointerwiki-invalid": "Message shown on Special:GoToInterwiki if given an invalid title.", "gotointerwiki-external": "Message shown on Special:GoToInterwiki if given a external interwiki link (e.g. [[Special:GoToInterwiki/Google:Foo]]). $1 is the full url the user is trying to get to. $2 is the text of the interwiki link (e.g. \"Google:foo\").", "undelete-cantedit": "Shown if the user tries to undelete a page that they cannot edit", - "undelete-cantcreate": "Shown if the user tries to undelete a page which currently does not exist, and they are not allowed to create it. This could for example happen on a wiki with custom protection levels where the page name has been create-protected and the user has the right to undelete but not the right to edit protected pages." + "undelete-cantcreate": "Shown if the user tries to undelete a page which currently does not exist, and they are not allowed to create it. This could for example happen on a wiki with custom protection levels where the page name has been create-protected and the user has the right to undelete but not the right to edit protected pages.", + "pagedata-title": "Title shown on the special page when a form or text is presented", + "pagedata-text": "Error shown when none of the formats acceptable to the client is suppoerted (HTTP error 406). Parameters:\n* $1 - the list of supported MIME types", + "pagedata-not-acceptable": "No matching format found. Supported MIME types: $1", + "pagedata-bad-title": "Error shown when the requested title is invalid. Parameters:\n* $1: the malformed ID" } diff --git a/languages/messages/MessagesEn.php b/languages/messages/MessagesEn.php index ac9f1b79c6..17b00b7dd0 100644 --- a/languages/messages/MessagesEn.php +++ b/languages/messages/MessagesEn.php @@ -466,6 +466,7 @@ $specialPageAliases = [ 'Newimages' => [ 'NewFiles', 'NewImages' ], 'Newpages' => [ 'NewPages' ], 'PagesWithProp' => [ 'PagesWithProp', 'Pageswithprop', 'PagesByProp', 'Pagesbyprop' ], + 'PageData' => [ 'Pagedata' ], 'PageLanguage' => [ 'PageLanguage' ], 'PasswordReset' => [ 'PasswordReset' ], 'PermanentLink' => [ 'PermanentLink', 'PermaLink' ], diff --git a/tests/phpunit/includes/linkeddata/PageDataRequestHandlerTest.php b/tests/phpunit/includes/linkeddata/PageDataRequestHandlerTest.php new file mode 100644 index 0000000000..eb67fae955 --- /dev/null +++ b/tests/phpunit/includes/linkeddata/PageDataRequestHandlerTest.php @@ -0,0 +1,270 @@ +interfaceTitle = Title::newFromText( "Special:PageDataRequestHandlerTest" ); + + $this->obLevel = ob_get_level(); + } + + protected function tearDown() { + $obLevel = ob_get_level(); + + while ( ob_get_level() > $this->obLevel ) { + ob_end_clean(); + } + + if ( $obLevel !== $this->obLevel ) { + $this->fail( "Test changed output buffer level: was {$this->obLevel}" . + "before test, but $obLevel after test." + ); + } + + parent::tearDown(); + } + + /** + * @return PageDataRequestHandler + */ + protected function newHandler() { + return new PageDataRequestHandler( 'json' ); + } + + /** + * @param array $params + * @param string[] $headers + * + * @return OutputPage + */ + protected function makeOutputPage( array $params, array $headers ) { + // construct request + $request = new FauxRequest( $params ); + $request->response()->header( 'Status: 200 OK', true, 200 ); // init/reset + + foreach ( $headers as $name => $value ) { + $request->setHeader( strtoupper( $name ), $value ); + } + + // construct Context and OutputPage + $context = new DerivativeContext( RequestContext::getMain() ); + $context->setRequest( $request ); + + $output = new OutputPage( $context ); + $output->setTitle( $this->interfaceTitle ); + $context->setOutput( $output ); + + return $output; + } + + public function handleRequestProvider() { + + $cases = []; + + $cases[] = [ '', [], [], '!!', 400 ]; + + $cases[] = [ '', [ 'title' => 'Helsinki' ], [], '!!', 303, [ 'Location' => '!.+!' ] ]; + + $subpageCases = []; + foreach ( $cases as $c ) { + $case = $c; + $case[0] = ''; + + if ( isset( $case[1]['title'] ) ) { + $case[0] .= $case[1]['title']; + unset( $case[1]['title'] ); + } + + $subpageCases[] = $case; + } + + $cases = array_merge( $cases, $subpageCases ); + + $cases[] = [ + '', + [ 'title' => 'Helsinki' ], + [ 'Accept' => 'text/HTML' ], + '!!', + 303, + [ 'Location' => '!Helsinki$!' ] + ]; + + $cases[] = [ + '', + [ + 'title' => 'Helsinki', + 'revision' => '4242', + ], + [ 'Accept' => 'text/HTML' ], + '!!', + 303, + [ 'Location' => '!Helsinki(\?|&)oldid=4242!' ] + ]; + + $cases[] = [ + 'Helsinki', + [], + [], + '!!', + 303, + [ 'Location' => '!Helsinki&action=raw!' ] + ]; + + // #31: /Q5 with "Accept: text/foobar" triggers a 406 + $cases[] = [ + 'Helsinki', + [], + [ 'Accept' => 'text/foobar' ], + '!!', + 406, + [], + ]; + + $cases[] = [ + 'Helsinki', + [], + [ 'Accept' => 'text/HTML' ], + '!!', + 303, + [ 'Location' => '!Helsinki$!' ] + ]; + + return $cases; + } + + /** + * @dataProvider handleRequestProvider + * + * @param string $subpage The subpage to request (or '') + * @param array $params Request parameters + * @param array $headers Request headers + * @param string $expectedOutput Regex to match the output against. + * @param int $expectedStatusCode Expected HTTP status code. + * @param string[] $expectedHeaders Expected HTTP response headers. + */ + public function testHandleRequest( + $subpage, + array $params, + array $headers, + $expectedOutput, + $expectedStatusCode = 200, + array $expectedHeaders = [] + ) { + $output = $this->makeOutputPage( $params, $headers ); + $request = $output->getRequest(); + + /* @var FauxResponse $response */ + $response = $request->response(); + + // construct handler + $handler = $this->newHandler(); + + try { + ob_start(); + $handler->handleRequest( $subpage, $request, $output ); + + if ( $output->getRedirect() !== '' ) { + // hack to apply redirect to web response + $output->output(); + } + + $text = ob_get_contents(); + ob_end_clean(); + + $this->assertEquals( $expectedStatusCode, $response->getStatusCode(), 'status code' ); + $this->assertRegExp( $expectedOutput, $text, 'output' ); + + foreach ( $expectedHeaders as $name => $exp ) { + $value = $response->getHeader( $name ); + $this->assertNotNull( $value, "header: $name" ); + $this->assertInternalType( 'string', $value, "header: $name" ); + $this->assertRegExp( $exp, $value, "header: $name" ); + } + } catch ( HttpError $e ) { + ob_end_clean(); + $this->assertEquals( $expectedStatusCode, $e->getStatusCode(), 'status code' ); + $this->assertRegExp( $expectedOutput, $e->getHTML(), 'error output' ); + } + + // We always set "Access-Control-Allow-Origin: *" + $this->assertSame( '*', $response->getHeader( 'Access-Control-Allow-Origin' ) ); + } + + public function provideHttpContentNegotiation() { + $helsinki = Title::newFromText( 'Helsinki' ); + return [ + 'Accept Header of HTML' => [ + $helsinki, + [ 'ACCEPT' => 'text/html' ], // headers + 'Helsinki' + ], + 'Accept Header without weights' => [ + $helsinki, + [ 'ACCEPT' => '*/*, text/html, text/x-wiki' ], + 'Helsinki&action=raw' + ], + 'Accept Header with weights' => [ + $helsinki, + [ 'ACCEPT' => 'text/*; q=0.5, text/json; q=0.7, application/rdf+xml; q=0.8' ], + 'Helsinki&action=raw' + ], + 'Accept Header accepting evertyhing and HTML' => [ + $helsinki, + [ 'ACCEPT' => 'text/html, */*' ], + 'Helsinki&action=raw' + ], + 'No Accept Header' => [ + $helsinki, + [], + 'Helsinki&action=raw' + ], + ]; + } + + /** + * @dataProvider provideHttpContentNegotiation + * + * @param Title $title + * @param array $headers Request headers + * @param string $expectedRedirectSuffix Expected suffix of the HTTP Location header. + * + * @throws HttpError + */ + public function testHttpContentNegotiation( + Title $title, + array $headers, + $expectedRedirectSuffix + ) { + /* @var FauxResponse $response */ + $output = $this->makeOutputPage( [], $headers ); + $request = $output->getRequest(); + + $handler = $this->newHandler(); + $handler->httpContentNegotiation( $request, $output, $title ); + + $this->assertStringEndsWith( + $expectedRedirectSuffix, + $output->getRedirect(), + 'redirect target' + ); + } +} diff --git a/tests/phpunit/includes/specials/SpecialPageDataTest.php b/tests/phpunit/includes/specials/SpecialPageDataTest.php new file mode 100644 index 0000000000..7569d63192 --- /dev/null +++ b/tests/phpunit/includes/specials/SpecialPageDataTest.php @@ -0,0 +1,155 @@ +getContext()->setOutput( new OutputPage( $page->getContext() ) ); + + $page->setRequestHandler( new PageDataRequestHandler() ); + + return $page; + } + + public function provideExecute() { + $cases = []; + + $cases['Empty request'] = [ '', [], [], '!!', 200 ]; + + $cases['Only title specified'] = [ + '', + [ 'title' => 'Helsinki' ], + [], + '!!', + 303, + [ 'Location' => '!.+!' ] + ]; + + $subpageCases = []; + foreach ( $cases as $c ) { + $case = $c; + $case[0] = ''; + + if ( isset( $case[1]['title'] ) ) { + $case[0] .= $case[1]['title']; + unset( $case[1]['title'] ); + } + + $subpageCases[] = $case; + } + + $cases = array_merge( $cases, $subpageCases ); + + $cases['Accept only HTML'] = [ + '', + [ 'title' => 'Helsinki' ], + [ 'Accept' => 'text/HTML' ], + '!!', + 303, + [ 'Location' => '!Helsinki$!' ] + ]; + + $cases['Accept only HTML with revid'] = [ + '', + [ + 'title' => 'Helsinki', + 'revision' => '4242', + ], + [ 'Accept' => 'text/HTML' ], + '!!', + 303, + [ 'Location' => '!Helsinki(\?|&)oldid=4242!' ] + ]; + + $cases['Nothing specified'] = [ + 'Helsinki', + [], + [], + '!!', + 303, + [ 'Location' => '!Helsinki&action=raw!' ] + ]; + + $cases['Invalid Accept header'] = [ + 'Helsinki', + [], + [ 'Accept' => 'text/foobar' ], + '!!', + 406, + [], + ]; + + return $cases; + } + + /** + * @dataProvider provideExecute + * + * @param string $subpage The subpage to request (or '') + * @param array $params Request parameters + * @param array $headers Request headers + * @param string $expRegExp Regex to match the output against. + * @param int $expCode Expected HTTP status code + * @param array $expHeaders Expected HTTP response headers + */ + public function testExecute( + $subpage, + array $params, + array $headers, + $expRegExp, + $expCode = 200, + array $expHeaders = [] + ) { + $request = new FauxRequest( $params ); + $request->response()->header( 'Status: 200 OK', true, 200 ); // init/reset + + foreach ( $headers as $name => $value ) { + $request->setHeader( strtoupper( $name ), $value ); + } + + try { + /* @var FauxResponse $response */ + list( $output, $response ) = $this->executeSpecialPage( $subpage, $request ); + + $this->assertEquals( $expCode, $response->getStatusCode(), "status code" ); + $this->assertRegExp( $expRegExp, $output, "output" ); + + foreach ( $expHeaders as $name => $exp ) { + $value = $response->getHeader( $name ); + $this->assertNotNull( $value, "header: $name" ); + $this->assertInternalType( 'string', $value, "header: $name" ); + $this->assertRegExp( $exp, $value, "header: $name" ); + } + } catch ( HttpError $e ) { + $this->assertEquals( $expCode, $e->getStatusCode(), "status code" ); + $this->assertRegExp( $expRegExp, $e->getHTML(), "error output" ); + } + } + + public function testSpecialPageWithoutParameters() { + $this->setContentLang( Language::factory( 'en' ) ); + $request = new FauxRequest(); + $request->response()->header( 'Status: 200 OK', true, 200 ); // init/reset + + list( $output, ) = $this->executeSpecialPage( '', $request ); + + $this->assertContains( + "Content negotiation applies based on you client's Accept header.", + $output, + "output" + ); + } + +} -- 2.20.1