'PackedOverlayImageGallery' => __DIR__ . '/includes/gallery/PackedOverlayImageGallery.php',
'Page' => __DIR__ . '/includes/page/Page.php',
'PageArchive' => __DIR__ . '/includes/page/PageArchive.php',
+ 'PageDataRequestHandler' => __DIR__ . '/includes/linkeddata/PageDataRequestHandler.php',
'PageExists' => __DIR__ . '/maintenance/pageExists.php',
'PageLangLogFormatter' => __DIR__ . '/includes/logging/PageLangLogFormatter.php',
'PageProps' => __DIR__ . '/includes/PageProps.php',
'SpecialNewpages' => __DIR__ . '/includes/specials/SpecialNewpages.php',
'SpecialPage' => __DIR__ . '/includes/specialpage/SpecialPage.php',
'SpecialPageAction' => __DIR__ . '/includes/actions/SpecialPageAction.php',
+ 'SpecialPageData' => __DIR__ . '/includes/specials/SpecialPageData.php',
'SpecialPageFactory' => __DIR__ . '/includes/specialpage/SpecialPageFactory.php',
'SpecialPageLanguage' => __DIR__ . '/includes/specials/SpecialPageLanguage.php',
'SpecialPagesWithProp' => __DIR__ . '/includes/specials/SpecialPagesWithProp.php',
--- /dev/null
+<?php
+
+use MediaWiki\Http\HttpAcceptParser;
+use MediaWiki\Http\HttpAcceptNegotiator;
+
+/**
+ * Request handler implementing a data interface for mediawiki pages.
+ *
+ * @license GPL-2.0+
+ * @author Daniel Kinzler
+ * @author Amir Sarabadanai
+ */
+
+class PageDataRequestHandler {
+
+ /**
+ * Checks whether the request is complete, i.e. whether it contains all information needed
+ * to reply with page data.
+ *
+ * This does not check whether the request is valid and will actually produce a successful
+ * response.
+ *
+ * @param string|null $title Page title
+ * @param WebRequest $request
+ *
+ * @return bool
+ * @throws HttpError
+ */
+ public function canHandleRequest( $title, WebRequest $request ) {
+ if ( $title === '' || $title === null ) {
+ if ( $request->getText( 'title', '' ) === '' ) {
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ /**
+ * Main method for handling requests.
+ *
+ * @param string $title Page title
+ * @param WebRequest $request The request parameters. Known parameters are:
+ * - title: the page title
+ * - format: the format
+ * - oldid|revision: the revision ID
+ * @param OutputPage $output
+ *
+ * @note: Instead of an output page, a WebResponse could be sufficient, but
+ * redirect logic is currently implemented in OutputPage.
+ *
+ * @throws HttpError
+ */
+ public function handleRequest( $title, WebRequest $request, OutputPage $output ) {
+ // No matter what: The response is always public
+ $output->getRequest()->response()->header( 'Access-Control-Allow-Origin: *' );
+
+ $revision = 0;
+
+ $title = $request->getText( 'title', $title );
+ $revision = $request->getInt( 'oldid', $revision );
+ $revision = $request->getInt( 'revision', $revision );
+
+ if ( $title === null || $title === '' ) {
+ //TODO: different error message?
+ throw new HttpError( 400, wfMessage( 'pagedata-bad-title', $title ) );
+ }
+
+ try {
+ $title = Title::newFromTextThrow( $title );
+ } catch ( MalformedTitleException $ex ) {
+ throw new HttpError( 400, wfMessage( 'pagedata-bad-title', $title ) );
+ }
+
+ $this->httpContentNegotiation( $request, $output, $title, $revision );
+ }
+
+ /**
+ * Applies HTTP content negotiation.
+ * If the negotiation is successful, this method will set the appropriate redirect
+ * in the OutputPage object and return. Otherwise, an HttpError is thrown.
+ *
+ * @param WebRequest $request
+ * @param OutputPage $output
+ * @param Title $title
+ * @param int $revision The desired revision
+ *
+ * @throws HttpError
+ */
+ public function httpContentNegotiation(
+ WebRequest $request,
+ OutputPage $output,
+ Title $title,
+ $revision = 0
+ ) {
+ $contentHandler = ContentHandler::getForTitle( $title );
+ $mimeTypes = $contentHandler->getSupportedFormats();
+
+ $headers = $request->getAllHeaders();
+ if ( isset( $headers['ACCEPT'] ) ) {
+ $parser = new HttpAcceptParser();
+ $accept = $parser->parseWeights( $headers['ACCEPT'] );
+ } else {
+ // anything goes
+ $accept = [
+ '*' => 0.1 // just to make extra sure
+ ];
+ // prefer the default
+ $accept[$mimeTypes[0]] = 1;
+ }
+
+ $negotiator = new HttpAcceptNegotiator( $mimeTypes );
+ $format = $negotiator->getBestSupportedKey( $accept, null );
+
+ if ( $format === null ) {
+ $format = isset( $accept['text/html'] ) ? 'text/html' : null;
+ }
+
+ if ( $format === null ) {
+ $msg = wfMessage( 'pagedata-not-acceptable', implode( ', ', $mimeTypes ) );
+ throw new HttpError( 406, $msg );
+ }
+
+ $url = $this->getDocUrl( $title, $format, $revision );
+ $output->redirect( $url, 303 );
+ }
+
+ /**
+ * Returns a url representing the given title.
+ *
+ * @param Title $title
+ * @param string|null $format The (normalized) format name, or ''
+ * @param int $revision
+ * @return string
+ */
+ private function getDocUrl( Title $title, $format = '', $revision = 0 ) {
+ $params = [];
+
+ if ( $revision > 0 ) {
+ $params['oldid'] = $revision;
+ }
+
+ if ( $format === 'text/html' ) {
+ return $title->getFullURL( $params );
+ }
+
+ $params[ 'action' ] = 'raw';
+
+ return $title->getFullURL( $params );
+ }
+
+}
'Revisiondelete' => 'SpecialRevisionDelete',
'RunJobs' => 'SpecialRunJobs',
'Specialpages' => 'SpecialSpecialpages',
+ 'PageData' => 'SpecialPageData'
];
private static $list;
--- /dev/null
+<?php
+
+/**
+ * Special page to act as an endpoint for accessing raw page data.
+ * The web server should generally be configured to make this accessible via a canonical URL/URI,
+ * such as <http://my.domain.org/data/main/Foo>.
+ *
+ * @license GPL-2.0+
+ */
+class SpecialPageData extends SpecialPage {
+
+ /**
+ * @var PageDataRequestHandler|null
+ */
+ private $requestHandler = null;
+
+ public function __construct() {
+ parent::__construct( 'PageData' );
+ }
+
+ /**
+ * Sets the request handler to be used by the special page.
+ * May be used when a particular instance of PageDataRequestHandler is already
+ * known, e.g. during testing.
+ *
+ * If no request handler is set using this method, a default handler is created
+ * on demand by initDependencies().
+ *
+ * @param PageDataRequestHandler $requestHandler
+ */
+ public function setRequestHandler( PageDataRequestHandler $requestHandler ) {
+ $this->requestHandler = $requestHandler;
+ }
+
+ /**
+ * Initialize any un-initialized members from global context.
+ * In particular, this initializes $this->requestHandler
+ */
+ protected function initDependencies() {
+ if ( $this->requestHandler === null ) {
+ $this->requestHandler = $this->newDefaultRequestHandler();
+ }
+ }
+
+ /**
+ * Creates a PageDataRequestHandler based on global defaults.
+ *
+ * @return PageDataRequestHandler
+ */
+ private function newDefaultRequestHandler() {
+
+ return new PageDataRequestHandler();
+ }
+
+ /**
+ * @see SpecialWikibasePage::execute
+ *
+ * @param string|null $subPage
+ *
+ * @throws HttpError
+ */
+ public function execute( $subPage ) {
+ $this->initDependencies();
+
+ // If there is no title, show an HTML form
+ // TODO: Don't do this if HTML is not acceptable according to HTTP headers.
+ if ( !$this->requestHandler->canHandleRequest( $subPage, $this->getRequest() ) ) {
+ $this->showForm();
+ return;
+ }
+
+ $this->requestHandler->handleRequest( $subPage, $this->getRequest(), $this->getOutput() );
+ }
+
+ /**
+ * Shows an informative page to the user; Called when there is no page to output.
+ */
+ public function showForm() {
+ $this->getOutput()->showErrorPage( 'pagedata-title', 'pagedata-text' );
+ }
+
+ public function isListed() {
+ // Do not list this page in Special:SpecialPages
+ return false;
+ }
+
+}
"gotointerwiki-invalid": "The specified title is invalid.",
"gotointerwiki-external": "You are about to leave {{SITENAME}} to visit [[$2]], which is a separate website.\n\n'''[$1 Continue to $1]'''",
"undelete-cantedit": "You cannot undelete this page as you are not allowed to edit this page.",
- "undelete-cantcreate": "You cannot undelete this page as there is no existing page with this name and you are not allowed to create this page."
+ "undelete-cantcreate": "You cannot undelete this page as there is no existing page with this name and you are not allowed to create this page.",
+ "pagedata-title": "Page data",
+ "pagedata-text": "This page provides a data interface to pages. Please provide the page title in the URL, using subpage syntax.\n* Content negotiation applies based on you client's Accept header. This means that the page data will be provided in the format preferred by your client.",
+ "pagedata-not-acceptable": "No matching format found. Supported MIME types: $1",
+ "pagedata-bad-title": "Invalid title: $1."
}
"gotointerwiki-invalid": "Message shown on Special:GoToInterwiki if given an invalid title.",
"gotointerwiki-external": "Message shown on Special:GoToInterwiki if given a external interwiki link (e.g. [[Special:GoToInterwiki/Google:Foo]]). $1 is the full url the user is trying to get to. $2 is the text of the interwiki link (e.g. \"Google:foo\").",
"undelete-cantedit": "Shown if the user tries to undelete a page that they cannot edit",
- "undelete-cantcreate": "Shown if the user tries to undelete a page which currently does not exist, and they are not allowed to create it. This could for example happen on a wiki with custom protection levels where the page name has been create-protected and the user has the right to undelete but not the right to edit protected pages."
+ "undelete-cantcreate": "Shown if the user tries to undelete a page which currently does not exist, and they are not allowed to create it. This could for example happen on a wiki with custom protection levels where the page name has been create-protected and the user has the right to undelete but not the right to edit protected pages.",
+ "pagedata-title": "Title shown on the special page when a form or text is presented",
+ "pagedata-text": "Error shown when none of the formats acceptable to the client is suppoerted (HTTP error 406). Parameters:\n* $1 - the list of supported MIME types",
+ "pagedata-not-acceptable": "No matching format found. Supported MIME types: $1",
+ "pagedata-bad-title": "Error shown when the requested title is invalid. Parameters:\n* $1: the malformed ID"
}
'Newimages' => [ 'NewFiles', 'NewImages' ],
'Newpages' => [ 'NewPages' ],
'PagesWithProp' => [ 'PagesWithProp', 'Pageswithprop', 'PagesByProp', 'Pagesbyprop' ],
+ 'PageData' => [ 'Pagedata' ],
'PageLanguage' => [ 'PageLanguage' ],
'PasswordReset' => [ 'PasswordReset' ],
'PermanentLink' => [ 'PermanentLink', 'PermaLink' ],
--- /dev/null
+<?php
+
+/**
+ * @covers PageDataRequestHandler
+ *
+ * @group PageData
+ *
+ * @license GPL-2.0+
+ */
+class PageDataRequestHandlerTest extends \MediaWikiTestCase {
+
+ /**
+ * @var Title
+ */
+ private $interfaceTitle;
+
+ /**
+ * @var int
+ */
+ private $obLevel;
+
+ protected function setUp() {
+ parent::setUp();
+
+ $this->interfaceTitle = Title::newFromText( "Special:PageDataRequestHandlerTest" );
+
+ $this->obLevel = ob_get_level();
+ }
+
+ protected function tearDown() {
+ $obLevel = ob_get_level();
+
+ while ( ob_get_level() > $this->obLevel ) {
+ ob_end_clean();
+ }
+
+ if ( $obLevel !== $this->obLevel ) {
+ $this->fail( "Test changed output buffer level: was {$this->obLevel}" .
+ "before test, but $obLevel after test."
+ );
+ }
+
+ parent::tearDown();
+ }
+
+ /**
+ * @return PageDataRequestHandler
+ */
+ protected function newHandler() {
+ return new PageDataRequestHandler( 'json' );
+ }
+
+ /**
+ * @param array $params
+ * @param string[] $headers
+ *
+ * @return OutputPage
+ */
+ protected function makeOutputPage( array $params, array $headers ) {
+ // construct request
+ $request = new FauxRequest( $params );
+ $request->response()->header( 'Status: 200 OK', true, 200 ); // init/reset
+
+ foreach ( $headers as $name => $value ) {
+ $request->setHeader( strtoupper( $name ), $value );
+ }
+
+ // construct Context and OutputPage
+ $context = new DerivativeContext( RequestContext::getMain() );
+ $context->setRequest( $request );
+
+ $output = new OutputPage( $context );
+ $output->setTitle( $this->interfaceTitle );
+ $context->setOutput( $output );
+
+ return $output;
+ }
+
+ public function handleRequestProvider() {
+
+ $cases = [];
+
+ $cases[] = [ '', [], [], '!!', 400 ];
+
+ $cases[] = [ '', [ 'title' => 'Helsinki' ], [], '!!', 303, [ 'Location' => '!.+!' ] ];
+
+ $subpageCases = [];
+ foreach ( $cases as $c ) {
+ $case = $c;
+ $case[0] = '';
+
+ if ( isset( $case[1]['title'] ) ) {
+ $case[0] .= $case[1]['title'];
+ unset( $case[1]['title'] );
+ }
+
+ $subpageCases[] = $case;
+ }
+
+ $cases = array_merge( $cases, $subpageCases );
+
+ $cases[] = [
+ '',
+ [ 'title' => 'Helsinki' ],
+ [ 'Accept' => 'text/HTML' ],
+ '!!',
+ 303,
+ [ 'Location' => '!Helsinki$!' ]
+ ];
+
+ $cases[] = [
+ '',
+ [
+ 'title' => 'Helsinki',
+ 'revision' => '4242',
+ ],
+ [ 'Accept' => 'text/HTML' ],
+ '!!',
+ 303,
+ [ 'Location' => '!Helsinki(\?|&)oldid=4242!' ]
+ ];
+
+ $cases[] = [
+ 'Helsinki',
+ [],
+ [],
+ '!!',
+ 303,
+ [ 'Location' => '!Helsinki&action=raw!' ]
+ ];
+
+ // #31: /Q5 with "Accept: text/foobar" triggers a 406
+ $cases[] = [
+ 'Helsinki',
+ [],
+ [ 'Accept' => 'text/foobar' ],
+ '!!',
+ 406,
+ [],
+ ];
+
+ $cases[] = [
+ 'Helsinki',
+ [],
+ [ 'Accept' => 'text/HTML' ],
+ '!!',
+ 303,
+ [ 'Location' => '!Helsinki$!' ]
+ ];
+
+ return $cases;
+ }
+
+ /**
+ * @dataProvider handleRequestProvider
+ *
+ * @param string $subpage The subpage to request (or '')
+ * @param array $params Request parameters
+ * @param array $headers Request headers
+ * @param string $expectedOutput Regex to match the output against.
+ * @param int $expectedStatusCode Expected HTTP status code.
+ * @param string[] $expectedHeaders Expected HTTP response headers.
+ */
+ public function testHandleRequest(
+ $subpage,
+ array $params,
+ array $headers,
+ $expectedOutput,
+ $expectedStatusCode = 200,
+ array $expectedHeaders = []
+ ) {
+ $output = $this->makeOutputPage( $params, $headers );
+ $request = $output->getRequest();
+
+ /* @var FauxResponse $response */
+ $response = $request->response();
+
+ // construct handler
+ $handler = $this->newHandler();
+
+ try {
+ ob_start();
+ $handler->handleRequest( $subpage, $request, $output );
+
+ if ( $output->getRedirect() !== '' ) {
+ // hack to apply redirect to web response
+ $output->output();
+ }
+
+ $text = ob_get_contents();
+ ob_end_clean();
+
+ $this->assertEquals( $expectedStatusCode, $response->getStatusCode(), 'status code' );
+ $this->assertRegExp( $expectedOutput, $text, 'output' );
+
+ foreach ( $expectedHeaders as $name => $exp ) {
+ $value = $response->getHeader( $name );
+ $this->assertNotNull( $value, "header: $name" );
+ $this->assertInternalType( 'string', $value, "header: $name" );
+ $this->assertRegExp( $exp, $value, "header: $name" );
+ }
+ } catch ( HttpError $e ) {
+ ob_end_clean();
+ $this->assertEquals( $expectedStatusCode, $e->getStatusCode(), 'status code' );
+ $this->assertRegExp( $expectedOutput, $e->getHTML(), 'error output' );
+ }
+
+ // We always set "Access-Control-Allow-Origin: *"
+ $this->assertSame( '*', $response->getHeader( 'Access-Control-Allow-Origin' ) );
+ }
+
+ public function provideHttpContentNegotiation() {
+ $helsinki = Title::newFromText( 'Helsinki' );
+ return [
+ 'Accept Header of HTML' => [
+ $helsinki,
+ [ 'ACCEPT' => 'text/html' ], // headers
+ 'Helsinki'
+ ],
+ 'Accept Header without weights' => [
+ $helsinki,
+ [ 'ACCEPT' => '*/*, text/html, text/x-wiki' ],
+ 'Helsinki&action=raw'
+ ],
+ 'Accept Header with weights' => [
+ $helsinki,
+ [ 'ACCEPT' => 'text/*; q=0.5, text/json; q=0.7, application/rdf+xml; q=0.8' ],
+ 'Helsinki&action=raw'
+ ],
+ 'Accept Header accepting evertyhing and HTML' => [
+ $helsinki,
+ [ 'ACCEPT' => 'text/html, */*' ],
+ 'Helsinki&action=raw'
+ ],
+ 'No Accept Header' => [
+ $helsinki,
+ [],
+ 'Helsinki&action=raw'
+ ],
+ ];
+ }
+
+ /**
+ * @dataProvider provideHttpContentNegotiation
+ *
+ * @param Title $title
+ * @param array $headers Request headers
+ * @param string $expectedRedirectSuffix Expected suffix of the HTTP Location header.
+ *
+ * @throws HttpError
+ */
+ public function testHttpContentNegotiation(
+ Title $title,
+ array $headers,
+ $expectedRedirectSuffix
+ ) {
+ /* @var FauxResponse $response */
+ $output = $this->makeOutputPage( [], $headers );
+ $request = $output->getRequest();
+
+ $handler = $this->newHandler();
+ $handler->httpContentNegotiation( $request, $output, $title );
+
+ $this->assertStringEndsWith(
+ $expectedRedirectSuffix,
+ $output->getRedirect(),
+ 'redirect target'
+ );
+ }
+}
--- /dev/null
+<?php
+
+/**
+ * @covers SpecialPageData
+ *
+ * @group Database
+ *
+ * @group SpecialPage
+ *
+ * @license GPL-2.0+
+ * @author Daniel Kinzler
+ */
+class SpecialPageDataTest extends SpecialPageTestBase {
+
+ protected function newSpecialPage() {
+ $page = new SpecialPageData();
+
+ // why is this needed?
+ $page->getContext()->setOutput( new OutputPage( $page->getContext() ) );
+
+ $page->setRequestHandler( new PageDataRequestHandler() );
+
+ return $page;
+ }
+
+ public function provideExecute() {
+ $cases = [];
+
+ $cases['Empty request'] = [ '', [], [], '!!', 200 ];
+
+ $cases['Only title specified'] = [
+ '',
+ [ 'title' => 'Helsinki' ],
+ [],
+ '!!',
+ 303,
+ [ 'Location' => '!.+!' ]
+ ];
+
+ $subpageCases = [];
+ foreach ( $cases as $c ) {
+ $case = $c;
+ $case[0] = '';
+
+ if ( isset( $case[1]['title'] ) ) {
+ $case[0] .= $case[1]['title'];
+ unset( $case[1]['title'] );
+ }
+
+ $subpageCases[] = $case;
+ }
+
+ $cases = array_merge( $cases, $subpageCases );
+
+ $cases['Accept only HTML'] = [
+ '',
+ [ 'title' => 'Helsinki' ],
+ [ 'Accept' => 'text/HTML' ],
+ '!!',
+ 303,
+ [ 'Location' => '!Helsinki$!' ]
+ ];
+
+ $cases['Accept only HTML with revid'] = [
+ '',
+ [
+ 'title' => 'Helsinki',
+ 'revision' => '4242',
+ ],
+ [ 'Accept' => 'text/HTML' ],
+ '!!',
+ 303,
+ [ 'Location' => '!Helsinki(\?|&)oldid=4242!' ]
+ ];
+
+ $cases['Nothing specified'] = [
+ 'Helsinki',
+ [],
+ [],
+ '!!',
+ 303,
+ [ 'Location' => '!Helsinki&action=raw!' ]
+ ];
+
+ $cases['Invalid Accept header'] = [
+ 'Helsinki',
+ [],
+ [ 'Accept' => 'text/foobar' ],
+ '!!',
+ 406,
+ [],
+ ];
+
+ return $cases;
+ }
+
+ /**
+ * @dataProvider provideExecute
+ *
+ * @param string $subpage The subpage to request (or '')
+ * @param array $params Request parameters
+ * @param array $headers Request headers
+ * @param string $expRegExp Regex to match the output against.
+ * @param int $expCode Expected HTTP status code
+ * @param array $expHeaders Expected HTTP response headers
+ */
+ public function testExecute(
+ $subpage,
+ array $params,
+ array $headers,
+ $expRegExp,
+ $expCode = 200,
+ array $expHeaders = []
+ ) {
+ $request = new FauxRequest( $params );
+ $request->response()->header( 'Status: 200 OK', true, 200 ); // init/reset
+
+ foreach ( $headers as $name => $value ) {
+ $request->setHeader( strtoupper( $name ), $value );
+ }
+
+ try {
+ /* @var FauxResponse $response */
+ list( $output, $response ) = $this->executeSpecialPage( $subpage, $request );
+
+ $this->assertEquals( $expCode, $response->getStatusCode(), "status code" );
+ $this->assertRegExp( $expRegExp, $output, "output" );
+
+ foreach ( $expHeaders as $name => $exp ) {
+ $value = $response->getHeader( $name );
+ $this->assertNotNull( $value, "header: $name" );
+ $this->assertInternalType( 'string', $value, "header: $name" );
+ $this->assertRegExp( $exp, $value, "header: $name" );
+ }
+ } catch ( HttpError $e ) {
+ $this->assertEquals( $expCode, $e->getStatusCode(), "status code" );
+ $this->assertRegExp( $expRegExp, $e->getHTML(), "error output" );
+ }
+ }
+
+ public function testSpecialPageWithoutParameters() {
+ $this->setContentLang( Language::factory( 'en' ) );
+ $request = new FauxRequest();
+ $request->response()->header( 'Status: 200 OK', true, 200 ); // init/reset
+
+ list( $output, ) = $this->executeSpecialPage( '', $request );
+
+ $this->assertContains(
+ "Content negotiation applies based on you client's Accept header.",
+ $output,
+ "output"
+ );
+ }
+
+}