From 63e93db2d8b56833ec6047ac7dd96d2a418d4d36 Mon Sep 17 00:00:00 2001 From: Daniel Friesen Date: Sat, 26 Nov 2011 16:29:25 +0000 Subject: [PATCH] Implement path routing code. - Makes extending paths with extensions simpler. - Should fix bug 32621 by parsing paths based on pattern weight rather than pattern order. --- RELEASE-NOTES-1.19 | 2 + docs/hooks.txt | 10 +- includes/AutoLoader.php | 1 + includes/PathRouter.php | 269 ++++++++++++++++++++++ includes/WebRequest.php | 33 ++- tests/phpunit/includes/PathRouterTest.php | 155 +++++++++++++ 6 files changed, 450 insertions(+), 20 deletions(-) create mode 100644 includes/PathRouter.php create mode 100644 tests/phpunit/includes/PathRouterTest.php diff --git a/RELEASE-NOTES-1.19 b/RELEASE-NOTES-1.19 index ee8a54bef7..5085fdaab5 100644 --- a/RELEASE-NOTES-1.19 +++ b/RELEASE-NOTES-1.19 @@ -93,6 +93,8 @@ production. * (bug 24037) Add byte length of revision to Special:Contributions. * (bug 1672) Added $wgDisableUploadScriptChecks to allow uploading of files containing HTML or JS. DISABLING THESE CHECKS IS VERY DANGEROUS. +* New path mappings can be added using the WebRequestPathInfoRouter hook + and adding paths to the PathRouter. === Bug fixes in 1.19 === * $wgUploadNavigationUrl should be used for file redlinks if. diff --git a/docs/hooks.txt b/docs/hooks.txt index 72f65765cf..4e1cae8292 100644 --- a/docs/hooks.txt +++ b/docs/hooks.txt @@ -2186,14 +2186,8 @@ $title: Title object $redirect: whether the page is a redirect $skin: Skin object -'WebRequestGetPathInfoRequestURI': while extracting path info from REQUEST_URI. - Allows an extension to extend the extraction of titles from paths. - Implementing hooks should follow the pattern used in core: - * Use the `$matches = WebRequest::extractTitle` pattern - * Ensure that you test `if ( !$matches ) {` before you try extracting a title - from the path so that you don't override an already found match. -$path: The request path to extract a title from. -&$matches: The array to apply matches to. +'WebRequestPathInfoRouter': While building the PathRouter to parse the REQUEST_URI. +$router: The PathRouter instance 'WikiExporter::dumpStableQuery': Get the SELECT query for "stable" revisions dumps diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index 307326a266..e7a9577fb7 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -163,6 +163,7 @@ $wgAutoloadLocalClasses = array( 'PageQueryPage' => 'includes/PageQueryPage.php', 'Pager' => 'includes/Pager.php', 'PasswordError' => 'includes/User.php', + 'PathRouter' => 'includes/PathRouter.php', 'PermissionsError' => 'includes/Exception.php', 'PhpHttpRequest' => 'includes/HttpFunctions.php', 'PoolCounter' => 'includes/PoolCounter.php', diff --git a/includes/PathRouter.php b/includes/PathRouter.php new file mode 100644 index 0000000000..5dbcfb1a00 --- /dev/null +++ b/includes/PathRouter.php @@ -0,0 +1,269 @@ +add( "/wiki/$1" ); + * - Matches /wiki/Foo style urls and extracts the title + * $router->add( array( 'edit' => "/edit/$1" ), array( 'action' => '$key' ) ); + * - Matches /edit/Foo style urls and sets action=edit + * $router->add( '/$2/$1', + * array( 'variant' => '$2' ), + * array( '$2' => array( 'zh-hant', 'zh-hans' ) + * ); + * - Matches /zh-hant/Foo or /zh-hans/Foo + * $router->addStrict( "/foo/Bar", array( 'title' => 'Baz' ) ); + * - Matches /foo/Bar explicitly and uses "Baz" as the title + * $router->add( '/help/$1', array( 'title' => 'Help:$1' ) ); + * - Matches /help/Foo with "Help:Foo" as the title + * $router->add( '/help/$1', array( 'title' => 'Help:$1' ) ); + * - Matches + * $router->add( '/$1', array( 'foo' => array( 'value' => 'bar$2' ) ); + * - Matches /Foo and sets 'foo=bar$2' without $2 being replaced + * $router->add( '/$1', array( 'data:foo' => 'bar' ), array( 'callback' => 'functionname' ) ); + * - Matches /Foo, adds the key 'foo' with the value 'bar' to the data array + * and calls functionname( &$matches, $data ); + * + * Path patterns: + * - Paths may contain $# patterns such as $1, $2, etc... + * - $1 will match 0 or more while the rest will match 1 or more + * - Unless you use addStrict "/wiki" and "/wiki/" will be expanded to "/wiki/$1" + * + * Params: + * - In a pattern $1, $2, etc... will be replaced with the relevant contents + * - If you used a keyed array as a path pattern $key will be replaced with the relevant contents + * - The default behavior is equivalent to `array( 'title' => '$1' )`, if you don't want the title parameter you can explicitly use `array( 'title' => false )` + * - You can specify a value that won't have replacements in it using `'foo' => array( 'value' => 'bar' );` + * + * Options: + * - The option keys $1, $2, etc... can be specified to restrict the possible values of that variable. + * A string can be used for a single value, or an array for multiple. + * - When the option key 'strict' is set (Using addStrict is simpler than doing this directly) + * the path won't have $1 implicitly added to it. + * - The option key 'callback' can specify a callback that will be run when a path is matched. + * The callback will have the arguments ( &$matches, $data ) and the matches array can be modified. + * + * @since 1.19 + * @author Daniel Friesen + */ +class PathRouter { + + protected function doAdd( $path, $params, $options, $key = null ) { + if ( $path[0] !== '/' ) { + $path = '/' . $path; + } + + if ( !isset( $options['strict'] ) || !$options['strict'] ) { + // Unless this is a strict path make sure that the path has a $1 + if ( strpos( $path, '$1' ) === false ) { + if ( substr( $path, -1 ) !== '/' ) { + $path .= '/'; + } + $path .= '$1'; + } + } + + if ( !isset( $params['title'] ) && strpos( $path, '$1' ) !== false ) { + $params['title'] = '$1'; + } + if ( isset( $params['title'] ) && $params['title'] === false ) { + unset( $params['title'] ); + } + + foreach ( $params as $paramName => $paramData ) { + if ( is_string( $paramData ) ) { + if ( preg_match( '/\$(\d+|key)/', $paramData ) ) { + $paramArrKey = 'pattern'; + } else { + // If there's no replacement use a value instead + // of a pattern for a little more efficiency + $paramArrKey = 'value'; + } + $params[$paramName] = array( + $paramArrKey => $paramData + ); + } + } + + foreach ( $options as $optionName => $optionData ) { + if ( preg_match( '/^\$\d+$/', $optionName ) ) { + if ( !is_array( $optionData ) ) { + $options[$optionName] = array( $optionData ); + } + } + } + + $pattern = (object)array( + 'path' => $path, + 'params' => $params, + 'options' => $options, + 'key' => $key, + ); + $pattern->weight = self::makeWeight( $pattern ); + $this->patterns[] = $pattern; + } + + /** + * Add a new path pattern to the path router + * + * @param $path The path pattern to add + * @param $params The params for this path pattern + * @param $options The options for this path pattern + */ + public function add( $path, $params = array(), $options = array() ) { + if ( is_array( $path ) ) { + foreach ( $path as $key => $onePath ) { + $this->doAdd( $onePath, $params, $options, $key ); + } + } else { + $this->doAdd( $path, $params, $options ); + } + } + + /** + * Add a new path pattern to the path router with the strict option on + * @see self::add + */ + public function addStrict( $path, $params = array(), $options = array() ) { + $options['strict'] = true; + $this->add( $path, $params, $options ); + } + + protected function sortByWeight() { + $weights = array(); + foreach( $this->patterns as $key => $pattern ) { + $weights[$key] = $pattern->weight; + } + array_multisort( $weights, SORT_DESC, SORT_NUMERIC, $this->patterns ); + } + + public static function makeWeight( $pattern ) { + # Start with a weight of 0 + $weight = 0; + + // Explode the path to work with + $path = explode( '/', $pattern->path ); + + # For each level of the path + foreach( $path as $piece ) { + if ( preg_match( '/^\$(\d+|key)$/', $piece ) ) { + # For a piece that is only a $1 variable add 1 points of weight + $weight += 1; + } elseif ( preg_match( '/\$(\d+|key)/', $piece ) ) { + # For a piece that simply contains a $1 variable add 2 points of weight + $weight += 2; + } else { + # For a solid piece add a full 3 points of weight + $weight += 3; + } + } + + foreach ( $pattern->options as $key => $option ) { + if ( preg_match( '/^\$\d+$/', $key ) ) { + # Add 0.5 for restrictions to values + # This way given two separate "/$2/$1" patterns the + # one with a limited set of $2 values will dominate + # the one that'll match more loosely + $weight += 0.5; + } + } + + return $weight; + } + + /** + * Parse a path and return the query matches for the path + * + * @param $path The path to parse + * @return Array The array of matches for the path + */ + public function parse( $path ) { + $this->sortByWeight(); + + $matches = null; + + foreach ( $this->patterns as $pattern ) { + $matches = self::extractTitle( $path, $pattern ); + if ( !is_null( $matches ) ) { + break; + } + } + + return is_null( $matches ) ? array() : $matches; + } + + protected static function extractTitle( $path, $pattern ) { + $regexp = preg_quote( $pattern->path, '#' ); + $regexp = preg_replace( '#\\\\\$1#', '(?P.*)', $regexp ); + $regexp = preg_replace( '#\\\\\$(\d+)#', '(?P.+?)', $regexp ); + $regexp = "#^{$regexp}$#"; + + $matches = array(); + $data = array(); + + if ( preg_match( $regexp, $path, $m ) ) { + foreach ( $pattern->options as $key => $option ) { + if ( preg_match( '/^\$\d+$/', $key ) ) { + $n = intval( substr( $key, 1 ) ); + $value = $m["par{$n}"]; + if ( !in_array( $value, $option ) ) { + return null; + } + } + } + + foreach ( $m as $matchKey => $matchValue ) { + if ( preg_match( '/^par\d+$/', $matchKey ) ) { + $n = intval( substr( $matchKey, 3 ) ); + $data['$'.$n] = $matchValue; + } + } + + foreach ( $pattern->params as $paramName => $paramData ) { + $value = null; + if ( preg_match( '/^data:/', $paramName ) ) { + $isData = true; + $key = substr( $paramName, 5 ); + } else { + $isData = false; + $key = $paramName; + } + + if ( isset( $paramData['value'] ) ) { + $value = $paramData['value']; + } elseif ( isset( $paramData['pattern'] ) ) { + $value = $paramData['pattern']; + foreach ( $m as $matchKey => $matchValue ) { + if ( preg_match( '/^par\d+$/', $matchKey ) ) { + $n = intval( substr( $matchKey, 3 ) ); + $value = str_replace( '$' . $n, $matchValue, $value ); + } + } + if ( isset( $pattern->key ) ) { + $value = str_replace( '$key', $pattern->key, $value ); + } + if ( preg_match( '/\$(\d+|key)/', $value ) ) { + // Still contains $# or $key patterns after replacement + // Seams like we don't have all the data, abort + return null; + } + } + + if ( $isData ) { + $data[$key] = $value; + } else { + $matches[$key] = $value; + } + } + + if ( isset( $pattern->options['callback'] ) ) { + call_user_func_array( $pattern->options['callback'], array( &$matches, $data ) ); + } + } else { + return null; + } + return $matches; + } + +} diff --git a/includes/WebRequest.php b/includes/WebRequest.php index 9f6d277f8d..7b43c8d1b4 100644 --- a/includes/WebRequest.php +++ b/includes/WebRequest.php @@ -93,40 +93,49 @@ class WebRequest { // Abort to keep from breaking... return $matches; } + + $router = new PathRouter; + // Raw PATH_INFO style - $matches = self::extractTitle( $path, "$wgScript/$1" ); + $router->add( "$wgScript/$1" ); - if( !$matches - && isset( $_SERVER['SCRIPT_NAME'] ) + if( isset( $_SERVER['SCRIPT_NAME'] ) && preg_match( '/\.php5?/', $_SERVER['SCRIPT_NAME'] ) ) { # Check for SCRIPT_NAME, we handle index.php explicitly # But we do have some other .php files such as img_auth.php # Don't let root article paths clober the parsing for them - $matches = self::extractTitle( $path, $_SERVER['SCRIPT_NAME'] . "/$1" ); + $router->add( $_SERVER['SCRIPT_NAME'] . "/$1" ); } global $wgArticlePath; - if( !$matches && $wgArticlePath ) { - $matches = self::extractTitle( $path, $wgArticlePath ); + if( $wgArticlePath ) { + $router->add( $wgArticlePath ); } global $wgActionPaths; - if( !$matches && $wgActionPaths ) { - $matches = self::extractTitle( $path, $wgActionPaths, 'action' ); + if( $wgActionPaths ) { + $router->add( $wgActionPaths, array( 'action' => '$key' ) ); } global $wgVariantArticlePath, $wgContLang; - if( !$matches && $wgVariantArticlePath ) { - $variantPaths = array(); + if( $wgVariantArticlePath ) { + /*$variantPaths = array(); foreach( $wgContLang->getVariants() as $variant ) { $variantPaths[$variant] = str_replace( '$2', $variant, $wgVariantArticlePath ); } - $matches = self::extractTitle( $path, $variantPaths, 'variant' ); + $router->add( $variantPaths, array( 'parameter' => 'variant' ) );*/ + // Maybe actually this? + $router->add( $wgVariantArticlePath, + array( 'variant' => '$2'), + array( '$2' => $wgContLang->getVariants() ) + ); } - wfRunHooks( 'WebRequestGetPathInfoRequestURI', array( $path, &$matches ) ); + wfRunHooks( 'WebRequestPathInfoRouter', array( $router ) ); + + $matches = $router->parse( $path ); } } elseif ( isset( $_SERVER['ORIG_PATH_INFO'] ) && $_SERVER['ORIG_PATH_INFO'] != '' ) { // Mangled PATH_INFO diff --git a/tests/phpunit/includes/PathRouterTest.php b/tests/phpunit/includes/PathRouterTest.php new file mode 100644 index 0000000000..07fa65d3c6 --- /dev/null +++ b/tests/phpunit/includes/PathRouterTest.php @@ -0,0 +1,155 @@ +add("/$1"); + $matches = $router->parse( "/Foo" ); + $this->assertEquals( $matches, array( 'title' => "Foo" ) ); + } + + /** + * Test loose path auto-$1 + */ + public function testLoose() { + $router = new PathRouter; + $router->add("/"); # Should be the same as "/$1" + $matches = $router->parse( "/Foo" ); + $this->assertEquals( $matches, array( 'title' => "Foo" ) ); + + $router = new PathRouter; + $router->add("/wiki"); # Should be the same as /wiki/$1 + $matches = $router->parse( "/wiki/Foo" ); + $this->assertEquals( $matches, array( 'title' => "Foo" ) ); + + $router = new PathRouter; + $router->add("/wiki/"); # Should be the same as /wiki/$1 + $matches = $router->parse( "/wiki/Foo" ); + $this->assertEquals( $matches, array( 'title' => "Foo" ) ); + } + + /** + * Test to ensure that path is based on specifity, not order + */ + public function testOrder() { + $router = new PathRouter; + $router->add("/$1"); + $router->add("/a/$1"); + $router->add("/b/$1"); + $matches = $router->parse( "/a/Foo" ); + $this->assertEquals( $matches, array( 'title' => "Foo" ) ); + + $router = new PathRouter; + $router->add("/b/$1"); + $router->add("/a/$1"); + $router->add("/$1"); + $matches = $router->parse( "/a/Foo" ); + $this->assertEquals( $matches, array( 'title' => "Foo" ) ); + } + + /** + * Test the handling of key based arrays with a url parameter + */ + public function testKeyParameter() { + $router = new PathRouter; + $router->add( array( 'edit' => "/edit/$1" ), array( 'action' => '$key' ) ); + $matches = $router->parse( "/edit/Foo" ); + $this->assertEquals( $matches, array( 'title' => "Foo", 'action' => 'edit' ) ); + } + + /** + * Test the handling of $2 inside paths + */ + public function testAdditionalParameter() { + // Basic $2 + $router = new PathRouter; + $router->add( '/$2/$1', array( 'test' => '$2' ) ); + $matches = $router->parse( "/asdf/Foo" ); + $this->assertEquals( $matches, array( 'title' => "Foo", 'test' => 'asdf' ) ); + } + + /** + * Test additional restricted value parameter + */ + public function testRestrictedValue() { + $router = new PathRouter; + $router->add( '/$2/$1', + array( 'test' => '$2' ), + array( '$2' => array( 'a', 'b' ) ) + ); + $router->add( '/$2/$1', + array( 'test2' => '$2' ), + array( '$2' => 'c' ) + ); + $router->add( '/$1' ); + + $matches = $router->parse( "/asdf/Foo" ); + $this->assertEquals( $matches, array( 'title' => "asdf/Foo" ) ); + + $matches = $router->parse( "/a/Foo" ); + $this->assertEquals( $matches, array( 'title' => "Foo", 'test' => 'a' ) ); + + $matches = $router->parse( "/c/Foo" ); + $this->assertEquals( $matches, array( 'title' => "Foo", 'test2' => 'c' ) ); + } + + public function callbackForTest( &$matches, $data ) { + $matches['x'] = $data['$1']; + $matches['foo'] = $data['foo']; + } + + public function testCallback() { + $router = new PathRouter; + $router->add( "/$1", + array( 'a' => 'b', 'data:foo' => 'bar' ), + array( 'callback' => array( $this, 'callbackForTest' ) ) + ); + $matches = $router->parse( '/Foo'); + $this->assertEquals( $matches, array( + 'title' => "Foo", + 'x' => 'Foo', + 'a' => 'b', + 'foo' => 'bar' + ) ); + } + + /** + * Test to ensure weight of paths is handled correctly + */ + public function testWeight() { + $router = new PathRouter; + $router->addStrict( "/Bar", array( 'ping' => 'pong' ) ); + $router->add( "/asdf-$1", array( 'title' => 'qwerty-$1' ) ); + $router->add( "/$1" ); + $router->add( "/qwerty-$1", array( 'title' => 'asdf-$1' ) ); + $router->addStrict( "/Baz", array( 'marco' => 'polo' ) ); + $router->add( "/a/$1" ); + $router->add( "/asdf/$1" ); + $router->add( "/$2/$1", array( 'unrestricted' => '$2' ) ); + $router->add( array( 'qwerty' => "/qwerty/$1" ), array( 'qwerty' => '$key' ) ); + $router->add( "/$2/$1", array( 'restricted-to-y' => '$2' ), array( '$2' => 'y' ) ); + + foreach( array( + "/Foo" => array( 'title' => "Foo" ), + "/Bar" => array( 'ping' => 'pong' ), + "/Baz" => array( 'marco' => 'polo' ), + "/asdf-foo" => array( 'title' => "qwerty-foo" ), + "/qwerty-bar" => array( 'title' => "asdf-bar" ), + "/a/Foo" => array( 'title' => "Foo" ), + "/asdf/Foo" => array( 'title' => "Foo" ), + "/qwerty/Foo" => array( 'title' => "Foo", 'qwerty' => 'qwerty' ), + "/baz/Foo" => array( 'title' => "Foo", 'unrestricted' => 'baz' ), + "/y/Foo" => array( 'title' => "Foo", 'restricted-to-y' => 'y' ), + ) as $path => $result ) { + $this->assertEquals( $router->parse( $path ), $result ); + } + } + +} -- 2.20.1