From e87c20e7a3243c359f7e189ddcc45b3f744a797b Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Tue, 23 Nov 2004 05:36:40 +0000 Subject: [PATCH] Switch do_html_entity_decode() from using strtr() to preg_replace(), which is much faster on the common case (no match). On pages with a large number of links (hundreds) this actually made up about 1/10 of the time spent in Parser::replaceInternalLinks(). --- includes/GlobalFunctions.php | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/includes/GlobalFunctions.php b/includes/GlobalFunctions.php index dd889f34d8..3fc9231208 100644 --- a/includes/GlobalFunctions.php +++ b/includes/GlobalFunctions.php @@ -91,17 +91,37 @@ if ( !function_exists( 'mb_substr' ) ) { function do_html_entity_decode( $string, $quote_style=ENT_COMPAT, $charset='ISO-8859-1' ) { static $trans; static $savedCharset; + static $regexp; if( !isset( $trans ) || $savedCharset != $charset ) { $trans = array_flip( get_html_translation_table( HTML_ENTITIES, $quote_style ) ); $savedCharset = $charset; - # Assumes $charset will always be the same through a run, and only understands - # utf-8 or default. Note - mixing latin1 named entities and unicode numbered + + # Note - mixing latin1 named entities and unicode numbered # ones will result in a bad link. if( strcasecmp( 'utf-8', $charset ) == 0 ) { $trans = array_map( 'utf8_encode', $trans ); } + + /** + * Most links will _not_ contain these fun guys, + * and on long pages with many links we can get + * called a lot. + * + * A regular expression search is faster than + * a strtr or str_replace with a hundred-ish + * entries, though it may be slower to actually + * replace things. + * + * They all look like '&xxxx;'... + */ + foreach( $trans as $key => $val ) { + $snip[] = substr( $key, 1, -1 ); + } + $regexp = '/(&(?:' . implode( '|', $snip ) . ');)/e'; } - return strtr( $string, $trans ); + + $out = preg_replace( $regexp, '$trans["$1"]', $string ); + return $out; } -- 2.20.1