From 930efa63e1db7634f69b3e798da80933ab42db4a Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Tue, 2 Jul 2019 14:02:33 -0400 Subject: [PATCH] Improve LanguageConverter performance on pages with many HTML tags We were concatenating a single character to the end of the wikitext source (which copies the entire string) every time through an inner loop; when the page was large and the loop count was large this took an excessive amount of time. Bug: T223969 Change-Id: Ib80306b0bc6c73b750d492764f0e2dfd3a7a5450 --- languages/LanguageConverter.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index c5ff9d65a7..7fd3631e75 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -426,8 +426,9 @@ class LanguageConverter { // We add a marker (\004) at the end of text, to ensure we always match the // entire text (Otherwise, pcre.backtrack_limit might cause silent failure) + $textWithMarker = $text . "\004"; while ( $startPos < strlen( $text ) ) { - if ( preg_match( $reg, $text . "\004", $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { + if ( preg_match( $reg, $textWithMarker, $markupMatches, PREG_OFFSET_CAPTURE, $startPos ) ) { $elementPos = $markupMatches[0][1]; $element = $markupMatches[0][0]; if ( $element === "\004" ) { -- 2.20.1