From 25512652d289b02fa90268f9ff06ba094af6781d Mon Sep 17 00:00:00 2001 From: "C. Scott Ananian" Date: Tue, 2 Jul 2019 14:04:39 -0400 Subject: [PATCH] LanguageConverter performance: Reuse the same string object for regexp The regular expression used by LanguageConverter::autoConvert() is a constant, but it is being created on-the-fly by every invocation. This causes an expensive full-string comparison when the compiled regular expression is fetched from the cache -- since the regex is 332 bytes long, the time taken for this comparison can add up quickly: on page with a lot of tags, the regexp cache may spend more time looking up the regexp than it takes to execute it. Bug: T223969 Change-Id: I53c3e631e47a791cf3f0844dd79d4357605c59e3 --- languages/LanguageConverter.php | 45 ++++++++++++++++++--------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/languages/LanguageConverter.php b/languages/LanguageConverter.php index 7fd3631e75..9fc7d73f0e 100644 --- a/languages/LanguageConverter.php +++ b/languages/LanguageConverter.php @@ -391,27 +391,30 @@ class LanguageConverter { IMPORTANT: Beware of failure from pcre.backtrack_limit (T124404). Minimize use of backtracking where possible. */ - $marker = '|' . Parser::MARKER_PREFIX . '[^\x7f]++\x7f'; - - // this one is needed when the text is inside an HTML markup - $htmlfix = '|<[^>\004]++(?=\004$)|^[^<>]*+>'; - - // Optimize for the common case where these tags have - // few or no children. Thus try and possesively get as much as - // possible, and only engage in backtracking when we hit a '<'. - - // disable convert to variants between tags - $codefix = '[^<]*+(?:(?:(?!<\/code>).)[^<]*+)*+<\/code>|'; - // disable conversion of