From 0449e2d7a4b79a40a8c6557a12f72c003efae281 Mon Sep 17 00:00:00 2001 From: Jens Frank Date: Sun, 29 Feb 2004 13:33:51 +0000 Subject: [PATCH] extended tokenizer to handle prefixed links --- includes/Parser.php | 13 +++---------- includes/Tokenizer.php | 43 ++++++++++++++++++++++++++++++++---------- 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/includes/Parser.php b/includes/Parser.php index 310bf4b206..ee1aa7f152 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -583,6 +583,7 @@ class Parser $lastToken = array_pop( $tokenStack ); } $txt = $linkText ."]]"; + $prefix = $lastToken["text"]; $nextToken = $tokenizer->previewToken(); if ( $nextToken["type"] == "text" ) { @@ -590,8 +591,7 @@ class Parser $nextToken = $tokenizer->nextToken(); $txt .= $nextToken["text"]; } - $txt = $this->handleInternalLink( $txt ); - #$txt = "<" . $txt . ">"; + $txt = $this->handleInternalLink( $txt, $prefix ); } $tagIsOpen = (count( $tokenStack ) != 0); break; @@ -643,7 +643,7 @@ class Parser return $s; } - /* private */ function handleInternalLink( $line ) + /* private */ function handleInternalLink( $line, $prefix ) { global $wgLang, $wgLinkCache; global $wgNamespacesWithSubpages, $wgLanguageCode; @@ -679,13 +679,6 @@ class Parser wfProfileOut( "$fname-setup" ); - $prefix = $new_prefix; - if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $line, $m ) ) { - $new_prefix = $m[2]; - $line = $m[1]; - } else { - $new_prefix = ""; - } if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt $text = $m[2]; $trail = $m[3]; diff --git a/includes/Tokenizer.php b/includes/Tokenizer.php index f0e9b08a3b..d7eb080b73 100644 --- a/includes/Tokenizer.php +++ b/includes/Tokenizer.php @@ -25,22 +25,31 @@ class Tokenizer { function preParse() { - $this->mCount = preg_match_all( "/(\[\[|\]\]|\'\'\'\'\'|\'\'\'|\'\')/", - $this->mText, $this->mMatch, + global $wgLang; + if ( $wgLang->linkPrefixExtension() ) { + $regex = "/(([a-zA-Z\x80-\xff]+)\[\[|\]\]|\'\'\'\'\'|\'\'\'|\'\')/"; + # 000000000000000000000000000000000000000000000000000000 + # 1111111111111111111111111111111111111111111111111111 + # 222222222222222222 + # which $this->mMatch[...] will contain the match. + } else { + $regex = "/(\[\[|\]\]|\'\'\'\'\'|\'\'\'|\'\')/"; + } + + $this->mCount = preg_match_all( $regex, $this->mText, $this->mMatch, PREG_PATTERN_ORDER|PREG_OFFSET_CAPTURE); $this->mMatchPos=0; + # print( "
" );
+		# print_r( $this->mMatch );
+		# print( "
" ); } function nextToken() { $token = $this->previewToken(); if ( $token ) { - if ( $token["type"] == "text" ) { - $this->mPos = $token["mPos"]; - } else { - $this->mMatchPos = $token["mMatchPos"]; - $this->mPos = $token["mPos"]; - } + $this->mMatchPos = $token["mMatchPos"]; + $this->mPos = $token["mPos"]; } return $token; } @@ -54,16 +63,30 @@ class Tokenizer { $token["type"] = "text"; $token["text"] = substr( $this->mText, $this->mPos, $this->mMatch[0][$this->mMatchPos][1] - $this->mPos ); + # What the pointers would change to if this would not just be a preview + $token["mMatchPos"] = $this->mMatchPos; $token["mPos"] = $this->mMatch[0][$this->mMatchPos][1]; } else { - $token["type"] = $this->mMatch[0][$this->mMatchPos][0]; - $token["mPos"] = $this->mPos + strlen($token["type"]); + # If linkPrefixExtension is set, $this->mMatch[2][$this->mMatchPos][0] + # contains the link prefix, or is null if no link prefix exist. + if ( $this->mMatch[2][$this->mMatchPos][0] ) + { + # prefixed link open tag, [0] is "prefix[[" + $token["type"] = "[["; + $token["text"] = $this->mMatch[2][$this->mMatchPos][0]; # the prefix + } else { + $token["type"] = $this->mMatch[0][$this->mMatchPos][0]; + } + # What the pointers would change to if this would not just be a preview + $token["mPos"] = $this->mPos + strlen( $this->mMatch[0][$this->mMatchPos][0] ); $token["mMatchPos"] = $this->mMatchPos + 1; } } elseif ( $this->mPos < $this->mTextLength ) { $token["type"] = "text"; $token["text"] = substr( $this->mText, $this->mPos ); + # What the pointers would change to if this would not just be a preview $token["mPos"] = $this->mTextLength; + $token["mMatchPos"] = $this->mMatchPos; } else { $token = FALSE; } -- 2.20.1