extended tokenizer to handle prefixed links
authorJens Frank <jeluf@users.mediawiki.org>
Sun, 29 Feb 2004 13:33:51 +0000 (13:33 +0000)
committerJens Frank <jeluf@users.mediawiki.org>
Sun, 29 Feb 2004 13:33:51 +0000 (13:33 +0000)
includes/Parser.php
includes/Tokenizer.php

index 310bf4b..ee1aa7f 100644 (file)
@@ -583,6 +583,7 @@ class Parser
                                                        $lastToken = array_pop( $tokenStack );
                                                }
                                                $txt = $linkText ."]]";
+                                               $prefix = $lastToken["text"];
                                                $nextToken = $tokenizer->previewToken();
                                                if ( $nextToken["type"] == "text" ) 
                                                {
@@ -590,8 +591,7 @@ class Parser
                                                        $nextToken = $tokenizer->nextToken();
                                                        $txt .= $nextToken["text"];
                                                }
-                                               $txt = $this->handleInternalLink( $txt );
-                                               #$txt = "<font color=\"#00FF00\"><b>&lt;" . $txt . "&gt;</b></font>";
+                                               $txt = $this->handleInternalLink( $txt, $prefix );
                                        }
                                        $tagIsOpen = (count( $tokenStack ) != 0);
                                        break;
@@ -643,7 +643,7 @@ class Parser
                return $s;
        }
 
-       /* private */ function handleInternalLink( $line )
+       /* private */ function handleInternalLink( $line, $prefix )
        {
                global $wgLang, $wgLinkCache;
                global $wgNamespacesWithSubpages, $wgLanguageCode;
@@ -679,13 +679,6 @@ class Parser
 
                wfProfileOut( "$fname-setup" );
 
-               $prefix = $new_prefix;
-               if ( $wgLang->linkPrefixExtension() && preg_match( $e2, $line, $m ) ) {
-                       $new_prefix = $m[2];
-                       $line = $m[1];
-               } else {
-                       $new_prefix = "";
-               }
                if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt
                        $text = $m[2];
                        $trail = $m[3];                         
index f0e9b08..d7eb080 100644 (file)
@@ -25,22 +25,31 @@ class Tokenizer {
 
        function preParse()
        {
-               $this->mCount = preg_match_all( "/(\[\[|\]\]|\'\'\'\'\'|\'\'\'|\'\')/",
-                                               $this->mText, $this->mMatch,
+               global $wgLang;
+               if (  $wgLang->linkPrefixExtension() ) {
+                       $regex = "/(([a-zA-Z\x80-\xff]+)\[\[|\]\]|\'\'\'\'\'|\'\'\'|\'\')/";
+                       #          000000000000000000000000000000000000000000000000000000
+                       #           1111111111111111111111111111111111111111111111111111
+                       #            222222222222222222
+                       # which $this->mMatch[...] will contain the match.
+               } else {
+                       $regex = "/(\[\[|\]\]|\'\'\'\'\'|\'\'\'|\'\')/";
+               }
+
+               $this->mCount = preg_match_all( $regex, $this->mText, $this->mMatch,
                                                PREG_PATTERN_ORDER|PREG_OFFSET_CAPTURE);
                $this->mMatchPos=0;
+               # print( "<pre>" );
+               # print_r( $this->mMatch );
+               # print( "</pre>" );
        }
 
        function nextToken()
        {
                $token = $this->previewToken();
                if ( $token ) {
-                       if ( $token["type"] == "text" ) {
-                               $this->mPos = $token["mPos"];
-                       } else {
-                               $this->mMatchPos = $token["mMatchPos"];
-                               $this->mPos = $token["mPos"];
-                       }
+                       $this->mMatchPos = $token["mMatchPos"];
+                       $this->mPos = $token["mPos"];
                }
                return $token;
        }
@@ -54,16 +63,30 @@ class Tokenizer {
                                $token["type"] = "text";
                                $token["text"] = substr( $this->mText, $this->mPos,
                                                         $this->mMatch[0][$this->mMatchPos][1] - $this->mPos );
+                               # What the pointers would change to if this would not just be a preview
+                               $token["mMatchPos"] = $this->mMatchPos; 
                                $token["mPos"] = $this->mMatch[0][$this->mMatchPos][1];
                        } else {
-                               $token["type"] = $this->mMatch[0][$this->mMatchPos][0];
-                               $token["mPos"] = $this->mPos + strlen($token["type"]);
+                               # If linkPrefixExtension is set,  $this->mMatch[2][$this->mMatchPos][0]
+                               # contains the link prefix, or is null if no link prefix exist.
+                               if ( $this->mMatch[2][$this->mMatchPos][0] )
+                               {
+                                       # prefixed link open tag, [0] is "prefix[["
+                                       $token["type"] = "[[";
+                                       $token["text"] = $this->mMatch[2][$this->mMatchPos][0]; # the prefix
+                               } else {
+                                       $token["type"] = $this->mMatch[0][$this->mMatchPos][0];
+                               }
+                               # What the pointers would change to if this would not just be a preview
+                               $token["mPos"] = $this->mPos + strlen( $this->mMatch[0][$this->mMatchPos][0] );
                                $token["mMatchPos"] = $this->mMatchPos + 1;
                        }
                } elseif ( $this->mPos < $this->mTextLength ) {
                        $token["type"] = "text";
                        $token["text"] = substr( $this->mText, $this->mPos );
+                       # What the pointers would change to if this would not just be a preview
                        $token["mPos"] = $this->mTextLength;
+                       $token["mMatchPos"] = $this->mMatchPos;
                } else {
                        $token = FALSE;
                }