processing.
Using this hook, added a conversion of spaces to non-breaking
spaces for the French wikipedia.
Switched ----- -> <hr> processing to tokenizer.
$text = $this->removeHTMLtags( $text );
$text = $this->replaceVariables( $text );
- $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
+ # $text = preg_replace( "/(^|\n)-----*/", "\\1<hr>", $text );
$text = str_replace ( "<HR>", "<hr>", $text );
$text = $this->doHeadings( $text );
/* private */ function replaceInternalLinks( $str )
{
+ global $wgLang; # for language specific parser hook
+
$tokenizer=Tokenizer::newFromString( $str );
$tokenStack = array();
}
$tagIsOpen = (count( $tokenStack ) != 0);
break;
+ case "----":
+ $txt = "\n<hr>\n";
+ break;
case "'''":
# This and the three next ones handle quotes
$txt = $this->handle3Quotes( $state, $token );
$txt="";
break;
default:
- # An unkown token. Highlight.
- $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
- $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
+ # Call language specific Hook.
+ $txt = $wgLang->processToken( $token, $tokenStack );
+ if ( NULL == $txt ) {
+ # An unkown token. Highlight.
+ $txt = "<font color=\"#FF0000\"><b>".$token["type"]."</b></font>";
+ $txt .= "<font color=\"#FFFF00\"><b>".$token["text"]."</b></font>";
+ }
break;
}
# If we're parsing the interior of a link, don't append the interior to $s,
function preParse()
{
global $wgLang;
+
+ # build up the regex, step by step.
+ # Basic features: Quotes for <em>/<strong> and hyphens for <hr>
+ $regex = "\'\'\'\'\'|\'\'\'|\'\'|\n-----*";
+ # Append regex for linkPrefixExtension
if ( $wgLang->linkPrefixExtension() ) {
- $regex = "/(([a-zA-Z\x80-\xff]+)\[\[|\]\]|\'\'\'\'\'|\'\'\'|\'\')/";
- # 000000000000000000000000000000000000000000000000000000
- # 1111111111111111111111111111111111111111111111111111
- # 222222222222222222
- # which $this->mMatch[...] will contain the match.
+ $regex .= "|([a-zA-Z\x80-\xff]+)\[\[";
} else {
- $regex = "/(\[\[|\]\]|\'\'\'\'\'|\'\'\'|\'\')/";
+ $regex .= "|\[\[";
}
+ # Closing link
+ $regex .= "|\]\]";
+ # Language-specific additions
+ $regex .= $wgLang->tokenizerRegex();
+ # Finalize regex
+ $regex = "/(" . $regex . ")/";
+ # Apply the regex to the text
$this->mCount = preg_match_all( $regex, $this->mText, $this->mMatch,
PREG_PATTERN_ORDER|PREG_OFFSET_CAPTURE);
$this->mMatchPos=0;
- # print( "<pre>" );
- # print_r( $this->mMatch );
- # print( "</pre>" );
}
function nextToken()
$token["text"] = $this->mMatch[2][$this->mMatchPos][0]; # the prefix
} else {
$token["type"] = $this->mMatch[0][$this->mMatchPos][0];
+ if ( substr($token["type"],1,4) == "----" )
+ {
+ # any number of hyphens bigger than four is a <HR>.
+ # strip down to four.
+ $token["type"]="----";
+ }
}
# What the pointers would change to if this would not just be a preview
$token["mPos"] = $this->mPos + strlen( $this->mMatch[0][$this->mMatchPos][0] );
{
return "<em>$text</em>";
}
+
+ # returns additional Regex for the tokenizer. See LanguageFr.php for an example
+ function tokenizerRegex()
+ {
+ return "";
+ }
+
+ # Process the token generated from the tokenizer by the above regex. Return
+ # NULL if the token is unknown, and the text to be added to the output otherwise
+ function processToken( &$token , &$tokenStack)
+ {
+ return NULL;
+ }
+
}
@include_once( "Language" . ucfirst( $wgLanguageCode ) . ".php" );
else return $m;
}
+
+ # returns additional Regex for the tokenizer.
+ function tokenizerRegex()
+ {
+ return "| [:»!?]|« |[0-9] [0-9]";
+ }
+
+ # Process the token generated from the tokenizer by the above regex. Return
+ # NULL if the token is unknown, and the text to be added to the output otherwise
+ function processToken( &$token , &$tokenStack)
+ {
+ if ( preg_match( "/ ([:»!?])/", $token["type"], $m ) )
+ {
+ $txt = " " . $m[1];
+ } elseif ( "« " == $token["type"] )
+ {
+ $txt = "« ";
+ } elseif ( preg_match( "/([0-9]) ([0-9])/", $token["type"], $m ) )
+ {
+ $txt = $m[1] . " " . $m[2];
+ } else
+ {
+ $txt = NULL;
+ }
+ return $txt;
+ }
}
?>