X-Git-Url: http://git.cyclocoop.org/?a=blobdiff_plain;f=includes%2FParser.php;h=5d6a7c7bcc94e6aecc81a4c6004b244fafba8e1d;hb=3545cddd9bafa492146a5433353a9a6312d749cb;hp=4404e7d6d3778a090fe4986693bbd7df703fc954;hpb=bce772029ecc898a50dfd6064beeaa88e647370e;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Parser.php b/includes/Parser.php index 4404e7d6d3..5d6a7c7bcc 100644 --- a/includes/Parser.php +++ b/includes/Parser.php @@ -1,13 +1,6 @@ mTagHooks = array(); $this->clearState(); } - function clearState() - { + function clearState() { $this->mOutput = new ParserOutput; $this->mAutonumber = 0; $this->mLastSection = ""; @@ -77,6 +72,7 @@ class Parser $this->mIncludeCount = array(); $this->mStripState = array(); $this->mArgStack = array(); + $this->mInPre = false; } # First pass--just handle sections, pass the rest off @@ -84,8 +80,7 @@ class Parser # # Returns a ParserOutput # - function parse( $text, &$title, $options, $linestart = true, $clearState = true ) - { + function parse( $text, &$title, $options, $linestart = true, $clearState = true ) { global $wgUseTidy; $fname = "Parser::parse"; wfProfileIn( $fname ); @@ -105,10 +100,15 @@ class Parser # Clean up special characters, only run once, next-to-last before doBlockLevels if(!$wgUseTidy) { $fixtags = array( - "/
/i" => '
', - "/
/i" => '
', - "/
/i"=>'
', - "/<\\/center *>/i" => '
', + # french spaces, last one Guillemet-left + # only if there is something before the space + '/(.) (\\?|:|!|\\302\\273)/i' => '\\1 \\2', + # french spaces, Guillemet-right + "/(\\302\\253) /i"=>"\\1 ", + '/
/i' => '
', + '/
/i' => '
', + '/
/i' => '
', + '/<\\/center *>/i' => '
', # Clean up spare ampersands; note that we probably ought to be # more careful about named entities. '/&(?!:amp;|#[Xx][0-9A-fa-f]+;|#[0-9]+;|[a-zA-Z0-9]+;)/' => '&' @@ -116,13 +116,18 @@ class Parser $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); } else { $fixtags = array( - "/
/i"=>'
', - "/<\\/center *>/i" => '
' + # french spaces, last one Guillemet-left + '/ (\\?|:|!|\\302\\273)/i' => ' \\1', + # french spaces, Guillemet-right + '/(\\302\\253) /i' => '\\1 ', + '/
/i' => '
', + '/<\\/center *>/i' => '
' ); $text = preg_replace( array_keys($fixtags), array_values($fixtags), $text ); } # only once and last $text = $this->doBlockLevels( $text, $linestart ); + $text = $this->unstripNoWiki( $text, $this->mStripState ); if($wgUseTidy) { $text = $this->tidy($text); } @@ -131,8 +136,7 @@ class Parser return $this->mOutput; } - /* static */ function getRandomString() - { + /* static */ function getRandomString() { return dechex(mt_rand(0, 0x7fffffff)) . dechex(mt_rand(0, 0x7fffffff)); } @@ -152,24 +156,24 @@ class Parser $content = array( ); } $n = 1; - $stripped = ""; + $stripped = ''; - while ( "" != $text ) { + while ( '' != $text ) { if($tag==STRIP_COMMENTS) { - $p = preg_split( "//i", $p[1], 2 ); + $q = preg_split( '/-->/i', $p[1], 2 ); } else { $q = preg_split( "/<\\/\\s*$tag\\s*>/i", $p[1], 2 ); } - $marker = $rnd . sprintf("%08X", $n++); + $marker = $rnd . sprintf('%08X', $n++); $content[$marker] = $q[0]; $stripped .= $marker; $text = $q[1]; @@ -187,20 +191,21 @@ class Parser # will be stripped in addition to other tags. This is important # for section editing, where these comments cause confusion when # counting the sections in the wikisource - function strip( $text, &$state, $stripcomments = false ) - { + function strip( $text, &$state, $stripcomments = false ) { $render = ($this->mOutputType == OT_HTML); $nowiki_content = array(); - $hiero_content = array(); $math_content = array(); $pre_content = array(); $comment_content = array(); - + $ext_content = array(); + # Replace any instances of the placeholders $uniq_prefix = UNIQ_PREFIX; #$text = str_replace( $uniq_prefix, wfHtmlEscapeFirst( $uniq_prefix ), $text ); - $text = Parser::extractTags("nowiki", $text, $nowiki_content, $uniq_prefix); + + # nowiki + $text = Parser::extractTags('nowiki', $text, $nowiki_content, $uniq_prefix); foreach( $nowiki_content as $marker => $content ){ if( $render ){ $nowiki_content[$marker] = wfEscapeHTMLTagsOnly( $content ); @@ -209,16 +214,8 @@ class Parser } } - $text = Parser::extractTags("hiero", $text, $hiero_content, $uniq_prefix); - foreach( $hiero_content as $marker => $content ){ - if( $render && $GLOBALS['wgUseWikiHiero']){ - $hiero_content[$marker] = WikiHiero( $content, WH_MODE_HTML); - } else { - $hiero_content[$marker] = "$content"; - } - } - - $text = Parser::extractTags("math", $text, $math_content, $uniq_prefix); + # math + $text = Parser::extractTags('math', $text, $math_content, $uniq_prefix); foreach( $math_content as $marker => $content ){ if( $render ) { if( $this->mOptions->getUseTeX() ) { @@ -231,14 +228,17 @@ class Parser } } - $text = Parser::extractTags("pre", $text, $pre_content, $uniq_prefix); + # pre + $text = Parser::extractTags('pre', $text, $pre_content, $uniq_prefix); foreach( $pre_content as $marker => $content ){ if( $render ){ - $pre_content[$marker] = "
" . wfEscapeHTMLTagsOnly( $content ) . "
"; + $pre_content[$marker] = '
' . wfEscapeHTMLTagsOnly( $content ) . '
'; } else { $pre_content[$marker] = "
$content
"; } } + + # Comments if($stripcomments) { $text = Parser::extractTags(STRIP_COMMENTS, $text, $comment_content, $uniq_prefix); foreach( $comment_content as $marker => $content ){ @@ -246,49 +246,75 @@ class Parser } } + # Extensions + foreach ( $this->mTagHooks as $tag => $callback ) { + $ext_contents[$tag] = array(); + $text = Parser::extractTags( $tag, $text, $ext_content[$tag], $uniq_prefix ); + foreach( $ext_content[$tag] as $marker => $content ) { + if ( $render ) { + $ext_content[$tag][$marker] = $callback( $content ); + } else { + $ext_content[$tag][$marker] = "<$tag>$content"; + } + } + } + # Merge state with the pre-existing state, if there is one if ( $state ) { $state['nowiki'] = $state['nowiki'] + $nowiki_content; - $state['hiero'] = $state['hiero'] + $hiero_content; $state['math'] = $state['math'] + $math_content; $state['pre'] = $state['pre'] + $pre_content; $state['comment'] = $state['comment'] + $comment_content; + + foreach( $ext_content as $tag => $array ) { + if ( array_key_exists( $tag, $state ) ) { + $state[$tag] = $state[$tag] + $array; + } + } } else { $state = array( 'nowiki' => $nowiki_content, - 'hiero' => $hiero_content, 'math' => $math_content, 'pre' => $pre_content, - 'comment' => $comment_content - ); + 'comment' => $comment_content, + ) + $ext_content; } return $text; } - function unstrip( $text, &$state ) - { + # always call unstripNoWiki() after this one + function unstrip( $text, &$state ) { # Must expand in reverse order, otherwise nested tags will be corrupted $contentDict = end( $state ); for ( $contentDict = end( $state ); $contentDict !== false; $contentDict = prev( $state ) ) { - for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) { - $text = str_replace( key( $contentDict ), $content, $text ); + if( key($state) != 'nowiki') { + for ( $content = end( $contentDict ); $content !== false; $content = prev( $contentDict ) ) { + $text = str_replace( key( $contentDict ), $content, $text ); + } } } return $text; } + # always call this after unstrip() to preserve the order + function unstripNoWiki( $text, &$state ) { + # Must expand in reverse order, otherwise nested tags will be corrupted + for ( $content = end($state['nowiki']); $content !== false; $content = prev( $state['nowiki'] ) ) { + $text = str_replace( key( $state['nowiki'] ), $content, $text ); + } + + return $text; + } # Add an item to the strip state # Returns the unique tag which must be inserted into the stripped text # The tag will be replaced with the original text in unstrip() - function insertStripItem( $text, &$state ) - { + function insertStripItem( $text, &$state ) { $rnd = UNIQ_PREFIX . '-item' . Parser::getRandomString(); if ( !$state ) { $state = array( 'nowiki' => array(), - 'hiero' => array(), 'math' => array(), 'pre' => array() ); @@ -297,9 +323,24 @@ class Parser return $rnd; } + # categoryMagic + # generate a list of subcategories and pages for a category + # depending on wfMsg("usenewcategorypage") it either calls the new + # or the old code. The new code will not work properly for some + # languages due to sorting issues, so they might want to turn it + # off. + function categoryMagic() { + $msg = wfMsg('usenewcategorypage'); + if ( '0' == @$msg[0] ) + { + return $this->oldCategoryMagic(); + } else { + return $this->newCategoryMagic(); + } + } + # This method generates the list of subcategories and pages for a category - function categoryMagic () - { + function oldCategoryMagic () { global $wgLang , $wgUser ; if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all @@ -339,66 +380,247 @@ class Parser # Showing subcategories if ( count ( $children ) > 0 ) { - $r .= "

".wfMsg("subcategories")."

\n" ; - $r .= implode ( ", " , $children ) ; + $r .= '

'.wfMsg('subcategories')."

\n" ; + $r .= implode ( ', ' , $children ) ; } # Showing pages in this category if ( count ( $articles ) > 0 ) { $ti = $this->mTitle->getText() ; - $h = wfMsg( "category_header", $ti ); + $h = wfMsg( 'category_header', $ti ); $r .= "

{$h}

\n" ; - $r .= implode ( ", " , $articles ) ; + $r .= implode ( ', ' , $articles ) ; } return $r ; } - function getHTMLattrs () - { + + + function newCategoryMagic () { + global $wgLang , $wgUser ; + if ( !$this->mOptions->getUseCategoryMagic() ) return ; # Doesn't use categories at all + + $cns = Namespace::getCategory() ; + if ( $this->mTitle->getNamespace() != $cns ) return '' ; # This ain't a category page + + $r = "
\n"; + + + $sk =& $wgUser->getSkin() ; + + $articles = array() ; + $articles_start_char = array(); + $children = array() ; + $children_start_char = array(); + $data = array () ; + $id = $this->mTitle->getArticleID() ; + + # FIXME: add limits + $t = wfStrencode( $this->mTitle->getDBKey() ); + $sql = "SELECT DISTINCT cur_title,cur_namespace,cl_sortkey FROM +cur,categorylinks WHERE cl_to='$t' AND cl_from=cur_id ORDER BY +cl_sortkey" ; + $res = wfQuery ( $sql, DB_READ ) ; + while ( $x = wfFetchObject ( $res ) ) + { + $t = $ns = $wgLang->getNsText ( $x->cur_namespace ) ; + if ( $t != '' ) $t .= ':' ; + $t .= $x->cur_title ; + + if ( $x->cur_namespace == $cns ) { + $ctitle = str_replace( '_',' ',$x->cur_title ); + array_push ( $children, $sk->makeKnownLink ( $t, $ctitle ) ) ; # Subcategory + + // If there's a link from Category:A to Category:B, the sortkey of the resulting + // entry in the categorylinks table is Category:A, not A, which it SHOULD be. + // Workaround: If sortkey == "Category:".$title, than use $title for sorting, + // else use sortkey... + if ( ($ns.":".$ctitle) == $x->cl_sortkey ) { + array_push ( $children_start_char, $wgLang->firstChar( $x->cur_title ) ); + } else { + array_push ( $children_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ; + } + } else { + array_push ( $articles , $sk->makeLink ( $t ) ) ; # Page in this category + array_push ( $articles_start_char, $wgLang->firstChar( $x->cl_sortkey ) ) ; + } + } + wfFreeResult ( $res ) ; + + $ti = $this->mTitle->getText() ; + + # Don't show subcategories section if there are none. + if ( count ( $children ) > 0 ) + { + # Showing subcategories + $r .= '

' . wfMsg( 'subcategories' ) . "

\n" + . wfMsg( 'subcategorycount', count( $children ) ); + if ( count ( $children ) > 6 ) { + + // divide list into three equal chunks + $chunk = (int) (count ( $children ) / 3); + + // get and display header + $r .= ''; + + $startChunk = 0; + $endChunk = $chunk; + + // loop through the chunks + for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0; + $chunkIndex < 3; + $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1) + { + + $r .= ''; + + + } + $r .= '
    '; + // output all subcategories to category + for ($index = $startChunk ; + $index < $endChunk && $index < count($children); + $index++ ) + { + // check for change of starting letter or begging of chunk + if ( ($children_start_char[$index] != $children_start_char[$index - 1]) + || ($index == $startChunk) ) + { + $r .= "

{$children_start_char[$index]}

\n
    "; + } + + $r .= "
  • {$children[$index]}
  • "; + } + $r .= '
'; + } else { + // for short lists of subcategories to category. + + $r .= "

{$children_start_char[0]}

\n"; + $r .= '
  • '.$children[0].'
  • '; + for ($index = 1; $index < count($children); $index++ ) + { + if ($children_start_char[$index] != $children_start_char[$index - 1]) + { + $r .= "

{$children_start_char[$index]}

\n
    "; + } + + $r .= "
  • {$children[$index]}
  • "; + } + $r .= '
'; + } + } # END of if ( count($children) > 0 ) + + $r .= '

' . wfMsg( 'category_header', $ti ) . "

\n" . + wfMsg( 'categoryarticlecount', count( $articles ) ); + + # Showing articles in this category + if ( count ( $articles ) > 6) { + $ti = $this->mTitle->getText() ; + + // divide list into three equal chunks + $chunk = (int) (count ( $articles ) / 3); + + // get and display header + $r .= ''; + + // loop through the chunks + for($startChunk = 0, $endChunk = $chunk, $chunkIndex = 0; + $chunkIndex < 3; + $chunkIndex++, $startChunk = $endChunk, $endChunk += $chunk + 1) + { + + $r .= ''; + + + } + $r .= '
    '; + + // output all articles in category + for ($index = $startChunk ; + $index < $endChunk && $index < count($articles); + $index++ ) + { + // check for change of starting letter or begging of chunk + if ( ($articles_start_char[$index] != $articles_start_char[$index - 1]) + || ($index == $startChunk) ) + { + $r .= "

{$articles_start_char[$index]}

\n
    "; + } + + $r .= "
  • {$articles[$index]}
  • "; + } + $r .= '
'; + } elseif ( count ( $articles ) > 0) { + // for short lists of articles in categories. + $ti = $this->mTitle->getText() ; + + $r .= '

'.$articles_start_char[0]."

\n"; + $r .= '
  • '.$articles[0].'
  • '; + for ($index = 1; $index < count($articles); $index++ ) + { + if ($articles_start_char[$index] != $articles_start_char[$index - 1]) + { + $r .= "

{$articles_start_char[$index]}

\n
    "; + } + + $r .= "
  • {$articles[$index]}
  • "; + } + $r .= '
'; + } + + + return $r ; + } + + # Return allowed HTML attributes + function getHTMLattrs () { $htmlattrs = array( # Allowed attributes--no scripting, etc. - "title", "align", "lang", "dir", "width", "height", - "bgcolor", "clear", /* BR */ "noshade", /* HR */ - "cite", /* BLOCKQUOTE, Q */ "size", "face", "color", - /* FONT */ "type", "start", "value", "compact", + 'title', 'align', 'lang', 'dir', 'width', 'height', + 'bgcolor', 'clear', /* BR */ 'noshade', /* HR */ + 'cite', /* BLOCKQUOTE, Q */ 'size', 'face', 'color', + /* FONT */ 'type', 'start', 'value', 'compact', /* For various lists, mostly deprecated but safe */ - "summary", "width", "border", "frame", "rules", - "cellspacing", "cellpadding", "valign", "char", - "charoff", "colgroup", "col", "span", "abbr", "axis", - "headers", "scope", "rowspan", "colspan", /* Tables */ - "id", "class", "name", "style" /* For CSS */ + 'summary', 'width', 'border', 'frame', 'rules', + 'cellspacing', 'cellpadding', 'valign', 'char', + 'charoff', 'colgroup', 'col', 'span', 'abbr', 'axis', + 'headers', 'scope', 'rowspan', 'colspan', /* Tables */ + 'id', 'class', 'name', 'style' /* For CSS */ ); return $htmlattrs ; } - function fixTagAttributes ( $t ) - { - if ( trim ( $t ) == "" ) return "" ; # Saves runtime ;-) + # Remove non approved attributes and javascript in css + function fixTagAttributes ( $t ) { + if ( trim ( $t ) == '' ) return '' ; # Saves runtime ;-) $htmlattrs = $this->getHTMLattrs() ; # Strip non-approved attributes from the tag $t = preg_replace( - "/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e", + '/(\\w+)(\\s*=\\s*([^\\s\">]+|\"[^\">]*\"))?/e', "(in_array(strtolower(\"\$1\"),\$htmlattrs)?(\"\$1\".((\"x\$3\" != \"x\")?\"=\$3\":'')):'')", $t); # Strip javascript "expression" from stylesheets. Brute force approach: # If anythin offensive is found, all attributes of the HTML tag are dropped if( preg_match( - "/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is", + '/style\\s*=.*(expression|tps*:\/\/|url\\s*\().*/is', wfMungeToUtf8( $t ) ) ) { - $t=""; + $t=''; } return trim ( $t ) ; } - /* interface with html tidy, used if $wgUseTidy = true */ + # interface with html tidy, used if $wgUseTidy = true function tidy ( $text ) { global $wgTidyConf, $wgTidyBin, $wgTidyOpts; global $wgInputEncoding, $wgOutputEncoding; + $fname = 'Parser::tidy'; + wfProfileIn( $fname ); + $cleansource = ''; switch(strtoupper($wgOutputEncoding)) { case 'ISO-8859-1': @@ -411,17 +633,17 @@ class Parser $wgTidyOpts .= ' -raw'; } - $text = ''. 'test'.$text.''; $descriptorspec = array( - 0 => array("pipe", "r"), - 1 => array("pipe", "w"), - 2 => array("file", "/dev/null", "a") + 0 => array('pipe', 'r'), + 1 => array('pipe', 'w'), + 2 => array('file', '/dev/null', 'a') ); $process = proc_open("$wgTidyBin -config $wgTidyConf $wgTidyOpts", $descriptorspec, $pipes); if (is_resource($process)) { - fwrite($pipes[0], $text); + fwrite($pipes[0], $wrappedtext); fclose($pipes[0]); while (!feof($pipes[1])) { $cleansource .= fgets($pipes[1], 1024); @@ -429,15 +651,19 @@ class Parser fclose($pipes[1]); $return_value = proc_close($process); } + + wfProfileOut( $fname ); + if( $cleansource == '' && $text != '') { - return '

'.wfMsg('seriousxhtmlerrors').'

'.htmlspecialchars($text).'
'; + wfDebug( "Tidy error detected!\n" ); + return $text . "\n\n"; } else { return $cleansource; } } - function doTableStuff ( $t ) - { + # parse the wiki syntax used to render tables + function doTableStuff ( $t ) { $t = explode ( "\n" , $t ) ; $td = array () ; # Is currently a td tag open? $ltd = array () ; # Was it TD or TH? @@ -447,20 +673,20 @@ class Parser { $x = trim ( $x ) ; $fc = substr ( $x , 0 , 1 ) ; - if ( "{|" == substr ( $x , 0 , 2 ) ) + if ( '{|' == substr ( $x , 0 , 2 ) ) { - $t[$k] = "\nfixTagAttributes ( substr ( $x , 3 ) ) . ">" ; + $t[$k] = "\n
fixTagAttributes ( substr ( $x , 3 ) ) . '>' ; array_push ( $td , false ) ; - array_push ( $ltd , "" ) ; + array_push ( $ltd , '' ) ; array_push ( $tr , false ) ; - array_push ( $ltr , "" ) ; + array_push ( $ltr , '' ) ; } else if ( count ( $td ) == 0 ) { } # Don't do any of the following - else if ( "|}" == substr ( $x , 0 , 2 ) ) + else if ( '|}' == substr ( $x , 0 , 2 ) ) { $z = "
\n" ; $l = array_pop ( $ltd ) ; - if ( array_pop ( $tr ) ) $z = "" . $z ; + if ( array_pop ( $tr ) ) $z = '' . $z ; if ( array_pop ( $td ) ) $z = "" . $z ; array_pop ( $ltr ) ; $t[$k] = $z ; @@ -470,51 +696,51 @@ class Parser $z = trim ( substr ( $x , 2 ) ) ; $t[$k] = "{$z}\n" ; }*/ - else if ( "|-" == substr ( $x , 0 , 2 ) ) # Allows for |--------------- + else if ( '|-' == substr ( $x , 0 , 2 ) ) # Allows for |--------------- { $x = substr ( $x , 1 ) ; - while ( $x != "" && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ; - $z = "" ; + while ( $x != '' && substr ( $x , 0 , 1 ) == '-' ) $x = substr ( $x , 1 ) ; + $z = '' ; $l = array_pop ( $ltd ) ; - if ( array_pop ( $tr ) ) $z = "" . $z ; + if ( array_pop ( $tr ) ) $z = '' . $z ; if ( array_pop ( $td ) ) $z = "" . $z ; array_pop ( $ltr ) ; $t[$k] = $z ; array_push ( $tr , false ) ; array_push ( $td , false ) ; - array_push ( $ltd , "" ) ; + array_push ( $ltd , '' ) ; array_push ( $ltr , $this->fixTagAttributes ( $x ) ) ; } - else if ( "|" == $fc || "!" == $fc || "|+" == substr ( $x , 0 , 2 ) ) # Caption + else if ( '|' == $fc || '!' == $fc || '|+' == substr ( $x , 0 , 2 ) ) # Caption { - if ( "|+" == substr ( $x , 0 , 2 ) ) + if ( '|+' == substr ( $x , 0 , 2 ) ) { - $fc = "+" ; + $fc = '+' ; $x = substr ( $x , 1 ) ; } $after = substr ( $x , 1 ) ; - if ( $fc == "!" ) $after = str_replace ( "!!" , "||" , $after ) ; - $after = explode ( "||" , $after ) ; - $t[$k] = "" ; + if ( $fc == '!' ) $after = str_replace ( '!!' , '||' , $after ) ; + $after = explode ( '||' , $after ) ; + $t[$k] = '' ; foreach ( $after AS $theline ) { - $z = "" ; - if ( $fc != "+" ) + $z = '' ; + if ( $fc != '+' ) { $tra = array_pop ( $ltr ) ; if ( !array_pop ( $tr ) ) $z = "\n" ; array_push ( $tr , true ) ; - array_push ( $ltr , "" ) ; + array_push ( $ltr , '' ) ; } $l = array_pop ( $ltd ) ; if ( array_pop ( $td ) ) $z = "" . $z ; - if ( $fc == "|" ) $l = "td" ; - else if ( $fc == "!" ) $l = "th" ; - else if ( $fc == "+" ) $l = "caption" ; - else $l = "" ; + if ( $fc == '|' ) $l = 'td' ; + else if ( $fc == '!' ) $l = 'th' ; + else if ( $fc == '+' ) $l = 'caption' ; + else $l = '' ; array_push ( $ltd , $l ) ; - $y = explode ( "|" , $theline , 2 ) ; + $y = explode ( '|' , $theline , 2 ) ; if ( count ( $y ) == 1 ) $y = "{$z}<{$l}>{$y[0]}" ; else $y = $y = "{$z}<{$l} ".$this->fixTagAttributes($y[0]).">{$y[1]}" ; $t[$k] .= $y ; @@ -526,9 +752,9 @@ class Parser # Closing open td, tr && table while ( count ( $td ) > 0 ) { - if ( array_pop ( $td ) ) $t[] = "" ; - if ( array_pop ( $tr ) ) $t[] = "" ; - $t[] = "" ; + if ( array_pop ( $td ) ) $t[] = '' ; + if ( array_pop ( $tr ) ) $t[] = '' ; + $t[] = '' ; } $t = implode ( "\n" , $t ) ; @@ -538,34 +764,35 @@ class Parser # Parses the text and adds the result to the strip state # Returns the strip tag - function stripParse( $text, $linestart, $args ) + function stripParse( $text, $newline, $args ) { $text = $this->strip( $text, $this->mStripState ); - $text = $this->internalParse( $text, $linestart, $args, false ); - if( $linestart ) { - $text = "\n" . $text; - } - return $this->insertStripItem( $text, $this->mStripState ); + $text = $this->internalParse( $text, (bool)$newline, $args, false ); + return $newline.$this->insertStripItem( $text, $this->mStripState ); } - function internalParse( $text, $linestart, $args = array(), $isMain=true ) - { - $fname = "Parser::internalParse"; + function internalParse( $text, $linestart, $args = array(), $isMain=true ) { + $fname = 'Parser::internalParse'; wfProfileIn( $fname ); $text = $this->removeHTMLtags( $text ); $text = $this->replaceVariables( $text, $args ); - # $text = preg_replace( "/(^|\n)-----*/", "\\1
", $text ); + $text = preg_replace( '/(^|\n)-----*/', '\\1
', $text ); $text = $this->doHeadings( $text ); if($this->mOptions->getUseDynamicDates()) { global $wgDateFormatter; $text = $wgDateFormatter->reformat( $this->mOptions->getDateFormat(), $text ); } + $text = $this->doAllQuotes( $text ); $text = $this->replaceExternalLinks( $text ); - $text = $this->doTokenizedParser ( $text ); + $text = $this->replaceInternalLinks ( $text ); + $text = $this->replaceInternalLinks ( $text ); + //$text = $this->doTokenizedParser ( $text ); $text = $this->doTableStuff ( $text ) ; + $text = $this->magicISBN( $text ); + $text = $this->magicRFC( $text ); $text = $this->formatHeadings( $text, $isMain ); $sk =& $this->mOptions->getSkin(); $text = $sk->transformContent( $text ); @@ -579,39 +806,99 @@ class Parser return $text; } - - /* private */ function doHeadings( $text ) - { + # Parse headers and return html + /* private */ function doHeadings( $text ) { + $fname = 'Parser::doHeadings'; + wfProfileIn( $fname ); for ( $i = 6; $i >= 1; --$i ) { - $h = substr( "======", 0, $i ); + $h = substr( '======', 0, $i ); $text = preg_replace( "/^{$h}(.+){$h}(\\s|$)/m", "\\1\\2", $text ); } + wfProfileOut( $fname ); return $text; } + /* private */ function doAllQuotes( $text ) { + $fname = 'Parser::doAllQuotes'; + wfProfileIn( $fname ); + $outtext = ''; + $lines = explode( "\n", $text ); + foreach ( $lines as $line ) { + $outtext .= $this->doQuotes ( '', $line, '' ) . "\n"; + } + $outtext = substr($outtext, 0,-1); + wfProfileOut( $fname ); + return $outtext; + } + + /* private */ function doQuotes( $pre, $text, $mode ) { + if ( preg_match( "/^(.*)''(.*)$/sU", $text, $m ) ) { + $m1_strong = ($m[1] == "") ? "" : "{$m[1]}"; + $m1_em = ($m[1] == "") ? "" : "{$m[1]}"; + if ( substr ($m[2], 0, 1) == '\'' ) { + $m[2] = substr ($m[2], 1); + if ($mode == 'em') { + return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'emstrong' ); + } else if ($mode == 'strong') { + return $m1_strong . $this->doQuotes ( '', $m[2], '' ); + } else if (($mode == 'emstrong') || ($mode == 'both')) { + return $this->doQuotes ( '', $pre.$m1_strong.$m[2], 'em' ); + } else if ($mode == 'strongem') { + return "{$pre}{$m1_em}" . $this->doQuotes ( '', $m[2], 'em' ); + } else { + return $m[1] . $this->doQuotes ( '', $m[2], 'strong' ); + } + } else { + if ($mode == 'strong') { + return $this->doQuotes ( $m[1], $m[2], ($m[1] == '') ? 'both' : 'strongem' ); + } else if ($mode == 'em') { + return $m1_em . $this->doQuotes ( '', $m[2], '' ); + } else if ($mode == 'emstrong') { + return "{$pre}{$m1_strong}" . $this->doQuotes ( '', $m[2], 'strong' ); + } else if (($mode == 'strongem') || ($mode == 'both')) { + return $this->doQuotes ( '', $pre.$m1_em.$m[2], 'strong' ); + } else { + return $m[1] . $this->doQuotes ( '', $m[2], 'em' ); + } + } + } else { + $text_strong = ($text == '') ? '' : "{$text}"; + $text_em = ($text == '') ? '' : "{$text}"; + if ($mode == '') { + return $pre . $text; + } else if ($mode == 'em') { + return $pre . $text_em; + } else if ($mode == 'strong') { + return $pre . $text_strong; + } else if ($mode == 'strongem') { + return (($pre == '') && ($text == '')) ? '' : "{$pre}{$text_em}"; + } else { + return (($pre == '') && ($text == '')) ? '' : "{$pre}{$text_strong}"; + } + } + } + # Note: we have to do external links before the internal ones, # and otherwise take great care in the order of things here, so # that we don't end up interpreting some URLs twice. - /* private */ function replaceExternalLinks( $text ) - { - $fname = "Parser::replaceExternalLinks"; + /* private */ function replaceExternalLinks( $text ) { + $fname = 'Parser::replaceExternalLinks'; wfProfileIn( $fname ); - $text = $this->subReplaceExternalLinks( $text, "http", true ); - $text = $this->subReplaceExternalLinks( $text, "https", true ); - $text = $this->subReplaceExternalLinks( $text, "ftp", false ); - $text = $this->subReplaceExternalLinks( $text, "irc", false ); - $text = $this->subReplaceExternalLinks( $text, "gopher", false ); - $text = $this->subReplaceExternalLinks( $text, "news", false ); - $text = $this->subReplaceExternalLinks( $text, "mailto", false ); + $text = $this->subReplaceExternalLinks( $text, 'http', true ); + $text = $this->subReplaceExternalLinks( $text, 'https', true ); + $text = $this->subReplaceExternalLinks( $text, 'ftp', false ); + $text = $this->subReplaceExternalLinks( $text, 'irc', false ); + $text = $this->subReplaceExternalLinks( $text, 'gopher', false ); + $text = $this->subReplaceExternalLinks( $text, 'news', false ); + $text = $this->subReplaceExternalLinks( $text, 'mailto', false ); wfProfileOut( $fname ); return $text; } - /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber ) - { - $unique = "4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3"; + /* private */ function subReplaceExternalLinks( $s, $protocol, $autonumber ) { + $unique = '4jzAfzB8hNvf4sqyO9Edd8pSmk9rE2in0Tgw3'; $uc = "A-Za-z0-9_\\/~%\\-+&*#?!=()@\\x80-\\xFF"; # this is the list of separators that should be ignored if they @@ -620,8 +907,8 @@ class Parser # in this case, the last comma should not become part of the URL, # but in "www.foo.com/123,2342,32.htm" it should. $sep = ",;\.:"; - $fnc = "A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF"; - $images = "gif|png|jpg|jpeg"; + $fnc = 'A-Za-z0-9_.,~%\\-+&;#*?!=()@\\x80-\\xFF'; + $images = 'gif|png|jpg|jpeg'; # PLEASE NOTE: The curly braces { } are not part of the regex, # they are interpreted as part of the string (used to tell PHP @@ -633,13 +920,13 @@ class Parser $sk =& $this->mOptions->getSkin(); if ( $autonumber and $this->mOptions->getAllowExternalImages() ) { # Use img tags only for HTTP urls - $s = preg_replace( $e1, "\\1" . $sk->makeImage( "{$unique}:\\3" . - "/\\4.\\5", "\\4.\\5" ) . "\\6", $s ); + $s = preg_replace( $e1, '\\1' . $sk->makeImage( "{$unique}:\\3" . + '/\\4.\\5', '\\4.\\5' ) . '\\6', $s ); } - $s = preg_replace( $e2, "\\1" . "getExternalLinkAttributes( "{$unique}:\\3", wfEscapeHTML( "{$unique}:\\3" ) ) . ">" . wfEscapeHTML( "{$unique}:\\3" ) . - "\\5", $s ); + '\\5', $s ); $s = str_replace( $unique, $protocol, $s ); $a = explode( "[{$protocol}:", " " . $s ); @@ -664,7 +951,7 @@ class Parser continue; } if( $link == $text || preg_match( "!$protocol://" . preg_quote( $text, "/" ) . "/?$!", $link ) ) { - $paren = ""; + $paren = ''; } else { # Expand the URL for printable version $paren = " (" . htmlspecialchars ( $link ) . ")"; @@ -676,302 +963,31 @@ class Parser return $s; } - /* private */ function handle4Quotes( &$state, $token ) - { - /* This one makes some assumptions. - * '''Caesar''''s army => Caesar's army - * ''''Caesar'''' was a roman emperor => 'Caesar' was a roman emperor - * These assumptions might be wrong, but any other assumption might be wrong, too. - * So here we go */ - if ( $state["strong"] !== false ) { - return $this->handle3Quotes( $state, $token ) . "'"; - } else { - return "'" . $this->handle3Quotes( $state, $token ); - } - } - - - /* private */ function handle3Quotes( &$state, $token ) - { - if ( $state["strong"] !== false ) { - if ( $state["em"] !== false && $state["em"] > $state["strong"] ) - { - # ''' lala ''lala ''' - $s = ""; - } else { - $s = ""; - } - $state["strong"] = FALSE; - } else { - $s = ""; - $state["strong"] = $token["pos"]; - } - return $s; - } - - /* private */ function handle2Quotes( &$state, $token ) - { - if ( $state["em"] !== false ) { - if ( $state["strong"] !== false && $state["strong"] > $state["em"] ) - { - # ''lala'''lala'' ....''' - $s = ""; - } else { - $s = ""; - } - $state["em"] = FALSE; - } else { - $s = ""; - $state["em"] = $token["pos"]; - - } - return $s; - } - - /* private */ function handle5Quotes( &$state, $token ) - { - $s = ""; - if ( $state["em"] !== false && $state["strong"] !== false ) { - if ( $state["em"] < $state["strong"] ) { - $s .= ""; - } else { - $s .= ""; - } - $state["strong"] = $state["em"] = FALSE; - } elseif ( $state["em"] !== false ) { - $s .= ""; - $state["em"] = FALSE; - $state["strong"] = $token["pos"]; - } elseif ( $state["strong"] !== false ) { - $s .= ""; - $state["strong"] = FALSE; - $state["em"] = $token["pos"]; - } else { # not $em and not $strong - $s .= ""; - $state["strong"] = $state["em"] = $token["pos"]; - } - return $s; - } - - /* private */ function doTokenizedParser( $str ) - { - global $wgLang; # for language specific parser hook - global $wgUploadDirectory, $wgUseTimeline; - - $tokenizer=Tokenizer::newFromString( $str ); - $tokenStack = array(); - - $s=""; - $state["em"] = FALSE; - $state["strong"] = FALSE; - $tagIsOpen = FALSE; - $threeopen = false; - - # The tokenizer splits the text into tokens and returns them one by one. - # Every call to the tokenizer returns a new token. - while ( $token = $tokenizer->nextToken() ) - { - switch ( $token["type"] ) - { - case "text": - # simple text with no further markup - $txt = $token["text"]; - break; - case "blank": - # Text that contains blanks that have to be converted to - # non-breakable spaces for French. - # U+202F NARROW NO-BREAK SPACE might be a better choice, but - # browser support for Unicode spacing is poor. - $txt = str_replace( " ", " ", $token["text"] ); - break; - case "[[[": - # remember the tag opened with 3 [ - $threeopen = true; - case "[[": - # link opening tag. - # FIXME : Treat orphaned open tags (stack not empty when text is over) - $tagIsOpen = TRUE; - array_push( $tokenStack, $token ); - $txt=""; - break; - - case "]]]": - case "]]": - # link close tag. - # get text from stack, glue it together, and call the code to handle a - # link - - if ( count( $tokenStack ) == 0 ) - { - # stack empty. Found a ]] without an opening [[ - $txt = "]]"; - } else { - $linkText = ""; - $lastToken = array_pop( $tokenStack ); - while ( !(($lastToken["type"] == "[[[") or ($lastToken["type"] == "[[")) ) - { - if( !empty( $lastToken["text"] ) ) { - $linkText = $lastToken["text"] . $linkText; - } - $lastToken = array_pop( $tokenStack ); - } - - $txt = $linkText ."]]"; - - if( isset( $lastToken["text"] ) ) { - $prefix = $lastToken["text"]; - } else { - $prefix = ""; - } - $nextToken = $tokenizer->previewToken(); - if ( $nextToken["type"] == "text" ) - { - # Preview just looks at it. Now we have to fetch it. - $nextToken = $tokenizer->nextToken(); - $txt .= $nextToken["text"]; - } - $txt = $this->handleInternalLink( $this->unstrip($txt,$this->mStripState), $prefix ); - # did the tag start with 3 [ ? - if($threeopen) { - # show the first as text - $txt = "[".$txt; - $threeopen=false; - } - - } - $tagIsOpen = (count( $tokenStack ) != 0); - break; - case "----": - $txt = "\n
\n"; - break; - case "'''": - # This and the four next ones handle quotes - $txt = $this->handle3Quotes( $state, $token ); - break; - case "''": - $txt = $this->handle2Quotes( $state, $token ); - break; - case "'''''": - $txt = $this->handle5Quotes( $state, $token ); - break; - case "''''": - $txt = $this->handle4Quotes( $state, $token ); - break; - case "": - # empty token - $txt=""; - break; - case "h": - #heading- used to close all unbalanced bold or em tags in this section - $txt = ''; - if( $state['em'] !== false and - ( $state['strong'] === false or $state['em'] > $state['strong'] ) ) - { - $s .= '
'; - $state['em'] = false; - } - if ( $state['strong'] !== false ) $txt .= '
'; - if ( $state['em'] !== false ) $txt .= '
'; - $state['strong'] = $state['em'] = false; - break; - case "RFC ": - if ( $tagIsOpen ) { - $txt = "RFC "; - } else { - $txt = $this->doMagicRFC( $tokenizer ); - } - break; - case "ISBN ": - if ( $tagIsOpen ) { - $txt = "ISBN "; - } else { - $txt = $this->doMagicISBN( $tokenizer ); - } - break; - case "": - if ( $wgUseTimeline && - "" != ( $timelinesrc = $tokenizer->readAllUntil("</timeline>") ) ) - { - $txt = renderTimeline( $timelinesrc ); - } else { - $txt=$token["text"]; - } - break; - default: - # Call language specific Hook. - $txt = $wgLang->processToken( $token, $tokenStack ); - if ( NULL == $txt ) { - # An unkown token. Highlight. - $txt = "".$token["type"].""; - $txt .= "".$token["text"].""; - } - break; - } - # If we're parsing the interior of a link, don't append the interior to $s, - # but push it to the stack so it can be processed when a ]] token is found. - if ( $tagIsOpen && $txt != "" ) { - $token["type"] = "text"; - $token["text"] = $txt; - array_push( $tokenStack, $token ); - } else { - $s .= $txt; - } - } #end while - - # make 100% sure all strong and em tags are closed - # doBlockLevels often messes the last bit up though, but invalid nesting is better than unclosed tags - # tidy solves this though - if( $state['em'] !== false and - ( $state['strong'] === false or $state['em'] > $state['strong'] ) ) - { - $s .= ''; - $state['em'] = false; - } - if ( $state['strong'] !== false ) $s .= ''; - if ( $state['em'] !== false ) $s .= ''; - - if ( count( $tokenStack ) != 0 ) - { - # still objects on stack. opened [[ tag without closing ]] tag. - $txt = ""; - while ( $lastToken = array_pop( $tokenStack ) ) - { - if ( $lastToken["type"] == "text" ) - { - $txt = $lastToken["text"] . $txt; - } else { - $txt = $lastToken["type"] . $txt; - } - } - $s .= $txt; - } - return $s; - } - - /* private */ function handleInternalLink( $line, $prefix ) - { + /* private */ function replaceInternalLinks( $s ) { global $wgLang, $wgLinkCache; global $wgNamespacesWithSubpages, $wgLanguageCode; - static $fname = "Parser::handleInternalLink" ; + static $fname = 'Parser::replaceInternalLinks' ; wfProfileIn( $fname ); - wfProfileIn( "$fname-setup" ); + wfProfileIn( $fname.'-setup' ); static $tc = FALSE; # the % is needed to support urlencoded titles as well - if ( !$tc ) { $tc = Title::legalChars() . "#%"; } + if ( !$tc ) { $tc = Title::legalChars() . '#%'; } $sk =& $this->mOptions->getSkin(); + $a = explode( '[[', ' ' . $s ); + $s = array_shift( $a ); + $s = substr( $s, 1 ); + # Match a link having the form [[namespace:link|alternate]]trail static $e1 = FALSE; if ( !$e1 ) { $e1 = "/^([{$tc}]+)(?:\\|([^]]+))?]](.*)\$/sD"; } # Match the end of a line for a word that's not followed by whitespace, # e.g. in the case of 'The Arab al[[Razi]]', 'al' will be matched - #$e2 = "/^(.*)\\b(\\w+)\$/suD"; - #$e2 = "/^(.*\\s)(\\S+)\$/suD"; - static $e2 = '/^(.*\s)([a-zA-Z\x80-\xff]+)$/sD'; - + static $e2 = '/^(.*?)([a-zA-Z\x80-\xff]+)$/sD'; + $useLinkPrefixExtension = $wgLang->linkPrefixExtension(); # Special and Media are pseudo-namespaces; no pages actually exist in them static $image = FALSE; static $special = FALSE; @@ -984,122 +1000,148 @@ class Parser $nottalk = !Namespace::isTalk( $this->mTitle->getNamespace() ); - wfProfileOut( "$fname-setup" ); - $s = ""; - - if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt - $text = $m[2]; - # fix up urlencoded title texts - if(preg_match("/%/", $m[1] )) $m[1] = urldecode($m[1]); - $trail = $m[3]; - } else { # Invalid form; output directly - $s .= $prefix . "[[" . $line ; - return $s; + if ( $useLinkPrefixExtension ) { + if ( preg_match( $e2, $s, $m ) ) { + $first_prefix = $m[2]; + $s = $m[1]; + } else { + $first_prefix = false; + } + } else { + $prefix = ''; } - /* Valid link forms: - Foobar -- normal - :Foobar -- override special treatment of prefix (images, language links) - /Foobar -- convert to CurrentPage/Foobar - /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text - */ - $c = substr($m[1],0,1); - $noforce = ($c != ":"); - if( $c == "/" ) { # subpage - if(substr($m[1],-1,1)=="/") { # / at end means we don't want the slash to be shown - $m[1]=substr($m[1],1,strlen($m[1])-2); - $noslash=$m[1]; - } else { - $noslash=substr($m[1],1); + wfProfileOut( $fname.'-setup' ); + + foreach ( $a as $line ) { + wfProfileIn( $fname.'-prefixhandling' ); + if ( $useLinkPrefixExtension ) { + if ( preg_match( $e2, $s, $m ) ) { + $prefix = $m[2]; + $s = $m[1]; + } else { + $prefix=''; + } + # first link + if($first_prefix) { + $prefix = $first_prefix; + $first_prefix = false; + } + } + wfProfileOut( $fname.'-prefixhandling' ); + + if ( preg_match( $e1, $line, $m ) ) { # page with normal text or alt + $text = $m[2]; + # fix up urlencoded title texts + if(preg_match('/%/', $m[1] )) $m[1] = urldecode($m[1]); + $trail = $m[3]; + } else { # Invalid form; output directly + $s .= $prefix . '[[' . $line ; + continue; } - if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here - $link = $this->mTitle->getPrefixedText(). "/" . trim($noslash); - if( "" == $text ) { - $text= $m[1]; - } # this might be changed for ugliness reasons + + /* Valid link forms: + Foobar -- normal + :Foobar -- override special treatment of prefix (images, language links) + /Foobar -- convert to CurrentPage/Foobar + /Foobar/ -- convert to CurrentPage/Foobar, strip the initial / from text + */ + $c = substr($m[1],0,1); + $noforce = ($c != ':'); + if( $c == '/' ) { # subpage + if(substr($m[1],-1,1)=='/') { # / at end means we don't want the slash to be shown + $m[1]=substr($m[1],1,strlen($m[1])-2); + $noslash=$m[1]; + } else { + $noslash=substr($m[1],1); + } + if(!empty($wgNamespacesWithSubpages[$this->mTitle->getNamespace()])) { # subpages allowed here + $link = $this->mTitle->getPrefixedText(). '/' . trim($noslash); + if( '' == $text ) { + $text= $m[1]; + } # this might be changed for ugliness reasons + } else { + $link = $noslash; # no subpage allowed, use standard link + } + } elseif( $noforce ) { # no subpage + $link = $m[1]; } else { - $link = $noslash; # no subpage allowed, use standard link + $link = substr( $m[1], 1 ); } - } elseif( $noforce ) { # no subpage - $link = $m[1]; - } else { - $link = substr( $m[1], 1 ); - } - $wasblank = ( "" == $text ); - if( $wasblank ) + $wasblank = ( '' == $text ); + if( $wasblank ) $text = $link; - $nt = Title::newFromText( $link ); - if( !$nt ) { - $s .= $prefix . "[[" . $line; - return $s; - } - $ns = $nt->getNamespace(); - $iw = $nt->getInterWiki(); - if( $noforce ) { - if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) { - array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() ); - $s .= $prefix . $trail ; - return (trim($s) == '')? '': $s; - } - if( $ns == $image ) { - $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail; - $wgLinkCache->addImageLinkObj( $nt ); - return $s; + $nt = Title::newFromText( $link ); + if( !$nt ) { + $s .= $prefix . '[[' . $line; + continue; } - if ( $ns == $category ) { - $t = $nt->getText() ; - $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ; - - $wgLinkCache->suspend(); # Don't save in links/brokenlinks - $t = $sk->makeLinkObj( $nnt, $t, "", "" , $prefix ); - $wgLinkCache->resume(); - - $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text; - $wgLinkCache->addCategoryLinkObj( $nt, $sortkey ); - $this->mOutput->mCategoryLinks[] = $t ; - $s .= $prefix . $trail ; - return $s ; + $ns = $nt->getNamespace(); + $iw = $nt->getInterWiki(); + if( $noforce ) { + if( $iw && $this->mOptions->getInterwikiMagic() && $nottalk && $wgLang->getLanguageName( $iw ) ) { + array_push( $this->mOutput->mLanguageLinks, $nt->getPrefixedText() ); + $tmp = $prefix . $trail ; + $s .= (trim($tmp) == '')? '': $tmp; + continue; + } + if ( $ns == $image ) { + $s .= $prefix . $sk->makeImageLinkObj( $nt, $text ) . $trail; + $wgLinkCache->addImageLinkObj( $nt ); + continue; + } + if ( $ns == $category ) { + $t = $nt->getText() ; + $nnt = Title::newFromText ( Namespace::getCanonicalName($category).":".$t ) ; + + $wgLinkCache->suspend(); # Don't save in links/brokenlinks + $t = $sk->makeLinkObj( $nnt, $t, '', '' , $prefix ); + $wgLinkCache->resume(); + + $sortkey = $wasblank ? $this->mTitle->getPrefixedText() : $text; + $wgLinkCache->addCategoryLinkObj( $nt, $sortkey ); + $this->mOutput->mCategoryLinks[] = $t ; + $s .= $prefix . $trail ; + continue; + } + } + if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) && + ( strpos( $link, '#' ) == FALSE ) ) { + # Self-links are handled specially; generally de-link and change to bold. + $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, '', $trail ); + continue; } - } - if( ( $nt->getPrefixedText() == $this->mTitle->getPrefixedText() ) && - ( strpos( $link, "#" ) == FALSE ) ) { - # Self-links are handled specially; generally de-link and change to bold. - $s .= $prefix . $sk->makeSelfLinkObj( $nt, $text, "", $trail ); - return $s; - } - if( $ns == $media ) { - $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail; - $wgLinkCache->addImageLinkObj( $nt ); - return $s; - } elseif( $ns == $special ) { - $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, "", $trail ); - return $s; + if( $ns == $media ) { + $s .= $prefix . $sk->makeMediaLinkObj( $nt, $text ) . $trail; + $wgLinkCache->addImageLinkObj( $nt ); + continue; + } elseif( $ns == $special ) { + $s .= $prefix . $sk->makeKnownLinkObj( $nt, $text, '', $trail ); + continue; + } + $s .= $sk->makeLinkObj( $nt, $text, '', $trail, $prefix ); } - $s .= $sk->makeLinkObj( $nt, $text, "", $trail , $prefix ); - wfProfileOut( $fname ); return $s; } # Some functions here used by doBlockLevels() # - /* private */ function closeParagraph() - { - $result = ""; + /* private */ function closeParagraph() { + $result = ''; if ( '' != $this->mLastSection ) { - $result = "mLastSection . ">\n"; + $result = 'mLastSection . ">\n"; } $this->mInPre = false; - $this->mLastSection = ""; + $this->mLastSection = ''; return $result; } # getCommon() returns the length of the longest common substring # of both arguments, starting at the beginning of both. # - /* private */ function getCommon( $st1, $st2 ) - { + /* private */ function getCommon( $st1, $st2 ) { $fl = strlen( $st1 ); $shorter = strlen( $st2 ); if ( $fl < $shorter ) { $shorter = $fl; } @@ -1116,53 +1158,51 @@ class Parser { $result = $this->closeParagraph(); - if ( "*" == $char ) { $result .= "
  • "; } - else if ( "#" == $char ) { $result .= "
    1. "; } - else if ( ":" == $char ) { $result .= "
      "; } - else if ( ";" == $char ) { - $result .= "
      "; + if ( '*' == $char ) { $result .= '
      • '; } + else if ( '#' == $char ) { $result .= '
        1. '; } + else if ( ':' == $char ) { $result .= '
          '; } + else if ( ';' == $char ) { + $result .= '
          '; $this->mDTopen = true; } - else { $result = ""; } + else { $result = ''; } return $result; } - /* private */ function nextItem( $char ) - { - if ( "*" == $char || "#" == $char ) { return "
        2. "; } - else if ( ":" == $char || ";" == $char ) { + /* private */ function nextItem( $char ) { + if ( '*' == $char || '#' == $char ) { return '
        3. '; } + else if ( ':' == $char || ';' == $char ) { $close = "
      "; - if ( $this->mDTopen ) { $close = ""; } - if ( ";" == $char ) { + if ( $this->mDTopen ) { $close = ''; } + if ( ';' == $char ) { $this->mDTopen = true; - return $close . "
      "; + return $close . '
      '; } else { $this->mDTopen = false; - return $close . "
      "; + return $close . '
      '; } } - return ""; + return ''; } - /* private */function closeList( $char ) - { - if ( "*" == $char ) { $text = "
"; } - else if ( "#" == $char ) { $text = ""; } - else if ( ":" == $char ) { + /* private */function closeList( $char ) { + if ( '*' == $char ) { $text = ''; } + else if ( '#' == $char ) { $text = ''; } + else if ( ':' == $char ) { if ( $this->mDTopen ) { $this->mDTopen = false; - $text = ""; + $text = ''; } else { - $text = ""; + $text = ''; } } - else { return ""; } + else { return ''; } return $text."\n"; } /* private */ function doBlockLevels( $text, $linestart ) { - $fname = "Parser::doBlockLevels"; + $fname = 'Parser::doBlockLevels'; wfProfileIn( $fname ); # Parsing through the text line by line. The main thing @@ -1188,11 +1228,11 @@ class Parser } if ( !$this->mInPre ) { # Multiple prefixes may abut each other for nested lists. - $prefixLength = strspn( $oLine, "*#:;" ); + $prefixLength = strspn( $oLine, '*#:;' ); $pref = substr( $oLine, 0, $prefixLength ); # eh? - $pref2 = str_replace( ";", ":", $pref ); + $pref2 = str_replace( ';', ':', $pref ); $t = substr( $oLine, $prefixLength ); } else { # Don't interpret any other prefixes in preformatted text @@ -1215,7 +1255,7 @@ class Parser # FIXME: This is not foolproof. Something better in Tokenizer might help. if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) { $term = $match[1]; - $output .= $term . $this->nextItem( ":" ); + $output .= $term . $this->nextItem( ':' ); $t = $match[2]; } } @@ -1235,7 +1275,7 @@ class Parser $char = substr( $pref, $commonPrefixLength, 1 ); $output .= $this->openList( $char ); - if ( ";" == $char ) { + if ( ';' == $char ) { # FIXME: This is dupe of code above if( preg_match( '/^(.*?(?:\s| )):(.*)$/', $t, $match ) ) { $term = $match[1]; @@ -1251,10 +1291,10 @@ class Parser # No prefix (not in list)--go to paragraph mode $uniq_prefix = UNIQ_PREFIX; // XXX: use a stack for nestable elements like span, table and div - $openmatch = preg_match("/(closeParagraph(); @@ -1278,14 +1318,14 @@ class Parser // paragraph if ( '' == trim($t) ) { if ( $paragraphStack ) { - $output .= $paragraphStack.'
'; + $output .= $paragraphStack.'
'; $paragraphStack = false; $this->mLastSection = 'p'; } else { if ($this->mLastSection != 'p' ) { $output .= $this->closeParagraph(); $this->mLastSection = ''; - $paragraphStack = "

"; + $paragraphStack = '

'; } else { $paragraphStack = '

'; } @@ -1311,36 +1351,37 @@ class Parser $output .= $this->closeList( $pref2{$prefixLength-1} ); --$prefixLength; } - if ( "" != $this->mLastSection ) { - $output .= "mLastSection . ">"; - $this->mLastSection = ""; + if ( '' != $this->mLastSection ) { + $output .= 'mLastSection . '>'; + $this->mLastSection = ''; } wfProfileOut( $fname ); return $output; } + # Return value of a magic variable (like PAGENAME) function getVariableValue( $index ) { global $wgLang, $wgSitename, $wgServer; switch ( $index ) { case MAG_CURRENTMONTH: - return date( "m" ); + return date( 'm' ); case MAG_CURRENTMONTHNAME: - return $wgLang->getMonthName( date("n") ); + return $wgLang->getMonthName( date('n') ); case MAG_CURRENTMONTHNAMEGEN: - return $wgLang->getMonthNameGen( date("n") ); + return $wgLang->getMonthNameGen( date('n') ); case MAG_CURRENTDAY: - return date("j"); + return date('j'); case MAG_PAGENAME: return $this->mTitle->getText(); case MAG_NAMESPACE: # return Namespace::getCanonicalName($this->mTitle->getNamespace()); return $wgLang->getNsText($this->mTitle->getNamespace()); // Patch by Dori case MAG_CURRENTDAYNAME: - return $wgLang->getWeekdayName( date("w")+1 ); + return $wgLang->getWeekdayName( date('w')+1 ); case MAG_CURRENTYEAR: - return date( "Y" ); + return date( 'Y' ); case MAG_CURRENTTIME: return $wgLang->time( wfTimestampNow(), false ); case MAG_NUMBEROFARTICLES: @@ -1354,8 +1395,8 @@ class Parser } } - function initialiseVariables() - { + # initialise the magic variables (like CURRENTMONTHNAME) + function initialiseVariables() { global $wgVariableIDs; $this->mVariables = array(); foreach ( $wgVariableIDs as $id ) { @@ -1364,11 +1405,10 @@ class Parser } } - /* private */ function replaceVariables( $text, $args = array() ) - { + /* private */ function replaceVariables( $text, $args = array() ) { global $wgLang, $wgScript, $wgArticlePath; - $fname = "Parser::replaceVariables"; + $fname = 'Parser::replaceVariables'; wfProfileIn( $fname ); $bail = false; @@ -1376,31 +1416,45 @@ class Parser $this->initialiseVariables(); } $titleChars = Title::legalChars(); + $nonBraceChars = str_replace( array( '{', '}' ), array( '', '' ), $titleChars ); # This function is called recursively. To keep track of arguments we need a stack: array_push( $this->mArgStack, $args ); # PHP global rebinding syntax is a bit weird, need to use the GLOBALS array $GLOBALS['wgCurParser'] =& $this; + - # Argument substitution if ( $this->mOutputType == OT_HTML ) { - $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", "wfArgSubstitution", $text ); + # Variable substitution + $text = preg_replace_callback( "/{{([$nonBraceChars]*?)}}/", 'wfVariableSubstitution', $text ); + + # Argument substitution + $text = preg_replace_callback( "/(\\n?){{{([$titleChars]*?)}}}/", 'wfArgSubstitution', $text ); } - - # Double brace substitution - $regex = "/(\\n?){{([$titleChars]*?)(\\|.*?|)}}/s"; - $text = preg_replace_callback( $regex, "wfBraceSubstitution", $text ); + # Template substitution + $regex = '/(\\n?){{(['.$nonBraceChars.']*)(\\|.*?|)}}/s'; + $text = preg_replace_callback( $regex, 'wfBraceSubstitution', $text ); array_pop( $this->mArgStack ); + wfProfileOut( $fname ); return $text; } - function braceSubstitution( $matches ) - { + function variableSubstitution( $matches ) { + if ( array_key_exists( $matches[1], $this->mVariables ) ) { + $text = $this->mVariables[$matches[1]]; + $this->mOutput->mContainsOldMagic = true; + } else { + $text = $matches[0]; + } + return $text; + } + + function braceSubstitution( $matches ) { global $wgLinkCache, $wgLang; - $fname = "Parser::braceSubstitution"; + $fname = 'Parser::braceSubstitution'; $found = false; $nowiki = false; $noparse = false; @@ -1414,15 +1468,15 @@ class Parser $newline = $matches[1]; $part1 = $matches[2]; # If the third subpattern matched anything, it will start with | - if ( $matches[3] !== "" ) { - $args = explode( "|", substr( $matches[3], 1 ) ); + if ( $matches[3] !== '' ) { + $args = explode( '|', substr( $matches[3], 1 ) ); } else { $args = array(); } $argc = count( $args ); # {{{}}} - if ( strpos( $matches[0], "{{{" ) !== false ) { + if ( strpos( $matches[0], '{{{' ) !== false ) { $text = $matches[0]; $found = true; $noparse = true; @@ -1461,7 +1515,7 @@ class Parser # Check if it is an internal message $mwInt =& MagicWord::get( MAG_INT ); if ( $mwInt->matchStartAndRemove( $part1 ) ) { - if ( $this->incrementIncludeCount( "int:$part1" ) ) { + if ( $this->incrementIncludeCount( 'int:'.$part1 ) ) { $text = wfMsgReal( $part1, $args, true ); $found = true; } @@ -1544,7 +1598,7 @@ class Parser # If the title is valid but undisplayable, make a link to it if ( $this->mOutputType == OT_HTML && !$found ) { - $text = "[[" . $title->getPrefixedText() . "]]"; + $text = '[[' . $title->getPrefixedText() . ']]'; $found = true; } } @@ -1559,14 +1613,14 @@ class Parser $assocArgs = array(); $index = 1; foreach( $args as $arg ) { - $eqpos = strpos( $arg, "=" ); + $eqpos = strpos( $arg, '=' ); if ( $eqpos === false ) { $assocArgs[$index++] = $arg; } else { $name = trim( substr( $arg, 0, $eqpos ) ); $value = trim( substr( $arg, $eqpos+1 ) ); if ( $value === false ) { - $value = ""; + $value = ''; } if ( $name !== false ) { $assocArgs[$name] = $value; @@ -1580,7 +1634,7 @@ class Parser } # Run full parser on the included text - $text = $this->stripParse( $text, (bool)$newline, $assocArgs ); + $text = $this->stripParse( $text, $newline, $assocArgs ); # Resume the link cache and register the inclusion as a link if ( !is_null( $title ) ) { @@ -1597,23 +1651,21 @@ class Parser } # Triple brace replacement -- used for template arguments - function argSubstitution( $matches ) - { + function argSubstitution( $matches ) { $newline = $matches[1]; $arg = trim( $matches[2] ); $text = $matches[0]; $inputArgs = end( $this->mArgStack ); if ( array_key_exists( $arg, $inputArgs ) ) { - $text = $this->stripParse( $inputArgs[$arg], (bool)$newline, array() ); + $text = $this->stripParse( $inputArgs[$arg], $newline, array() ); } return $text; } # Returns true if the function is allowed to include this entity - function incrementIncludeCount( $dbk ) - { + function incrementIncludeCount( $dbk ) { if ( !array_key_exists( $dbk, $this->mIncludeCount ) ) { $this->mIncludeCount[$dbk] = 0; } @@ -1626,29 +1678,28 @@ class Parser # Cleans up HTML, removes dangerous tags and attributes - /* private */ function removeHTMLtags( $text ) - { + /* private */ function removeHTMLtags( $text ) { global $wgUseTidy, $wgUserHtml; - $fname = "Parser::removeHTMLtags"; + $fname = 'Parser::removeHTMLtags'; wfProfileIn( $fname ); if( $wgUserHtml ) { $htmlpairs = array( # Tags that must be closed - "b", "del", "i", "ins", "u", "font", "big", "small", "sub", "sup", "h1", - "h2", "h3", "h4", "h5", "h6", "cite", "code", "em", "s", - "strike", "strong", "tt", "var", "div", "center", - "blockquote", "ol", "ul", "dl", "table", "caption", "pre", - "ruby", "rt" , "rb" , "rp", "p" + 'b', 'del', 'i', 'ins', 'u', 'font', 'big', 'small', 'sub', 'sup', 'h1', + 'h2', 'h3', 'h4', 'h5', 'h6', 'cite', 'code', 'em', 's', + 'strike', 'strong', 'tt', 'var', 'div', 'center', + 'blockquote', 'ol', 'ul', 'dl', 'table', 'caption', 'pre', + 'ruby', 'rt' , 'rb' , 'rp', 'p' ); $htmlsingle = array( - "br", "hr", "li", "dt", "dd" + 'br', 'hr', 'li', 'dt', 'dd' ); $htmlnest = array( # Tags that can be nested--?? - "table", "tr", "td", "th", "div", "blockquote", "ol", "ul", - "dl", "font", "big", "small", "sub", "sup" + 'table', 'tr', 'td', 'th', 'div', 'blockquote', 'ol', 'ul', + 'dl', 'font', 'big', 'small', 'sub', 'sup' ); $tabletags = array( # Can only appear inside table - "td", "th", "tr" + 'td', 'th', 'tr' ); } else { $htmlpairs = array(); @@ -1663,15 +1714,15 @@ class Parser $htmlattrs = $this->getHTMLattrs () ; # Remove HTML comments - $text = preg_replace( "/(\\n * *(?=\\n)|)/sU", "$2", $text ); + $text = preg_replace( '/(\\n * *(?=\\n)|)/sU', '$2', $text ); - $bits = explode( "<", $text ); + $bits = explode( '<', $text ); $text = array_shift( $bits ); if(!$wgUseTidy) { $tagstack = array(); $tablestack = array(); foreach ( $bits as $x ) { $prev = error_reporting( E_ALL & ~( E_NOTICE | E_WARNING ) ); - preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/", + preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/', $x, $regs ); list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; error_reporting( $prev ); @@ -1682,25 +1733,25 @@ class Parser if ( $slash ) { # Closing a tag... if ( ! in_array( $t, $htmlsingle ) && - ( count($tagstack) && $ot = array_pop( $tagstack ) ) != $t ) { - if(!empty($ot)) array_push( $tagstack, $ot ); + ( $ot = @array_pop( $tagstack ) ) != $t ) { + @array_push( $tagstack, $ot ); $badtag = 1; } else { - if ( $t == "table" ) { + if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); } - $newparams = ""; + $newparams = ''; } } else { # Keep track for later if ( in_array( $t, $tabletags ) && - ! in_array( "table", $tagstack ) ) { + ! in_array( 'table', $tagstack ) ) { $badtag = 1; } else if ( in_array( $t, $tagstack ) && ! in_array ( $t , $htmlnest ) ) { $badtag = 1 ; } else if ( ! in_array( $t, $htmlsingle ) ) { - if ( $t == "table" ) { + if ( $t == 'table' ) { array_push( $tablestack, $tagstack ); $tagstack = array(); } @@ -1711,30 +1762,30 @@ class Parser } if ( ! $badtag ) { - $rest = str_replace( ">", ">", $rest ); + $rest = str_replace( '>', '>', $rest ); $text .= "<$slash$t $newparams$brace$rest"; continue; } } - $text .= "<" . str_replace( ">", ">", $x); + $text .= '<' . str_replace( '>', '>', $x); } # Close off any remaining tags - while ( $t = array_pop( $tagstack ) ) { + while ( is_array( $tagstack ) && ($t = array_pop( $tagstack )) ) { $text .= "\n"; - if ( $t == "table" ) { $tagstack = array_pop( $tablestack ); } + if ( $t == 'table' ) { $tagstack = array_pop( $tablestack ); } } } else { # this might be possible using tidy itself foreach ( $bits as $x ) { - preg_match( "/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/", + preg_match( '/^(\\/?)(\\w+)([^>]*)(\\/{0,1}>)([^<]*)$/', $x, $regs ); @list( $qbar, $slash, $t, $params, $brace, $rest ) = $regs; if ( in_array( $t = strtolower( $t ), $htmlelements ) ) { $newparams = $this->fixTagAttributes($params); - $rest = str_replace( ">", ">", $rest ); + $rest = str_replace( '>', '>', $rest ); $text .= "<$slash$t $newparams$brace$rest"; } else { - $text .= "<" . str_replace( ">", ">", $x); + $text .= '<' . str_replace( '>', '>', $x); } } } @@ -1756,8 +1807,7 @@ class Parser * */ - /* private */ function formatHeadings( $text, $isMain=true ) - { + /* private */ function formatHeadings( $text, $isMain=true ) { global $wgInputEncoding; $doNumberHeadings = $this->mOptions->getNumberHeadings(); @@ -1784,13 +1834,13 @@ class Parser # never add the TOC to the Main Page. This is an entry page that should not # be more than 1-2 screens large anyway - if( $this->mTitle->getPrefixedText() == wfMsg("mainpage") ) { + if( $this->mTitle->getPrefixedText() == wfMsg('mainpage') ) { $doShowToc = 0; } # Get all headlines for numbering them and adding funky stuff like [edit] # links - this is for later, but we need the number of headlines right now - $numMatches = preg_match_all( "/)(.*?)<\/H[1-6]>/i", $text, $matches ); + $numMatches = preg_match_all( '/)(.*?)<\/H[1-6]>/i', $text, $matches ); # if there are fewer than 4 headlines in the article, do not show TOC if( $numMatches < 4 ) { @@ -1814,14 +1864,14 @@ class Parser # Ugh .. the TOC should have neat indentation levels which can be # passed to the skin functions. These are determined here $toclevel = 0; - $toc = ""; - $full = ""; + $toc = ''; + $full = ''; $head = array(); $sublevelCount = array(); $level = 0; $prevlevel = 0; foreach( $matches[3] as $headline ) { - $numbering = ""; + $numbering = ''; if( $level ) { $prevlevel = $level; } @@ -1845,7 +1895,7 @@ class Parser for( $i = 1; $i <= $level; $i++ ) { if( !empty( $sublevelCount[$i] ) ) { if( $dot ) { - $numbering .= "."; + $numbering .= '.'; } $numbering .= $sublevelCount[$i]; $dot = 1; @@ -1856,13 +1906,17 @@ class Parser # The canonized header is a version of the header text safe to use for links # Avoid insertion of weird stuff like by expanding the relevant sections $canonized_headline = $this->unstrip( $headline, $this->mStripState ); + $canonized_headline = $this->unstripNoWiki( $headline, $this->mStripState ); # strip out HTML - $canonized_headline = preg_replace( "/<.*?" . ">/","",$canonized_headline ); + $canonized_headline = preg_replace( '/<.*?' . '>/','',$canonized_headline ); $tocline = trim( $canonized_headline ); - $canonized_headline = preg_replace("/[ \\?&\\/<>\\(\\)\\[\\]=,+']+/", '_', urlencode( do_html_entity_decode( $tocline, ENT_COMPAT, $wgInputEncoding ) ) ); - # strip out urlencoded   (inserted for french spaces, e.g. first space in 'something : something') - $canonized_headline = str_replace('%C2%A0','_', $canonized_headline); + $canonized_headline = urlencode( do_html_entity_decode( str_replace(' ', '_', $tocline), ENT_COMPAT, $wgInputEncoding ) ); + $replacearray = array( + '%3A' => ':', + '%' => '.' + ); + $canonized_headline = str_replace(array_keys($replacearray),array_values($replacearray),$canonized_headline); $refer[$headlineCount] = $canonized_headline; # count how many in assoc. array so we can track dupes in anchors @@ -1872,26 +1926,26 @@ class Parser # Prepend the number to the heading text if( $doNumberHeadings || $doShowToc ) { - $tocline = $numbering . " " . $tocline; + $tocline = $numbering . ' ' . $tocline; # Don't number the heading if it is the only one (looks silly) if( $doNumberHeadings && count( $matches[3] ) > 1) { # the two are different if the line contains a link - $headline=$numbering . " " . $headline; + $headline=$numbering . ' ' . $headline; } } # Create the anchor for linking from the TOC to the section $anchor = $canonized_headline; if($refcount[$headlineCount] > 1 ) { - $anchor .= "_" . $refcount[$headlineCount]; + $anchor .= '_' . $refcount[$headlineCount]; } if( $doShowToc ) { $toc .= $sk->tocLine($anchor,$tocline,$toclevel); } if( $showEditLink ) { if ( empty( $head[$headlineCount] ) ) { - $head[$headlineCount] = ""; + $head[$headlineCount] = ''; } $head[$headlineCount] .= $sk->editSectionLink($headlineCount+1); } @@ -1915,7 +1969,7 @@ class Parser # split up and insert constructed headlines - $blocks = preg_split( "/.*?<\/H[1-6]>/i", $text ); + $blocks = preg_split( '/.*?<\/H[1-6]>/i', $text ); $i = 0; foreach( $blocks as $block ) { @@ -1942,73 +1996,54 @@ class Parser return $full; } - /* private */ function doMagicISBN( &$tokenizer ) - { + # Return an HTML link for the "ISBN 123456" text + /* private */ function magicISBN( $text ) { global $wgLang; - # Check whether next token is a text token - # If yes, fetch it and convert the text into a - # Special::BookSources link - $token = $tokenizer->previewToken(); - while ( $token["type"] == "" ) - { - $tokenizer->nextToken(); - $token = $tokenizer->previewToken(); - } - if ( $token["type"] == "text" ) - { - $token = $tokenizer->nextToken(); - $x = $token["text"]; - $valid = "0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + $a = split( 'ISBN ', " $text" ); + if ( count ( $a ) < 2 ) return $text; + $text = substr( array_shift( $a ), 1); + $valid = '0123456789-ABCDEFGHIJKLMNOPQRSTUVWXYZ'; - $isbn = $blank = "" ; - while ( " " == $x{0} ) { - $blank .= " "; + foreach ( $a as $x ) { + $isbn = $blank = '' ; + while ( ' ' == $x{0} ) { + $blank .= ' '; $x = substr( $x, 1 ); } while ( strstr( $valid, $x{0} ) != false ) { $isbn .= $x{0}; $x = substr( $x, 1 ); } - $num = str_replace( "-", "", $isbn ); - $num = str_replace( " ", "", $num ); + $num = str_replace( '-', '', $isbn ); + $num = str_replace( ' ', '', $num ); - if ( "" == $num ) { - $text = "ISBN $blank$x"; + if ( '' == $num ) { + $text .= "ISBN $blank$x"; } else { - $titleObj = Title::makeTitle( NS_SPECIAL, "Booksources" ); - $text = "escapeLocalUrl( "isbn={$num}" ) . "\" class=\"internal\">ISBN $isbn"; $text .= $x; } - } else { - $text = "ISBN "; } return $text; } - /* private */ function doMagicRFC( &$tokenizer ) - { + + # Return an HTML link for the "RFC 1234" text + /* private */ function magicRFC( $text ) { global $wgLang; - # Check whether next token is a text token - # If yes, fetch it and convert the text into a - # link to an RFC source - $token = $tokenizer->previewToken(); - while ( $token["type"] == "" ) - { - $tokenizer->nextToken(); - $token = $tokenizer->previewToken(); - } - if ( $token["type"] == "text" ) - { - $token = $tokenizer->nextToken(); - $x = $token["text"]; - $valid = "0123456789"; + $a = split( 'RFC ', ' '.$text ); + if ( count ( $a ) < 2 ) return $text; + $text = substr( array_shift( $a ), 1); + $valid = '0123456789'; - $rfc = $blank = "" ; - while ( " " == $x{0} ) { - $blank .= " "; + foreach ( $a as $x ) { + $rfc = $blank = '' ; + while ( ' ' == $x{0} ) { + $blank .= ' '; $x = substr( $x, 1 ); } while ( strstr( $valid, $x{0} ) != false ) { @@ -2016,23 +2051,20 @@ class Parser $x = substr( $x, 1 ); } - if ( "" == $rfc ) { + if ( '' == $rfc ) { $text .= "RFC $blank$x"; } else { - $url = wfmsg( "rfcurl" ); - $url = str_replace( "$1", $rfc, $url); + $url = wfmsg( 'rfcurl' ); + $url = str_replace( '$1', $rfc, $url); $sk =& $this->mOptions->getSkin(); $la = $sk->getExternalLinkAttributes( $url, "RFC {$rfc}" ); - $text = "RFC {$rfc}{$x}"; + $text .= "RFC {$rfc}{$x}"; } - } else { - $text = "RFC "; } return $text; } - function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) - { + function preSaveTransform( $text, &$title, &$user, $options, $clearState = true ) { $this->mOptions = $options; $this->mTitle =& $title; $this->mOutputType = OT_WIKI; @@ -2047,19 +2079,21 @@ class Parser ); $text = str_replace(array_keys($pairs), array_values($pairs), $text); // now with regexes + /* $pairs = array( "//i" => '
', - "/
/i" => "
", + "/
/i" => "
", ); $text = preg_replace(array_keys($pairs), array_values($pairs), $text); + */ $text = $this->strip( $text, $stripState, false ); $text = $this->pstPass2( $text, $user ); $text = $this->unstrip( $text, $stripState ); + $text = $this->unstripNoWiki( $text, $stripState ); return $text; } - /* private */ function pstPass2( $text, &$user ) - { + /* private */ function pstPass2( $text, &$user ) { global $wgLang, $wgLocaltimezone, $wgCurParser; # Variable replacement @@ -2069,20 +2103,20 @@ class Parser # Signatures # $n = $user->getName(); - $k = $user->getOption( "nickname" ); - if ( "" == $k ) { $k = $n; } + $k = $user->getOption( 'nickname' ); + if ( '' == $k ) { $k = $n; } if(isset($wgLocaltimezone)) { - $oldtz = getenv("TZ"); putenv("TZ=$wgLocaltimezone"); + $oldtz = getenv('TZ'); putenv('TZ='.$wgLocaltimezone); } /* Note: this is an ugly timezone hack for the European wikis */ - $d = $wgLang->timeanddate( date( "YmdHis" ), false ) . - " (" . date( "T" ) . ")"; - if(isset($wgLocaltimezone)) putenv("TZ=$oldtz"); + $d = $wgLang->timeanddate( date( 'YmdHis' ), false ) . + ' (' . date( 'T' ) . ')'; + if(isset($wgLocaltimezone)) putenv('TZ='.$oldtzs); - $text = preg_replace( "/~~~~~/", $d, $text ); - $text = preg_replace( "/~~~~/", "[[" . $wgLang->getNsText( + $text = preg_replace( '/~~~~~/', $d, $text ); + $text = preg_replace( '/~~~~/', '[[' . $wgLang->getNsText( Namespace::getUser() ) . ":$n|$k]] $d", $text ); - $text = preg_replace( "/~~~/", "[[" . $wgLang->getNsText( + $text = preg_replace( '/~~~/', '[[' . $wgLang->getNsText( Namespace::getUser() ) . ":$n|$k]]", $text ); # Context links: [[|name]] and [[name (context)|]] @@ -2102,12 +2136,12 @@ class Parser if ( preg_match( $conpat, $t, $m ) ) { $context = $m[2]; } - $text = preg_replace( $p4, "[[\\1:\\2 (\\3)|\\2]]", $text ); - $text = preg_replace( $p1, "[[\\1 (\\2)|\\1]]", $text ); - $text = preg_replace( $p3, "[[\\1:\\2|\\2]]", $text ); + $text = preg_replace( $p4, '[[\\1:\\2 (\\3)|\\2]]', $text ); + $text = preg_replace( $p1, '[[\\1 (\\2)|\\1]]', $text ); + $text = preg_replace( $p3, '[[\\1:\\2|\\2]]', $text ); - if ( "" == $context ) { - $text = preg_replace( $p2, "[[\\1]]", $text ); + if ( '' == $context ) { + $text = preg_replace( $p2, '[[\\1]]', $text ); } else { $text = preg_replace( $p2, "[[\\1 ({$context})|\\1]]", $text ); } @@ -2131,8 +2165,7 @@ class Parser # Set up some variables which are usually set up in parse() # so that an external function can call some class members with confidence - function startExternalParse( &$title, $options, $outputType, $clearState = true ) - { + function startExternalParse( &$title, $options, $outputType, $clearState = true ) { $this->mTitle =& $title; $this->mOptions = $options; $this->mOutputType = $outputType; @@ -2160,11 +2193,21 @@ class Parser $executing = false; return $text; } + + # Create an HTML-style tag, e.g. special text + # Callback will be called with the text within + # Transform and return the text within + function setHook( $tag, $callback ) { + $oldVal = @$this->mTagHooks[$tag]; + $this->mTagHooks[$tag] = $callback; + return $oldVal; + } } class ParserOutput { var $mText, $mLanguageLinks, $mCategoryLinks, $mContainsOldMagic; + var $mCacheTime; # Used in ParserCache function ParserOutput( $text = "", $languageLinks = array(), $categoryLinks = array(), $containsOldMagic = false ) @@ -2173,16 +2216,19 @@ class ParserOutput $this->mLanguageLinks = $languageLinks; $this->mCategoryLinks = $categoryLinks; $this->mContainsOldMagic = $containsOldMagic; + $this->mCacheTime = ""; } function getText() { return $this->mText; } function getLanguageLinks() { return $this->mLanguageLinks; } function getCategoryLinks() { return $this->mCategoryLinks; } + function getCacheTime() { return $this->mCacheTime; } function containsOldMagic() { return $this->mContainsOldMagic; } function setText( $text ) { return wfSetVar( $this->mText, $text ); } function setLanguageLinks( $ll ) { return wfSetVar( $this->mLanguageLinks, $ll ); } function setCategoryLinks( $cl ) { return wfSetVar( $this->mCategoryLinks, $cl ); } function setContainsOldMagic( $com ) { return wfSetVar( $this->mContainsOldMagic, $com ); } + function setCacheTime( $t ) { return wfSetVar( $this->mCacheTime, $t ); } function merge( $other ) { $this->mLanguageLinks = array_merge( $this->mLanguageLinks, $other->mLanguageLinks ); @@ -2207,39 +2253,38 @@ class ParserOptions var $mNumberHeadings; # Automatically number headings var $mShowToc; # Show table of contents - function getUseTeX() { return $this->mUseTeX; } - function getUseCategoryMagic() { return $this->mUseCategoryMagic; } - function getUseDynamicDates() { return $this->mUseDynamicDates; } - function getInterwikiMagic() { return $this->mInterwikiMagic; } - function getAllowExternalImages() { return $this->mAllowExternalImages; } - function getSkin() { return $this->mSkin; } - function getDateFormat() { return $this->mDateFormat; } - function getEditSection() { return $this->mEditSection; } - function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; } - function getNumberHeadings() { return $this->mNumberHeadings; } - function getShowToc() { return $this->mShowToc; } - - function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); } - function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); } - function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); } - function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); } - function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); } - function setSkin( $x ) { return wfSetRef( $this->mSkin, $x ); } - function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); } - function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); } - function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); } - function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); } - function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); } - - /* static */ function newFromUser( &$user ) - { + function getUseTeX() { return $this->mUseTeX; } + function getUseCategoryMagic() { return $this->mUseCategoryMagic; } + function getUseDynamicDates() { return $this->mUseDynamicDates; } + function getInterwikiMagic() { return $this->mInterwikiMagic; } + function getAllowExternalImages() { return $this->mAllowExternalImages; } + function getSkin() { return $this->mSkin; } + function getDateFormat() { return $this->mDateFormat; } + function getEditSection() { return $this->mEditSection; } + function getEditSectionOnRightClick() { return $this->mEditSectionOnRightClick; } + function getNumberHeadings() { return $this->mNumberHeadings; } + function getShowToc() { return $this->mShowToc; } + + function setUseTeX( $x ) { return wfSetVar( $this->mUseTeX, $x ); } + function setUseCategoryMagic( $x ) { return wfSetVar( $this->mUseCategoryMagic, $x ); } + function setUseDynamicDates( $x ) { return wfSetVar( $this->mUseDynamicDates, $x ); } + function setInterwikiMagic( $x ) { return wfSetVar( $this->mInterwikiMagic, $x ); } + function setAllowExternalImages( $x ) { return wfSetVar( $this->mAllowExternalImages, $x ); } + function setDateFormat( $x ) { return wfSetVar( $this->mDateFormat, $x ); } + function setEditSection( $x ) { return wfSetVar( $this->mEditSection, $x ); } + function setEditSectionOnRightClick( $x ) { return wfSetVar( $this->mEditSectionOnRightClick, $x ); } + function setNumberHeadings( $x ) { return wfSetVar( $this->mNumberHeadings, $x ); } + function setShowToc( $x ) { return wfSetVar( $this->mShowToc, $x ); } + + function setSkin( &$x ) { $this->mSkin =& $x; } + + /* static */ function newFromUser( &$user ) { $popts = new ParserOptions; $popts->initialiseFromUser( $user ); return $popts; } - function initialiseFromUser( &$userInput ) - { + function initialiseFromUser( &$userInput ) { global $wgUseTeX, $wgUseCategoryMagic, $wgUseDynamicDates, $wgInterwikiMagic, $wgAllowExternalImages; if ( !$userInput ) { @@ -2255,11 +2300,11 @@ class ParserOptions $this->mInterwikiMagic = $wgInterwikiMagic; $this->mAllowExternalImages = $wgAllowExternalImages; $this->mSkin =& $user->getSkin(); - $this->mDateFormat = $user->getOption( "date" ); - $this->mEditSection = $user->getOption( "editsection" ); - $this->mEditSectionOnRightClick = $user->getOption( "editsectiononrightclick" ); - $this->mNumberHeadings = $user->getOption( "numberheadings" ); - $this->mShowToc = $user->getOption( "showtoc" ); + $this->mDateFormat = $user->getOption( 'date' ); + $this->mEditSection = $user->getOption( 'editsection' ); + $this->mEditSectionOnRightClick = $user->getOption( 'editsectiononrightclick' ); + $this->mNumberHeadings = $user->getOption( 'numberheadings' ); + $this->mShowToc = $user->getOption( 'showtoc' ); } @@ -2278,4 +2323,10 @@ function wfArgSubstitution( $matches ) return $wgCurParser->argSubstitution( $matches ); } +function wfVariableSubstitution( $matches ) +{ + global $wgCurParser; + return $wgCurParser->variableSubstitution( $matches ); +} + ?>