(X)HTML parser * Based on work by Jan Hidders and Magnus Manske * To use, set * $wgUseXMLparser = true ; * $wgEnableParserCache = false ; * $wgWiki2xml to the path and executable of the command line version (cli) * in LocalSettings.php * @package MediaWiki * @subpackage Experimental */ /** * the base class for an element */ class element { var $name = ''; var $attrs = array(); var $children = array(); /** * This finds the ATTRS element and returns the ATTR sub-children as a single string */ function getSourceAttrs () { $ret = "" ; foreach ($this->children as $child) { if ( !is_string($child) AND $child->name == "ATTRS" ) { $ret = $child->makeXHTML ( $parser ); } } return $ret ; } /** * This collects the ATTR thingies for getSourceAttrs() */ function getTheseAttrs () { $ret = array() ; foreach ($this->children as $child) { if ( !is_string($child) AND $child->name == "ATTR" ) { $ret[] = $child->attrs["NAME"] . "='" . $child->children[0] . "'" ; } } return implode ( " " , $ret ) ; } function fixLinkTails ( &$parser , $key ) { $k2 = $key + 1 ; if ( !isset ( $this->children[$k2] ) ) return ; if ( !is_string ( $this->children[$k2]) ) return ; if ( is_string ( $this->children[$key]) ) return ; if ( $this->children[$key]->name != "LINK" ) return ; $n = $this->children[$k2] ; $s = "" ; while ( $n != "" AND ( ( $n[0] >= 'a' AND $n[0] <= 'z' ) OR $n[0] == 'ä' OR $n[0] == 'ö' OR $n[0] == 'ü' OR $n[0] == 'ß' ) ) { $s .= $n[0] ; $n = substr ( $n , 1 ) ; } $this->children[$k2] = $n ; if ( count ( $this->children[$key]->children ) > 1 ) { $kl = array_keys ( $this->children[$key]->children ) ; $kl = array_pop ( $kl ) ; $this->children[$key]->children[$kl]->children[] = $s ; } else { $e = new element ; $e->name = "LINKOPTION" ; $t = $this->children[$key]->sub_makeXHTML ( $parser ) ; $e->children[] = trim ( $t ) . $s ; $this->children[$key]->children[] = $e ; } } /** * This function generates the XHTML for the entire subtree */ function sub_makeXHTML ( &$parser , $tag = "" , $attr = "" ) { $ret = "" ; $attr2 = $this->getSourceAttrs () ; if ( $attr != "" AND $attr2 != "" ) $attr .= " " ; $attr .= $attr2 ; if ( $tag != "" ) { $ret .= "<" . $tag ; if ( $attr != "" ) $ret .= " " . $attr ; $ret .= ">" ; } # THIS SHOULD BE DONE IN THE WIKI2XML-PARSER INSTEAD # foreach ( array_keys ( $this->children ) AS $x ) # $this->fixLinkTails ( $parser , $x ) ; foreach ($this->children as $key => $child) { if ( is_string($child) ) { $ret .= $child ; } else if ( $child->name != "ATTRS" ) { $ret .= $child->makeXHTML ( $parser ); } } if ( $tag != "" ) $ret .= "" . $tag . ">\n" ; return $ret ; } /** * Link functions */ function createInternalLink ( &$parser , $target , $display_title , $options ) { global $wgUser ; $skin = $wgUser->getSkin() ; $tp = explode ( ":" , $target ) ; # tp = target parts $title = "" ; # The plain title $language = "" ; # The language/meta/etc. part $namespace = "" ; # The namespace, if any $subtarget = "" ; # The '#' thingy $nt = Title::newFromText ( $target ) ; $fl = strtoupper ( $this->attrs["FORCEDLINK"] ) == "YES" ; if ( $fl || count ( $tp ) == 1 ) $title = $target ; # Plain and simple case else # There's stuff missing here... { if ( $nt->getNamespace() == NS_IMAGE ) { $options[] = $display_title ; return $skin->makeImageLinkObj ( $nt , implode ( "|" , $options ) ) ; } else $title = $target ; # Default } if ( $language != "" ) # External link within the WikiMedia project { return "{language link}" ; } else if ( $namespace != "" ) # Link to another namespace, check for image/media stuff { return "{namespace link}" ; } else { return $skin->makeLink ( $target , $display_title ) ; } } function makeInternalLink ( &$parser ) { $target = "" ; $option = array () ; foreach ($this->children as $child) { if ( is_string($child) ) { # This shouldn't be the case! } else { if ( $child->name == "LINKTARGET" ) $target = trim ( $child->makeXHTML ( $parser ) ) ; else $option[] = trim ( $child->makeXHTML ( $parser ) ) ; } } if ( count ( $option ) == 0 ) $option[] = $target ; # Create dummy display title $display_title = array_pop ( $option ) ; return $this->createInternalLink ( $parser , $target , $display_title , $option ) ; } function getTemplateXHTML ( $title , $parts , &$parser ) { global $wgLang , $wgUser ; $skin = $wgUser->getSkin() ; $ot = $title ; # Original title if ( count ( explode ( ":" , $title ) ) == 1 ) $title = $wgLang->getNsText ( NS_TEMPLATE ) . ":" . $title ; $nt = Title::newFromText ( $title ) ; $id = $nt->getArticleID() ; if ( $id == 0 ) { # No/non-existing page return $skin->makeBrokenLink ( $title , $ot ) ; } $a = 0 ; $tv = array () ; # Template variables foreach ( $parts AS $part ) { $a++ ; $x = explode ( "=" , $part , 2 ) ; if ( count ( $x ) == 1 ) $key = "{$a}" ; else $key = $x[0] ; $value = array_pop ( $x ) ; $tv[$key] = $value ; } $art = new Article ( $nt ) ; $text = $art->getContent ( false ) ; $parser->plain_parse ( $text , true , $tv ) ; return $text ; } /** * This function actually converts wikiXML into XHTML tags */ function makeXHTML ( &$parser ) { $ret = "" ; $n = $this->name ; # Shortcut if ( $n == "EXTENSION" ) # Fix allowed HTML { $old_n = $n ; $ext = strtoupper ( $this->attrs["NAME"] ) ; if ( $ext == "B" || $ext == "STRONG" ) $n = "BOLD" ; else if ( $ext == "I" || $ext == "EM" ) $n = "ITALICS" ; else if ( $ext == "U" ) $n = "UNDERLINED" ; # Hey, virtual wiki tag! ;-) else if ( $ext == "S" ) $n = "STRIKE" ; else if ( $ext == "P" ) $n = "PARAGRAPH" ; else if ( $ext == "TABLE" ) $n = "TABLE" ; else if ( $ext == "TR" ) $n = "TABLEROW" ; else if ( $ext == "TD" ) $n = "TABLECELL" ; else if ( $ext == "TH" ) $n = "TABLEHEAD" ; else if ( $ext == "CAPTION" ) $n = "CAPTION" ; else if ( $ext == "NOWIKI" ) $n = "NOWIKI" ; if ( $n != $old_n ) unset ( $this->attrs["NAME"] ) ; # Cleanup else if ( $parser->nowiki > 0 ) $n = "" ; # No "real" wiki tags allowed in nowiki section } if ( $n == "ARTICLE" ) $ret .= $this->sub_makeXHTML ( $parser ) ; else if ( $n == "HEADING" ) $ret .= $this->sub_makeXHTML ( $parser , "h" . $this->attrs["LEVEL"] ) ; else if ( $n == "PARAGRAPH" ) $ret .= $this->sub_makeXHTML ( $parser , "p" ) ; else if ( $n == "BOLD" ) $ret .= $this->sub_makeXHTML ( $parser , "strong" ) ; else if ( $n == "ITALICS" ) $ret .= $this->sub_makeXHTML ( $parser , "em" ) ; # These don't exist as wiki markup else if ( $n == "UNDERLINED" ) $ret .= $this->sub_makeXHTML ( $parser , "u" ) ; else if ( $n == "STRIKE" ) $ret .= $this->sub_makeXHTML ( $parser , "strike" ) ; # HTML comment else if ( $n == "COMMENT" ) $ret .= "" ; # Comments are parsed out # Links else if ( $n == "LINK" ) $ret .= $this->makeInternalLink ( $parser ) ; else if ( $n == "LINKTARGET" ) $ret .= $this->sub_makeXHTML ( $parser ) ; else if ( $n == "LINKOPTION" ) $ret .= $this->sub_makeXHTML ( $parser ) ; else if ( $n == "TEMPLATE" ) { $parts = $this->sub_makeXHTML ( $parser ) ; $parts = explode ( "|" , $parts ) ; $title = array_shift ( $parts ) ; $ret .= $this->getTemplateXHTML ( $title , $parts , &$parser ) ; } else if ( $n == "TEMPLATEVAR" ) { $x = $this->sub_makeXHTML ( $parser ) ; if ( isset ( $parser->mCurrentTemplateOptions["{$x}"] ) ) $ret .= $parser->mCurrentTemplateOptions["{$x}"] ; } else if ( $n == "IGNORE" ) # Internal use, not generated by wiki2xml parser $ret .= $this->sub_makeXHTML ( $parser ) ; else if ( $n == "NOWIKI" ) { $parser->nowiki++ ; $ret .= $this->sub_makeXHTML ( $parser , "" ) ; $parser->nowiki-- ; } # Unknown HTML extension else if ( $n == "EXTENSION" ) # This is currently a dummy!!! { $ext = $this->attrs["NAME"] ; $ret .= "<" . $ext . ">" ; $ret .= $this->sub_makeXHTML ( $parser ) ; $ret .= "</" . $ext . "> " ; } # Table stuff else if ( $n == "TABLE" ) { $ret .= $this->sub_makeXHTML ( $parser , "table" ) ; } else if ( $n == "TABLEROW" ) { $ret .= $this->sub_makeXHTML ( $parser , "tr" ) ; } else if ( $n == "TABLECELL" ) { $ret .= $this->sub_makeXHTML ( $parser , "td" ) ; } else if ( $n == "TABLEHEAD" ) { $ret .= $this->sub_makeXHTML ( $parser , "th" ) ; } else if ( $n == "CAPTION" ) { $ret .= $this->sub_makeXHTML ( $parser , "caption" ) ; } else if ( $n == "ATTRS" ) # SPECIAL CASE : returning attributes { return $this->getTheseAttrs () ; } # Lists else if ( $n == "LISTITEM" ) { if ( $parser->mListType == "dl" ) $ret .= $this->sub_makeXHTML ( $parser , "dd" ) ; else $ret .= $this->sub_makeXHTML ( $parser , "li" ) ; } else if ( $n == "LIST" ) { $type = "ol" ; # Default if ( $this->attrs["TYPE"] == "bullet" ) $type = "ul" ; else if ( $this->attrs["TYPE"] == "indent" ) $type = "dl" ; $oldtype = $parser->mListType ; $parser->mListType = $type ; $ret .= $this->sub_makeXHTML ( $parser , $type ) ; $parser->mListType = $oldtype ; } # Something else entirely else { $ret .= "<" . $n . ">" ; $ret .= $this->sub_makeXHTML ( $parser ) ; $ret .= "</" . $n . "> " ; } $ret = "\n{$ret}\n" ; $ret = str_replace ( "\n\n" , "\n" , $ret ) ; return $ret ; } /** * A function for additional debugging output */ function myPrint() { $ret = "