Made strong/em handling more forgiving against unbalanced ticks
[lhc/web/wiklou.git] / includes / Parser.php
index e488b50..05c6786 100644 (file)
@@ -547,7 +547,7 @@ class Parser
                return $t ;
        }
 
-       function internalParse( $text, $linestart, $args = array() )
+       function internalParse( $text, $linestart, $args = array(), $isMain=true )
        {
                $fname = "Parser::internalParse";
                wfProfileIn( $fname );
@@ -565,7 +565,7 @@ class Parser
                $text = $this->replaceExternalLinks( $text );
                $text = $this->doTokenizedParser ( $text );
                $text = $this->doTableStuff ( $text ) ;
-               $text = $this->formatHeadings( $text );
+               $text = $this->formatHeadings( $text, $isMain );
                $sk =& $this->mOptions->getSkin();
                $text = $sk->transformContent( $text );
 
@@ -675,6 +675,21 @@ class Parser
                return $s;
        }
 
+       /* private */ function handle4Quotes( &$state, $token )
+       {
+               /* This one makes some assumptions. 
+                * '''Caesar''''s army  => <strong>Caesar</strong>'s army
+                * ''''Caesar'''' was a roman emperor => '<strong>Caesar</strong>' was a roman emperor
+                * These assumptions might be wrong, but any other assumption might be wrong, too.
+                * So here we go */
+               if ( $state["strong"] !== false ) {
+                       return $this->handle3Quotes( $state, $token ) . "'";
+               } else {
+                       return "'" . $this->handle3Quotes( $state, $token );
+               }
+       }
+
+
        /* private */ function handle3Quotes( &$state, $token )
        {
                if ( $state["strong"] !== false ) {
@@ -688,7 +703,7 @@ class Parser
                        $state["strong"] = FALSE;
                } else {
                        $s = "<strong>";
-                       $state["strong"] = isset($token["pos"]) ? $token["pos"] : true;
+                       $state["strong"] = $token["pos"];
                }
                return $s;
        }
@@ -706,7 +721,7 @@ class Parser
                        $state["em"] = FALSE;
                } else {
                        $s = "<em>";
-                       $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
+                       $state["em"] = $token["pos"];
 
                }
                return $s;
@@ -729,10 +744,10 @@ class Parser
                } elseif ( $state["strong"] !== false ) {
                        $s .= "</strong><em>";
                        $state["strong"] = FALSE;
-                       $state["em"] = @$token["pos"];
+                       $state["em"] = $token["pos"];
                } else { # not $em and not $strong
                        $s .= "<strong><em>";
-                       $state["strong"] = $state["em"] = isset($token["pos"]) ? $token["pos"] : true;
+                       $state["strong"] = $state["em"] = $token["pos"];
                }
                return $s;
        }
@@ -830,7 +845,7 @@ class Parser
                                        $txt = "\n<hr />\n";
                                        break;
                                case "'''":
-                                       # This and the three next ones handle quotes
+                                       # This and the four next ones handle quotes
                                        $txt = $this->handle3Quotes( $state, $token );
                                        break;
                                case "''":
@@ -839,10 +854,26 @@ class Parser
                                case "'''''":
                                        $txt = $this->handle5Quotes( $state, $token );
                                        break;
+                               case "''''":
+                                       $txt = $this->handle4Quotes( $state, $token );
+                                       break;
                                case "":
                                        # empty token
                                        $txt="";
                                        break;
+                               case "h": 
+                                       #heading- used to close all unbalanced bold or em tags in this section
+                                       $txt = '';
+                                       if( $state['em'] !== false and 
+                                       ( $state['strong'] === false or $state['em'] > $state['strong'] ) )
+                                       { 
+                                               $s .= '</em>';
+                                               $state['em'] = false;
+                                       }
+                                       if ( $state['strong'] !== false ) $txt .= '</strong>';
+                                       if ( $state['em'] !== false ) $txt .= '</em>';
+                                       $state['strong'] = $state['em'] = false;
+                                       break;
                                case "RFC ":
                                        if ( $tagIsOpen ) {
                                                $txt = "RFC ";
@@ -886,6 +917,19 @@ class Parser
                                $s .= $txt;
                        }
                } #end while
+
+               # make 100% sure all strong and em tags are closed
+               # doBlockLevels often messes the last bit up though, but invalid nesting is better than unclosed tags
+               # tidy solves this though
+               if( $state['em'] !== false and 
+               ( $state['strong'] === false or $state['em'] > $state['strong'] ) )
+               { 
+                       $s .= '</em>';
+                       $state['em'] = false;
+               }
+               if ( $state['strong'] !== false ) $s .= '</strong>';
+               if ( $state['em'] !== false ) $s .= '</em>';
+
                if ( count( $tokenStack ) != 0 )
                {
                        # still objects on stack. opened [[ tag without closing ]] tag.
@@ -1507,7 +1551,7 @@ class Parser
 
                        # Run full parser on the included text
                        $text = $this->strip( $text, $this->mStripState );
-                       $text = $this->internalParse( $text, (bool)$newline, $assocArgs );
+                       $text = $this->internalParse( $text, (bool)$newline, $assocArgs, false );
 
                        # Add the result to the strip state for re-inclusion after
                        # the rest of the processing
@@ -1672,7 +1716,7 @@ class Parser
  *
  */
 
-       /* private */ function formatHeadings( $text )
+       /* private */ function formatHeadings( $text, $isMain=true )
        {
                global $wgInputEncoding;
                
@@ -1842,7 +1886,7 @@ class Parser
                                # $full .= $sk->editSectionLink(0);
                        }
                        $full .= $block;
-                       if( $doShowToc && !$i) {
+                       if( $doShowToc && !$i && $isMain) {
                        # Top anchor now in skin
                                $full = $full.$toc;
                        }