+ /**
+ * Helper function for parse() that transforms half-parsed HTML into fully
+ * parsed HTML.
+ *
+ * @param string $text
+ * @param bool $isMain
+ * @param bool $linestart
+ * @return string
+ */
+ private function internalParseHalfParsed( $text, $isMain = true, $linestart = true ) {
+ global $wgUseTidy, $wgAlwaysUseTidy;
+
+ $text = $this->mStripState->unstripGeneral( $text );
+
+ # Clean up special characters, only run once, next-to-last before doBlockLevels
+ $fixtags = array(
+ # french spaces, last one Guillemet-left
+ # only if there is something before the space
+ '/(.) (?=\\?|:|;|!|%|\\302\\273)/' => '\\1 ',
+ # french spaces, Guillemet-right
+ '/(\\302\\253) /' => '\\1 ',
+ '/ (!\s*important)/' => ' \\1', # Beware of CSS magic word !important, bug #11874.
+ );
+ $text = preg_replace( array_keys( $fixtags ), array_values( $fixtags ), $text );
+
+ $text = $this->doBlockLevels( $text, $linestart );
+
+ $this->replaceLinkHolders( $text );
+
+ /**
+ * The input doesn't get language converted if
+ * a) It's disabled
+ * b) Content isn't converted
+ * c) It's a conversion table
+ * d) it is an interface message (which is in the user language)
+ */
+ if ( !( $this->mOptions->getDisableContentConversion()
+ || isset( $this->mDoubleUnderscores['nocontentconvert'] ) )
+ ) {
+ if ( !$this->mOptions->getInterfaceMessage() ) {
+ # The position of the convert() call should not be changed. it
+ # assumes that the links are all replaced and the only thing left
+ # is the <nowiki> mark.
+ $text = $this->getConverterLanguage()->convert( $text );
+ }
+ }
+
+ $text = $this->mStripState->unstripNoWiki( $text );
+
+ if ( $isMain ) {
+ wfRunHooks( 'ParserBeforeTidy', array( &$this, &$text ) );
+ }
+
+ $text = $this->replaceTransparentTags( $text );
+ $text = $this->mStripState->unstripGeneral( $text );
+
+ $text = Sanitizer::normalizeCharReferences( $text );
+
+ if ( ( $wgUseTidy && $this->mOptions->getTidy() ) || $wgAlwaysUseTidy ) {
+ $text = MWTidy::tidy( $text );
+ } else {
+ # attempt to sanitize at least some nesting problems
+ # (bug #2702 and quite a few others)
+ $tidyregs = array(
+ # ''Something [http://www.cool.com cool''] -->
+ # <i>Something</i><a href="http://www.cool.com"..><i>cool></i></a>
+ '/(<([bi])>)(<([bi])>)?([^<]*)(<\/?a[^<]*>)([^<]*)(<\/\\4>)?(<\/\\2>)/' =>
+ '\\1\\3\\5\\8\\9\\6\\1\\3\\7\\8\\9',
+ # fix up an anchor inside another anchor, only
+ # at least for a single single nested link (bug 3695)
+ '/(<a[^>]+>)([^<]*)(<a[^>]+>[^<]*)<\/a>(.*)<\/a>/' =>
+ '\\1\\2</a>\\3</a>\\1\\4</a>',
+ # fix div inside inline elements- doBlockLevels won't wrap a line which
+ # contains a div, so fix it up here; replace
+ # div with escaped text
+ '/(<([aib]) [^>]+>)([^<]*)(<div([^>]*)>)(.*)(<\/div>)([^<]*)(<\/\\2>)/' =>
+ '\\1\\3<div\\5>\\6</div>\\8\\9',
+ # remove empty italic or bold tag pairs, some
+ # introduced by rules above
+ '/<([bi])><\/\\1>/' => '',
+ );
+
+ $text = preg_replace(
+ array_keys( $tidyregs ),
+ array_values( $tidyregs ),
+ $text );
+ }
+
+ if ( $isMain ) {
+ wfRunHooks( 'ParserAfterTidy', array( &$this, &$text ) );
+ }
+
+ return $text;
+ }
+