4 * This is the part of the wikitext parser which handles automatic paragraphs
5 * and conversion of start-of-line prefixes to HTML lists.
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
20 * http://www.gnu.org/copyleft/gpl.html
25 class BlockLevelPass
{
26 private $mDTopen = false;
27 private $mInPre = false;
28 private $mLastSection = '';
32 # State constants for the definition list colon extraction
33 const COLON_STATE_TEXT
= 0;
34 const COLON_STATE_TAG
= 1;
35 const COLON_STATE_TAGSTART
= 2;
36 const COLON_STATE_CLOSETAG
= 3;
37 const COLON_STATE_TAGSLASH
= 4;
38 const COLON_STATE_COMMENT
= 5;
39 const COLON_STATE_COMMENTDASH
= 6;
40 const COLON_STATE_COMMENTDASHDASH
= 7;
43 * Make lists from lines starting with ':', '*', '#', etc.
46 * @param bool $linestart Whether or not this is at the start of a line.
47 * @return string The lists rendered as HTML
49 public static function doBlockLevels( $text, $linestart ) {
50 $pass = new self( $text, $linestart );
51 return $pass->execute();
54 private function __construct( $text, $linestart ) {
56 $this->linestart
= $linestart;
62 private function closeParagraph() {
64 if ( $this->mLastSection
!= '' ) {
65 $result = '</' . $this->mLastSection
. ">\n";
67 $this->mInPre
= false;
68 $this->mLastSection
= '';
73 * getCommon() returns the length of the longest common substring
74 * of both arguments, starting at the beginning of both.
81 private function getCommon( $st1, $st2 ) {
83 $shorter = strlen( $st2 );
84 if ( $fl < $shorter ) {
88 for ( $i = 0; $i < $shorter; ++
$i ) {
89 if ( $st1[$i] != $st2[$i] ) {
97 * These next three functions open, continue, and close the list
98 * element appropriate to the prefix character passed into them.
100 * @param string $char
104 private function openList( $char ) {
105 $result = $this->closeParagraph();
107 if ( '*' === $char ) {
108 $result .= "<ul><li>";
109 } elseif ( '#' === $char ) {
110 $result .= "<ol><li>";
111 } elseif ( ':' === $char ) {
112 $result .= "<dl><dd>";
113 } elseif ( ';' === $char ) {
114 $result .= "<dl><dt>";
115 $this->mDTopen
= true;
117 $result = '<!-- ERR 1 -->';
125 * @param string $char
129 private function nextItem( $char ) {
130 if ( '*' === $char ||
'#' === $char ) {
131 return "</li>\n<li>";
132 } elseif ( ':' === $char ||
';' === $char ) {
134 if ( $this->mDTopen
) {
137 if ( ';' === $char ) {
138 $this->mDTopen
= true;
139 return $close . '<dt>';
141 $this->mDTopen
= false;
142 return $close . '<dd>';
145 return '<!-- ERR 2 -->';
150 * @param string $char
154 private function closeList( $char ) {
155 if ( '*' === $char ) {
156 $text = "</li></ul>";
157 } elseif ( '#' === $char ) {
158 $text = "</li></ol>";
159 } elseif ( ':' === $char ) {
160 if ( $this->mDTopen
) {
161 $this->mDTopen
= false;
162 $text = "</dt></dl>";
164 $text = "</dd></dl>";
167 return '<!-- ERR 3 -->';
173 private function execute() {
175 # Parsing through the text line by line. The main thing
176 # happening here is handling of block-level elements p, pre,
177 # and making lists from lines starting with * # : etc.
178 $textLines = StringUtils
::explode( "\n", $text );
180 $lastPrefix = $output = '';
181 $this->mDTopen
= $inBlockElem = false;
183 $paragraphStack = false;
184 $inBlockquote = false;
186 foreach ( $textLines as $oLine ) {
188 if ( !$this->linestart
) {
190 $this->linestart
= true;
198 $lastPrefixLength = strlen( $lastPrefix );
199 $preCloseMatch = preg_match( '/<\\/pre/i', $oLine );
200 $preOpenMatch = preg_match( '/<pre/i', $oLine );
201 # If not in a <pre> element, scan for and figure out what prefixes are there.
202 if ( !$this->mInPre
) {
203 # Multiple prefixes may abut each other for nested lists.
204 $prefixLength = strspn( $oLine, '*#:;' );
205 $prefix = substr( $oLine, 0, $prefixLength );
208 # ; and : are both from definition-lists, so they're equivalent
209 # for the purposes of determining whether or not we need to open/close
211 $prefix2 = str_replace( ';', ':', $prefix );
212 $t = substr( $oLine, $prefixLength );
213 $this->mInPre
= (bool)$preOpenMatch;
215 # Don't interpret any other prefixes in preformatted text
217 $prefix = $prefix2 = '';
222 if ( $prefixLength && $lastPrefix === $prefix2 ) {
223 # Same as the last item, so no need to deal with nesting or opening stuff
224 $output .= $this->nextItem( substr( $prefix, -1 ) );
225 $paragraphStack = false;
227 if ( substr( $prefix, -1 ) === ';' ) {
228 # The one nasty exception: definition lists work like this:
229 # ; title : definition text
230 # So we check for : in the remainder text to split up the
231 # title and definition, without b0rking links.
233 if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
235 $output .= $term . $this->nextItem( ':' );
238 } elseif ( $prefixLength ||
$lastPrefixLength ) {
239 # We need to open or close prefixes, or both.
241 # Either open or close a level...
242 $commonPrefixLength = $this->getCommon( $prefix, $lastPrefix );
243 $paragraphStack = false;
245 # Close all the prefixes which aren't shared.
246 while ( $commonPrefixLength < $lastPrefixLength ) {
247 $output .= $this->closeList( $lastPrefix[$lastPrefixLength - 1] );
251 # Continue the current prefix if appropriate.
252 if ( $prefixLength <= $commonPrefixLength && $commonPrefixLength > 0 ) {
253 $output .= $this->nextItem( $prefix[$commonPrefixLength - 1] );
256 # Open prefixes where appropriate.
257 if ( $lastPrefix && $prefixLength > $commonPrefixLength ) {
260 while ( $prefixLength > $commonPrefixLength ) {
261 $char = substr( $prefix, $commonPrefixLength, 1 );
262 $output .= $this->openList( $char );
264 if ( ';' === $char ) {
265 # @todo FIXME: This is dupe of code above
266 if ( $this->findColonNoLinks( $t, $term, $t2 ) !== false ) {
268 $output .= $term . $this->nextItem( ':' );
271 ++
$commonPrefixLength;
273 if ( !$prefixLength && $lastPrefix ) {
276 $lastPrefix = $prefix2;
279 # If we have no prefixes, go to paragraph mode.
280 if ( 0 == $prefixLength ) {
281 # No prefix (not in list)--go to paragraph mode
282 # XXX: use a stack for nestable elements like span, table and div
283 $openmatch = preg_match(
284 '/(?:<table|<h1|<h2|<h3|<h4|<h5|<h6|<pre|<tr|'
285 . '<p|<ul|<ol|<dl|<li|<\\/tr|<\\/td|<\\/th)/iS',
288 $closematch = preg_match(
289 '/(?:<\\/table|<\\/h1|<\\/h2|<\\/h3|<\\/h4|<\\/h5|<\\/h6|'
290 . '<td|<th|<\\/?blockquote|<\\/?div|<hr|<\\/pre|<\\/p|<\\/mw:|'
291 . Parser
::MARKER_PREFIX
292 . '-pre|<\\/li|<\\/ul|<\\/ol|<\\/dl|<\\/?center)/iS',
296 if ( $openmatch ||
$closematch ) {
297 $paragraphStack = false;
298 # @todo bug 5718: paragraph closed
299 $output .= $this->closeParagraph();
300 if ( $preOpenMatch && !$preCloseMatch ) {
301 $this->mInPre
= true;
304 while ( preg_match( '/<(\\/?)blockquote[\s>]/i', $t,
305 $bqMatch, PREG_OFFSET_CAPTURE
, $bqOffset )
307 $inBlockquote = !$bqMatch[1][0]; // is this a close tag?
308 $bqOffset = $bqMatch[0][1] +
strlen( $bqMatch[0][0] );
310 $inBlockElem = !$closematch;
311 } elseif ( !$inBlockElem && !$this->mInPre
) {
312 if ( ' ' == substr( $t, 0, 1 )
313 && ( $this->mLastSection
=== 'pre' ||
trim( $t ) != '' )
317 if ( $this->mLastSection
!== 'pre' ) {
318 $paragraphStack = false;
319 $output .= $this->closeParagraph() . '<pre>';
320 $this->mLastSection
= 'pre';
322 $t = substr( $t, 1 );
325 if ( trim( $t ) === '' ) {
326 if ( $paragraphStack ) {
327 $output .= $paragraphStack . '<br />';
328 $paragraphStack = false;
329 $this->mLastSection
= 'p';
331 if ( $this->mLastSection
!== 'p' ) {
332 $output .= $this->closeParagraph();
333 $this->mLastSection
= '';
334 $paragraphStack = '<p>';
336 $paragraphStack = '</p><p>';
340 if ( $paragraphStack ) {
341 $output .= $paragraphStack;
342 $paragraphStack = false;
343 $this->mLastSection
= 'p';
344 } elseif ( $this->mLastSection
!== 'p' ) {
345 $output .= $this->closeParagraph() . '<p>';
346 $this->mLastSection
= 'p';
352 # somewhere above we forget to get out of pre block (bug 785)
353 if ( $preCloseMatch && $this->mInPre
) {
354 $this->mInPre
= false;
356 if ( $paragraphStack === false ) {
358 if ( $prefixLength === 0 ) {
363 while ( $prefixLength ) {
364 $output .= $this->closeList( $prefix2[$prefixLength - 1] );
366 if ( !$prefixLength ) {
370 if ( $this->mLastSection
!= '' ) {
371 $output .= '</' . $this->mLastSection
. '>';
372 $this->mLastSection
= '';
379 * Split up a string on ':', ignoring any occurrences inside tags
380 * to prevent illegal overlapping.
382 * @param string $str The string to split
383 * @param string &$before Set to everything before the ':'
384 * @param string &$after Set to everything after the ':'
385 * @throws MWException
386 * @return string The position of the ':', or false if none found
388 private function findColonNoLinks( $str, &$before, &$after ) {
389 $pos = strpos( $str, ':' );
390 if ( $pos === false ) {
395 $lt = strpos( $str, '<' );
396 if ( $lt === false ||
$lt > $pos ) {
397 # Easy; no tag nesting to worry about
398 $before = substr( $str, 0, $pos );
399 $after = substr( $str, $pos +
1 );
403 # Ugly state machine to walk through avoiding tags.
404 $state = self
::COLON_STATE_TEXT
;
406 $len = strlen( $str );
407 for ( $i = 0; $i < $len; $i++
) {
411 # (Using the number is a performance hack for common cases)
412 case 0: # self::COLON_STATE_TEXT:
415 # Could be either a <start> tag or an </end> tag
416 $state = self
::COLON_STATE_TAGSTART
;
421 $before = substr( $str, 0, $i );
422 $after = substr( $str, $i +
1 );
425 # Embedded in a tag; don't break it.
428 # Skip ahead looking for something interesting
429 $colon = strpos( $str, ':', $i );
430 if ( $colon === false ) {
431 # Nothing else interesting
434 $lt = strpos( $str, '<', $i );
435 if ( $stack === 0 ) {
436 if ( $lt === false ||
$colon < $lt ) {
438 $before = substr( $str, 0, $colon );
439 $after = substr( $str, $colon +
1 );
443 if ( $lt === false ) {
444 # Nothing else interesting to find; abort!
445 # We're nested, but there's no close tags left. Abort!
448 # Skip ahead to next tag start
450 $state = self
::COLON_STATE_TAGSTART
;
453 case 1: # self::COLON_STATE_TAG:
458 $state = self
::COLON_STATE_TEXT
;
461 # Slash may be followed by >?
462 $state = self
::COLON_STATE_TAGSLASH
;
468 case 2: # self::COLON_STATE_TAGSTART:
471 $state = self
::COLON_STATE_CLOSETAG
;
474 $state = self
::COLON_STATE_COMMENT
;
477 # Illegal early close? This shouldn't happen D:
478 $state = self
::COLON_STATE_TEXT
;
481 $state = self
::COLON_STATE_TAG
;
484 case 3: # self::COLON_STATE_CLOSETAG:
489 wfDebug( __METHOD__
. ": Invalid input; too many close tags\n" );
492 $state = self
::COLON_STATE_TEXT
;
495 case self
::COLON_STATE_TAGSLASH
:
497 # Yes, a self-closed tag <blah/>
498 $state = self
::COLON_STATE_TEXT
;
500 # Probably we're jumping the gun, and this is an attribute
501 $state = self
::COLON_STATE_TAG
;
504 case 5: # self::COLON_STATE_COMMENT:
506 $state = self
::COLON_STATE_COMMENTDASH
;
509 case self
::COLON_STATE_COMMENTDASH
:
511 $state = self
::COLON_STATE_COMMENTDASHDASH
;
513 $state = self
::COLON_STATE_COMMENT
;
516 case self
::COLON_STATE_COMMENTDASHDASH
:
518 $state = self
::COLON_STATE_TEXT
;
520 $state = self
::COLON_STATE_COMMENT
;
524 throw new MWException( "State machine error in " . __METHOD__
);
528 wfDebug( __METHOD__
. ": Invalid input; not enough close tags (stack $stack, state $state)\n" );