[PLUGINS] +abomailman
[ptitvelo/web/www.git] / www / plugins / facteur / lib / markdownify / markdownify_extra.php
1 <?php
2 /**
3 * Class to convert HTML to Markdown with PHP Markdown Extra syntax support.
4 *
5 * @version 1.0.0 alpha
6 * @author Milian Wolff (<mail@milianw.de>, <http://milianw.de>)
7 * @license LGPL, see LICENSE_LGPL.txt and the summary below
8 * @copyright (C) 2007 Milian Wolff
9 *
10 * This library is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU Lesser General Public
12 * License as published by the Free Software Foundation; either
13 * version 2.1 of the License, or (at your option) any later version.
14 *
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18 * Lesser General Public License for more details.
19 *
20 * You should have received a copy of the GNU Lesser General Public
21 * License along with this library; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23 */
24
25 /**
26 * standard Markdownify class
27 */
28 require_once dirname(__FILE__) . '/markdownify.php';
29
30 class Markdownify_Extra extends Markdownify {
31 /**
32 * table data, including rows with content and the maximum width of each col
33 *
34 * @var array
35 */
36 var $table = array();
37 /**
38 * current col
39 *
40 * @var int
41 */
42 var $col = -1;
43 /**
44 * current row
45 *
46 * @var int
47 */
48 var $row = 0;
49 /**
50 * constructor, see Markdownify::Markdownify() for more information
51 */
52 function Markdownify_Extra($linksAfterEachParagraph = MDFY_LINKS_EACH_PARAGRAPH, $bodyWidth = MDFY_BODYWIDTH, $keepHTML = MDFY_KEEPHTML) {
53 parent::Markdownify($linksAfterEachParagraph, $bodyWidth, $keepHTML);
54
55 ### new markdownable tags & attributes
56 # header ids: # foo {bar}
57 $this->isMarkdownable['h1']['id'] = 'optional';
58 $this->isMarkdownable['h2']['id'] = 'optional';
59 $this->isMarkdownable['h3']['id'] = 'optional';
60 $this->isMarkdownable['h4']['id'] = 'optional';
61 $this->isMarkdownable['h5']['id'] = 'optional';
62 $this->isMarkdownable['h6']['id'] = 'optional';
63 # tables
64 $this->isMarkdownable['table'] = array();
65 $this->isMarkdownable['th'] = array(
66 'align' => 'optional',
67 );
68 $this->isMarkdownable['td'] = array(
69 'align' => 'optional',
70 );
71 $this->isMarkdownable['tr'] = array();
72 array_push($this->ignore, 'thead');
73 array_push($this->ignore, 'tbody');
74 array_push($this->ignore, 'tfoot');
75 # definition lists
76 $this->isMarkdownable['dl'] = array();
77 $this->isMarkdownable['dd'] = array();
78 $this->isMarkdownable['dt'] = array();
79 # footnotes
80 $this->isMarkdownable['fnref'] = array(
81 'target' => 'required',
82 );
83 $this->isMarkdownable['footnotes'] = array();
84 $this->isMarkdownable['fn'] = array(
85 'name' => 'required',
86 );
87 $this->parser->blockElements['fnref'] = false;
88 $this->parser->blockElements['fn'] = true;
89 $this->parser->blockElements['footnotes'] = true;
90 # abbr
91 $this->isMarkdownable['abbr'] = array(
92 'title' => 'required',
93 );
94 # build RegEx lookahead to decide wether table can pe parsed or not
95 $inlineTags = array_keys($this->parser->blockElements, false);
96 $colContents = '(?:[^<]|<(?:'.implode('|', $inlineTags).'|[^a-z]))+';
97 $this->tableLookaheadHeader = '{
98 ^\s*(?:<thead\s*>)?\s* # open optional thead
99 <tr\s*>\s*(?: # start required row with headers
100 <th(?:\s+align=("|\')(?:left|center|right)\1)?\s*> # header with optional align
101 \s*'.$colContents.'\s* # contents
102 </th>\s* # close header
103 )+</tr> # close row with headers
104 \s*(?:</thead>)? # close optional thead
105 }sxi';
106 $this->tdSubstitute = '\s*'.$colContents.'\s* # contents
107 </td>\s*';
108 $this->tableLookaheadBody = '{
109 \s*(?:<tbody\s*>)?\s* # open optional tbody
110 (?:<tr\s*>\s* # start row
111 %s # cols to be substituted
112 </tr>)+ # close row
113 \s*(?:</tbody>)? # close optional tbody
114 \s*</table> # close table
115 }sxi';
116 }
117 /**
118 * handle header tags (<h1> - <h6>)
119 *
120 * @param int $level 1-6
121 * @return void
122 */
123 function handleHeader($level) {
124 static $id = null;
125 if ($this->parser->isStartTag) {
126 if (isset($this->parser->tagAttributes['id'])) {
127 $id = $this->parser->tagAttributes['id'];
128 }
129 } else {
130 if (!is_null($id)) {
131 $this->out(' {#'.$id.'}');
132 $id = null;
133 }
134 }
135 parent::handleHeader($level);
136 }
137 /**
138 * handle <abbr> tags
139 *
140 * @param void
141 * @return void
142 */
143 function handleTag_abbr() {
144 if ($this->parser->isStartTag) {
145 $this->stack();
146 $this->buffer();
147 } else {
148 $tag = $this->unstack();
149 $tag['text'] = $this->unbuffer();
150 $add = true;
151 foreach ($this->stack['abbr'] as $stacked) {
152 if ($stacked['text'] == $tag['text']) {
153 /** TODO: differing abbr definitions, i.e. different titles for same text **/
154 $add = false;
155 break;
156 }
157 }
158 $this->out($tag['text']);
159 if ($add) {
160 array_push($this->stack['abbr'], $tag);
161 }
162 }
163 }
164 /**
165 * flush stacked abbr tags
166 *
167 * @param void
168 * @return void
169 */
170 function flushStacked_abbr() {
171 $out = array();
172 foreach ($this->stack['abbr'] as $k => $tag) {
173 if (!isset($tag['unstacked'])) {
174 array_push($out, ' *['.$tag['text'].']: '.$tag['title']);
175 $tag['unstacked'] = true;
176 $this->stack['abbr'][$k] = $tag;
177 }
178 }
179 if (!empty($out)) {
180 $this->out("\n\n".implode("\n", $out));
181 }
182 }
183 /**
184 * handle <table> tags
185 *
186 * @param void
187 * @return void
188 */
189 function handleTag_table() {
190 if ($this->parser->isStartTag) {
191 # check if upcoming table can be converted
192 if ($this->keepHTML) {
193 if (preg_match($this->tableLookaheadHeader, $this->parser->html, $matches)) {
194 # header seems good, now check body
195 # get align & number of cols
196 preg_match_all('#<th(?:\s+align=("|\')(left|right|center)\1)?\s*>#si', $matches[0], $cols);
197 $regEx = '';
198 $i = 1;
199 $aligns = array();
200 foreach ($cols[2] as $align) {
201 $align = strtolower($align);
202 array_push($aligns, $align);
203 if (empty($align)) {
204 $align = 'left'; # default value
205 }
206 $td = '\s+align=("|\')'.$align.'\\'.$i;
207 $i++;
208 if ($align == 'left') {
209 # look for empty align or left
210 $td = '(?:'.$td.')?';
211 }
212 $td = '<td'.$td.'\s*>';
213 $regEx .= $td.$this->tdSubstitute;
214 }
215 $regEx = sprintf($this->tableLookaheadBody, $regEx);
216 if (preg_match($regEx, $this->parser->html, $matches, null, strlen($matches[0]))) {
217 # this is a markdownable table tag!
218 $this->table = array(
219 'rows' => array(),
220 'col_widths' => array(),
221 'aligns' => $aligns,
222 );
223 $this->row = 0;
224 } else {
225 # non markdownable table
226 $this->handleTagToText();
227 }
228 } else {
229 # non markdownable table
230 $this->handleTagToText();
231 }
232 } else {
233 $this->table = array(
234 'rows' => array(),
235 'col_widths' => array(),
236 'aligns' => array(),
237 );
238 $this->row = 0;
239 }
240 } else {
241 # finally build the table in Markdown Extra syntax
242 $separator = array();
243 # seperator with correct align identifikators
244 foreach($this->table['aligns'] as $col => $align) {
245 if (!$this->keepHTML && !isset($this->table['col_widths'][$col])) {
246 break;
247 }
248 $left = ' ';
249 $right = ' ';
250 switch ($align) {
251 case 'left':
252 $left = ':';
253 break;
254 case 'center':
255 $right = ':';
256 $left = ':';
257 case 'right':
258 $right = ':';
259 break;
260 }
261 array_push($separator, $left.str_repeat('-', $this->table['col_widths'][$col]).$right);
262 }
263 $separator = '|'.implode('|', $separator).'|';
264
265 $rows = array();
266 # add padding
267 array_walk_recursive($this->table['rows'], array(&$this, 'alignTdContent'));
268 $header = array_shift($this->table['rows']);
269 array_push($rows, '| '.implode(' | ', $header).' |');
270 array_push($rows, $separator);
271 foreach ($this->table['rows'] as $row) {
272 array_push($rows, '| '.implode(' | ', $row).' |');
273 }
274 $this->out(implode("\n".$this->indent, $rows));
275 $this->table = array();
276 $this->setLineBreaks(2);
277 }
278 }
279 /**
280 * properly pad content so it is aligned as whished
281 * should be used with array_walk_recursive on $this->table['rows']
282 *
283 * @param string &$content
284 * @param int $col
285 * @return void
286 */
287 function alignTdContent(&$content, $col) {
288 switch ($this->table['aligns'][$col]) {
289 default:
290 case 'left':
291 $content .= str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content));
292 break;
293 case 'right':
294 $content = str_repeat(' ', $this->table['col_widths'][$col] - $this->strlen($content)).$content;
295 break;
296 case 'center':
297 $paddingNeeded = $this->table['col_widths'][$col] - $this->strlen($content);
298 $left = floor($paddingNeeded / 2);
299 $right = $paddingNeeded - $left;
300 $content = str_repeat(' ', $left).$content.str_repeat(' ', $right);
301 break;
302 }
303 }
304 /**
305 * handle <tr> tags
306 *
307 * @param void
308 * @return void
309 */
310 function handleTag_tr() {
311 if ($this->parser->isStartTag) {
312 $this->col = -1;
313 } else {
314 $this->row++;
315 }
316 }
317 /**
318 * handle <td> tags
319 *
320 * @param void
321 * @return void
322 */
323 function handleTag_td() {
324 if ($this->parser->isStartTag) {
325 $this->col++;
326 if (!isset($this->table['col_widths'][$this->col])) {
327 $this->table['col_widths'][$this->col] = 0;
328 }
329 $this->buffer();
330 } else {
331 $buffer = trim($this->unbuffer());
332 $this->table['col_widths'][$this->col] = max($this->table['col_widths'][$this->col], $this->strlen($buffer));
333 $this->table['rows'][$this->row][$this->col] = $buffer;
334 }
335 }
336 /**
337 * handle <th> tags
338 *
339 * @param void
340 * @return void
341 */
342 function handleTag_th() {
343 if (!$this->keepHTML && !isset($this->table['rows'][1]) && !isset($this->table['aligns'][$this->col+1])) {
344 if (isset($this->parser->tagAttributes['align'])) {
345 $this->table['aligns'][$this->col+1] = $this->parser->tagAttributes['align'];
346 } else {
347 $this->table['aligns'][$this->col+1] = '';
348 }
349 }
350 $this->handleTag_td();
351 }
352 /**
353 * handle <dl> tags
354 *
355 * @param void
356 * @return void
357 */
358 function handleTag_dl() {
359 if (!$this->parser->isStartTag) {
360 $this->setLineBreaks(2);
361 }
362 }
363 /**
364 * handle <dt> tags
365 *
366 * @param void
367 * @return void
368 **/
369 function handleTag_dt() {
370 if (!$this->parser->isStartTag) {
371 $this->setLineBreaks(1);
372 }
373 }
374 /**
375 * handle <dd> tags
376 *
377 * @param void
378 * @return void
379 */
380 function handleTag_dd() {
381 if ($this->parser->isStartTag) {
382 if (substr(ltrim($this->parser->html), 0, 3) == '<p>') {
383 # next comes a paragraph, so we'll need an extra line
384 $this->out("\n".$this->indent);
385 } elseif (substr($this->output, -2) == "\n\n") {
386 $this->output = substr($this->output, 0, -1);
387 }
388 $this->out(': ');
389 $this->indent(' ', false);
390 } else {
391 # lookahead for next dt
392 if (substr(ltrim($this->parser->html), 0, 4) == '<dt>') {
393 $this->setLineBreaks(2);
394 } else {
395 $this->setLineBreaks(1);
396 }
397 $this->indent(' ');
398 }
399 }
400 /**
401 * handle <fnref /> tags (custom footnote references, see markdownify_extra::parseString())
402 *
403 * @param void
404 * @return void
405 */
406 function handleTag_fnref() {
407 $this->out('[^'.$this->parser->tagAttributes['target'].']');
408 }
409 /**
410 * handle <fn> tags (custom footnotes, see markdownify_extra::parseString()
411 * and markdownify_extra::_makeFootnotes())
412 *
413 * @param void
414 * @return void
415 */
416 function handleTag_fn() {
417 if ($this->parser->isStartTag) {
418 $this->out('[^'.$this->parser->tagAttributes['name'].']:');
419 $this->setLineBreaks(1);
420 } else {
421 $this->setLineBreaks(2);
422 }
423 $this->indent(' ');
424 }
425 /**
426 * handle <footnotes> tag (custom footnotes, see markdownify_extra::parseString()
427 * and markdownify_extra::_makeFootnotes())
428 *
429 * @param void
430 * @return void
431 */
432 function handleTag_footnotes() {
433 if (!$this->parser->isStartTag) {
434 $this->setLineBreaks(2);
435 }
436 }
437 /**
438 * parse a HTML string, clean up footnotes prior
439 *
440 * @param string $HTML input
441 * @return string Markdown formatted output
442 */
443 function parseString($html) {
444 /** TODO: custom markdown-extra options, e.g. titles & classes **/
445 # <sup id="fnref:..."><a href"#fn..." rel="footnote">...</a></sup>
446 # => <fnref target="..." />
447 $html = preg_replace('@<sup id="fnref:([^"]+)">\s*<a href="#fn:\1" rel="footnote">\s*\d+\s*</a>\s*</sup>@Us', '<fnref target="$1" />', $html);
448 # <div class="footnotes">
449 # <hr />
450 # <ol>
451 #
452 # <li id="fn:...">...</li>
453 # ...
454 #
455 # </ol>
456 # </div>
457 # =>
458 # <footnotes>
459 # <fn name="...">...</fn>
460 # ...
461 # </footnotes>
462 $html = preg_replace_callback('#<div class="footnotes">\s*<hr />\s*<ol>\s*(.+)\s*</ol>\s*</div>#Us', array(&$this, '_makeFootnotes'), $html);
463 return parent::parseString($html);
464 }
465 /**
466 * replace HTML representation of footnotes with something more easily parsable
467 *
468 * @note this is a callback to be used in parseString()
469 *
470 * @param array $matches
471 * @return string
472 */
473 function _makeFootnotes($matches) {
474 # <li id="fn:1">
475 # ...
476 # <a href="#fnref:block" rev="footnote">&#8617;</a></p>
477 # </li>
478 # => <fn name="1">...</fn>
479 # remove footnote link
480 $fns = preg_replace('@\s*(&#160;\s*)?<a href="#fnref:[^"]+" rev="footnote"[^>]*>&#8617;</a>\s*@s', '', $matches[1]);
481 # remove empty paragraph
482 $fns = preg_replace('@<p>\s*</p>@s', '', $fns);
483 # <li id="fn:1">...</li> -> <footnote nr="1">...</footnote>
484 $fns = str_replace('<li id="fn:', '<fn name="', $fns);
485
486 $fns = '<footnotes>'.$fns.'</footnotes>';
487 return preg_replace('#</li>\s*(?=(?:<fn|</footnotes>))#s', '</fn>$1', $fns);
488 }
489 }