3 * Copyright © 2000, 2001 Geoffrey T. Dairiki <dairiki@dairiki.org>
4 * You may copy this code freely under the conditions of the GPL.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
22 * @ingroup DifferenceEngine
23 * @defgroup DifferenceEngine DifferenceEngine
26 use MediaWiki\Diff\ComplexityException
;
27 use MediaWiki\Diff\WordAccumulator
;
30 * Performs a word-level diff on several lines
32 * @ingroup DifferenceEngine
34 class WordLevelDiff
extends \Diff
{
38 protected $bailoutComplexity = 40000000; // Roughly 6K x 6K words changed
41 * @param string[] $linesBefore
42 * @param string[] $linesAfter
44 public function __construct( $linesBefore, $linesAfter ) {
45 list( $wordsBefore, $wordsBeforeStripped ) = $this->split( $linesBefore );
46 list( $wordsAfter, $wordsAfterStripped ) = $this->split( $linesAfter );
49 parent
::__construct( $wordsBeforeStripped, $wordsAfterStripped );
50 } catch ( ComplexityException
$ex ) {
51 // Too hard to diff, just show whole paragraph(s) as changed
52 $this->edits
= [ new DiffOpChange( $linesBefore, $linesAfter ) ];
56 $editCount = count( $this->edits
);
57 for ( $i = 0; $i < $editCount; $i++
) {
58 $orig = &$this->edits
[$i]->orig
;
59 if ( is_array( $orig ) ) {
60 $orig = array_slice( $wordsBefore, $xi, count( $orig ) );
61 $xi +
= count( $orig );
64 $closing = &$this->edits
[$i]->closing
;
65 if ( is_array( $closing ) ) {
66 $closing = array_slice( $wordsAfter, $yi, count( $closing ) );
67 $yi +
= count( $closing );
73 * @param string[] $lines
77 private function split( $lines ) {
81 foreach ( $lines as $line ) {
89 if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
91 foreach ( $m[0] as $word ) {
94 foreach ( $m[1] as $stripped_word ) {
95 $stripped[] = $stripped_word;
100 return [ $words, $stripped ];
106 public function orig() {
107 $orig = new WordAccumulator
;
109 foreach ( $this->edits
as $edit ) {
110 if ( $edit->type
== 'copy' ) {
111 $orig->addWords( $edit->orig
);
112 } elseif ( $edit->orig
) {
113 $orig->addWords( $edit->orig
, 'del' );
116 $lines = $orig->getLines();
124 public function closing() {
125 $closing = new WordAccumulator
;
127 foreach ( $this->edits
as $edit ) {
128 if ( $edit->type
== 'copy' ) {
129 $closing->addWords( $edit->closing
);
130 } elseif ( $edit->closing
) {
131 $closing->addWords( $edit->closing
, 'ins' );
134 $lines = $closing->getLines();