changes to languages because of Phabricator reports.
=== Other changes in 1.28 ===
-
+* (T128697) Improved handling of large diffs.
== Compatibility ==
'MediaWiki\\Auth\\Throttler' => __DIR__ . '/includes/auth/Throttler.php',
'MediaWiki\\Auth\\UserDataAuthenticationRequest' => __DIR__ . '/includes/auth/UserDataAuthenticationRequest.php',
'MediaWiki\\Auth\\UsernameAuthenticationRequest' => __DIR__ . '/includes/auth/UsernameAuthenticationRequest.php',
+ 'MediaWiki\\Diff\\ComplexityException' => __DIR__ . '/includes/diff/ComplexityException.php',
'MediaWiki\\Diff\\WordAccumulator' => __DIR__ . '/includes/diff/WordAccumulator.php',
'MediaWiki\\Interwiki\\ClassicInterwikiLookup' => __DIR__ . '/includes/interwiki/ClassicInterwikiLookup.php',
'MediaWiki\\Interwiki\\InterwikiLookup' => __DIR__ . '/includes/interwiki/InterwikiLookup.php',
--- /dev/null
+<?php
+/**
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup DifferenceEngine
+ */
+
+namespace MediaWiki\Diff;
+
+use Exception;
+
+class ComplexityException extends Exception {
+ public function __construct() {
+ parent::__construct( 'Diff is too complex to generate' );
+ }
+}
*/
public $edits;
+ /**
+ * @var int If this diff complexity is exceeded, a ComplexityException is thrown
+ * 0 means no limit.
+ */
+ protected $bailoutComplexity = 0;
+
/**
* Constructor.
* Computes diff between sequences of strings.
* @param string[] $from_lines An array of strings.
* Typically these are lines from a file.
* @param string[] $to_lines An array of strings.
+ * @throws \MediaWiki\Diff\ComplexityException
*/
public function __construct( $from_lines, $to_lines ) {
$eng = new DiffEngine;
+ $eng->setBailoutComplexity( $this->bailoutComplexity );
$this->edits = $eng->diff( $from_lines, $to_lines );
}
* @file
* @ingroup DifferenceEngine
*/
+use MediaWiki\Diff\ComplexityException;
/**
* This diff implementation is mainly lifted from the LCS algorithm of the Eclipse project which
private $tooLong;
private $powLimit;
+ protected $bailoutComplexity = 0;
+
// State variables
private $maxDifferences;
private $lcsLengthCorrectedForHeuristic = false;
*
* @param string[] $from_lines
* @param string[] $to_lines
+ * @throws ComplexityException
*
* @return DiffOp[]
*/
return $edits;
}
+ /**
+ * Sets the complexity (in comparison operations) that can't be exceeded
+ * @param int $value
+ */
+ public function setBailoutComplexity( $value ) {
+ $this->bailoutComplexity = $value;
+ }
+
/**
* Adjust inserts/deletes of identical lines to join changes
* as much as possible.
/**
* @param string[] $from
* @param string[] $to
+ * @throws ComplexityException
*/
protected function diffInternal( array $from, array $to ) {
// remember initial lengths
$this->m = count( $this->from );
$this->n = count( $this->to );
+ if ( $this->bailoutComplexity > 0 && $this->m * $this->n > $this->bailoutComplexity ) {
+ throw new ComplexityException();
+ }
+
$this->removed = $this->m > 0 ? array_fill( 0, $this->m, true ) : [];
$this->added = $this->n > 0 ? array_fill( 0, $this->n, true ) : [];
* @defgroup DifferenceEngine DifferenceEngine
*/
+use MediaWiki\Diff\ComplexityException;
use MediaWiki\Diff\WordAccumulator;
/**
* @ingroup DifferenceEngine
*/
class WordLevelDiff extends \Diff {
- const MAX_LINE_LENGTH = 10000;
+ /**
+ * @inheritdoc
+ */
+ protected $bailoutComplexity = 40000000; // Roughly 6K x 6K words changed
/**
* @param string[] $linesBefore
list( $wordsBefore, $wordsBeforeStripped ) = $this->split( $linesBefore );
list( $wordsAfter, $wordsAfterStripped ) = $this->split( $linesAfter );
- parent::__construct( $wordsBeforeStripped, $wordsAfterStripped );
+ try {
+ parent::__construct( $wordsBeforeStripped, $wordsAfterStripped );
+ } catch ( ComplexityException $ex ) {
+ // Too hard to diff, just show whole paragraph(s) as changed
+ $this->edits = [ new DiffOpChange( $linesBefore, $linesAfter ) ];
+ }
$xi = $yi = 0;
$editCount = count( $this->edits );
$stripped = [];
$first = true;
foreach ( $lines as $line ) {
- # If the line is too long, just pretend the entire line is one big word
- # This prevents resource exhaustion problems
if ( $first ) {
$first = false;
} else {
$words[] = "\n";
$stripped[] = "\n";
}
- if ( strlen( $line ) > self::MAX_LINE_LENGTH ) {
- $words[] = $line;
- $stripped[] = $line;
- } else {
- $m = [];
- if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
- $line, $m )
- ) {
- foreach ( $m[0] as $word ) {
- $words[] = $word;
- }
- foreach ( $m[1] as $stripped_word ) {
- $stripped[] = $stripped_word;
- }
+ $m = [];
+ if ( preg_match_all( '/ ( [^\S\n]+ | [0-9_A-Za-z\x80-\xff]+ | . ) (?: (?!< \n) [^\S\n])? /xs',
+ $line, $m ) ) {
+ foreach ( $m[0] as $word ) {
+ $words[] = $word;
+ }
+ foreach ( $m[1] as $stripped_word ) {
+ $stripped[] = $stripped_word;
}
}
}