3 * Renders a slot diff by doing a text diff on the native representation.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
21 * @ingroup DifferenceEngine
24 use MediaWiki\Shell\Shell
;
25 use Wikimedia\Assert\Assert
;
28 * Renders a slot diff by doing a text diff on the native representation.
30 * If you want to use this without content objects (to call getTextDiff() on some
31 * non-content-related texts), obtain an instance with
32 * ContentHandler::getForModelID( CONTENT_MODEL_TEXT )
33 * ->getSlotDiffRenderer( RequestContext::getMain() )
35 * @ingroup DifferenceEngine
37 class TextSlotDiffRenderer
extends SlotDiffRenderer
{
39 /** Use the PHP diff implementation (DiffEngine). */
40 const ENGINE_PHP
= 'php';
42 /** Use the wikidiff2 PHP module. */
43 const ENGINE_WIKIDIFF2
= 'wikidiff2';
45 /** Use an external executable. */
46 const ENGINE_EXTERNAL
= 'external';
48 /** @var IBufferingStatsdDataFactory|null */
49 private $statsdDataFactory;
51 /** @var Language|null The language this content is in. */
55 * Number of paragraph moves the algorithm should attempt to detect.
56 * Only used with the wikidiff2 engine.
58 * @see $wgWikiDiff2MovedParagraphDetectionCutoff
60 private $wikiDiff2MovedParagraphDetectionCutoff = 0;
62 /** @var string One of the ENGINE_* constants. */
63 private $engine = self
::ENGINE_PHP
;
65 /** @var string Path to an executable to be used as the diff engine. */
66 private $externalEngine;
69 * Convenience helper to use getTextDiff without an instance.
70 * @param string $oldText
71 * @param string $newText
74 public static function diff( $oldText, $newText ) {
75 /** @var $slotDiffRenderer TextSlotDiffRenderer */
76 $slotDiffRenderer = ContentHandler
::getForModelID( CONTENT_MODEL_TEXT
)
77 ->getSlotDiffRenderer( RequestContext
::getMain() );
78 return $slotDiffRenderer->getTextDiff( $oldText, $newText );
81 public function setStatsdDataFactory( IBufferingStatsdDataFactory
$statsdDataFactory ) {
82 $this->statsdDataFactory
= $statsdDataFactory;
85 public function setLanguage( Language
$language ) {
86 $this->language
= $language;
90 * @see $wgWikiDiff2MovedParagraphDetectionCutoff
92 public function setWikiDiff2MovedParagraphDetectionCutoff( $cutoff ) {
93 Assert
::parameterType( 'integer', $cutoff, '$cutoff' );
94 $this->wikiDiff2MovedParagraphDetectionCutoff
= $cutoff;
98 * Set which diff engine to use.
99 * @param string $type One of the ENGINE_* constants.
100 * @param string|null $executable Path to an external exectable, only when type is ENGINE_EXTERNAL.
102 public function setEngine( $type, $executable = null ) {
103 $engines = [ self
::ENGINE_PHP
, self
::ENGINE_WIKIDIFF2
, self
::ENGINE_EXTERNAL
];
104 Assert
::parameter( in_array( $type, $engines, true ), '$type',
105 'must be one of the TextSlotDiffRenderer::ENGINE_* constants' );
106 if ( $type === self
::ENGINE_EXTERNAL
) {
107 Assert
::parameter( is_string( $executable ) && is_executable( $executable ), '$executable',
108 'must be a path to a valid executable' );
110 Assert
::parameter( is_null( $executable ), '$executable',
111 'must not be set unless $type is ENGINE_EXTERNAL' );
113 $this->engine
= $type;
114 $this->externalEngine
= $executable;
118 public function getDiff( Content
$oldContent = null, Content
$newContent = null ) {
119 $this->normalizeContents( $oldContent, $newContent, TextContent
::class );
121 $oldText = $oldContent->serialize();
122 $newText = $newContent->serialize();
124 return $this->getTextDiff( $oldText, $newText );
128 * Diff the text representations of two content objects (or just two pieces of text in general).
129 * @param string $oldText
130 * @param string $newText
133 public function getTextDiff( $oldText, $newText ) {
134 Assert
::parameterType( 'string', $oldText, '$oldText' );
135 Assert
::parameterType( 'string', $newText, '$newText' );
137 $diff = function () use ( $oldText, $newText ) {
138 $time = microtime( true );
140 $result = $this->getTextDiffInternal( $oldText, $newText );
142 $time = intval( ( microtime( true ) - $time ) * 1000 );
143 if ( $this->statsdDataFactory
) {
144 $this->statsdDataFactory
->timing( 'diff_time', $time );
147 // TODO reimplement this using T142313
149 // Log requests slower than 99th percentile
150 if ( $time > 100 && $this->mOldPage && $this->mNewPage ) {
152 "$time ms diff: {$this->mOldid} -> {$this->mNewid} {$this->mNewPage}" );
160 * @param Status $status
163 $error = function ( $status ) {
164 throw new FatalError( $status->getWikiText() );
167 // Use PoolCounter if the diff looks like it can be expensive
168 if ( strlen( $oldText ) +
strlen( $newText ) > 20000 ) {
169 $work = new PoolCounterWorkViaCallback( 'diff',
170 md5( $oldText ) . md5( $newText ),
171 [ 'doWork' => $diff, 'error' => $error ]
173 return $work->execute();
180 * Diff the text representations of two content objects (or just two pieces of text in general).
181 * This does the actual diffing, getTextDiff() wraps it with logging and resource limiting.
182 * @param string $oldText
183 * @param string $newText
187 protected function getTextDiffInternal( $oldText, $newText ) {
188 // TODO move most of this into three parallel implementations of a text diff generator
189 // class, choose which one to use via dependecy injection
191 $oldText = str_replace( "\r\n", "\n", $oldText );
192 $newText = str_replace( "\r\n", "\n", $newText );
194 // Better external diff engine, the 2 may some day be dropped
195 // This one does the escaping and segmenting itself
196 if ( $this->engine
=== self
::ENGINE_WIKIDIFF2
) {
197 $wikidiff2Version = phpversion( 'wikidiff2' );
199 $wikidiff2Version !== false &&
200 version_compare( $wikidiff2Version, '1.5.0', '>=' ) &&
201 version_compare( $wikidiff2Version, '1.8.0', '<' )
203 $text = wikidiff2_do_diff(
207 $this->wikiDiff2MovedParagraphDetectionCutoff
210 // Don't pass the 4th parameter introduced in version 1.5.0 and removed in version 1.8.0
211 $text = wikidiff2_do_diff(
217 // Log a warning in case the configuration value is set to not silently ignore it
218 if ( $this->wikiDiff2MovedParagraphDetectionCutoff
> 0 ) {
219 wfLogWarning( '$wgWikiDiff2MovedParagraphDetectionCutoff is set but has no
220 effect since the used version of WikiDiff2 does not support it.' );
225 } elseif ( $this->engine
=== self
::ENGINE_EXTERNAL
) {
227 $tmpDir = wfTempDir();
228 $tempName1 = tempnam( $tmpDir, 'diff_' );
229 $tempName2 = tempnam( $tmpDir, 'diff_' );
231 $tempFile1 = fopen( $tempName1, "w" );
235 $tempFile2 = fopen( $tempName2, "w" );
239 fwrite( $tempFile1, $oldText );
240 fwrite( $tempFile2, $newText );
241 fclose( $tempFile1 );
242 fclose( $tempFile2 );
243 $cmd = [ $this->externalEngine
, $tempName1, $tempName2 ];
244 $result = Shell
::command( $cmd )
246 $exitCode = $result->getExitCode();
247 if ( $exitCode !== 0 ) {
248 throw new Exception( "External diff command returned code {$exitCode}. Stderr: "
249 . wfEscapeWikiText( $result->getStderr() )
252 $difftext = $result->getStdout();
253 unlink( $tempName1 );
254 unlink( $tempName2 );
257 } elseif ( $this->engine
=== self
::ENGINE_PHP
) {
258 if ( $this->language
) {
259 $oldText = $this->language
->segmentForDiff( $oldText );
260 $newText = $this->language
->segmentForDiff( $newText );
262 $ota = explode( "\n", $oldText );
263 $nta = explode( "\n", $newText );
264 $diffs = new Diff( $ota, $nta );
265 $formatter = new TableDiffFormatter();
266 $difftext = $formatter->format( $diffs );
267 if ( $this->language
) {
268 $difftext = $this->language
->unsegmentForDiff( $difftext );
273 throw new LogicException( 'Invalid engine: ' . $this->engine
);