Handle comment truncation in CommentStore
[lhc/web/wiklou.git] / includes / CommentStore.php
1 <?php
2 /**
3 * Manage storage of comments in the database
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 */
22
23 use Wikimedia\Rdbms\IDatabase;
24
25 /**
26 * CommentStore handles storage of comments (edit summaries, log reasons, etc)
27 * in the database.
28 * @since 1.30
29 */
30 class CommentStore {
31
32 /** Maximum length of a comment. Longer comments will be truncated. */
33 const MAX_COMMENT_LENGTH = 65535;
34
35 /** Maximum length of serialized data. Longer data will result in an exception. */
36 const MAX_DATA_LENGTH = 65535;
37
38 /**
39 * Define fields that use temporary tables for transitional purposes
40 * @var array Keys are '$key', values are arrays with four fields:
41 * - table: Temporary table name
42 * - pk: Temporary table column referring to the main table's primary key
43 * - field: Temporary table column referring comment.comment_id
44 * - joinPK: Main table's primary key
45 */
46 protected static $tempTables = [
47 'rev_comment' => [
48 'table' => 'revision_comment_temp',
49 'pk' => 'revcomment_rev',
50 'field' => 'revcomment_comment_id',
51 'joinPK' => 'rev_id',
52 ],
53 'img_description' => [
54 'table' => 'image_comment_temp',
55 'pk' => 'imgcomment_name',
56 'field' => 'imgcomment_description_id',
57 'joinPK' => 'img_name',
58 ],
59 ];
60
61 /**
62 * Fields that formerly used $tempTables
63 * @var array Key is '$key', value is the MediaWiki version in which it was
64 * removed from $tempTables.
65 */
66 protected static $formerTempTables = [];
67
68 /** @var string */
69 protected $key;
70
71 /** @var int One of the MIGRATION_* constants */
72 protected $stage;
73
74 /** @var array|null Cache for `self::getJoin()` */
75 protected $joinCache = null;
76
77 /** @var Language Language to use for comment truncation */
78 protected $lang;
79
80 /**
81 * @param string $key A key such as "rev_comment" identifying the comment
82 * field being fetched.
83 * @param Language $lang Language to use for comment truncation. Defaults
84 * to $wgContLang.
85 */
86 public function __construct( $key, Language $lang = null ) {
87 global $wgCommentTableSchemaMigrationStage, $wgContLang;
88
89 $this->key = $key;
90 $this->stage = $wgCommentTableSchemaMigrationStage;
91 $this->lang = $lang ?: $wgContLang;
92 }
93
94 /**
95 * Static constructor for easier chaining
96 * @param string $key A key such as "rev_comment" identifying the comment
97 * field being fetched.
98 * @return CommentStore
99 */
100 public static function newKey( $key ) {
101 return new CommentStore( $key );
102 }
103
104 /**
105 * Get SELECT fields for the comment key
106 *
107 * Each resulting row should be passed to `self::getCommentLegacy()` to get the
108 * actual comment.
109 *
110 * @note Use of this method may require a subsequent database query to
111 * actually fetch the comment. If possible, use `self::getJoin()` instead.
112 * @return string[] to include in the `$vars` to `IDatabase->select()`. All
113 * fields are aliased, so `+` is safe to use.
114 */
115 public function getFields() {
116 $fields = [];
117 if ( $this->stage === MIGRATION_OLD ) {
118 $fields["{$this->key}_text"] = $this->key;
119 $fields["{$this->key}_data"] = 'NULL';
120 $fields["{$this->key}_cid"] = 'NULL';
121 } else {
122 if ( $this->stage < MIGRATION_NEW ) {
123 $fields["{$this->key}_old"] = $this->key;
124 }
125 if ( isset( self::$tempTables[$this->key] ) ) {
126 $fields["{$this->key}_pk"] = self::$tempTables[$this->key]['joinPK'];
127 } else {
128 $fields["{$this->key}_id"] = "{$this->key}_id";
129 }
130 }
131 return $fields;
132 }
133
134 /**
135 * Get SELECT fields and joins for the comment key
136 *
137 * Each resulting row should be passed to `self::getComment()` to get the
138 * actual comment.
139 *
140 * @return array With three keys:
141 * - tables: (string[]) to include in the `$table` to `IDatabase->select()`
142 * - fields: (string[]) to include in the `$vars` to `IDatabase->select()`
143 * - joins: (array) to include in the `$join_conds` to `IDatabase->select()`
144 * All tables, fields, and joins are aliased, so `+` is safe to use.
145 */
146 public function getJoin() {
147 if ( $this->joinCache === null ) {
148 $tables = [];
149 $fields = [];
150 $joins = [];
151
152 if ( $this->stage === MIGRATION_OLD ) {
153 $fields["{$this->key}_text"] = $this->key;
154 $fields["{$this->key}_data"] = 'NULL';
155 $fields["{$this->key}_cid"] = 'NULL';
156 } else {
157 $join = $this->stage === MIGRATION_NEW ? 'JOIN' : 'LEFT JOIN';
158
159 if ( isset( self::$tempTables[$this->key] ) ) {
160 $t = self::$tempTables[$this->key];
161 $alias = "temp_$this->key";
162 $tables[$alias] = $t['table'];
163 $joins[$alias] = [ $join, "{$alias}.{$t['pk']} = {$t['joinPK']}" ];
164 $joinField = "{$alias}.{$t['field']}";
165 } else {
166 $joinField = "{$this->key}_id";
167 }
168
169 $alias = "comment_$this->key";
170 $tables[$alias] = 'comment';
171 $joins[$alias] = [ $join, "{$alias}.comment_id = {$joinField}" ];
172
173 if ( $this->stage === MIGRATION_NEW ) {
174 $fields["{$this->key}_text"] = "{$alias}.comment_text";
175 } else {
176 $fields["{$this->key}_text"] = "COALESCE( {$alias}.comment_text, $this->key )";
177 }
178 $fields["{$this->key}_data"] = "{$alias}.comment_data";
179 $fields["{$this->key}_cid"] = "{$alias}.comment_id";
180 }
181
182 $this->joinCache = [
183 'tables' => $tables,
184 'fields' => $fields,
185 'joins' => $joins,
186 ];
187 }
188
189 return $this->joinCache;
190 }
191
192 /**
193 * Extract the comment from a row
194 *
195 * Shared implementation for getComment() and getCommentLegacy()
196 *
197 * @param IDatabase|null $db Database handle for getCommentLegacy(), or null for getComment()
198 * @param object|array $row
199 * @param bool $fallback
200 * @return CommentStoreComment
201 */
202 private function getCommentInternal( IDatabase $db = null, $row, $fallback = false ) {
203 $key = $this->key;
204 $row = (array)$row;
205 if ( array_key_exists( "{$key}_text", $row ) && array_key_exists( "{$key}_data", $row ) ) {
206 $cid = isset( $row["{$key}_cid"] ) ? $row["{$key}_cid"] : null;
207 $text = $row["{$key}_text"];
208 $data = $row["{$key}_data"];
209 } elseif ( $this->stage === MIGRATION_OLD ) {
210 $cid = null;
211 if ( $fallback && isset( $row[$key] ) ) {
212 wfLogWarning( "Using deprecated fallback handling for comment $key" );
213 $text = $row[$key];
214 } else {
215 wfLogWarning( "Missing {$key}_text and {$key}_data fields in row with MIGRATION_OLD" );
216 $text = '';
217 }
218 $data = null;
219 } else {
220 if ( isset( self::$tempTables[$key] ) ) {
221 if ( array_key_exists( "{$key}_pk", $row ) ) {
222 if ( !$db ) {
223 throw new InvalidArgumentException(
224 "\$row does not contain fields needed for comment $key and getComment(), but "
225 . "does have fields for getCommentLegacy()"
226 );
227 }
228 $t = self::$tempTables[$key];
229 $id = $row["{$key}_pk"];
230 $row2 = $db->selectRow(
231 [ $t['table'], 'comment' ],
232 [ 'comment_id', 'comment_text', 'comment_data' ],
233 [ $t['pk'] => $id ],
234 __METHOD__,
235 [],
236 [ 'comment' => [ 'JOIN', [ "comment_id = {$t['field']}" ] ] ]
237 );
238 } elseif ( $fallback && isset( $row[$key] ) ) {
239 wfLogWarning( "Using deprecated fallback handling for comment $key" );
240 $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
241 } else {
242 throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
243 }
244 } else {
245 if ( array_key_exists( "{$key}_id", $row ) ) {
246 if ( !$db ) {
247 throw new InvalidArgumentException(
248 "\$row does not contain fields needed for comment $key and getComment(), but "
249 . "does have fields for getCommentLegacy()"
250 );
251 }
252 $id = $row["{$key}_id"];
253 $row2 = $db->selectRow(
254 'comment',
255 [ 'comment_id', 'comment_text', 'comment_data' ],
256 [ 'comment_id' => $id ],
257 __METHOD__
258 );
259 } elseif ( $fallback && isset( $row[$key] ) ) {
260 wfLogWarning( "Using deprecated fallback handling for comment $key" );
261 $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
262 } else {
263 throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
264 }
265 }
266
267 if ( $row2 ) {
268 $cid = $row2->comment_id;
269 $text = $row2->comment_text;
270 $data = $row2->comment_data;
271 } elseif ( $this->stage < MIGRATION_NEW && array_key_exists( "{$key}_old", $row ) ) {
272 $cid = null;
273 $text = $row["{$key}_old"];
274 $data = null;
275 } else {
276 // @codeCoverageIgnoreStart
277 wfLogWarning( "Missing comment row for $key, id=$id" );
278 $cid = null;
279 $text = '';
280 $data = null;
281 // @codeCoverageIgnoreEnd
282 }
283 }
284
285 $msg = null;
286 if ( $data !== null ) {
287 $data = FormatJson::decode( $data );
288 if ( !is_object( $data ) ) {
289 // @codeCoverageIgnoreStart
290 wfLogWarning( "Invalid JSON object in comment: $data" );
291 $data = null;
292 // @codeCoverageIgnoreEnd
293 } else {
294 $data = (array)$data;
295 if ( isset( $data['_message'] ) ) {
296 $msg = self::decodeMessage( $data['_message'] )
297 ->setInterfaceMessageFlag( true );
298 }
299 if ( !empty( $data['_null'] ) ) {
300 $data = null;
301 } else {
302 foreach ( $data as $k => $v ) {
303 if ( substr( $k, 0, 1 ) === '_' ) {
304 unset( $data[$k] );
305 }
306 }
307 }
308 }
309 }
310
311 return new CommentStoreComment( $cid, $text, $msg, $data );
312 }
313
314 /**
315 * Extract the comment from a row
316 *
317 * Use `self::getJoin()` to ensure the row contains the needed data.
318 *
319 * If you need to fake a comment in a row for some reason, set fields
320 * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
321 *
322 * @param object|array $row Result row.
323 * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
324 * @return CommentStoreComment
325 */
326 public function getComment( $row, $fallback = false ) {
327 return $this->getCommentInternal( null, $row, $fallback );
328 }
329
330 /**
331 * Extract the comment from a row, with legacy lookups.
332 *
333 * If `$row` might have been generated using `self::getFields()` rather
334 * than `self::getJoin()`, use this. Prefer `self::getComment()` if you
335 * know callers used `self::getJoin()` for the row fetch.
336 *
337 * If you need to fake a comment in a row for some reason, set fields
338 * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
339 *
340 * @param IDatabase $db Database handle to use for lookup
341 * @param object|array $row Result row.
342 * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
343 * @return CommentStoreComment
344 */
345 public function getCommentLegacy( IDatabase $db, $row, $fallback = false ) {
346 return $this->getCommentInternal( $db, $row, $fallback );
347 }
348
349 /**
350 * Create a new CommentStoreComment, inserting it into the database if necessary
351 *
352 * If a comment is going to be passed to `self::insert()` or the like
353 * multiple times, it will be more efficient to pass a CommentStoreComment
354 * once rather than making `self::insert()` do it every time through.
355 *
356 * @note When passing a CommentStoreComment, this may set `$comment->id` if
357 * it's not already set. If `$comment->id` is already set, it will not be
358 * verified that the specified comment actually exists or that it
359 * corresponds to the comment text, message, and/or data in the
360 * CommentStoreComment.
361 * @param IDatabase $dbw Database handle to insert on. Unused if `$comment`
362 * is a CommentStoreComment and `$comment->id` is set.
363 * @param string|Message|CommentStoreComment $comment Comment text or Message object, or
364 * a CommentStoreComment.
365 * @param array|null $data Structured data to store. Keys beginning with '_' are reserved.
366 * Ignored if $comment is a CommentStoreComment.
367 * @return CommentStoreComment
368 */
369 public function createComment( IDatabase $dbw, $comment, array $data = null ) {
370 global $wgContLang;
371
372 if ( !$comment instanceof CommentStoreComment ) {
373 if ( $data !== null ) {
374 foreach ( $data as $k => $v ) {
375 if ( substr( $k, 0, 1 ) === '_' ) {
376 throw new InvalidArgumentException( 'Keys in $data beginning with "_" are reserved' );
377 }
378 }
379 }
380 if ( $comment instanceof Message ) {
381 $message = clone $comment;
382 $text = $message->inLanguage( $wgContLang ) // Avoid $wgForceUIMsgAsContentMsg
383 ->setInterfaceMessageFlag( true )
384 ->text();
385 $comment = new CommentStoreComment( null, $text, $message, $data );
386 } else {
387 $comment = new CommentStoreComment( null, $comment, null, $data );
388 }
389 }
390
391 # Truncate comment in a Unicode-sensitive manner
392 $comment->text = $this->lang->truncate( $comment->text, self::MAX_COMMENT_LENGTH );
393
394 if ( $this->stage > MIGRATION_OLD && !$comment->id ) {
395 $dbData = $comment->data;
396 if ( !$comment->message instanceof RawMessage ) {
397 if ( $dbData === null ) {
398 $dbData = [ '_null' => true ];
399 }
400 $dbData['_message'] = self::encodeMessage( $comment->message );
401 }
402 if ( $dbData !== null ) {
403 $dbData = FormatJson::encode( (object)$dbData, false, FormatJson::ALL_OK );
404 $len = strlen( $dbData );
405 if ( $len > self::MAX_DATA_LENGTH ) {
406 $max = self::MAX_DATA_LENGTH;
407 throw new OverflowException( "Comment data is too long ($len bytes, maximum is $max)" );
408 }
409 }
410
411 $hash = self::hash( $comment->text, $dbData );
412 $comment->id = $dbw->selectField(
413 'comment',
414 'comment_id',
415 [
416 'comment_hash' => $hash,
417 'comment_text' => $comment->text,
418 'comment_data' => $dbData,
419 ],
420 __METHOD__
421 );
422 if ( !$comment->id ) {
423 $comment->id = $dbw->nextSequenceValue( 'comment_comment_id_seq' );
424 $dbw->insert(
425 'comment',
426 [
427 'comment_id' => $comment->id,
428 'comment_hash' => $hash,
429 'comment_text' => $comment->text,
430 'comment_data' => $dbData,
431 ],
432 __METHOD__
433 );
434 $comment->id = $dbw->insertId();
435 }
436 }
437
438 return $comment;
439 }
440
441 /**
442 * Implementation for `self::insert()` and `self::insertWithTempTable()`
443 * @param IDatabase $dbw
444 * @param string|Message|CommentStoreComment $comment
445 * @param array|null $data
446 * @return array [ array $fields, callable $callback ]
447 */
448 private function insertInternal( IDatabase $dbw, $comment, $data ) {
449 $fields = [];
450 $callback = null;
451
452 $comment = $this->createComment( $dbw, $comment, $data );
453
454 if ( $this->stage <= MIGRATION_WRITE_BOTH ) {
455 $fields[$this->key] = $this->lang->truncate( $comment->text, 255 );
456 }
457
458 if ( $this->stage >= MIGRATION_WRITE_BOTH ) {
459 if ( isset( self::$tempTables[$this->key] ) ) {
460 $t = self::$tempTables[$this->key];
461 $func = __METHOD__;
462 $commentId = $comment->id;
463 $callback = function ( $id ) use ( $dbw, $commentId, $t, $func ) {
464 $dbw->insert(
465 $t['table'],
466 [
467 $t['pk'] => $id,
468 $t['field'] => $commentId,
469 ],
470 $func
471 );
472 };
473 } else {
474 $fields["{$this->key}_id"] = $comment->id;
475 }
476 }
477
478 return [ $fields, $callback ];
479 }
480
481 /**
482 * Prepare for the insertion of a row with a comment
483 *
484 * @note It's recommended to include both the call to this method and the
485 * row insert in the same transaction.
486 * @param IDatabase $dbw Database handle to insert on
487 * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
488 * @param array|null $data As for `self::createComment()`
489 * @return array Fields for the insert or update
490 */
491 public function insert( IDatabase $dbw, $comment, $data = null ) {
492 if ( isset( self::$tempTables[$this->key] ) ) {
493 throw new InvalidArgumentException( "Must use insertWithTempTable() for $this->key" );
494 }
495
496 list( $fields ) = $this->insertInternal( $dbw, $comment, $data );
497 return $fields;
498 }
499
500 /**
501 * Prepare for the insertion of a row with a comment and temporary table
502 *
503 * This is currently needed for "rev_comment" and "img_description". In the
504 * future that requirement will be removed.
505 *
506 * @note It's recommended to include both the call to this method and the
507 * row insert in the same transaction.
508 * @param IDatabase $dbw Database handle to insert on
509 * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
510 * @param array|null $data As for `self::createComment()`
511 * @return array Two values:
512 * - array Fields for the insert or update
513 * - callable Function to call when the primary key of the row being
514 * inserted/updated is known. Pass it that primary key.
515 */
516 public function insertWithTempTable( IDatabase $dbw, $comment, $data = null ) {
517 if ( isset( self::$formerTempTables[$this->key] ) ) {
518 wfDeprecated( __METHOD__ . " for $this->key", self::$formerTempTables[$this->key] );
519 } elseif ( !isset( self::$tempTables[$this->key] ) ) {
520 throw new InvalidArgumentException( "Must use insert() for $this->key" );
521 }
522
523 list( $fields, $callback ) = $this->insertInternal( $dbw, $comment, $data );
524 if ( !$callback ) {
525 $callback = function () {
526 // Do nothing.
527 };
528 }
529 return [ $fields, $callback ];
530 }
531
532 /**
533 * Encode a Message as a PHP data structure
534 * @param Message $msg
535 * @return array
536 */
537 protected static function encodeMessage( Message $msg ) {
538 $key = count( $msg->getKeysToTry() ) > 1 ? $msg->getKeysToTry() : $msg->getKey();
539 $params = $msg->getParams();
540 foreach ( $params as &$param ) {
541 if ( $param instanceof Message ) {
542 $param = [
543 'message' => self::encodeMessage( $param )
544 ];
545 }
546 }
547 array_unshift( $params, $key );
548 return $params;
549 }
550
551 /**
552 * Decode a message that was encoded by self::encodeMessage()
553 * @param array $data
554 * @return Message
555 */
556 protected static function decodeMessage( $data ) {
557 $key = array_shift( $data );
558 foreach ( $data as &$param ) {
559 if ( is_object( $param ) ) {
560 $param = (array)$param;
561 }
562 if ( is_array( $param ) && count( $param ) === 1 && isset( $param['message'] ) ) {
563 $param = self::decodeMessage( $param['message'] );
564 }
565 }
566 return new Message( $key, $data );
567 }
568
569 /**
570 * Hashing function for comment storage
571 * @param string $text Comment text
572 * @param string|null $data Comment data
573 * @return int 32-bit signed integer
574 */
575 public static function hash( $text, $data ) {
576 $hash = crc32( $text ) ^ crc32( (string)$data );
577
578 // 64-bit PHP returns an unsigned CRC, change it to signed for
579 // insertion into the database.
580 if ( $hash >= 0x80000000 ) {
581 $hash |= -1 << 32;
582 }
583
584 return $hash;
585 }
586
587 }