3 * Manage storage of comments in the database
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
23 use Wikimedia\Rdbms\IDatabase
;
26 * CommentStore handles storage of comments (edit summaries, log reasons, etc)
33 * Maximum length of a comment in UTF-8 characters. Longer comments will be truncated.
34 * @note This must be at least 255 and not greater than floor( MAX_COMMENT_LENGTH / 4 ).
36 const COMMENT_CHARACTER_LIMIT
= 1000;
39 * Maximum length of a comment in bytes. Longer comments will be truncated.
40 * @note This value is determined by the size of the underlying database field,
41 * currently BLOB in MySQL/MariaDB.
43 const MAX_COMMENT_LENGTH
= 65535;
46 * Maximum length of serialized data in bytes. Longer data will result in an exception.
47 * @note This value is determined by the size of the underlying database field,
48 * currently BLOB in MySQL/MariaDB.
50 const MAX_DATA_LENGTH
= 65535;
53 * Define fields that use temporary tables for transitional purposes
54 * @var array Keys are '$key', values are arrays with four fields:
55 * - table: Temporary table name
56 * - pk: Temporary table column referring to the main table's primary key
57 * - field: Temporary table column referring comment.comment_id
58 * - joinPK: Main table's primary key
60 protected static $tempTables = [
62 'table' => 'revision_comment_temp',
63 'pk' => 'revcomment_rev',
64 'field' => 'revcomment_comment_id',
67 'img_description' => [
68 'table' => 'image_comment_temp',
69 'pk' => 'imgcomment_name',
70 'field' => 'imgcomment_description_id',
71 'joinPK' => 'img_name',
76 * Fields that formerly used $tempTables
77 * @var array Key is '$key', value is the MediaWiki version in which it was
78 * removed from $tempTables.
80 protected static $formerTempTables = [];
85 /** @var int One of the MIGRATION_* constants */
88 /** @var array|null Cache for `self::getJoin()` */
89 protected $joinCache = null;
91 /** @var Language Language to use for comment truncation */
95 * @param string $key A key such as "rev_comment" identifying the comment
96 * field being fetched.
97 * @param Language $lang Language to use for comment truncation. Defaults
100 public function __construct( $key, Language
$lang = null ) {
101 global $wgCommentTableSchemaMigrationStage, $wgContLang;
104 $this->stage
= $wgCommentTableSchemaMigrationStage;
105 $this->lang
= $lang ?
: $wgContLang;
109 * Static constructor for easier chaining
110 * @param string $key A key such as "rev_comment" identifying the comment
111 * field being fetched.
112 * @return CommentStore
114 public static function newKey( $key ) {
115 return new CommentStore( $key );
119 * Get SELECT fields for the comment key
121 * Each resulting row should be passed to `self::getCommentLegacy()` to get the
124 * @note Use of this method may require a subsequent database query to
125 * actually fetch the comment. If possible, use `self::getJoin()` instead.
126 * @return string[] to include in the `$vars` to `IDatabase->select()`. All
127 * fields are aliased, so `+` is safe to use.
129 public function getFields() {
131 if ( $this->stage
=== MIGRATION_OLD
) {
132 $fields["{$this->key}_text"] = $this->key
;
133 $fields["{$this->key}_data"] = 'NULL';
134 $fields["{$this->key}_cid"] = 'NULL';
136 if ( $this->stage
< MIGRATION_NEW
) {
137 $fields["{$this->key}_old"] = $this->key
;
139 if ( isset( self
::$tempTables[$this->key
] ) ) {
140 $fields["{$this->key}_pk"] = self
::$tempTables[$this->key
]['joinPK'];
142 $fields["{$this->key}_id"] = "{$this->key}_id";
149 * Get SELECT fields and joins for the comment key
151 * Each resulting row should be passed to `self::getComment()` to get the
154 * @return array With three keys:
155 * - tables: (string[]) to include in the `$table` to `IDatabase->select()`
156 * - fields: (string[]) to include in the `$vars` to `IDatabase->select()`
157 * - joins: (array) to include in the `$join_conds` to `IDatabase->select()`
158 * All tables, fields, and joins are aliased, so `+` is safe to use.
160 public function getJoin() {
161 if ( $this->joinCache
=== null ) {
166 if ( $this->stage
=== MIGRATION_OLD
) {
167 $fields["{$this->key}_text"] = $this->key
;
168 $fields["{$this->key}_data"] = 'NULL';
169 $fields["{$this->key}_cid"] = 'NULL';
171 $join = $this->stage
=== MIGRATION_NEW ?
'JOIN' : 'LEFT JOIN';
173 if ( isset( self
::$tempTables[$this->key
] ) ) {
174 $t = self
::$tempTables[$this->key
];
175 $alias = "temp_$this->key";
176 $tables[$alias] = $t['table'];
177 $joins[$alias] = [ $join, "{$alias}.{$t['pk']} = {$t['joinPK']}" ];
178 $joinField = "{$alias}.{$t['field']}";
180 $joinField = "{$this->key}_id";
183 $alias = "comment_$this->key";
184 $tables[$alias] = 'comment';
185 $joins[$alias] = [ $join, "{$alias}.comment_id = {$joinField}" ];
187 if ( $this->stage
=== MIGRATION_NEW
) {
188 $fields["{$this->key}_text"] = "{$alias}.comment_text";
190 $fields["{$this->key}_text"] = "COALESCE( {$alias}.comment_text, $this->key )";
192 $fields["{$this->key}_data"] = "{$alias}.comment_data";
193 $fields["{$this->key}_cid"] = "{$alias}.comment_id";
203 return $this->joinCache
;
207 * Extract the comment from a row
209 * Shared implementation for getComment() and getCommentLegacy()
211 * @param IDatabase|null $db Database handle for getCommentLegacy(), or null for getComment()
212 * @param object|array $row
213 * @param bool $fallback
214 * @return CommentStoreComment
216 private function getCommentInternal( IDatabase
$db = null, $row, $fallback = false ) {
219 if ( array_key_exists( "{$key}_text", $row ) && array_key_exists( "{$key}_data", $row ) ) {
220 $cid = isset( $row["{$key}_cid"] ) ?
$row["{$key}_cid"] : null;
221 $text = $row["{$key}_text"];
222 $data = $row["{$key}_data"];
223 } elseif ( $this->stage
=== MIGRATION_OLD
) {
225 if ( $fallback && isset( $row[$key] ) ) {
226 wfLogWarning( "Using deprecated fallback handling for comment $key" );
229 wfLogWarning( "Missing {$key}_text and {$key}_data fields in row with MIGRATION_OLD" );
234 if ( isset( self
::$tempTables[$key] ) ) {
235 if ( array_key_exists( "{$key}_pk", $row ) ) {
237 throw new InvalidArgumentException(
238 "\$row does not contain fields needed for comment $key and getComment(), but "
239 . "does have fields for getCommentLegacy()"
242 $t = self
::$tempTables[$key];
243 $id = $row["{$key}_pk"];
244 $row2 = $db->selectRow(
245 [ $t['table'], 'comment' ],
246 [ 'comment_id', 'comment_text', 'comment_data' ],
250 [ 'comment' => [ 'JOIN', [ "comment_id = {$t['field']}" ] ] ]
252 } elseif ( $fallback && isset( $row[$key] ) ) {
253 wfLogWarning( "Using deprecated fallback handling for comment $key" );
254 $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
256 throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
259 if ( array_key_exists( "{$key}_id", $row ) ) {
261 throw new InvalidArgumentException(
262 "\$row does not contain fields needed for comment $key and getComment(), but "
263 . "does have fields for getCommentLegacy()"
266 $id = $row["{$key}_id"];
267 $row2 = $db->selectRow(
269 [ 'comment_id', 'comment_text', 'comment_data' ],
270 [ 'comment_id' => $id ],
273 } elseif ( $fallback && isset( $row[$key] ) ) {
274 wfLogWarning( "Using deprecated fallback handling for comment $key" );
275 $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
277 throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
282 $cid = $row2->comment_id
;
283 $text = $row2->comment_text
;
284 $data = $row2->comment_data
;
285 } elseif ( $this->stage
< MIGRATION_NEW
&& array_key_exists( "{$key}_old", $row ) ) {
287 $text = $row["{$key}_old"];
290 // @codeCoverageIgnoreStart
291 wfLogWarning( "Missing comment row for $key, id=$id" );
295 // @codeCoverageIgnoreEnd
300 if ( $data !== null ) {
301 $data = FormatJson
::decode( $data );
302 if ( !is_object( $data ) ) {
303 // @codeCoverageIgnoreStart
304 wfLogWarning( "Invalid JSON object in comment: $data" );
306 // @codeCoverageIgnoreEnd
308 $data = (array)$data;
309 if ( isset( $data['_message'] ) ) {
310 $msg = self
::decodeMessage( $data['_message'] )
311 ->setInterfaceMessageFlag( true );
313 if ( !empty( $data['_null'] ) ) {
316 foreach ( $data as $k => $v ) {
317 if ( substr( $k, 0, 1 ) === '_' ) {
325 return new CommentStoreComment( $cid, $text, $msg, $data );
329 * Extract the comment from a row
331 * Use `self::getJoin()` to ensure the row contains the needed data.
333 * If you need to fake a comment in a row for some reason, set fields
334 * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
336 * @param object|array $row Result row.
337 * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
338 * @return CommentStoreComment
340 public function getComment( $row, $fallback = false ) {
341 return $this->getCommentInternal( null, $row, $fallback );
345 * Extract the comment from a row, with legacy lookups.
347 * If `$row` might have been generated using `self::getFields()` rather
348 * than `self::getJoin()`, use this. Prefer `self::getComment()` if you
349 * know callers used `self::getJoin()` for the row fetch.
351 * If you need to fake a comment in a row for some reason, set fields
352 * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
354 * @param IDatabase $db Database handle to use for lookup
355 * @param object|array $row Result row.
356 * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
357 * @return CommentStoreComment
359 public function getCommentLegacy( IDatabase
$db, $row, $fallback = false ) {
360 return $this->getCommentInternal( $db, $row, $fallback );
364 * Create a new CommentStoreComment, inserting it into the database if necessary
366 * If a comment is going to be passed to `self::insert()` or the like
367 * multiple times, it will be more efficient to pass a CommentStoreComment
368 * once rather than making `self::insert()` do it every time through.
370 * @note When passing a CommentStoreComment, this may set `$comment->id` if
371 * it's not already set. If `$comment->id` is already set, it will not be
372 * verified that the specified comment actually exists or that it
373 * corresponds to the comment text, message, and/or data in the
374 * CommentStoreComment.
375 * @param IDatabase $dbw Database handle to insert on. Unused if `$comment`
376 * is a CommentStoreComment and `$comment->id` is set.
377 * @param string|Message|CommentStoreComment $comment Comment text or Message object, or
378 * a CommentStoreComment.
379 * @param array|null $data Structured data to store. Keys beginning with '_' are reserved.
380 * Ignored if $comment is a CommentStoreComment.
381 * @return CommentStoreComment
383 public function createComment( IDatabase
$dbw, $comment, array $data = null ) {
384 $comment = CommentStoreComment
::newUnsavedComment( $comment, $data );
386 # Truncate comment in a Unicode-sensitive manner
387 $comment->text
= $this->lang
->truncate( $comment->text
, self
::MAX_COMMENT_LENGTH
);
388 if ( mb_strlen( $comment->text
, 'UTF-8' ) > self
::COMMENT_CHARACTER_LIMIT
) {
389 $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this->lang
)->escaped();
390 if ( mb_strlen( $ellipsis ) >= self
::COMMENT_CHARACTER_LIMIT
) {
394 $maxLength = self
::COMMENT_CHARACTER_LIMIT
- mb_strlen( $ellipsis, 'UTF-8' );
395 $comment->text
= mb_substr( $comment->text
, 0, $maxLength, 'UTF-8' ) . $ellipsis;
398 if ( $this->stage
> MIGRATION_OLD
&& !$comment->id
) {
399 $dbData = $comment->data
;
400 if ( !$comment->message
instanceof RawMessage
) {
401 if ( $dbData === null ) {
402 $dbData = [ '_null' => true ];
404 $dbData['_message'] = self
::encodeMessage( $comment->message
);
406 if ( $dbData !== null ) {
407 $dbData = FormatJson
::encode( (object)$dbData, false, FormatJson
::ALL_OK
);
408 $len = strlen( $dbData );
409 if ( $len > self
::MAX_DATA_LENGTH
) {
410 $max = self
::MAX_DATA_LENGTH
;
411 throw new OverflowException( "Comment data is too long ($len bytes, maximum is $max)" );
415 $hash = self
::hash( $comment->text
, $dbData );
416 $comment->id
= $dbw->selectField(
420 'comment_hash' => $hash,
421 'comment_text' => $comment->text
,
422 'comment_data' => $dbData,
426 if ( !$comment->id
) {
430 'comment_hash' => $hash,
431 'comment_text' => $comment->text
,
432 'comment_data' => $dbData,
436 $comment->id
= $dbw->insertId();
444 * Implementation for `self::insert()` and `self::insertWithTempTable()`
445 * @param IDatabase $dbw
446 * @param string|Message|CommentStoreComment $comment
447 * @param array|null $data
448 * @return array [ array $fields, callable $callback ]
450 private function insertInternal( IDatabase
$dbw, $comment, $data ) {
454 $comment = $this->createComment( $dbw, $comment, $data );
456 if ( $this->stage
<= MIGRATION_WRITE_BOTH
) {
457 $fields[$this->key
] = $this->lang
->truncate( $comment->text
, 255 );
460 if ( $this->stage
>= MIGRATION_WRITE_BOTH
) {
461 if ( isset( self
::$tempTables[$this->key
] ) ) {
462 $t = self
::$tempTables[$this->key
];
464 $commentId = $comment->id
;
465 $callback = function ( $id ) use ( $dbw, $commentId, $t, $func ) {
470 $t['field'] => $commentId,
476 $fields["{$this->key}_id"] = $comment->id
;
480 return [ $fields, $callback ];
484 * Insert a comment in preparation for a row that references it
486 * @note It's recommended to include both the call to this method and the
487 * row insert in the same transaction.
488 * @param IDatabase $dbw Database handle to insert on
489 * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
490 * @param array|null $data As for `self::createComment()`
491 * @return array Fields for the insert or update
493 public function insert( IDatabase
$dbw, $comment, $data = null ) {
494 if ( isset( self
::$tempTables[$this->key
] ) ) {
495 throw new InvalidArgumentException( "Must use insertWithTempTable() for $this->key" );
498 list( $fields ) = $this->insertInternal( $dbw, $comment, $data );
503 * Insert a comment in a temporary table in preparation for a row that references it
505 * This is currently needed for "rev_comment" and "img_description". In the
506 * future that requirement will be removed.
508 * @note It's recommended to include both the call to this method and the
509 * row insert in the same transaction.
510 * @param IDatabase $dbw Database handle to insert on
511 * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
512 * @param array|null $data As for `self::createComment()`
513 * @return array Two values:
514 * - array Fields for the insert or update
515 * - callable Function to call when the primary key of the row being
516 * inserted/updated is known. Pass it that primary key.
518 public function insertWithTempTable( IDatabase
$dbw, $comment, $data = null ) {
519 if ( isset( self
::$formerTempTables[$this->key
] ) ) {
520 wfDeprecated( __METHOD__
. " for $this->key", self
::$formerTempTables[$this->key
] );
521 } elseif ( !isset( self
::$tempTables[$this->key
] ) ) {
522 throw new InvalidArgumentException( "Must use insert() for $this->key" );
525 list( $fields, $callback ) = $this->insertInternal( $dbw, $comment, $data );
527 $callback = function () {
531 return [ $fields, $callback ];
535 * Encode a Message as a PHP data structure
536 * @param Message $msg
539 protected static function encodeMessage( Message
$msg ) {
540 $key = count( $msg->getKeysToTry() ) > 1 ?
$msg->getKeysToTry() : $msg->getKey();
541 $params = $msg->getParams();
542 foreach ( $params as &$param ) {
543 if ( $param instanceof Message
) {
545 'message' => self
::encodeMessage( $param )
549 array_unshift( $params, $key );
554 * Decode a message that was encoded by self::encodeMessage()
558 protected static function decodeMessage( $data ) {
559 $key = array_shift( $data );
560 foreach ( $data as &$param ) {
561 if ( is_object( $param ) ) {
562 $param = (array)$param;
564 if ( is_array( $param ) && count( $param ) === 1 && isset( $param['message'] ) ) {
565 $param = self
::decodeMessage( $param['message'] );
568 return new Message( $key, $data );
572 * Hashing function for comment storage
573 * @param string $text Comment text
574 * @param string|null $data Comment data
575 * @return int 32-bit signed integer
577 public static function hash( $text, $data ) {
578 $hash = crc32( $text ) ^
crc32( (string)$data );
580 // 64-bit PHP returns an unsigned CRC, change it to signed for
581 // insertion into the database.
582 if ( $hash >= 0x80000000 ) {