3 * Manage storage of comments in the database
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
23 use Wikimedia\Rdbms\IDatabase
;
26 * CommentStore handles storage of comments (edit summaries, log reasons, etc)
32 /** Maximum length of a comment. Longer comments will be truncated. */
33 const MAX_COMMENT_LENGTH
= 65535;
35 /** Maximum length of serialized data. Longer data will result in an exception. */
36 const MAX_DATA_LENGTH
= 65535;
39 * Define fields that use temporary tables for transitional purposes
40 * @var array Keys are '$key', values are arrays with four fields:
41 * - table: Temporary table name
42 * - pk: Temporary table column referring to the main table's primary key
43 * - field: Temporary table column referring comment.comment_id
44 * - joinPK: Main table's primary key
46 protected static $tempTables = [
48 'table' => 'revision_comment_temp',
49 'pk' => 'revcomment_rev',
50 'field' => 'revcomment_comment_id',
53 'img_description' => [
54 'table' => 'image_comment_temp',
55 'pk' => 'imgcomment_name',
56 'field' => 'imgcomment_description_id',
57 'joinPK' => 'img_name',
62 * Fields that formerly used $tempTables
63 * @var array Key is '$key', value is the MediaWiki version in which it was
64 * removed from $tempTables.
66 protected static $formerTempTables = [];
71 /** @var int One of the MIGRATION_* constants */
74 /** @var array|null Cache for `self::getJoin()` */
75 protected $joinCache = null;
77 /** @var Language Language to use for comment truncation */
81 * @param string $key A key such as "rev_comment" identifying the comment
82 * field being fetched.
83 * @param Language $lang Language to use for comment truncation. Defaults
86 public function __construct( $key, Language
$lang = null ) {
87 global $wgCommentTableSchemaMigrationStage, $wgContLang;
90 $this->stage
= $wgCommentTableSchemaMigrationStage;
91 $this->lang
= $lang ?
: $wgContLang;
95 * Static constructor for easier chaining
96 * @param string $key A key such as "rev_comment" identifying the comment
97 * field being fetched.
98 * @return CommentStore
100 public static function newKey( $key ) {
101 return new CommentStore( $key );
105 * Get SELECT fields for the comment key
107 * Each resulting row should be passed to `self::getCommentLegacy()` to get the
110 * @note Use of this method may require a subsequent database query to
111 * actually fetch the comment. If possible, use `self::getJoin()` instead.
112 * @return string[] to include in the `$vars` to `IDatabase->select()`. All
113 * fields are aliased, so `+` is safe to use.
115 public function getFields() {
117 if ( $this->stage
=== MIGRATION_OLD
) {
118 $fields["{$this->key}_text"] = $this->key
;
119 $fields["{$this->key}_data"] = 'NULL';
120 $fields["{$this->key}_cid"] = 'NULL';
122 if ( $this->stage
< MIGRATION_NEW
) {
123 $fields["{$this->key}_old"] = $this->key
;
125 if ( isset( self
::$tempTables[$this->key
] ) ) {
126 $fields["{$this->key}_pk"] = self
::$tempTables[$this->key
]['joinPK'];
128 $fields["{$this->key}_id"] = "{$this->key}_id";
135 * Get SELECT fields and joins for the comment key
137 * Each resulting row should be passed to `self::getComment()` to get the
140 * @return array With three keys:
141 * - tables: (string[]) to include in the `$table` to `IDatabase->select()`
142 * - fields: (string[]) to include in the `$vars` to `IDatabase->select()`
143 * - joins: (array) to include in the `$join_conds` to `IDatabase->select()`
144 * All tables, fields, and joins are aliased, so `+` is safe to use.
146 public function getJoin() {
147 if ( $this->joinCache
=== null ) {
152 if ( $this->stage
=== MIGRATION_OLD
) {
153 $fields["{$this->key}_text"] = $this->key
;
154 $fields["{$this->key}_data"] = 'NULL';
155 $fields["{$this->key}_cid"] = 'NULL';
157 $join = $this->stage
=== MIGRATION_NEW ?
'JOIN' : 'LEFT JOIN';
159 if ( isset( self
::$tempTables[$this->key
] ) ) {
160 $t = self
::$tempTables[$this->key
];
161 $alias = "temp_$this->key";
162 $tables[$alias] = $t['table'];
163 $joins[$alias] = [ $join, "{$alias}.{$t['pk']} = {$t['joinPK']}" ];
164 $joinField = "{$alias}.{$t['field']}";
166 $joinField = "{$this->key}_id";
169 $alias = "comment_$this->key";
170 $tables[$alias] = 'comment';
171 $joins[$alias] = [ $join, "{$alias}.comment_id = {$joinField}" ];
173 if ( $this->stage
=== MIGRATION_NEW
) {
174 $fields["{$this->key}_text"] = "{$alias}.comment_text";
176 $fields["{$this->key}_text"] = "COALESCE( {$alias}.comment_text, $this->key )";
178 $fields["{$this->key}_data"] = "{$alias}.comment_data";
179 $fields["{$this->key}_cid"] = "{$alias}.comment_id";
189 return $this->joinCache
;
193 * Extract the comment from a row
195 * Shared implementation for getComment() and getCommentLegacy()
197 * @param IDatabase|null $db Database handle for getCommentLegacy(), or null for getComment()
198 * @param object|array $row
199 * @param bool $fallback
200 * @return CommentStoreComment
202 private function getCommentInternal( IDatabase
$db = null, $row, $fallback = false ) {
205 if ( array_key_exists( "{$key}_text", $row ) && array_key_exists( "{$key}_data", $row ) ) {
206 $cid = isset( $row["{$key}_cid"] ) ?
$row["{$key}_cid"] : null;
207 $text = $row["{$key}_text"];
208 $data = $row["{$key}_data"];
209 } elseif ( $this->stage
=== MIGRATION_OLD
) {
211 if ( $fallback && isset( $row[$key] ) ) {
212 wfLogWarning( "Using deprecated fallback handling for comment $key" );
215 wfLogWarning( "Missing {$key}_text and {$key}_data fields in row with MIGRATION_OLD" );
220 if ( isset( self
::$tempTables[$key] ) ) {
221 if ( array_key_exists( "{$key}_pk", $row ) ) {
223 throw new InvalidArgumentException(
224 "\$row does not contain fields needed for comment $key and getComment(), but "
225 . "does have fields for getCommentLegacy()"
228 $t = self
::$tempTables[$key];
229 $id = $row["{$key}_pk"];
230 $row2 = $db->selectRow(
231 [ $t['table'], 'comment' ],
232 [ 'comment_id', 'comment_text', 'comment_data' ],
236 [ 'comment' => [ 'JOIN', [ "comment_id = {$t['field']}" ] ] ]
238 } elseif ( $fallback && isset( $row[$key] ) ) {
239 wfLogWarning( "Using deprecated fallback handling for comment $key" );
240 $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
242 throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
245 if ( array_key_exists( "{$key}_id", $row ) ) {
247 throw new InvalidArgumentException(
248 "\$row does not contain fields needed for comment $key and getComment(), but "
249 . "does have fields for getCommentLegacy()"
252 $id = $row["{$key}_id"];
253 $row2 = $db->selectRow(
255 [ 'comment_id', 'comment_text', 'comment_data' ],
256 [ 'comment_id' => $id ],
259 } elseif ( $fallback && isset( $row[$key] ) ) {
260 wfLogWarning( "Using deprecated fallback handling for comment $key" );
261 $row2 = (object)[ 'comment_text' => $row[$key], 'comment_data' => null ];
263 throw new InvalidArgumentException( "\$row does not contain fields needed for comment $key" );
268 $cid = $row2->comment_id
;
269 $text = $row2->comment_text
;
270 $data = $row2->comment_data
;
271 } elseif ( $this->stage
< MIGRATION_NEW
&& array_key_exists( "{$key}_old", $row ) ) {
273 $text = $row["{$key}_old"];
276 // @codeCoverageIgnoreStart
277 wfLogWarning( "Missing comment row for $key, id=$id" );
281 // @codeCoverageIgnoreEnd
286 if ( $data !== null ) {
287 $data = FormatJson
::decode( $data );
288 if ( !is_object( $data ) ) {
289 // @codeCoverageIgnoreStart
290 wfLogWarning( "Invalid JSON object in comment: $data" );
292 // @codeCoverageIgnoreEnd
294 $data = (array)$data;
295 if ( isset( $data['_message'] ) ) {
296 $msg = self
::decodeMessage( $data['_message'] )
297 ->setInterfaceMessageFlag( true );
299 if ( !empty( $data['_null'] ) ) {
302 foreach ( $data as $k => $v ) {
303 if ( substr( $k, 0, 1 ) === '_' ) {
311 return new CommentStoreComment( $cid, $text, $msg, $data );
315 * Extract the comment from a row
317 * Use `self::getJoin()` to ensure the row contains the needed data.
319 * If you need to fake a comment in a row for some reason, set fields
320 * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
322 * @param object|array $row Result row.
323 * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
324 * @return CommentStoreComment
326 public function getComment( $row, $fallback = false ) {
327 return $this->getCommentInternal( null, $row, $fallback );
331 * Extract the comment from a row, with legacy lookups.
333 * If `$row` might have been generated using `self::getFields()` rather
334 * than `self::getJoin()`, use this. Prefer `self::getComment()` if you
335 * know callers used `self::getJoin()` for the row fetch.
337 * If you need to fake a comment in a row for some reason, set fields
338 * `{$key}_text` (string) and `{$key}_data` (JSON string or null).
340 * @param IDatabase $db Database handle to use for lookup
341 * @param object|array $row Result row.
342 * @param bool $fallback If true, fall back as well as possible instead of throwing an exception.
343 * @return CommentStoreComment
345 public function getCommentLegacy( IDatabase
$db, $row, $fallback = false ) {
346 return $this->getCommentInternal( $db, $row, $fallback );
350 * Create a new CommentStoreComment, inserting it into the database if necessary
352 * If a comment is going to be passed to `self::insert()` or the like
353 * multiple times, it will be more efficient to pass a CommentStoreComment
354 * once rather than making `self::insert()` do it every time through.
356 * @note When passing a CommentStoreComment, this may set `$comment->id` if
357 * it's not already set. If `$comment->id` is already set, it will not be
358 * verified that the specified comment actually exists or that it
359 * corresponds to the comment text, message, and/or data in the
360 * CommentStoreComment.
361 * @param IDatabase $dbw Database handle to insert on. Unused if `$comment`
362 * is a CommentStoreComment and `$comment->id` is set.
363 * @param string|Message|CommentStoreComment $comment Comment text or Message object, or
364 * a CommentStoreComment.
365 * @param array|null $data Structured data to store. Keys beginning with '_' are reserved.
366 * Ignored if $comment is a CommentStoreComment.
367 * @return CommentStoreComment
369 public function createComment( IDatabase
$dbw, $comment, array $data = null ) {
372 if ( !$comment instanceof CommentStoreComment
) {
373 if ( $data !== null ) {
374 foreach ( $data as $k => $v ) {
375 if ( substr( $k, 0, 1 ) === '_' ) {
376 throw new InvalidArgumentException( 'Keys in $data beginning with "_" are reserved' );
380 if ( $comment instanceof Message
) {
381 $message = clone $comment;
382 $text = $message->inLanguage( $wgContLang ) // Avoid $wgForceUIMsgAsContentMsg
383 ->setInterfaceMessageFlag( true )
385 $comment = new CommentStoreComment( null, $text, $message, $data );
387 $comment = new CommentStoreComment( null, $comment, null, $data );
391 # Truncate comment in a Unicode-sensitive manner
392 $comment->text
= $this->lang
->truncate( $comment->text
, self
::MAX_COMMENT_LENGTH
);
394 if ( $this->stage
> MIGRATION_OLD
&& !$comment->id
) {
395 $dbData = $comment->data
;
396 if ( !$comment->message
instanceof RawMessage
) {
397 if ( $dbData === null ) {
398 $dbData = [ '_null' => true ];
400 $dbData['_message'] = self
::encodeMessage( $comment->message
);
402 if ( $dbData !== null ) {
403 $dbData = FormatJson
::encode( (object)$dbData, false, FormatJson
::ALL_OK
);
404 $len = strlen( $dbData );
405 if ( $len > self
::MAX_DATA_LENGTH
) {
406 $max = self
::MAX_DATA_LENGTH
;
407 throw new OverflowException( "Comment data is too long ($len bytes, maximum is $max)" );
411 $hash = self
::hash( $comment->text
, $dbData );
412 $comment->id
= $dbw->selectField(
416 'comment_hash' => $hash,
417 'comment_text' => $comment->text
,
418 'comment_data' => $dbData,
422 if ( !$comment->id
) {
423 $comment->id
= $dbw->nextSequenceValue( 'comment_comment_id_seq' );
427 'comment_id' => $comment->id
,
428 'comment_hash' => $hash,
429 'comment_text' => $comment->text
,
430 'comment_data' => $dbData,
434 $comment->id
= $dbw->insertId();
442 * Implementation for `self::insert()` and `self::insertWithTempTable()`
443 * @param IDatabase $dbw
444 * @param string|Message|CommentStoreComment $comment
445 * @param array|null $data
446 * @return array [ array $fields, callable $callback ]
448 private function insertInternal( IDatabase
$dbw, $comment, $data ) {
452 $comment = $this->createComment( $dbw, $comment, $data );
454 if ( $this->stage
<= MIGRATION_WRITE_BOTH
) {
455 $fields[$this->key
] = $this->lang
->truncate( $comment->text
, 255 );
458 if ( $this->stage
>= MIGRATION_WRITE_BOTH
) {
459 if ( isset( self
::$tempTables[$this->key
] ) ) {
460 $t = self
::$tempTables[$this->key
];
462 $commentId = $comment->id
;
463 $callback = function ( $id ) use ( $dbw, $commentId, $t, $func ) {
468 $t['field'] => $commentId,
474 $fields["{$this->key}_id"] = $comment->id
;
478 return [ $fields, $callback ];
482 * Prepare for the insertion of a row with a comment
484 * @note It's recommended to include both the call to this method and the
485 * row insert in the same transaction.
486 * @param IDatabase $dbw Database handle to insert on
487 * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
488 * @param array|null $data As for `self::createComment()`
489 * @return array Fields for the insert or update
491 public function insert( IDatabase
$dbw, $comment, $data = null ) {
492 if ( isset( self
::$tempTables[$this->key
] ) ) {
493 throw new InvalidArgumentException( "Must use insertWithTempTable() for $this->key" );
496 list( $fields ) = $this->insertInternal( $dbw, $comment, $data );
501 * Prepare for the insertion of a row with a comment and temporary table
503 * This is currently needed for "rev_comment" and "img_description". In the
504 * future that requirement will be removed.
506 * @note It's recommended to include both the call to this method and the
507 * row insert in the same transaction.
508 * @param IDatabase $dbw Database handle to insert on
509 * @param string|Message|CommentStoreComment $comment As for `self::createComment()`
510 * @param array|null $data As for `self::createComment()`
511 * @return array Two values:
512 * - array Fields for the insert or update
513 * - callable Function to call when the primary key of the row being
514 * inserted/updated is known. Pass it that primary key.
516 public function insertWithTempTable( IDatabase
$dbw, $comment, $data = null ) {
517 if ( isset( self
::$formerTempTables[$this->key
] ) ) {
518 wfDeprecated( __METHOD__
. " for $this->key", self
::$formerTempTables[$this->key
] );
519 } elseif ( !isset( self
::$tempTables[$this->key
] ) ) {
520 throw new InvalidArgumentException( "Must use insert() for $this->key" );
523 list( $fields, $callback ) = $this->insertInternal( $dbw, $comment, $data );
525 $callback = function () {
529 return [ $fields, $callback ];
533 * Encode a Message as a PHP data structure
534 * @param Message $msg
537 protected static function encodeMessage( Message
$msg ) {
538 $key = count( $msg->getKeysToTry() ) > 1 ?
$msg->getKeysToTry() : $msg->getKey();
539 $params = $msg->getParams();
540 foreach ( $params as &$param ) {
541 if ( $param instanceof Message
) {
543 'message' => self
::encodeMessage( $param )
547 array_unshift( $params, $key );
552 * Decode a message that was encoded by self::encodeMessage()
556 protected static function decodeMessage( $data ) {
557 $key = array_shift( $data );
558 foreach ( $data as &$param ) {
559 if ( is_object( $param ) ) {
560 $param = (array)$param;
562 if ( is_array( $param ) && count( $param ) === 1 && isset( $param['message'] ) ) {
563 $param = self
::decodeMessage( $param['message'] );
566 return new Message( $key, $data );
570 * Hashing function for comment storage
571 * @param string $text Comment text
572 * @param string|null $data Comment data
573 * @return int 32-bit signed integer
575 public static function hash( $text, $data ) {
576 $hash = crc32( $text ) ^
crc32( (string)$data );
578 // 64-bit PHP returns an unsigned CRC, change it to signed for
579 // insertion into the database.
580 if ( $hash >= 0x80000000 ) {