From: Brad Jorsch Date: Fri, 1 Sep 2017 18:30:53 +0000 (-0400) Subject: Handle comment truncation in CommentStore X-Git-Tag: 1.31.0-rc.0~2210^2 X-Git-Url: http://git.cyclocoop.org/%22%20.%20generer_url_ecrire%28%22calendrier%22%2C%22type=semaine%22%29%20.%20%22?a=commitdiff_plain;h=6ec1a3150246524b6e0bf4c11729fd905f50cc57;p=lhc%2Fweb%2Fwiklou.git Handle comment truncation in CommentStore Since the caller doesn't (and shouldn't) know whether CommentStore is using the old or the new schema, it should leave truncation of comments to CommentStore. Change-Id: I92954c922514271d774518d6a6c28a01f33c88c2 --- diff --git a/includes/CommentStore.php b/includes/CommentStore.php index 0c86c1e871..0dee961951 100644 --- a/includes/CommentStore.php +++ b/includes/CommentStore.php @@ -29,6 +29,12 @@ use Wikimedia\Rdbms\IDatabase; */ class CommentStore { + /** Maximum length of a comment. Longer comments will be truncated. */ + const MAX_COMMENT_LENGTH = 65535; + + /** Maximum length of serialized data. Longer data will result in an exception. */ + const MAX_DATA_LENGTH = 65535; + /** * Define fields that use temporary tables for transitional purposes * @var array Keys are '$key', values are arrays with four fields: @@ -68,15 +74,21 @@ class CommentStore { /** @var array|null Cache for `self::getJoin()` */ protected $joinCache = null; + /** @var Language Language to use for comment truncation */ + protected $lang; + /** * @param string $key A key such as "rev_comment" identifying the comment * field being fetched. + * @param Language $lang Language to use for comment truncation. Defaults + * to $wgContLang. */ - public function __construct( $key ) { - global $wgCommentTableSchemaMigrationStage; + public function __construct( $key, Language $lang = null ) { + global $wgCommentTableSchemaMigrationStage, $wgContLang; $this->key = $key; $this->stage = $wgCommentTableSchemaMigrationStage; + $this->lang = $lang ?: $wgContLang; } /** @@ -376,6 +388,9 @@ class CommentStore { } } + # Truncate comment in a Unicode-sensitive manner + $comment->text = $this->lang->truncate( $comment->text, self::MAX_COMMENT_LENGTH ); + if ( $this->stage > MIGRATION_OLD && !$comment->id ) { $dbData = $comment->data; if ( !$comment->message instanceof RawMessage ) { @@ -386,6 +401,11 @@ class CommentStore { } if ( $dbData !== null ) { $dbData = FormatJson::encode( (object)$dbData, false, FormatJson::ALL_OK ); + $len = strlen( $dbData ); + if ( $len > self::MAX_DATA_LENGTH ) { + $max = self::MAX_DATA_LENGTH; + throw new OverflowException( "Comment data is too long ($len bytes, maximum is $max)" ); + } } $hash = self::hash( $comment->text, $dbData ); @@ -432,7 +452,7 @@ class CommentStore { $comment = $this->createComment( $dbw, $comment, $data ); if ( $this->stage <= MIGRATION_WRITE_BOTH ) { - $fields[$this->key] = $comment->text; + $fields[$this->key] = $this->lang->truncate( $comment->text, 255 ); } if ( $this->stage >= MIGRATION_WRITE_BOTH ) { diff --git a/includes/EditPage.php b/includes/EditPage.php index 2860664e4c..8dd84ad8bd 100644 --- a/includes/EditPage.php +++ b/includes/EditPage.php @@ -850,7 +850,7 @@ class EditPage { * @throws ErrorPageError */ public function importFormData( &$request ) { - global $wgContLang, $wgUser; + global $wgUser; # Section edit can come from either the form or a link $this->section = $request->getVal( 'wpSection', $request->getVal( 'section' ) ); @@ -876,8 +876,7 @@ class EditPage { } } - # Truncate for whole multibyte characters - $this->summary = $wgContLang->truncate( $request->getText( 'wpSummary' ), 255 ); + $this->summary = $request->getText( 'wpSummary' ); # If the summary consists of a heading, e.g. '==Foobar==', extract the title from the # header syntax, e.g. 'Foobar'. This is mainly an issue when we are using wpSummary for @@ -889,7 +888,7 @@ class EditPage { # currently doing double duty as both edit summary and section title. Right now this # is just to allow API edits to work around this limitation, but this should be # incorporated into the actual edit form when EditPage is rewritten (Bugs 18654, 26312). - $this->sectiontitle = $wgContLang->truncate( $request->getText( 'wpSectionTitle' ), 255 ); + $this->sectiontitle = $request->getText( 'wpSectionTitle' ); $this->sectiontitle = preg_replace( '/^\s*=+\s*(.*?)\s*=+\s*$/', '$1', $this->sectiontitle ); $this->edittime = $request->getVal( 'wpEdittime' ); diff --git a/includes/MovePage.php b/includes/MovePage.php index 39dc6424db..2f8255ba38 100644 --- a/includes/MovePage.php +++ b/includes/MovePage.php @@ -442,7 +442,6 @@ class MovePage { private function moveToInternal( User $user, &$nt, $reason = '', $createRedirect = true, array $changeTags = [] ) { - global $wgContLang; if ( $nt->exists() ) { $moveOverRedirect = true; $logType = 'move_redir'; @@ -520,8 +519,6 @@ class MovePage { if ( $reason ) { $comment .= wfMessage( 'colon-separator' )->inContentLanguage()->text() . $reason; } - # Truncate for whole multibyte characters. - $comment = $wgContLang->truncate( $comment, 255 ); $dbw = wfGetDB( DB_MASTER ); diff --git a/includes/Revision.php b/includes/Revision.php index ff4a284386..fcb9d9c263 100644 --- a/includes/Revision.php +++ b/includes/Revision.php @@ -1706,7 +1706,7 @@ class Revision implements IDBAccessObject { * @return Revision|null Revision or null on error */ public static function newNullRevision( $dbw, $pageId, $summary, $minor, $user = null ) { - global $wgContentHandlerUseDB, $wgContLang; + global $wgContentHandlerUseDB; $fields = [ 'page_latest', 'page_namespace', 'page_title', 'rev_text_id', 'rev_len', 'rev_sha1' ]; @@ -1733,9 +1733,6 @@ class Revision implements IDBAccessObject { $user = $wgUser; } - // Truncate for whole multibyte characters - $summary = $wgContLang->truncate( $summary, 255 ); - $row = [ 'page' => $pageId, 'user_text' => $user->getName(), diff --git a/includes/changes/RecentChange.php b/includes/changes/RecentChange.php index 588f602e64..e62d2e7b32 100644 --- a/includes/changes/RecentChange.php +++ b/includes/changes/RecentChange.php @@ -284,7 +284,7 @@ class RecentChange { * @param bool $noudp */ public function save( $noudp = false ) { - global $wgPutIPinRC, $wgUseEnotif, $wgShowUpdatedMarker, $wgContLang; + global $wgPutIPinRC, $wgUseEnotif, $wgShowUpdatedMarker; $dbw = wfGetDB( DB_MASTER ); if ( !is_array( $this->mExtra ) ) { @@ -315,9 +315,6 @@ class RecentChange { # Trim spaces on user supplied text $this->mAttribs['rc_comment'] = trim( $this->mAttribs['rc_comment'] ); - # Make sure summary is truncated (whole multibyte characters) - $this->mAttribs['rc_comment'] = $wgContLang->truncate( $this->mAttribs['rc_comment'], 255 ); - # Fixup database timestamps $this->mAttribs['rc_timestamp'] = $dbw->timestamp( $this->mAttribs['rc_timestamp'] ); $this->mAttribs['rc_id'] = $dbw->nextSequenceValue( 'recentchanges_rc_id_seq' ); diff --git a/includes/filerepo/file/LocalFile.php b/includes/filerepo/file/LocalFile.php index 904c932d58..d2e7f89c1a 100644 --- a/includes/filerepo/file/LocalFile.php +++ b/includes/filerepo/file/LocalFile.php @@ -1195,8 +1195,6 @@ class LocalFile extends File { function upload( $src, $comment, $pageText, $flags = 0, $props = false, $timestamp = false, $user = null, $tags = [] ) { - global $wgContLang; - if ( $this->getRepo()->getReadOnlyReason() !== false ) { return $this->readOnlyFatalStatus(); } @@ -1230,9 +1228,6 @@ class LocalFile extends File { // Trim spaces on user supplied text $comment = trim( $comment ); - // Truncate nicely or the DB will do it for us - // non-nicely (dangling multi-byte chars, non-truncated version in cache). - $comment = $wgContLang->truncate( $comment, 255 ); $this->lock(); // begin $status = $this->publish( $src, $flags, $options ); diff --git a/includes/logging/LogEntry.php b/includes/logging/LogEntry.php index 6587304ffb..75617d9a0d 100644 --- a/includes/logging/LogEntry.php +++ b/includes/logging/LogEntry.php @@ -593,8 +593,6 @@ class ManualLogEntry extends LogEntryBase { * @throws MWException */ public function insert( IDatabase $dbw = null ) { - global $wgContLang; - $dbw = $dbw ?: wfGetDB( DB_MASTER ); $id = $dbw->nextSequenceValue( 'logging_log_id_seq' ); @@ -605,9 +603,6 @@ class ManualLogEntry extends LogEntryBase { // Trim spaces on user supplied text $comment = trim( $this->getComment() ); - // Truncate for whole multibyte characters. - $comment = $wgContLang->truncate( $comment, 255 ); - $params = $this->getParameters(); $relations = $this->relations; diff --git a/includes/logging/LogPage.php b/includes/logging/LogPage.php index 257f76d0bb..53ef828ad5 100644 --- a/includes/logging/LogPage.php +++ b/includes/logging/LogPage.php @@ -329,8 +329,6 @@ class LogPage { * @return int The log_id of the inserted log entry */ public function addEntry( $action, $target, $comment, $params = [], $doer = null ) { - global $wgContLang; - if ( !is_array( $params ) ) { $params = [ $params ]; } @@ -342,9 +340,6 @@ class LogPage { # Trim spaces on user supplied text $comment = trim( $comment ); - # Truncate for whole multibyte characters. - $comment = $wgContLang->truncate( $comment, 255 ); - $this->action = $action; $this->target = $target; $this->comment = $comment; diff --git a/includes/page/WikiPage.php b/includes/page/WikiPage.php index 790845e35e..d6754127bf 100644 --- a/includes/page/WikiPage.php +++ b/includes/page/WikiPage.php @@ -2299,7 +2299,7 @@ class WikiPage implements Page, IDBAccessObject { public function doUpdateRestrictions( array $limit, array $expiry, &$cascade, $reason, User $user, $tags = null ) { - global $wgCascadingRestrictionLevels, $wgContLang; + global $wgCascadingRestrictionLevels; if ( wfReadOnly() ) { return Status::newFatal( wfMessage( 'readonlytext', wfReadOnlyReason() ) ); @@ -2372,9 +2372,6 @@ class WikiPage implements Page, IDBAccessObject { $logAction = 'protect'; } - // Truncate for whole multibyte characters - $reason = $wgContLang->truncate( $reason, 255 ); - $logRelationsValues = []; $logRelationsField = null; $logParamsDetails = []; @@ -3148,9 +3145,6 @@ class WikiPage implements Page, IDBAccessObject { // Trim spaces on user supplied text $summary = trim( $summary ); - // Truncate for whole multibyte characters. - $summary = $wgContLang->truncate( $summary, 255 ); - // Save $flags = EDIT_UPDATE | EDIT_INTERNAL; diff --git a/includes/specials/SpecialBlock.php b/includes/specials/SpecialBlock.php index 66e4fbeda2..252dc68c3b 100644 --- a/includes/specials/SpecialBlock.php +++ b/includes/specials/SpecialBlock.php @@ -618,7 +618,7 @@ class SpecialBlock extends FormSpecialPage { * @return bool|string */ public static function processForm( array $data, IContextSource $context ) { - global $wgBlockAllowsUTEdit, $wgHideUserContribLimit, $wgContLang; + global $wgBlockAllowsUTEdit, $wgHideUserContribLimit; $performer = $context->getUser(); @@ -720,8 +720,7 @@ class SpecialBlock extends FormSpecialPage { $block = new Block(); $block->setTarget( $target ); $block->setBlocker( $performer ); - # Truncate reason for whole multibyte characters - $block->mReason = $wgContLang->truncate( $data['Reason'][0], 255 ); + $block->mReason = $data['Reason'][0]; $block->mExpiry = $expiryTime; $block->prevents( 'createaccount', $data['CreateAccount'] ); $block->prevents( 'editownusertalk', ( !$wgBlockAllowsUTEdit || $data['DisableUTEdit'] ) ); diff --git a/includes/specials/SpecialChangeContentModel.php b/includes/specials/SpecialChangeContentModel.php index bee6a39832..87c899f4e0 100644 --- a/includes/specials/SpecialChangeContentModel.php +++ b/includes/specials/SpecialChangeContentModel.php @@ -154,8 +154,6 @@ class SpecialChangeContentModel extends FormSpecialPage { } public function onSubmit( array $data ) { - global $wgContLang; - if ( $data['pagetitle'] === '' ) { // Initial form view of special page, pass return false; @@ -240,8 +238,6 @@ class SpecialChangeContentModel extends FormSpecialPage { if ( $data['reason'] !== '' ) { $reason .= $this->msg( 'colon-separator' )->inContentLanguage()->text() . $data['reason']; } - # Truncate for whole multibyte characters. - $reason = $wgContLang->truncate( $reason, 255 ); // Run edit filters $derivativeContext = new DerivativeContext( $this->getContext() ); diff --git a/tests/phpunit/includes/CommentStoreTest.php b/tests/phpunit/includes/CommentStoreTest.php index b65136aded..25914e275e 100644 --- a/tests/phpunit/includes/CommentStoreTest.php +++ b/tests/phpunit/includes/CommentStoreTest.php @@ -616,6 +616,31 @@ class CommentStoreTest extends MediaWikiLangTestCase { $this->assertTrue( is_callable( $callback ) ); } + public function testInsertTruncation() { + $comment = str_repeat( '💣', 16400 ); + $truncated1 = str_repeat( '💣', 63 ) . '...'; + $truncated2 = str_repeat( '💣', 16383 ) . '...'; + + $store = $this->makeStore( MIGRATION_WRITE_BOTH, 'ipb_reason' ); + $fields = $store->insert( $this->db, $comment ); + $this->assertSame( $truncated1, $fields['ipb_reason'] ); + $stored = $this->db->selectField( + 'comment', 'comment_text', [ 'comment_id' => $fields['ipb_reason_id'] ], __METHOD__ + ); + $this->assertSame( $truncated2, $stored ); + } + + /** + * @expectedException OverflowException + * @expectedExceptionMessage Comment data is too long (65611 bytes, maximum is 65535) + */ + public function testInsertTooMuchData() { + $store = $this->makeStore( MIGRATION_WRITE_BOTH, 'ipb_reason' ); + $store->insert( $this->db, 'foo', [ + 'long' => str_repeat( '💣', 16400 ) + ] ); + } + public function testConstructor() { $this->assertInstanceOf( CommentStore::class, CommentStore::newKey( 'dummy' ) ); }