+ /**
+ * The maximum uncompressed size before the object becomes sad
+ * Should be less than max_allowed_packet
+ */
+ var $mMaxSize = 10000000;
+
+ /**
+ * The maximum number of text items before the object becomes sad
+ */
+ var $mMaxCount = 100;
+
+ /** Constants from xdiff.h */
+ const XDL_BDOP_INS = 1;
+ const XDL_BDOP_CPY = 2;
+ const XDL_BDOP_INSB = 3;
+
+ function __construct() {
+ if ( !function_exists( 'gzdeflate' ) ) {
+ throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
+ }
+ }
+
+ /**
+ * @throws MWException
+ * @param $text string
+ * @return int
+ */
+ function addItem( $text ) {
+ if ( $this->mFrozen ) {
+ throw new MWException( __METHOD__.": Cannot add more items after sleep/wakeup" );
+ }
+
+ $this->mItems[] = $text;
+ $this->mSize += strlen( $text );
+ $this->mDiffs = null; // later
+ return count( $this->mItems ) - 1;
+ }
+
+ /**
+ * @param $key string
+ * @return string
+ */
+ function getItem( $key ) {
+ return $this->mItems[$key];
+ }
+
+ /**
+ * @param $text string
+ */
+ function setText( $text ) {
+ $this->mDefaultKey = $this->addItem( $text );
+ }
+
+ /**
+ * @return string
+ */
+ function getText() {
+ return $this->getItem( $this->mDefaultKey );
+ }
+
+ /**
+ * @throws MWException
+ */
+ function compress() {
+ if ( !function_exists( 'xdiff_string_rabdiff' ) ){
+ throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
+ }
+ if ( isset( $this->mDiffs ) ) {
+ // Already compressed
+ return;
+ }
+ if ( !count( $this->mItems ) ) {
+ // Empty
+ return;
+ }
+
+ // Create two diff sequences: one for main text and one for small text
+ $sequences = array(
+ 'small' => array(
+ 'tail' => '',
+ 'diffs' => array(),
+ 'map' => array(),
+ ),
+ 'main' => array(
+ 'tail' => '',
+ 'diffs' => array(),
+ 'map' => array(),
+ ),
+ );
+ $smallFactor = 0.5;
+
+ for ( $i = 0; $i < count( $this->mItems ); $i++ ) {
+ $text = $this->mItems[$i];
+ if ( $i == 0 ) {
+ $seqName = 'main';
+ } else {
+ $mainTail = $sequences['main']['tail'];
+ if ( strlen( $text ) < strlen( $mainTail ) * $smallFactor ) {
+ $seqName = 'small';
+ } else {
+ $seqName = 'main';
+ }
+ }
+ $seq =& $sequences[$seqName];
+ $tail = $seq['tail'];
+ $diff = $this->diff( $tail, $text );
+ $seq['diffs'][] = $diff;
+ $seq['map'][] = $i;
+ $seq['tail'] = $text;
+ }
+ unset( $seq ); // unlink dangerous alias
+
+ // Knit the sequences together
+ $tail = '';
+ $this->mDiffs = array();
+ $this->mDiffMap = array();
+ foreach ( $sequences as $seq ) {
+ if ( !count( $seq['diffs'] ) ) {
+ continue;
+ }
+ if ( $tail === '' ) {
+ $this->mDiffs[] = $seq['diffs'][0];
+ } else {
+ $head = $this->patch( '', $seq['diffs'][0] );
+ $this->mDiffs[] = $this->diff( $tail, $head );
+ }
+ $this->mDiffMap[] = $seq['map'][0];
+ for ( $i = 1; $i < count( $seq['diffs'] ); $i++ ) {
+ $this->mDiffs[] = $seq['diffs'][$i];
+ $this->mDiffMap[] = $seq['map'][$i];
+ }
+ $tail = $seq['tail'];
+ }
+ }
+
+ /**
+ * @param $t1
+ * @param $t2
+ * @return string
+ */
+ function diff( $t1, $t2 ) {
+ # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
+ # "String is not zero-terminated"
+ wfSuppressWarnings();
+ $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
+ wfRestoreWarnings();
+ return $diff;
+ }
+
+ /**
+ * @param $base
+ * @param $diff
+ * @return bool|string
+ */
+ function patch( $base, $diff ) {
+ if ( function_exists( 'xdiff_string_bpatch' ) ) {
+ wfSuppressWarnings();
+ $text = xdiff_string_bpatch( $base, $diff ) . '';
+ wfRestoreWarnings();
+ return $text;
+ }
+
+ # Pure PHP implementation
+
+ $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
+
+ # Check the checksum if mhash is available
+ if ( extension_loaded( 'mhash' ) ) {
+ $ofp = mhash( MHASH_ADLER32, $base );
+ if ( $ofp !== substr( $diff, 0, 4 ) ) {
+ wfDebug( __METHOD__. ": incorrect base checksum\n" );
+ return false;
+ }
+ }
+ if ( $header['csize'] != strlen( $base ) ) {
+ wfDebug( __METHOD__. ": incorrect base length\n" );
+ return false;
+ }
+
+ $p = 8;
+ $out = '';
+ while ( $p < strlen( $diff ) ) {
+ $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
+ $op = $x['op'];
+ ++$p;
+ switch ( $op ) {
+ case self::XDL_BDOP_INS:
+ $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
+ $p++;
+ $out .= substr( $diff, $p, $x['size'] );
+ $p += $x['size'];
+ break;
+ case self::XDL_BDOP_INSB:
+ $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
+ $p += 4;
+ $out .= substr( $diff, $p, $x['csize'] );
+ $p += $x['csize'];
+ break;
+ case self::XDL_BDOP_CPY:
+ $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
+ $p += 8;
+ $out .= substr( $base, $x['off'], $x['csize'] );
+ break;
+ default:
+ wfDebug( __METHOD__.": invalid op\n" );
+ return false;
+ }
+ }
+ return $out;
+ }
+
+ function uncompress() {
+ if ( !$this->mDiffs ) {
+ return;
+ }
+ $tail = '';
+ for ( $diffKey = 0; $diffKey < count( $this->mDiffs ); $diffKey++ ) {
+ $textKey = $this->mDiffMap[$diffKey];
+ $text = $this->patch( $tail, $this->mDiffs[$diffKey] );
+ $this->mItems[$textKey] = $text;
+ $tail = $text;
+ }
+ }
+
+ /**
+ * @return array
+ */
+ function __sleep() {
+ $this->compress();
+ if ( !count( $this->mItems ) ) {
+ // Empty object
+ $info = false;
+ } else {
+ // Take forward differences to improve the compression ratio for sequences
+ $map = '';
+ $prev = 0;
+ foreach ( $this->mDiffMap as $i ) {
+ if ( $map !== '' ) {
+ $map .= ',';
+ }
+ $map .= $i - $prev;
+ $prev = $i;
+ }
+ $info = array(
+ 'diffs' => $this->mDiffs,
+ 'map' => $map
+ );
+ }
+ if ( isset( $this->mDefaultKey ) ) {
+ $info['default'] = $this->mDefaultKey;
+ }
+ $this->mCompressed = gzdeflate( serialize( $info ) );
+ return array( 'mCompressed' );
+ }
+
+ function __wakeup() {
+ // addItem() doesn't work if mItems is partially filled from mDiffs
+ $this->mFrozen = true;
+ $info = unserialize( gzinflate( $this->mCompressed ) );
+ unset( $this->mCompressed );
+
+ if ( !$info ) {
+ // Empty object
+ return;
+ }
+
+ if ( isset( $info['default'] ) ) {
+ $this->mDefaultKey = $info['default'];
+ }
+ $this->mDiffs = $info['diffs'];
+ if ( isset( $info['base'] ) ) {
+ // Old format
+ $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
+ array_unshift( $this->mDiffs,
+ pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
+ $info['base'] );
+ } else {
+ // New format
+ $map = explode( ',', $info['map'] );
+ $cur = 0;
+ $this->mDiffMap = array();
+ foreach ( $map as $i ) {
+ $cur += $i;
+ $this->mDiffMap[] = $cur;
+ }
+ }
+ $this->uncompress();
+ }
+
+ /**
+ * Helper function for compression jobs
+ * Returns true until the object is "full" and ready to be committed
+ *
+ * @return bool
+ */
+ function isHappy() {
+ return $this->mSize < $this->mMaxSize
+ && count( $this->mItems ) < $this->mMaxCount;
+ }
+
+}