From: Tim Starling Date: Fri, 1 Nov 2013 06:17:45 +0000 (+1100) Subject: Introduce includes/utils directory X-Git-Tag: 1.31.0-rc.0~18303 X-Git-Url: http://git.cyclocoop.org//%27%40script%40/%27?a=commitdiff_plain;h=e4bcbe722ebbe9a5e8409d9fb3e26dbd51c88ecc;p=lhc%2Fweb%2Fwiklou.git Introduce includes/utils directory These are classes that provide facilities for use by any caller, are independent of user interface, and have a limited set of dependencies on the rest of MediaWiki. See the README file for a more precise definition. These classes cannot go in includes/libs because of a dependency on the MediaWiki framework, such as wfDebug() or MWException, but they are otherwise similar. I thought it would be useful to put them in their own directory, to make them more discoverable, and as part of a general program of reducing clutter in the base includes/ directory. I've probably missed a few classes which could be included here, but the following classes were considered and were rejected for now: * Fallback: single caller only * GitInfo: getViewers() has inappropriate dependencies * HttpFunctions: depends on configuration, $wgTitle * PoolCounter: depends on configuration * CacheHelper: depends on IContextSource, wfMemc() Also moved a couple of classes into libs/ instead, where that seemed to be more appropriate. Change-Id: I274cff805b7d694b728a89b764a049cd62d320fe --- diff --git a/includes/ArrayUtils.php b/includes/ArrayUtils.php deleted file mode 100644 index 97a56e1ce9..0000000000 --- a/includes/ArrayUtils.php +++ /dev/null @@ -1,68 +0,0 @@ - $w ) { - $sum += $w; - # Do not return keys if they have 0 weight. - # Note that the "all 0 weight" case is handed above - if ( $w > 0 && $sum >= $rand ) { - break; - } - } - return $i; - } -} diff --git a/includes/AutoLoader.php b/includes/AutoLoader.php index da854388c1..1417c774db 100644 --- a/includes/AutoLoader.php +++ b/includes/AutoLoader.php @@ -33,7 +33,6 @@ $wgAutoloadLocalClasses = array( 'AjaxDispatcher' => 'includes/AjaxDispatcher.php', 'AjaxResponse' => 'includes/AjaxResponse.php', 'AlphabeticPager' => 'includes/Pager.php', - 'ArrayUtils' => 'includes/ArrayUtils.php', 'Article' => 'includes/Article.php', 'AtomFeed' => 'includes/Feed.php', 'AuthPlugin' => 'includes/AuthPlugin.php', @@ -47,28 +46,17 @@ $wgAutoloadLocalClasses = array( 'Categoryfinder' => 'includes/Categoryfinder.php', 'CategoryPage' => 'includes/CategoryPage.php', 'CategoryViewer' => 'includes/CategoryViewer.php', - 'CdbFunctions' => 'includes/Cdb_PHP.php', - 'CdbReader' => 'includes/Cdb.php', - 'CdbReader_DBA' => 'includes/Cdb.php', - 'CdbReader_PHP' => 'includes/Cdb_PHP.php', - 'CdbWriter' => 'includes/Cdb.php', - 'CdbWriter_DBA' => 'includes/Cdb.php', - 'CdbWriter_PHP' => 'includes/Cdb_PHP.php', 'ChangesFeed' => 'includes/ChangesFeed.php', 'ChangeTags' => 'includes/ChangeTags.php', 'ChannelFeed' => 'includes/Feed.php', 'Collation' => 'includes/Collation.php', 'ConcatenatedGzipHistoryBlob' => 'includes/HistoryBlob.php', - 'ConfEditor' => 'includes/ConfEditor.php', - 'ConfEditorParseError' => 'includes/ConfEditor.php', - 'ConfEditorToken' => 'includes/ConfEditor.php', 'Cookie' => 'includes/Cookie.php', 'CookieJar' => 'includes/Cookie.php', 'CurlHttpRequest' => 'includes/HttpFunctions.php', 'DeprecatedGlobal' => 'includes/DeprecatedGlobal.php', 'DerivativeRequest' => 'includes/WebRequest.php', 'DiffHistoryBlob' => 'includes/HistoryBlob.php', - 'DoubleReplacer' => 'includes/StringUtils.php', 'DummyLinker' => 'includes/Linker.php', 'Dump7ZipOutput' => 'includes/Export.php', 'DumpBZip2Output' => 'includes/Export.php', @@ -84,7 +72,6 @@ $wgAutoloadLocalClasses = array( 'EditPage' => 'includes/EditPage.php', 'EmailNotification' => 'includes/UserMailer.php', 'ErrorPageError' => 'includes/Exception.php', - 'ExplodeIterator' => 'includes/StringUtils.php', 'FakeTitle' => 'includes/FakeTitle.php', 'Fallback' => 'includes/Fallback.php', 'FatalError' => 'includes/Exception.php', @@ -99,8 +86,6 @@ $wgAutoloadLocalClasses = array( 'FormOptions' => 'includes/FormOptions.php', 'FormSpecialPage' => 'includes/SpecialPage.php', 'GitInfo' => 'includes/GitInfo.php', - 'HashRing' => 'includes/HashRing.php', - 'HashtableReplacer' => 'includes/StringUtils.php', 'HistoryBlob' => 'includes/HistoryBlob.php', 'HistoryBlobCurStub' => 'includes/HistoryBlob.php', 'HistoryBlobStub' => 'includes/HistoryBlob.php', @@ -142,7 +127,6 @@ $wgAutoloadLocalClasses = array( 'IncludableSpecialPage' => 'includes/SpecialPage.php', 'IndexPager' => 'includes/Pager.php', 'Interwiki' => 'includes/interwiki/Interwiki.php', - 'IP' => 'includes/IP.php', 'LCStore' => 'includes/cache/LocalisationCache.php', 'LCStore_Accel' => 'includes/cache/LocalisationCache.php', 'LCStore_CDB' => 'includes/cache/LocalisationCache.php', @@ -157,16 +141,13 @@ $wgAutoloadLocalClasses = array( 'MagicWord' => 'includes/MagicWord.php', 'MagicWordArray' => 'includes/MagicWord.php', 'MailAddress' => 'includes/UserMailer.php', - 'MappedIterator' => 'includes/MappedIterator.php', 'MediaWiki' => 'includes/Wiki.php', 'MediaWiki_I18N' => 'includes/SkinTemplate.php', 'Message' => 'includes/Message.php', 'MessageBlobStore' => 'includes/MessageBlobStore.php', 'MimeMagic' => 'includes/MimeMagic.php', - 'MWCryptRand' => 'includes/MWCryptRand.php', 'MWException' => 'includes/Exception.php', 'MWExceptionHandler' => 'includes/Exception.php', - 'MWFunction' => 'includes/MWFunction.php', 'MWHookException' => 'includes/Hooks.php', 'MWHttpRequest' => 'includes/HttpFunctions.php', 'MWInit' => 'includes/Init.php', @@ -196,9 +177,6 @@ $wgAutoloadLocalClasses = array( 'ReadOnlyError' => 'includes/Exception.php', 'RedirectSpecialArticle' => 'includes/SpecialPage.php', 'RedirectSpecialPage' => 'includes/SpecialPage.php', - 'RegexlikeReplacer' => 'includes/StringUtils.php', - 'ReplacementArray' => 'includes/StringUtils.php', - 'Replacer' => 'includes/StringUtils.php', 'ReverseChronologicalPager' => 'includes/Pager.php', 'RevisionItem' => 'includes/RevisionList.php', 'RevisionItemBase' => 'includes/RevisionList.php', @@ -207,8 +185,6 @@ $wgAutoloadLocalClasses = array( 'RevisionList' => 'includes/RevisionList.php', 'RSSFeed' => 'includes/Feed.php', 'Sanitizer' => 'includes/Sanitizer.php', - 'ScopedCallback' => 'includes/ScopedCallback.php', - 'ScopedPHPTimeout' => 'includes/ScopedPHPTimeout.php', 'SiteConfiguration' => 'includes/SiteConfiguration.php', 'SiteStats' => 'includes/SiteStats.php', 'SiteStatsInit' => 'includes/SiteStats.php', @@ -230,7 +206,6 @@ $wgAutoloadLocalClasses = array( 'StatCounter' => 'includes/StatCounter.php', 'Status' => 'includes/Status.php', 'StreamFile' => 'includes/StreamFile.php', - 'StringUtils' => 'includes/StringUtils.php', 'StubContLang' => 'includes/StubObject.php', 'StubObject' => 'includes/StubObject.php', 'StubUserLang' => 'includes/StubObject.php', @@ -241,7 +216,6 @@ $wgAutoloadLocalClasses = array( 'TitleArray' => 'includes/TitleArray.php', 'TitleArrayFromResult' => 'includes/TitleArray.php', 'ThrottledError' => 'includes/Exception.php', - 'UIDGenerator' => 'includes/UIDGenerator.php', 'UnlistedSpecialPage' => 'includes/SpecialPage.php', 'UploadSourceAdapter' => 'includes/Import.php', 'UppercaseCollation' => 'includes/Collation.php', @@ -274,10 +248,7 @@ $wgAutoloadLocalClasses = array( 'XmlJsCode' => 'includes/Xml.php', 'XMLReader2' => 'includes/Import.php', 'XmlSelect' => 'includes/Xml.php', - 'XmlTypeCheck' => 'includes/XmlTypeCheck.php', 'ZhClient' => 'includes/ZhClient.php', - 'ZipDirectoryReader' => 'includes/ZipDirectoryReader.php', - 'ZipDirectoryReaderError' => 'includes/ZipDirectoryReader.php', # includes/actions 'CachedAction' => 'includes/actions/CachedAction.php', @@ -711,6 +682,8 @@ $wgAutoloadLocalClasses = array( 'JSParser' => 'includes/libs/jsminplus.php', 'JSToken' => 'includes/libs/jsminplus.php', 'JSTokenizer' => 'includes/libs/jsminplus.php', + 'ScopedPHPTimeout' => 'includes/libs/ScopedPHPTimeout.php', + 'XmlTypeCheck' => 'includes/libs/XmlTypeCheck.php', # includes/libs/lessphp 'lessc' => 'includes/libs/lessc.inc.php', @@ -1069,6 +1042,35 @@ $wgAutoloadLocalClasses = array( 'UploadStashWrongOwnerException' => 'includes/upload/UploadStash.php', 'UploadStashNoSuchKeyException' => 'includes/upload/UploadStash.php', + # includes/utils + 'ArrayUtils' => 'includes/utils/ArrayUtils.php', + 'CdbFunctions' => 'includes/utils/Cdb_PHP.php', + 'CdbReader' => 'includes/utils/Cdb.php', + 'CdbReader_DBA' => 'includes/utils/Cdb.php', + 'CdbReader_PHP' => 'includes/utils/Cdb_PHP.php', + 'CdbWriter' => 'includes/utils/Cdb.php', + 'CdbWriter_DBA' => 'includes/utils/Cdb.php', + 'CdbWriter_PHP' => 'includes/utils/Cdb_PHP.php', + 'ConfEditor' => 'includes/utils/ConfEditor.php', + 'ConfEditorParseError' => 'includes/utils/ConfEditor.php', + 'ConfEditorToken' => 'includes/utils/ConfEditor.php', + 'DoubleReplacer' => 'includes/utils/StringUtils.php', + 'ExplodeIterator' => 'includes/utils/StringUtils.php', + 'HashRing' => 'includes/utils/HashRing.php', + 'HashtableReplacer' => 'includes/utils/StringUtils.php', + 'IP' => 'includes/utils/IP.php', + 'MWCryptRand' => 'includes/utils/MWCryptRand.php', + 'MWFunction' => 'includes/utils/MWFunction.php', + 'MappedIterator' => 'includes/utils/MappedIterator.php', + 'RegexlikeReplacer' => 'includes/utils/StringUtils.php', + 'ReplacementArray' => 'includes/utils/StringUtils.php', + 'Replacer' => 'includes/utils/StringUtils.php', + 'ScopedCallback' => 'includes/utils/ScopedCallback.php', + 'StringUtils' => 'includes/utils/StringUtils.php', + 'UIDGenerator' => 'includes/utils/UIDGenerator.php', + 'ZipDirectoryReader' => 'includes/utils/ZipDirectoryReader.php', + 'ZipDirectoryReaderError' => 'includes/utils/ZipDirectoryReader.php', + # languages 'ConverterRule' => 'languages/LanguageConverter.php', 'FakeConverter' => 'languages/Language.php', diff --git a/includes/Cdb.php b/includes/Cdb.php deleted file mode 100644 index 81c0afe171..0000000000 --- a/includes/Cdb.php +++ /dev/null @@ -1,184 +0,0 @@ -handle = dba_open( $fileName, 'r-', 'cdb' ); - if ( !$this->handle ) { - throw new MWException( 'Unable to open CDB file "' . $fileName . '"' ); - } - } - - function close() { - if ( isset( $this->handle ) ) { - dba_close( $this->handle ); - } - unset( $this->handle ); - } - - function get( $key ) { - return dba_fetch( $key, $this->handle ); - } -} - -/** - * Writer class which uses the DBA extension - */ -class CdbWriter_DBA { - var $handle, $realFileName, $tmpFileName; - - function __construct( $fileName ) { - $this->realFileName = $fileName; - $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff ); - $this->handle = dba_open( $this->tmpFileName, 'n', 'cdb_make' ); - if ( !$this->handle ) { - throw new MWException( 'Unable to open CDB file for write "' . $fileName . '"' ); - } - } - - function set( $key, $value ) { - return dba_insert( $key, $value, $this->handle ); - } - - function close() { - if ( isset( $this->handle ) ) { - dba_close( $this->handle ); - } - if ( wfIsWindows() ) { - unlink( $this->realFileName ); - } - if ( !rename( $this->tmpFileName, $this->realFileName ) ) { - throw new MWException( 'Unable to move the new CDB file into place.' ); - } - unset( $this->handle ); - } - - function __destruct() { - if ( isset( $this->handle ) ) { - $this->close(); - } - } -} diff --git a/includes/Cdb_PHP.php b/includes/Cdb_PHP.php deleted file mode 100644 index a38b9a86b8..0000000000 --- a/includes/Cdb_PHP.php +++ /dev/null @@ -1,493 +0,0 @@ -> $b ) | ( 0x40000000 >> ( $b - 1 ) ); - } else { - return $a >> $b; - } - } - - /** - * The CDB hash function. - * - * @param $s string - * - * @return - */ - public static function hash( $s ) { - $h = 5381; - for ( $i = 0; $i < strlen( $s ); $i++ ) { - $h5 = ( $h << 5 ) & 0xffffffff; - // Do a 32-bit sum - // Inlined here for speed - $sum = ( $h & 0x3fffffff ) + ( $h5 & 0x3fffffff ); - $h = - ( - ( $sum & 0x40000000 ? 1 : 0 ) - + ( $h & 0x80000000 ? 2 : 0 ) - + ( $h & 0x40000000 ? 1 : 0 ) - + ( $h5 & 0x80000000 ? 2 : 0 ) - + ( $h5 & 0x40000000 ? 1 : 0 ) - ) << 30 - | ( $sum & 0x3fffffff ); - $h ^= ord( $s[$i] ); - $h &= 0xffffffff; - } - return $h; - } -} - -/** - * CDB reader class - */ -class CdbReader_PHP extends CdbReader { - /** The filename */ - var $fileName; - - /** The file handle */ - var $handle; - - /* number of hash slots searched under this key */ - var $loop; - - /* initialized if loop is nonzero */ - var $khash; - - /* initialized if loop is nonzero */ - var $kpos; - - /* initialized if loop is nonzero */ - var $hpos; - - /* initialized if loop is nonzero */ - var $hslots; - - /* initialized if findNext() returns true */ - var $dpos; - - /* initialized if cdb_findnext() returns 1 */ - var $dlen; - - /** - * @param $fileName string - * @throws MWException - */ - function __construct( $fileName ) { - $this->fileName = $fileName; - $this->handle = fopen( $fileName, 'rb' ); - if ( !$this->handle ) { - throw new MWException( 'Unable to open CDB file "' . $this->fileName . '".' ); - } - $this->findStart(); - } - - function close() { - if ( isset( $this->handle ) ) { - fclose( $this->handle ); - } - unset( $this->handle ); - } - - /** - * @param $key - * @return bool|string - */ - public function get( $key ) { - // strval is required - if ( $this->find( strval( $key ) ) ) { - return $this->read( $this->dlen, $this->dpos ); - } else { - return false; - } - } - - /** - * @param $key - * @param $pos - * @return bool - */ - protected function match( $key, $pos ) { - $buf = $this->read( strlen( $key ), $pos ); - return $buf === $key; - } - - protected function findStart() { - $this->loop = 0; - } - - /** - * @throws MWException - * @param $length - * @param $pos - * @return string - */ - protected function read( $length, $pos ) { - if ( fseek( $this->handle, $pos ) == -1 ) { - // This can easily happen if the internal pointers are incorrect - throw new MWException( - 'Seek failed, file "' . $this->fileName . '" may be corrupted.' ); - } - - if ( $length == 0 ) { - return ''; - } - - $buf = fread( $this->handle, $length ); - if ( $buf === false || strlen( $buf ) !== $length ) { - throw new MWException( - 'Read from CDB file failed, file "' . $this->fileName . '" may be corrupted.' ); - } - return $buf; - } - - /** - * Unpack an unsigned integer and throw an exception if it needs more than 31 bits - * @param $s - * @throws MWException - * @return mixed - */ - protected function unpack31( $s ) { - $data = unpack( 'V', $s ); - if ( $data[1] > 0x7fffffff ) { - throw new MWException( - 'Error in CDB file "' . $this->fileName . '", integer too big.' ); - } - return $data[1]; - } - - /** - * Unpack a 32-bit signed integer - * @param $s - * @return int - */ - protected function unpackSigned( $s ) { - $data = unpack( 'va/vb', $s ); - return $data['a'] | ( $data['b'] << 16 ); - } - - /** - * @param $key - * @return bool - */ - protected function findNext( $key ) { - if ( !$this->loop ) { - $u = CdbFunctions::hash( $key ); - $buf = $this->read( 8, ( $u << 3 ) & 2047 ); - $this->hslots = $this->unpack31( substr( $buf, 4 ) ); - if ( !$this->hslots ) { - return false; - } - $this->hpos = $this->unpack31( substr( $buf, 0, 4 ) ); - $this->khash = $u; - $u = CdbFunctions::unsignedShiftRight( $u, 8 ); - $u = CdbFunctions::unsignedMod( $u, $this->hslots ); - $u <<= 3; - $this->kpos = $this->hpos + $u; - } - - while ( $this->loop < $this->hslots ) { - $buf = $this->read( 8, $this->kpos ); - $pos = $this->unpack31( substr( $buf, 4 ) ); - if ( !$pos ) { - return false; - } - $this->loop += 1; - $this->kpos += 8; - if ( $this->kpos == $this->hpos + ( $this->hslots << 3 ) ) { - $this->kpos = $this->hpos; - } - $u = $this->unpackSigned( substr( $buf, 0, 4 ) ); - if ( $u === $this->khash ) { - $buf = $this->read( 8, $pos ); - $keyLen = $this->unpack31( substr( $buf, 0, 4 ) ); - if ( $keyLen == strlen( $key ) && $this->match( $key, $pos + 8 ) ) { - // Found - $this->dlen = $this->unpack31( substr( $buf, 4 ) ); - $this->dpos = $pos + 8 + $keyLen; - return true; - } - } - } - return false; - } - - /** - * @param $key - * @return bool - */ - protected function find( $key ) { - $this->findStart(); - return $this->findNext( $key ); - } -} - -/** - * CDB writer class - */ -class CdbWriter_PHP extends CdbWriter { - var $handle, $realFileName, $tmpFileName; - - var $hplist; - var $numentries, $pos; - - /** - * @param $fileName string - */ - function __construct( $fileName ) { - $this->realFileName = $fileName; - $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff ); - $this->handle = fopen( $this->tmpFileName, 'wb' ); - if ( !$this->handle ) { - $this->throwException( - 'Unable to open CDB file "' . $this->tmpFileName . '" for write.' ); - } - $this->hplist = array(); - $this->numentries = 0; - $this->pos = 2048; // leaving space for the pointer array, 256 * 8 - if ( fseek( $this->handle, $this->pos ) == -1 ) { - $this->throwException( 'fseek failed in file "' . $this->tmpFileName . '".' ); - } - } - - function __destruct() { - if ( isset( $this->handle ) ) { - $this->close(); - } - } - - /** - * @param $key - * @param $value - * @return - */ - public function set( $key, $value ) { - if ( strval( $key ) === '' ) { - // DBA cross-check hack - return; - } - $this->addbegin( strlen( $key ), strlen( $value ) ); - $this->write( $key ); - $this->write( $value ); - $this->addend( strlen( $key ), strlen( $value ), CdbFunctions::hash( $key ) ); - } - - /** - * @throws MWException - */ - public function close() { - $this->finish(); - if ( isset( $this->handle ) ) { - fclose( $this->handle ); - } - if ( wfIsWindows() && file_exists( $this->realFileName ) ) { - unlink( $this->realFileName ); - } - if ( !rename( $this->tmpFileName, $this->realFileName ) ) { - $this->throwException( 'Unable to move the new CDB file into place.' ); - } - unset( $this->handle ); - } - - /** - * @throws MWException - * @param $buf - */ - protected function write( $buf ) { - $len = fwrite( $this->handle, $buf ); - if ( $len !== strlen( $buf ) ) { - $this->throwException( 'Error writing to CDB file "' . $this->tmpFileName . '".' ); - } - } - - /** - * @throws MWException - * @param $len - */ - protected function posplus( $len ) { - $newpos = $this->pos + $len; - if ( $newpos > 0x7fffffff ) { - $this->throwException( - 'A value in the CDB file "' . $this->tmpFileName . '" is too large.' ); - } - $this->pos = $newpos; - } - - /** - * @param $keylen - * @param $datalen - * @param $h - */ - protected function addend( $keylen, $datalen, $h ) { - $this->hplist[] = array( - 'h' => $h, - 'p' => $this->pos - ); - - $this->numentries++; - $this->posplus( 8 ); - $this->posplus( $keylen ); - $this->posplus( $datalen ); - } - - /** - * @throws MWException - * @param $keylen - * @param $datalen - */ - protected function addbegin( $keylen, $datalen ) { - if ( $keylen > 0x7fffffff ) { - $this->throwException( 'Key length too long in file "' . $this->tmpFileName . '".' ); - } - if ( $datalen > 0x7fffffff ) { - $this->throwException( 'Data length too long in file "' . $this->tmpFileName . '".' ); - } - $buf = pack( 'VV', $keylen, $datalen ); - $this->write( $buf ); - } - - /** - * @throws MWException - */ - protected function finish() { - // Hack for DBA cross-check - $this->hplist = array_reverse( $this->hplist ); - - // Calculate the number of items that will be in each hashtable - $counts = array_fill( 0, 256, 0 ); - foreach ( $this->hplist as $item ) { - ++ $counts[255 & $item['h']]; - } - - // Fill in $starts with the *end* indexes - $starts = array(); - $pos = 0; - for ( $i = 0; $i < 256; ++$i ) { - $pos += $counts[$i]; - $starts[$i] = $pos; - } - - // Excessively clever and indulgent code to simultaneously fill $packedTables - // with the packed hashtables, and adjust the elements of $starts - // to actually point to the starts instead of the ends. - $packedTables = array_fill( 0, $this->numentries, false ); - foreach ( $this->hplist as $item ) { - $packedTables[--$starts[255 & $item['h']]] = $item; - } - - $final = ''; - for ( $i = 0; $i < 256; ++$i ) { - $count = $counts[$i]; - - // The size of the hashtable will be double the item count. - // The rest of the slots will be empty. - $len = $count + $count; - $final .= pack( 'VV', $this->pos, $len ); - - $hashtable = array(); - for ( $u = 0; $u < $len; ++$u ) { - $hashtable[$u] = array( 'h' => 0, 'p' => 0 ); - } - - // Fill the hashtable, using the next empty slot if the hashed slot - // is taken. - for ( $u = 0; $u < $count; ++$u ) { - $hp = $packedTables[$starts[$i] + $u]; - $where = CdbFunctions::unsignedMod( - CdbFunctions::unsignedShiftRight( $hp['h'], 8 ), $len ); - while ( $hashtable[$where]['p'] ) { - if ( ++$where == $len ) { - $where = 0; - } - } - $hashtable[$where] = $hp; - } - - // Write the hashtable - for ( $u = 0; $u < $len; ++$u ) { - $buf = pack( 'vvV', - $hashtable[$u]['h'] & 0xffff, - CdbFunctions::unsignedShiftRight( $hashtable[$u]['h'], 16 ), - $hashtable[$u]['p'] ); - $this->write( $buf ); - $this->posplus( 8 ); - } - } - - // Write the pointer array at the start of the file - rewind( $this->handle ); - if ( ftell( $this->handle ) != 0 ) { - $this->throwException( 'Error rewinding to start of file "' . $this->tmpFileName . '".' ); - } - $this->write( $final ); - } - - /** - * Clean up the temp file and throw an exception - * - * @param $msg string - * @throws MWException - */ - protected function throwException( $msg ) { - if ( $this->handle ) { - fclose( $this->handle ); - unlink( $this->tmpFileName ); - } - throw new MWException( $msg ); - } -} diff --git a/includes/ConfEditor.php b/includes/ConfEditor.php deleted file mode 100644 index 67cb87db1e..0000000000 --- a/includes/ConfEditor.php +++ /dev/null @@ -1,1109 +0,0 @@ -", or false if there isn't one - */ - var $pathStack; - - /** - * The elements of the top of the pathStack for every path encountered, indexed - * by slash-separated path. - */ - var $pathInfo; - - /** - * Next serial number for whitespace placeholder paths (\@extra-N) - */ - var $serial; - - /** - * Editor state. This consists of the internal copy/insert operations which - * are applied to the source string to obtain the destination string. - */ - var $edits; - - /** - * Simple entry point for command-line testing - * - * @param $text string - * - * @return string - */ - static function test( $text ) { - try { - $ce = new self( $text ); - $ce->parse(); - } catch ( ConfEditorParseError $e ) { - return $e->getMessage() . "\n" . $e->highlight( $text ); - } - return "OK"; - } - - /** - * Construct a new parser - */ - public function __construct( $text ) { - $this->text = $text; - } - - /** - * Edit the text. Returns the edited text. - * @param array $ops of operations. - * - * Operations are given as an associative array, with members: - * type: One of delete, set, append or insert (required) - * path: The path to operate on (required) - * key: The array key to insert/append, with PHP quotes - * value: The value, with PHP quotes - * - * delete - * Deletes an array element or statement with the specified path. - * e.g. - * array('type' => 'delete', 'path' => '$foo/bar/baz' ) - * is equivalent to the runtime PHP code: - * unset( $foo['bar']['baz'] ); - * - * set - * Sets the value of an array element. If the element doesn't exist, it - * is appended to the array. If it does exist, the value is set, with - * comments and indenting preserved. - * - * append - * Appends a new element to the end of the array. Adds a trailing comma. - * e.g. - * array( 'type' => 'append', 'path', '$foo/bar', - * 'key' => 'baz', 'value' => "'x'" ) - * is like the PHP code: - * $foo['bar']['baz'] = 'x'; - * - * insert - * Insert a new element at the start of the array. - * - * @throws MWException - * @return string - */ - public function edit( $ops ) { - $this->parse(); - - $this->edits = array( - array( 'copy', 0, strlen( $this->text ) ) - ); - foreach ( $ops as $op ) { - $type = $op['type']; - $path = $op['path']; - $value = isset( $op['value'] ) ? $op['value'] : null; - $key = isset( $op['key'] ) ? $op['key'] : null; - - switch ( $type ) { - case 'delete': - list( $start, $end ) = $this->findDeletionRegion( $path ); - $this->replaceSourceRegion( $start, $end, false ); - break; - case 'set': - if ( isset( $this->pathInfo[$path] ) ) { - list( $start, $end ) = $this->findValueRegion( $path ); - $encValue = $value; // var_export( $value, true ); - $this->replaceSourceRegion( $start, $end, $encValue ); - break; - } - // No existing path, fall through to append - $slashPos = strrpos( $path, '/' ); - $key = var_export( substr( $path, $slashPos + 1 ), true ); - $path = substr( $path, 0, $slashPos ); - // Fall through - case 'append': - // Find the last array element - $lastEltPath = $this->findLastArrayElement( $path ); - if ( $lastEltPath === false ) { - throw new MWException( "Can't find any element of array \"$path\"" ); - } - $lastEltInfo = $this->pathInfo[$lastEltPath]; - - // Has it got a comma already? - if ( strpos( $lastEltPath, '@extra' ) === false && !$lastEltInfo['hasComma'] ) { - // No comma, insert one after the value region - list( , $end ) = $this->findValueRegion( $lastEltPath ); - $this->replaceSourceRegion( $end - 1, $end - 1, ',' ); - } - - // Make the text to insert - list( $start, $end ) = $this->findDeletionRegion( $lastEltPath ); - - if ( $key === null ) { - list( $indent, ) = $this->getIndent( $start ); - $textToInsert = "$indent$value,"; - } else { - list( $indent, $arrowIndent ) = - $this->getIndent( $start, $key, $lastEltInfo['arrowByte'] ); - $textToInsert = "$indent$key$arrowIndent=> $value,"; - } - $textToInsert .= ( $indent === false ? ' ' : "\n" ); - - // Insert the item - $this->replaceSourceRegion( $end, $end, $textToInsert ); - break; - case 'insert': - // Find first array element - $firstEltPath = $this->findFirstArrayElement( $path ); - if ( $firstEltPath === false ) { - throw new MWException( "Can't find array element of \"$path\"" ); - } - list( $start, ) = $this->findDeletionRegion( $firstEltPath ); - $info = $this->pathInfo[$firstEltPath]; - - // Make the text to insert - if ( $key === null ) { - list( $indent, ) = $this->getIndent( $start ); - $textToInsert = "$indent$value,"; - } else { - list( $indent, $arrowIndent ) = - $this->getIndent( $start, $key, $info['arrowByte'] ); - $textToInsert = "$indent$key$arrowIndent=> $value,"; - } - $textToInsert .= ( $indent === false ? ' ' : "\n" ); - - // Insert the item - $this->replaceSourceRegion( $start, $start, $textToInsert ); - break; - default: - throw new MWException( "Unrecognised operation: \"$type\"" ); - } - } - - // Do the edits - $out = ''; - foreach ( $this->edits as $edit ) { - if ( $edit[0] == 'copy' ) { - $out .= substr( $this->text, $edit[1], $edit[2] - $edit[1] ); - } else { // if ( $edit[0] == 'insert' ) - $out .= $edit[1]; - } - } - - // Do a second parse as a sanity check - $this->text = $out; - try { - $this->parse(); - } catch ( ConfEditorParseError $e ) { - throw new MWException( - "Sorry, ConfEditor broke the file during editing and it won't parse anymore: " . - $e->getMessage() ); - } - return $out; - } - - /** - * Get the variables defined in the text - * @return array( varname => value ) - */ - function getVars() { - $vars = array(); - $this->parse(); - foreach ( $this->pathInfo as $path => $data ) { - if ( $path[0] != '$' ) { - continue; - } - $trimmedPath = substr( $path, 1 ); - $name = $data['name']; - if ( $name[0] == '@' ) { - continue; - } - if ( $name[0] == '$' ) { - $name = substr( $name, 1 ); - } - $parentPath = substr( $trimmedPath, 0, - strlen( $trimmedPath ) - strlen( $name ) ); - if ( substr( $parentPath, -1 ) == '/' ) { - $parentPath = substr( $parentPath, 0, -1 ); - } - - $value = substr( $this->text, $data['valueStartByte'], - $data['valueEndByte'] - $data['valueStartByte'] - ); - $this->setVar( $vars, $parentPath, $name, - $this->parseScalar( $value ) ); - } - return $vars; - } - - /** - * Set a value in an array, unless it's set already. For instance, - * setVar( $arr, 'foo/bar', 'baz', 3 ); will set - * $arr['foo']['bar']['baz'] = 3; - * @param $array array - * @param string $path slash-delimited path - * @param $key mixed Key - * @param $value mixed Value - */ - function setVar( &$array, $path, $key, $value ) { - $pathArr = explode( '/', $path ); - $target =& $array; - if ( $path !== '' ) { - foreach ( $pathArr as $p ) { - if ( !isset( $target[$p] ) ) { - $target[$p] = array(); - } - $target =& $target[$p]; - } - } - if ( !isset( $target[$key] ) ) { - $target[$key] = $value; - } - } - - /** - * Parse a scalar value in PHP - * @return mixed Parsed value - */ - function parseScalar( $str ) { - if ( $str !== '' && $str[0] == '\'' ) { - // Single-quoted string - // @todo FIXME: trim() call is due to mystery bug where whitespace gets - // appended to the token; without it we ended up reading in the - // extra quote on the end! - return strtr( substr( trim( $str ), 1, -1 ), - array( '\\\'' => '\'', '\\\\' => '\\' ) ); - } - if ( $str !== '' && $str[0] == '"' ) { - // Double-quoted string - // @todo FIXME: trim() call is due to mystery bug where whitespace gets - // appended to the token; without it we ended up reading in the - // extra quote on the end! - return stripcslashes( substr( trim( $str ), 1, -1 ) ); - } - if ( substr( $str, 0, 4 ) == 'true' ) { - return true; - } - if ( substr( $str, 0, 5 ) == 'false' ) { - return false; - } - if ( substr( $str, 0, 4 ) == 'null' ) { - return null; - } - // Must be some kind of numeric value, so let PHP's weak typing - // be useful for a change - return $str; - } - - /** - * Replace the byte offset region of the source with $newText. - * Works by adding elements to the $this->edits array. - */ - function replaceSourceRegion( $start, $end, $newText = false ) { - // Split all copy operations with a source corresponding to the region - // in question. - $newEdits = array(); - foreach ( $this->edits as $edit ) { - if ( $edit[0] !== 'copy' ) { - $newEdits[] = $edit; - continue; - } - $copyStart = $edit[1]; - $copyEnd = $edit[2]; - if ( $start >= $copyEnd || $end <= $copyStart ) { - // Outside this region - $newEdits[] = $edit; - continue; - } - if ( ( $start < $copyStart && $end > $copyStart ) - || ( $start < $copyEnd && $end > $copyEnd ) - ) { - throw new MWException( "Overlapping regions found, can't do the edit" ); - } - // Split the copy - $newEdits[] = array( 'copy', $copyStart, $start ); - if ( $newText !== false ) { - $newEdits[] = array( 'insert', $newText ); - } - $newEdits[] = array( 'copy', $end, $copyEnd ); - } - $this->edits = $newEdits; - } - - /** - * Finds the source byte region which you would want to delete, if $pathName - * was to be deleted. Includes the leading spaces and tabs, the trailing line - * break, and any comments in between. - * @param $pathName - * @throws MWException - * @return array - */ - function findDeletionRegion( $pathName ) { - if ( !isset( $this->pathInfo[$pathName] ) ) { - throw new MWException( "Can't find path \"$pathName\"" ); - } - $path = $this->pathInfo[$pathName]; - // Find the start - $this->firstToken(); - while ( $this->pos != $path['startToken'] ) { - $this->nextToken(); - } - $regionStart = $path['startByte']; - for ( $offset = -1; $offset >= -$this->pos; $offset-- ) { - $token = $this->getTokenAhead( $offset ); - if ( !$token->isSkip() ) { - // If there is other content on the same line, don't move the start point - // back, because that will cause the regions to overlap. - $regionStart = $path['startByte']; - break; - } - $lfPos = strrpos( $token->text, "\n" ); - if ( $lfPos === false ) { - $regionStart -= strlen( $token->text ); - } else { - // The line start does not include the LF - $regionStart -= strlen( $token->text ) - $lfPos - 1; - break; - } - } - // Find the end - while ( $this->pos != $path['endToken'] ) { - $this->nextToken(); - } - $regionEnd = $path['endByte']; // past the end - for ( $offset = 0; $offset < count( $this->tokens ) - $this->pos; $offset++ ) { - $token = $this->getTokenAhead( $offset ); - if ( !$token->isSkip() ) { - break; - } - $lfPos = strpos( $token->text, "\n" ); - if ( $lfPos === false ) { - $regionEnd += strlen( $token->text ); - } else { - // This should point past the LF - $regionEnd += $lfPos + 1; - break; - } - } - return array( $regionStart, $regionEnd ); - } - - /** - * Find the byte region in the source corresponding to the value part. - * This includes the quotes, but does not include the trailing comma - * or semicolon. - * - * The end position is the past-the-end (end + 1) value as per convention. - * @param $pathName - * @throws MWException - * @return array - */ - function findValueRegion( $pathName ) { - if ( !isset( $this->pathInfo[$pathName] ) ) { - throw new MWException( "Can't find path \"$pathName\"" ); - } - $path = $this->pathInfo[$pathName]; - if ( $path['valueStartByte'] === false || $path['valueEndByte'] === false ) { - throw new MWException( "Can't find value region for path \"$pathName\"" ); - } - return array( $path['valueStartByte'], $path['valueEndByte'] ); - } - - /** - * Find the path name of the last element in the array. - * If the array is empty, this will return the \@extra interstitial element. - * If the specified path is not found or is not an array, it will return false. - * @return bool|int|string - */ - function findLastArrayElement( $path ) { - // Try for a real element - $lastEltPath = false; - foreach ( $this->pathInfo as $candidatePath => $info ) { - $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 ); - $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 ); - if ( $part2 == '@' ) { - // Do nothing - } elseif ( $part1 == "$path/" ) { - $lastEltPath = $candidatePath; - } elseif ( $lastEltPath !== false ) { - break; - } - } - if ( $lastEltPath !== false ) { - return $lastEltPath; - } - - // Try for an interstitial element - $extraPath = false; - foreach ( $this->pathInfo as $candidatePath => $info ) { - $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 ); - if ( $part1 == "$path/" ) { - $extraPath = $candidatePath; - } elseif ( $extraPath !== false ) { - break; - } - } - return $extraPath; - } - - /** - * Find the path name of first element in the array. - * If the array is empty, this will return the \@extra interstitial element. - * If the specified path is not found or is not an array, it will return false. - * @return bool|int|string - */ - function findFirstArrayElement( $path ) { - // Try for an ordinary element - foreach ( $this->pathInfo as $candidatePath => $info ) { - $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 ); - $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 ); - if ( $part1 == "$path/" && $part2 != '@' ) { - return $candidatePath; - } - } - - // Try for an interstitial element - foreach ( $this->pathInfo as $candidatePath => $info ) { - $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 ); - if ( $part1 == "$path/" ) { - return $candidatePath; - } - } - return false; - } - - /** - * Get the indent string which sits after a given start position. - * Returns false if the position is not at the start of the line. - * @return array - */ - function getIndent( $pos, $key = false, $arrowPos = false ) { - $arrowIndent = ' '; - if ( $pos == 0 || $this->text[$pos - 1] == "\n" ) { - $indentLength = strspn( $this->text, " \t", $pos ); - $indent = substr( $this->text, $pos, $indentLength ); - } else { - $indent = false; - } - if ( $indent !== false && $arrowPos !== false ) { - $arrowIndentLength = $arrowPos - $pos - $indentLength - strlen( $key ); - if ( $arrowIndentLength > 0 ) { - $arrowIndent = str_repeat( ' ', $arrowIndentLength ); - } - } - return array( $indent, $arrowIndent ); - } - - /** - * Run the parser on the text. Throws an exception if the string does not - * match our defined subset of PHP syntax. - */ - public function parse() { - $this->initParse(); - $this->pushState( 'file' ); - $this->pushPath( '@extra-' . ( $this->serial++ ) ); - $token = $this->firstToken(); - - while ( !$token->isEnd() ) { - $state = $this->popState(); - if ( !$state ) { - $this->error( 'internal error: empty state stack' ); - } - - switch ( $state ) { - case 'file': - $this->expect( T_OPEN_TAG ); - $token = $this->skipSpace(); - if ( $token->isEnd() ) { - break 2; - } - $this->pushState( 'statement', 'file 2' ); - break; - case 'file 2': - $token = $this->skipSpace(); - if ( $token->isEnd() ) { - break 2; - } - $this->pushState( 'statement', 'file 2' ); - break; - case 'statement': - $token = $this->skipSpace(); - if ( !$this->validatePath( $token->text ) ) { - $this->error( "Invalid variable name \"{$token->text}\"" ); - } - $this->nextPath( $token->text ); - $this->expect( T_VARIABLE ); - $this->skipSpace(); - $arrayAssign = false; - if ( $this->currentToken()->type == '[' ) { - $this->nextToken(); - $token = $this->skipSpace(); - if ( !$token->isScalar() ) { - $this->error( "expected a string or number for the array key" ); - } - if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) { - $text = $this->parseScalar( $token->text ); - } else { - $text = $token->text; - } - if ( !$this->validatePath( $text ) ) { - $this->error( "Invalid associative array name \"$text\"" ); - } - $this->pushPath( $text ); - $this->nextToken(); - $this->skipSpace(); - $this->expect( ']' ); - $this->skipSpace(); - $arrayAssign = true; - } - $this->expect( '=' ); - $this->skipSpace(); - $this->startPathValue(); - if ( $arrayAssign ) { - $this->pushState( 'expression', 'array assign end' ); - } else { - $this->pushState( 'expression', 'statement end' ); - } - break; - case 'array assign end': - case 'statement end': - $this->endPathValue(); - if ( $state == 'array assign end' ) { - $this->popPath(); - } - $this->skipSpace(); - $this->expect( ';' ); - $this->nextPath( '@extra-' . ( $this->serial++ ) ); - break; - case 'expression': - $token = $this->skipSpace(); - if ( $token->type == T_ARRAY ) { - $this->pushState( 'array' ); - } elseif ( $token->isScalar() ) { - $this->nextToken(); - } elseif ( $token->type == T_VARIABLE ) { - $this->nextToken(); - } else { - $this->error( "expected simple expression" ); - } - break; - case 'array': - $this->skipSpace(); - $this->expect( T_ARRAY ); - $this->skipSpace(); - $this->expect( '(' ); - $this->skipSpace(); - $this->pushPath( '@extra-' . ( $this->serial++ ) ); - if ( $this->isAhead( ')' ) ) { - // Empty array - $this->pushState( 'array end' ); - } else { - $this->pushState( 'element', 'array end' ); - } - break; - case 'array end': - $this->skipSpace(); - $this->popPath(); - $this->expect( ')' ); - break; - case 'element': - $token = $this->skipSpace(); - // Look ahead to find the double arrow - if ( $token->isScalar() && $this->isAhead( T_DOUBLE_ARROW, 1 ) ) { - // Found associative element - $this->pushState( 'assoc-element', 'element end' ); - } else { - // Not associative - $this->nextPath( '@next' ); - $this->startPathValue(); - $this->pushState( 'expression', 'element end' ); - } - break; - case 'element end': - $token = $this->skipSpace(); - if ( $token->type == ',' ) { - $this->endPathValue(); - $this->markComma(); - $this->nextToken(); - $this->nextPath( '@extra-' . ( $this->serial++ ) ); - // Look ahead to find ending bracket - if ( $this->isAhead( ")" ) ) { - // Found ending bracket, no continuation - $this->skipSpace(); - } else { - // No ending bracket, continue to next element - $this->pushState( 'element' ); - } - } elseif ( $token->type == ')' ) { - // End array - $this->endPathValue(); - } else { - $this->error( "expected the next array element or the end of the array" ); - } - break; - case 'assoc-element': - $token = $this->skipSpace(); - if ( !$token->isScalar() ) { - $this->error( "expected a string or number for the array key" ); - } - if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) { - $text = $this->parseScalar( $token->text ); - } else { - $text = $token->text; - } - if ( !$this->validatePath( $text ) ) { - $this->error( "Invalid associative array name \"$text\"" ); - } - $this->nextPath( $text ); - $this->nextToken(); - $this->skipSpace(); - $this->markArrow(); - $this->expect( T_DOUBLE_ARROW ); - $this->skipSpace(); - $this->startPathValue(); - $this->pushState( 'expression' ); - break; - } - } - if ( count( $this->stateStack ) ) { - $this->error( 'unexpected end of file' ); - } - $this->popPath(); - } - - /** - * Initialise a parse. - */ - protected function initParse() { - $this->tokens = token_get_all( $this->text ); - $this->stateStack = array(); - $this->pathStack = array(); - $this->firstToken(); - $this->pathInfo = array(); - $this->serial = 1; - } - - /** - * Set the parse position. Do not call this except from firstToken() and - * nextToken(), there is more to update than just the position. - */ - protected function setPos( $pos ) { - $this->pos = $pos; - if ( $this->pos >= count( $this->tokens ) ) { - $this->currentToken = ConfEditorToken::newEnd(); - } else { - $this->currentToken = $this->newTokenObj( $this->tokens[$this->pos] ); - } - return $this->currentToken; - } - - /** - * Create a ConfEditorToken from an element of token_get_all() - * @return ConfEditorToken - */ - function newTokenObj( $internalToken ) { - if ( is_array( $internalToken ) ) { - return new ConfEditorToken( $internalToken[0], $internalToken[1] ); - } else { - return new ConfEditorToken( $internalToken, $internalToken ); - } - } - - /** - * Reset the parse position - */ - function firstToken() { - $this->setPos( 0 ); - $this->prevToken = ConfEditorToken::newEnd(); - $this->lineNum = 1; - $this->colNum = 1; - $this->byteNum = 0; - return $this->currentToken; - } - - /** - * Get the current token - */ - function currentToken() { - return $this->currentToken; - } - - /** - * Advance the current position and return the resulting next token - */ - function nextToken() { - if ( $this->currentToken ) { - $text = $this->currentToken->text; - $lfCount = substr_count( $text, "\n" ); - if ( $lfCount ) { - $this->lineNum += $lfCount; - $this->colNum = strlen( $text ) - strrpos( $text, "\n" ); - } else { - $this->colNum += strlen( $text ); - } - $this->byteNum += strlen( $text ); - } - $this->prevToken = $this->currentToken; - $this->setPos( $this->pos + 1 ); - return $this->currentToken; - } - - /** - * Get the token $offset steps ahead of the current position. - * $offset may be negative, to get tokens behind the current position. - * @return ConfEditorToken - */ - function getTokenAhead( $offset ) { - $pos = $this->pos + $offset; - if ( $pos >= count( $this->tokens ) || $pos < 0 ) { - return ConfEditorToken::newEnd(); - } else { - return $this->newTokenObj( $this->tokens[$pos] ); - } - } - - /** - * Advances the current position past any whitespace or comments - */ - function skipSpace() { - while ( $this->currentToken && $this->currentToken->isSkip() ) { - $this->nextToken(); - } - return $this->currentToken; - } - - /** - * Throws an error if the current token is not of the given type, and - * then advances to the next position. - */ - function expect( $type ) { - if ( $this->currentToken && $this->currentToken->type == $type ) { - return $this->nextToken(); - } else { - $this->error( "expected " . $this->getTypeName( $type ) . - ", got " . $this->getTypeName( $this->currentToken->type ) ); - } - } - - /** - * Push a state or two on to the state stack. - */ - function pushState( $nextState, $stateAfterThat = null ) { - if ( $stateAfterThat !== null ) { - $this->stateStack[] = $stateAfterThat; - } - $this->stateStack[] = $nextState; - } - - /** - * Pop a state from the state stack. - * @return mixed - */ - function popState() { - return array_pop( $this->stateStack ); - } - - /** - * Returns true if the user input path is valid. - * This exists to allow "/" and "@" to be reserved for string path keys - * @return bool - */ - function validatePath( $path ) { - return strpos( $path, '/' ) === false && substr( $path, 0, 1 ) != '@'; - } - - /** - * Internal function to update some things at the end of a path region. Do - * not call except from popPath() or nextPath(). - */ - function endPath() { - $key = ''; - foreach ( $this->pathStack as $pathInfo ) { - if ( $key !== '' ) { - $key .= '/'; - } - $key .= $pathInfo['name']; - } - $pathInfo['endByte'] = $this->byteNum; - $pathInfo['endToken'] = $this->pos; - $this->pathInfo[$key] = $pathInfo; - } - - /** - * Go up to a new path level, for example at the start of an array. - */ - function pushPath( $path ) { - $this->pathStack[] = array( - 'name' => $path, - 'level' => count( $this->pathStack ) + 1, - 'startByte' => $this->byteNum, - 'startToken' => $this->pos, - 'valueStartToken' => false, - 'valueStartByte' => false, - 'valueEndToken' => false, - 'valueEndByte' => false, - 'nextArrayIndex' => 0, - 'hasComma' => false, - 'arrowByte' => false - ); - } - - /** - * Go down a path level, for example at the end of an array. - */ - function popPath() { - $this->endPath(); - array_pop( $this->pathStack ); - } - - /** - * Go to the next path on the same level. This ends the current path and - * starts a new one. If $path is \@next, the new path is set to the next - * numeric array element. - */ - function nextPath( $path ) { - $this->endPath(); - $i = count( $this->pathStack ) - 1; - if ( $path == '@next' ) { - $nextArrayIndex =& $this->pathStack[$i]['nextArrayIndex']; - $this->pathStack[$i]['name'] = $nextArrayIndex; - $nextArrayIndex++; - } else { - $this->pathStack[$i]['name'] = $path; - } - $this->pathStack[$i] = - array( - 'startByte' => $this->byteNum, - 'startToken' => $this->pos, - 'valueStartToken' => false, - 'valueStartByte' => false, - 'valueEndToken' => false, - 'valueEndByte' => false, - 'hasComma' => false, - 'arrowByte' => false, - ) + $this->pathStack[$i]; - } - - /** - * Mark the start of the value part of a path. - */ - function startPathValue() { - $path =& $this->pathStack[count( $this->pathStack ) - 1]; - $path['valueStartToken'] = $this->pos; - $path['valueStartByte'] = $this->byteNum; - } - - /** - * Mark the end of the value part of a path. - */ - function endPathValue() { - $path =& $this->pathStack[count( $this->pathStack ) - 1]; - $path['valueEndToken'] = $this->pos; - $path['valueEndByte'] = $this->byteNum; - } - - /** - * Mark the comma separator in an array element - */ - function markComma() { - $path =& $this->pathStack[count( $this->pathStack ) - 1]; - $path['hasComma'] = true; - } - - /** - * Mark the arrow separator in an associative array element - */ - function markArrow() { - $path =& $this->pathStack[count( $this->pathStack ) - 1]; - $path['arrowByte'] = $this->byteNum; - } - - /** - * Generate a parse error - */ - function error( $msg ) { - throw new ConfEditorParseError( $this, $msg ); - } - - /** - * Get a readable name for the given token type. - * @return string - */ - function getTypeName( $type ) { - if ( is_int( $type ) ) { - return token_name( $type ); - } else { - return "\"$type\""; - } - } - - /** - * Looks ahead to see if the given type is the next token type, starting - * from the current position plus the given offset. Skips any intervening - * whitespace. - * @return bool - */ - function isAhead( $type, $offset = 0 ) { - $ahead = $offset; - $token = $this->getTokenAhead( $offset ); - while ( !$token->isEnd() ) { - if ( $token->isSkip() ) { - $ahead++; - $token = $this->getTokenAhead( $ahead ); - continue; - } elseif ( $token->type == $type ) { - // Found the type - return true; - } else { - // Not found - return false; - } - } - return false; - } - - /** - * Get the previous token object - */ - function prevToken() { - return $this->prevToken; - } - - /** - * Echo a reasonably readable representation of the tokenizer array. - */ - function dumpTokens() { - $out = ''; - foreach ( $this->tokens as $token ) { - $obj = $this->newTokenObj( $token ); - $out .= sprintf( "%-28s %s\n", - $this->getTypeName( $obj->type ), - addcslashes( $obj->text, "\0..\37" ) ); - } - echo "
" . htmlspecialchars( $out ) . "
"; - } -} - -/** - * Exception class for parse errors - */ -class ConfEditorParseError extends MWException { - var $lineNum, $colNum; - function __construct( $editor, $msg ) { - $this->lineNum = $editor->lineNum; - $this->colNum = $editor->colNum; - parent::__construct( "Parse error on line {$editor->lineNum} " . - "col {$editor->colNum}: $msg" ); - } - - function highlight( $text ) { - $lines = StringUtils::explode( "\n", $text ); - foreach ( $lines as $lineNum => $line ) { - if ( $lineNum == $this->lineNum - 1 ) { - return "$line\n" . str_repeat( ' ', $this->colNum - 1 ) . "^\n"; - } - } - return ''; - } - -} - -/** - * Class to wrap a token from the tokenizer. - */ -class ConfEditorToken { - var $type, $text; - - static $scalarTypes = array( T_LNUMBER, T_DNUMBER, T_STRING, T_CONSTANT_ENCAPSED_STRING ); - static $skipTypes = array( T_WHITESPACE, T_COMMENT, T_DOC_COMMENT ); - - static function newEnd() { - return new self( 'END', '' ); - } - - function __construct( $type, $text ) { - $this->type = $type; - $this->text = $text; - } - - function isSkip() { - return in_array( $this->type, self::$skipTypes ); - } - - function isScalar() { - return in_array( $this->type, self::$scalarTypes ); - } - - function isEnd() { - return $this->type == 'END'; - } -} diff --git a/includes/HashRing.php b/includes/HashRing.php deleted file mode 100644 index 930f8c0aa1..0000000000 --- a/includes/HashRing.php +++ /dev/null @@ -1,142 +0,0 @@ - weight) */ - protected $sourceMap = array(); - /** @var Array (location => (start, end)) */ - protected $ring = array(); - - const RING_SIZE = 268435456; // 2^28 - - /** - * @param array $map (location => weight) - */ - public function __construct( array $map ) { - $map = array_filter( $map, function( $w ) { return $w > 0; } ); - if ( !count( $map ) ) { - throw new MWException( "Ring is empty or all weights are zero." ); - } - $this->sourceMap = $map; - // Sort the locations based on the hash of their names - $hashes = array(); - foreach ( $map as $location => $weight ) { - $hashes[$location] = sha1( $location ); - } - uksort( $map, function ( $a, $b ) use ( $hashes ) { - return strcmp( $hashes[$a], $hashes[$b] ); - } ); - // Fit the map to weight-proportionate one with a space of size RING_SIZE - $sum = array_sum( $map ); - $standardMap = array(); - foreach ( $map as $location => $weight ) { - $standardMap[$location] = (int)floor( $weight / $sum * self::RING_SIZE ); - } - // Build a ring of RING_SIZE spots, with each location at a spot in location hash order - $index = 0; - foreach ( $standardMap as $location => $weight ) { - // Location covers half-closed interval [$index,$index + $weight) - $this->ring[$location] = array( $index, $index + $weight ); - $index += $weight; - } - // Make sure the last location covers what is left - end( $this->ring ); - $this->ring[key( $this->ring )][1] = self::RING_SIZE; - } - - /** - * Get the location of an item on the ring - * - * @param string $item - * @return string Location - */ - public function getLocation( $item ) { - $locations = $this->getLocations( $item, 1 ); - return $locations[0]; - } - - /** - * Get the location of an item on the ring, as well as the next clockwise locations - * - * @param string $item - * @param integer $limit Maximum number of locations to return - * @return array List of locations - */ - public function getLocations( $item, $limit ) { - $locations = array(); - $primaryLocation = null; - $spot = hexdec( substr( sha1( $item ), 0, 7 ) ); // first 28 bits - foreach ( $this->ring as $location => $range ) { - if ( count( $locations ) >= $limit ) { - break; - } - // The $primaryLocation is the location the item spot is in. - // After that is reached, keep appending the next locations. - if ( ( $range[0] <= $spot && $spot < $range[1] ) || $primaryLocation !== null ) { - if ( $primaryLocation === null ) { - $primaryLocation = $location; - } - $locations[] = $location; - } - } - // If more locations are requested, wrap-around and keep adding them - reset( $this->ring ); - while ( count( $locations ) < $limit ) { - list( $location, ) = each( $this->ring ); - if ( $location === $primaryLocation ) { - break; // don't go in circles - } - $locations[] = $location; - } - return $locations; - } - - /** - * Get the map of locations to weight (ignores 0-weight items) - * - * @return array - */ - public function getLocationWeights() { - return $this->sourceMap; - } - - /** - * Get a new hash ring with a location removed from the ring - * - * @param string $location - * @return HashRing|bool Returns false if no non-zero weighted spots are left - */ - public function newWithoutLocation( $location ) { - $map = $this->sourceMap; - unset( $map[$location] ); - if ( count( $map ) ) { - return new self( $map ); - } - return false; - } -} diff --git a/includes/IP.php b/includes/IP.php deleted file mode 100644 index 73834a5950..0000000000 --- a/includes/IP.php +++ /dev/null @@ -1,761 +0,0 @@ -", Aaron Schulz - */ - -// Some regex definition to "play" with IP address and IP address blocks - -// An IPv4 address is made of 4 bytes from x00 to xFF which is d0 to d255 -define( 'RE_IP_BYTE', '(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[0-9])' ); -define( 'RE_IP_ADD', RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE ); -// An IPv4 block is an IP address and a prefix (d1 to d32) -define( 'RE_IP_PREFIX', '(3[0-2]|[12]?\d)' ); -define( 'RE_IP_BLOCK', RE_IP_ADD . '\/' . RE_IP_PREFIX ); - -// An IPv6 address is made up of 8 words (each x0000 to xFFFF). -// However, the "::" abbreviation can be used on consecutive x0000 words. -define( 'RE_IPV6_WORD', '([0-9A-Fa-f]{1,4})' ); -define( 'RE_IPV6_PREFIX', '(12[0-8]|1[01][0-9]|[1-9]?\d)' ); -define( 'RE_IPV6_ADD', - '(?:' . // starts with "::" (including "::") - ':(?::|(?::' . RE_IPV6_WORD . '){1,7})' . - '|' . // ends with "::" (except "::") - RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){0,6}::' . - '|' . // contains one "::" in the middle (the ^ makes the test fail if none found) - RE_IPV6_WORD . '(?::((?(-1)|:))?' . RE_IPV6_WORD . '){1,6}(?(-2)|^)' . - '|' . // contains no "::" - RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){7}' . - ')' -); -// An IPv6 block is an IP address and a prefix (d1 to d128) -define( 'RE_IPV6_BLOCK', RE_IPV6_ADD . '\/' . RE_IPV6_PREFIX ); -// For IPv6 canonicalization (NOT for strict validation; these are quite lax!) -define( 'RE_IPV6_GAP', ':(?:0+:)*(?::(?:0+:)*)?' ); -define( 'RE_IPV6_V4_PREFIX', '0*' . RE_IPV6_GAP . '(?:ffff:)?' ); - -// This might be useful for regexps used elsewhere, matches any IPv6 or IPv6 address or network -define( 'IP_ADDRESS_STRING', - '(?:' . - RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?' . // IPv4 - '|' . - RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?' . // IPv6 - ')' -); - -/** - * A collection of public static functions to play with IP address - * and IP blocks. - */ -class IP { - /** - * Determine if a string is as valid IP address or network (CIDR prefix). - * SIIT IPv4-translated addresses are rejected. - * Note: canonicalize() tries to convert translated addresses to IPv4. - * - * @param string $ip possible IP address - * @return Boolean - */ - public static function isIPAddress( $ip ) { - return (bool)preg_match( '/^' . IP_ADDRESS_STRING . '$/', $ip ); - } - - /** - * Given a string, determine if it as valid IP in IPv6 only. - * Note: Unlike isValid(), this looks for networks too. - * - * @param string $ip possible IP address - * @return Boolean - */ - public static function isIPv6( $ip ) { - return (bool)preg_match( '/^' . RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?$/', $ip ); - } - - /** - * Given a string, determine if it as valid IP in IPv4 only. - * Note: Unlike isValid(), this looks for networks too. - * - * @param string $ip possible IP address - * @return Boolean - */ - public static function isIPv4( $ip ) { - return (bool)preg_match( '/^' . RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?$/', $ip ); - } - - /** - * Validate an IP address. Ranges are NOT considered valid. - * SIIT IPv4-translated addresses are rejected. - * Note: canonicalize() tries to convert translated addresses to IPv4. - * - * @param $ip String - * @return Boolean: True if it is valid. - */ - public static function isValid( $ip ) { - return ( preg_match( '/^' . RE_IP_ADD . '$/', $ip ) - || preg_match( '/^' . RE_IPV6_ADD . '$/', $ip ) ); - } - - /** - * Validate an IP Block (valid address WITH a valid prefix). - * SIIT IPv4-translated addresses are rejected. - * Note: canonicalize() tries to convert translated addresses to IPv4. - * - * @param $ipblock String - * @return Boolean: True if it is valid. - */ - public static function isValidBlock( $ipblock ) { - return ( preg_match( '/^' . RE_IPV6_BLOCK . '$/', $ipblock ) - || preg_match( '/^' . RE_IP_BLOCK . '$/', $ipblock ) ); - } - - /** - * Convert an IP into a verbose, uppercase, normalized form. - * IPv6 addresses in octet notation are expanded to 8 words. - * IPv4 addresses are just trimmed. - * - * @param string $ip IP address in quad or octet form (CIDR or not). - * @return String - */ - public static function sanitizeIP( $ip ) { - $ip = trim( $ip ); - if ( $ip === '' ) { - return null; - } - if ( self::isIPv4( $ip ) || !self::isIPv6( $ip ) ) { - return $ip; // nothing else to do for IPv4 addresses or invalid ones - } - // Remove any whitespaces, convert to upper case - $ip = strtoupper( $ip ); - // Expand zero abbreviations - $abbrevPos = strpos( $ip, '::' ); - if ( $abbrevPos !== false ) { - // We know this is valid IPv6. Find the last index of the - // address before any CIDR number (e.g. "a:b:c::/24"). - $CIDRStart = strpos( $ip, "/" ); - $addressEnd = ( $CIDRStart !== false ) - ? $CIDRStart - 1 - : strlen( $ip ) - 1; - // If the '::' is at the beginning... - if ( $abbrevPos == 0 ) { - $repeat = '0:'; - $extra = ( $ip == '::' ) ? '0' : ''; // for the address '::' - $pad = 9; // 7+2 (due to '::') - // If the '::' is at the end... - } elseif ( $abbrevPos == ( $addressEnd - 1 ) ) { - $repeat = ':0'; - $extra = ''; - $pad = 9; // 7+2 (due to '::') - // If the '::' is in the middle... - } else { - $repeat = ':0'; - $extra = ':'; - $pad = 8; // 6+2 (due to '::') - } - $ip = str_replace( '::', - str_repeat( $repeat, $pad - substr_count( $ip, ':' ) ) . $extra, - $ip - ); - } - // Remove leading zeros from each bloc as needed - $ip = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip ); - return $ip; - } - - /** - * Prettify an IP for display to end users. - * This will make it more compact and lower-case. - * - * @param $ip string - * @return string - */ - public static function prettifyIP( $ip ) { - $ip = self::sanitizeIP( $ip ); // normalize (removes '::') - if ( self::isIPv6( $ip ) ) { - // Split IP into an address and a CIDR - if ( strpos( $ip, '/' ) !== false ) { - list( $ip, $cidr ) = explode( '/', $ip, 2 ); - } else { - list( $ip, $cidr ) = array( $ip, '' ); - } - // Get the largest slice of words with multiple zeros - $offset = 0; - $longest = $longestPos = false; - while ( preg_match( - '!(?:^|:)0(?::0)+(?:$|:)!', $ip, $m, PREG_OFFSET_CAPTURE, $offset - ) ) { - list( $match, $pos ) = $m[0]; // full match - if ( strlen( $match ) > strlen( $longest ) ) { - $longest = $match; - $longestPos = $pos; - } - $offset = ( $pos + strlen( $match ) ); // advance - } - if ( $longest !== false ) { - // Replace this portion of the string with the '::' abbreviation - $ip = substr_replace( $ip, '::', $longestPos, strlen( $longest ) ); - } - // Add any CIDR back on - if ( $cidr !== '' ) { - $ip = "{$ip}/{$cidr}"; - } - // Convert to lower case to make it more readable - $ip = strtolower( $ip ); - } - return $ip; - } - - /** - * Given a host/port string, like one might find in the host part of a URL - * per RFC 2732, split the hostname part and the port part and return an - * array with an element for each. If there is no port part, the array will - * have false in place of the port. If the string was invalid in some way, - * false is returned. - * - * This was easy with IPv4 and was generally done in an ad-hoc way, but - * with IPv6 it's somewhat more complicated due to the need to parse the - * square brackets and colons. - * - * A bare IPv6 address is accepted despite the lack of square brackets. - * - * @param string $both The string with the host and port - * @return array - */ - public static function splitHostAndPort( $both ) { - if ( substr( $both, 0, 1 ) === '[' ) { - if ( preg_match( '/^\[(' . RE_IPV6_ADD . ')\](?::(?P\d+))?$/', $both, $m ) ) { - if ( isset( $m['port'] ) ) { - return array( $m[1], intval( $m['port'] ) ); - } else { - return array( $m[1], false ); - } - } else { - // Square bracket found but no IPv6 - return false; - } - } - $numColons = substr_count( $both, ':' ); - if ( $numColons >= 2 ) { - // Is it a bare IPv6 address? - if ( preg_match( '/^' . RE_IPV6_ADD . '$/', $both ) ) { - return array( $both, false ); - } else { - // Not valid IPv6, but too many colons for anything else - return false; - } - } - if ( $numColons >= 1 ) { - // Host:port? - $bits = explode( ':', $both ); - if ( preg_match( '/^\d+/', $bits[1] ) ) { - return array( $bits[0], intval( $bits[1] ) ); - } else { - // Not a valid port - return false; - } - } - // Plain hostname - return array( $both, false ); - } - - /** - * Given a host name and a port, combine them into host/port string like - * you might find in a URL. If the host contains a colon, wrap it in square - * brackets like in RFC 2732. If the port matches the default port, omit - * the port specification - * - * @param $host string - * @param $port int - * @param $defaultPort bool|int - * @return string - */ - public static function combineHostAndPort( $host, $port, $defaultPort = false ) { - if ( strpos( $host, ':' ) !== false ) { - $host = "[$host]"; - } - if ( $defaultPort !== false && $port == $defaultPort ) { - return $host; - } else { - return "$host:$port"; - } - } - - /** - * Given an unsigned integer, returns an IPv6 address in octet notation - * - * @param $ip_int String: IP address. - * @return String - */ - public static function toOctet( $ip_int ) { - return self::hexToOctet( wfBaseConvert( $ip_int, 10, 16, 32, false ) ); - } - - /** - * Convert an IPv4 or IPv6 hexadecimal representation back to readable format - * - * @param string $hex number, with "v6-" prefix if it is IPv6 - * @return String: quad-dotted (IPv4) or octet notation (IPv6) - */ - public static function formatHex( $hex ) { - if ( substr( $hex, 0, 3 ) == 'v6-' ) { // IPv6 - return self::hexToOctet( substr( $hex, 3 ) ); - } else { // IPv4 - return self::hexToQuad( $hex ); - } - } - - /** - * Converts a hexadecimal number to an IPv6 address in octet notation - * - * @param $ip_hex String: pure hex (no v6- prefix) - * @return String (of format a:b:c:d:e:f:g:h) - */ - public static function hexToOctet( $ip_hex ) { - // Pad hex to 32 chars (128 bits) - $ip_hex = str_pad( strtoupper( $ip_hex ), 32, '0', STR_PAD_LEFT ); - // Separate into 8 words - $ip_oct = substr( $ip_hex, 0, 4 ); - for ( $n = 1; $n < 8; $n++ ) { - $ip_oct .= ':' . substr( $ip_hex, 4 * $n, 4 ); - } - // NO leading zeroes - $ip_oct = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip_oct ); - return $ip_oct; - } - - /** - * Converts a hexadecimal number to an IPv4 address in quad-dotted notation - * - * @param $ip_hex String: pure hex - * @return String (of format a.b.c.d) - */ - public static function hexToQuad( $ip_hex ) { - // Pad hex to 8 chars (32 bits) - $ip_hex = str_pad( strtoupper( $ip_hex ), 8, '0', STR_PAD_LEFT ); - // Separate into four quads - $s = ''; - for ( $i = 0; $i < 4; $i++ ) { - if ( $s !== '' ) { - $s .= '.'; - } - $s .= base_convert( substr( $ip_hex, $i * 2, 2 ), 16, 10 ); - } - return $s; - } - - /** - * Determine if an IP address really is an IP address, and if it is public, - * i.e. not RFC 1918 or similar - * Comes from ProxyTools.php - * - * @param $ip String - * @return Boolean - */ - public static function isPublic( $ip ) { - if ( self::isIPv6( $ip ) ) { - return self::isPublic6( $ip ); - } - $n = self::toUnsigned( $ip ); - if ( !$n ) { - return false; - } - - // ip2long accepts incomplete addresses, as well as some addresses - // followed by garbage characters. Check that it's really valid. - if ( $ip != long2ip( $n ) ) { - return false; - } - - static $privateRanges = false; - if ( !$privateRanges ) { - $privateRanges = array( - array( '10.0.0.0', '10.255.255.255' ), # RFC 1918 (private) - array( '172.16.0.0', '172.31.255.255' ), # RFC 1918 (private) - array( '192.168.0.0', '192.168.255.255' ), # RFC 1918 (private) - array( '0.0.0.0', '0.255.255.255' ), # this network - array( '127.0.0.0', '127.255.255.255' ), # loopback - ); - } - - foreach ( $privateRanges as $r ) { - $start = self::toUnsigned( $r[0] ); - $end = self::toUnsigned( $r[1] ); - if ( $n >= $start && $n <= $end ) { - return false; - } - } - return true; - } - - /** - * Determine if an IPv6 address really is an IP address, and if it is public, - * i.e. not RFC 4193 or similar - * - * @param $ip String - * @return Boolean - */ - private static function isPublic6( $ip ) { - static $privateRanges = false; - if ( !$privateRanges ) { - $privateRanges = array( - array( 'fc00::', 'fdff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' ), # RFC 4193 (local) - array( '0:0:0:0:0:0:0:1', '0:0:0:0:0:0:0:1' ), # loopback - ); - } - $n = self::toHex( $ip ); - foreach ( $privateRanges as $r ) { - $start = self::toHex( $r[0] ); - $end = self::toHex( $r[1] ); - if ( $n >= $start && $n <= $end ) { - return false; - } - } - return true; - } - - /** - * Return a zero-padded upper case hexadecimal representation of an IP address. - * - * Hexadecimal addresses are used because they can easily be extended to - * IPv6 support. To separate the ranges, the return value from this - * function for an IPv6 address will be prefixed with "v6-", a non- - * hexadecimal string which sorts after the IPv4 addresses. - * - * @param string $ip quad dotted/octet IP address. - * @return String - */ - public static function toHex( $ip ) { - if ( self::isIPv6( $ip ) ) { - $n = 'v6-' . self::IPv6ToRawHex( $ip ); - } else { - $n = self::toUnsigned( $ip ); - if ( $n !== false ) { - $n = wfBaseConvert( $n, 10, 16, 8, false ); - } - } - return $n; - } - - /** - * Given an IPv6 address in octet notation, returns a pure hex string. - * - * @param string $ip octet ipv6 IP address. - * @return String: pure hex (uppercase) - */ - private static function IPv6ToRawHex( $ip ) { - $ip = self::sanitizeIP( $ip ); - if ( !$ip ) { - return null; - } - $r_ip = ''; - foreach ( explode( ':', $ip ) as $v ) { - $r_ip .= str_pad( $v, 4, 0, STR_PAD_LEFT ); - } - return $r_ip; - } - - /** - * Given an IP address in dotted-quad/octet notation, returns an unsigned integer. - * Like ip2long() except that it actually works and has a consistent error return value. - * Comes from ProxyTools.php - * - * @param string $ip quad dotted IP address. - * @return Mixed: string/int/false - */ - public static function toUnsigned( $ip ) { - if ( self::isIPv6( $ip ) ) { - $n = self::toUnsigned6( $ip ); - } else { - $n = ip2long( $ip ); - if ( $n < 0 ) { - $n += pow( 2, 32 ); - # On 32-bit platforms (and on Windows), 2^32 does not fit into an int, - # so $n becomes a float. We convert it to string instead. - if ( is_float( $n ) ) { - $n = (string)$n; - } - } - } - return $n; - } - - /** - * @param $ip - * @return String - */ - private static function toUnsigned6( $ip ) { - return wfBaseConvert( self::IPv6ToRawHex( $ip ), 16, 10 ); - } - - /** - * Convert a network specification in CIDR notation - * to an integer network and a number of bits - * - * @param string $range IP with CIDR prefix - * @return array(int or string, int) - */ - public static function parseCIDR( $range ) { - if ( self::isIPv6( $range ) ) { - return self::parseCIDR6( $range ); - } - $parts = explode( '/', $range, 2 ); - if ( count( $parts ) != 2 ) { - return array( false, false ); - } - list( $network, $bits ) = $parts; - $network = ip2long( $network ); - if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 32 ) { - if ( $bits == 0 ) { - $network = 0; - } else { - $network &= ~( ( 1 << ( 32 - $bits ) ) - 1 ); - } - # Convert to unsigned - if ( $network < 0 ) { - $network += pow( 2, 32 ); - } - } else { - $network = false; - $bits = false; - } - return array( $network, $bits ); - } - - /** - * Given a string range in a number of formats, - * return the start and end of the range in hexadecimal. - * - * Formats are: - * 1.2.3.4/24 CIDR - * 1.2.3.4 - 1.2.3.5 Explicit range - * 1.2.3.4 Single IP - * - * 2001:0db8:85a3::7344/96 CIDR - * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range - * 2001:0db8:85a3::7344 Single IP - * @param string $range IP range - * @return array(string, string) - */ - public static function parseRange( $range ) { - // CIDR notation - if ( strpos( $range, '/' ) !== false ) { - if ( self::isIPv6( $range ) ) { - return self::parseRange6( $range ); - } - list( $network, $bits ) = self::parseCIDR( $range ); - if ( $network === false ) { - $start = $end = false; - } else { - $start = sprintf( '%08X', $network ); - $end = sprintf( '%08X', $network + pow( 2, ( 32 - $bits ) ) - 1 ); - } - // Explicit range - } elseif ( strpos( $range, '-' ) !== false ) { - list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) ); - if ( self::isIPv6( $start ) && self::isIPv6( $end ) ) { - return self::parseRange6( $range ); - } - if ( self::isIPv4( $start ) && self::isIPv4( $end ) ) { - $start = self::toUnsigned( $start ); - $end = self::toUnsigned( $end ); - if ( $start > $end ) { - $start = $end = false; - } else { - $start = sprintf( '%08X', $start ); - $end = sprintf( '%08X', $end ); - } - } else { - $start = $end = false; - } - } else { - # Single IP - $start = $end = self::toHex( $range ); - } - if ( $start === false || $end === false ) { - return array( false, false ); - } else { - return array( $start, $end ); - } - } - - /** - * Convert a network specification in IPv6 CIDR notation to an - * integer network and a number of bits - * - * @param $range - * - * @return array(string, int) - */ - private static function parseCIDR6( $range ) { - # Explode into - $parts = explode( '/', IP::sanitizeIP( $range ), 2 ); - if ( count( $parts ) != 2 ) { - return array( false, false ); - } - list( $network, $bits ) = $parts; - $network = self::IPv6ToRawHex( $network ); - if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 128 ) { - if ( $bits == 0 ) { - $network = "0"; - } else { - # Native 32 bit functions WONT work here!!! - # Convert to a padded binary number - $network = wfBaseConvert( $network, 16, 2, 128 ); - # Truncate the last (128-$bits) bits and replace them with zeros - $network = str_pad( substr( $network, 0, $bits ), 128, 0, STR_PAD_RIGHT ); - # Convert back to an integer - $network = wfBaseConvert( $network, 2, 10 ); - } - } else { - $network = false; - $bits = false; - } - return array( $network, (int)$bits ); - } - - /** - * Given a string range in a number of formats, return the - * start and end of the range in hexadecimal. For IPv6. - * - * Formats are: - * 2001:0db8:85a3::7344/96 CIDR - * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range - * 2001:0db8:85a3::7344/96 Single IP - * - * @param $range - * - * @return array(string, string) - */ - private static function parseRange6( $range ) { - # Expand any IPv6 IP - $range = IP::sanitizeIP( $range ); - // CIDR notation... - if ( strpos( $range, '/' ) !== false ) { - list( $network, $bits ) = self::parseCIDR6( $range ); - if ( $network === false ) { - $start = $end = false; - } else { - $start = wfBaseConvert( $network, 10, 16, 32, false ); - # Turn network to binary (again) - $end = wfBaseConvert( $network, 10, 2, 128 ); - # Truncate the last (128-$bits) bits and replace them with ones - $end = str_pad( substr( $end, 0, $bits ), 128, 1, STR_PAD_RIGHT ); - # Convert to hex - $end = wfBaseConvert( $end, 2, 16, 32, false ); - # see toHex() comment - $start = "v6-$start"; - $end = "v6-$end"; - } - // Explicit range notation... - } elseif ( strpos( $range, '-' ) !== false ) { - list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) ); - $start = self::toUnsigned6( $start ); - $end = self::toUnsigned6( $end ); - if ( $start > $end ) { - $start = $end = false; - } else { - $start = wfBaseConvert( $start, 10, 16, 32, false ); - $end = wfBaseConvert( $end, 10, 16, 32, false ); - } - # see toHex() comment - $start = "v6-$start"; - $end = "v6-$end"; - } else { - # Single IP - $start = $end = self::toHex( $range ); - } - if ( $start === false || $end === false ) { - return array( false, false ); - } else { - return array( $start, $end ); - } - } - - /** - * Determine if a given IPv4/IPv6 address is in a given CIDR network - * - * @param string $addr the address to check against the given range. - * @param string $range the range to check the given address against. - * @return Boolean: whether or not the given address is in the given range. - */ - public static function isInRange( $addr, $range ) { - $hexIP = self::toHex( $addr ); - list( $start, $end ) = self::parseRange( $range ); - return ( strcmp( $hexIP, $start ) >= 0 && - strcmp( $hexIP, $end ) <= 0 ); - } - - /** - * Convert some unusual representations of IPv4 addresses to their - * canonical dotted quad representation. - * - * This currently only checks a few IPV4-to-IPv6 related cases. More - * unusual representations may be added later. - * - * @param string $addr something that might be an IP address - * @return String: valid dotted quad IPv4 address or null - */ - public static function canonicalize( $addr ) { - // remove zone info (bug 35738) - $addr = preg_replace( '/\%.*/', '', $addr ); - - if ( self::isValid( $addr ) ) { - return $addr; - } - // Turn mapped addresses from ::ce:ffff:1.2.3.4 to 1.2.3.4 - if ( strpos( $addr, ':' ) !== false && strpos( $addr, '.' ) !== false ) { - $addr = substr( $addr, strrpos( $addr, ':' ) + 1 ); - if ( self::isIPv4( $addr ) ) { - return $addr; - } - } - // IPv6 loopback address - $m = array(); - if ( preg_match( '/^0*' . RE_IPV6_GAP . '1$/', $addr, $m ) ) { - return '127.0.0.1'; - } - // IPv4-mapped and IPv4-compatible IPv6 addresses - if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . '(' . RE_IP_ADD . ')$/i', $addr, $m ) ) { - return $m[1]; - } - if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . RE_IPV6_WORD . - ':' . RE_IPV6_WORD . '$/i', $addr, $m ) ) - { - return long2ip( ( hexdec( $m[1] ) << 16 ) + hexdec( $m[2] ) ); - } - - return null; // give up - } - - /** - * Gets rid of unneeded numbers in quad-dotted/octet IP strings - * For example, 127.111.113.151/24 -> 127.111.113.0/24 - * @param string $range IP address to normalize - * @return string - */ - public static function sanitizeRange( $range ) { - list( /*...*/, $bits ) = self::parseCIDR( $range ); - list( $start, /*...*/ ) = self::parseRange( $range ); - $start = self::formatHex( $start ); - if ( $bits === false ) { - return $start; // wasn't actually a range - } - return "$start/$bits"; - } -} diff --git a/includes/MWCryptRand.php b/includes/MWCryptRand.php deleted file mode 100644 index bac018e896..0000000000 --- a/includes/MWCryptRand.php +++ /dev/null @@ -1,497 +0,0 @@ - $v ) { - if ( is_numeric( $k ) ) { - unset( $k ); - } - } - // The absolute filename itself will differ from install to install so don't leave it out - if ( ( $path = realpath( $file ) ) !== false ) { - $state .= $path; - } else { - $state .= $file; - } - $state .= implode( '', $stat ); - } else { - // The fact that the file isn't there is worth at least a - // minuscule amount of entropy. - $state .= '0'; - } - } - - // Try and make this a little more unstable by including the varying process - // id of the php process we are running inside of if we are able to access it - if ( function_exists( 'getmypid' ) ) { - $state .= getmypid(); - } - - // If available try to increase the instability of the data by throwing in - // the precise amount of memory that we happen to be using at the moment. - if ( function_exists( 'memory_get_usage' ) ) { - $state .= memory_get_usage( true ); - } - - // It's mostly worthless but throw the wiki's id into the data for a little more variance - $state .= wfWikiID(); - - // If we have a secret key or proxy key set then throw it into the state as well - global $wgSecretKey, $wgProxyKey; - if ( $wgSecretKey ) { - $state .= $wgSecretKey; - } elseif ( $wgProxyKey ) { - $state .= $wgProxyKey; - } - - return $state; - } - - /** - * Randomly hash data while mixing in clock drift data for randomness - * - * @param string $data The data to randomly hash. - * @return String The hashed bytes - * @author Tim Starling - */ - protected function driftHash( $data ) { - // Minimum number of iterations (to avoid slow operations causing the loop to gather little entropy) - $minIterations = self::MIN_ITERATIONS; - // Duration of time to spend doing calculations (in seconds) - $duration = ( self::MSEC_PER_BYTE / 1000 ) * $this->hashLength(); - // Create a buffer to use to trigger memory operations - $bufLength = 10000000; - $buffer = str_repeat( ' ', $bufLength ); - $bufPos = 0; - - // Iterate for $duration seconds or at least $minIterations number of iterations - $iterations = 0; - $startTime = microtime( true ); - $currentTime = $startTime; - while ( $iterations < $minIterations || $currentTime - $startTime < $duration ) { - // Trigger some memory writing to trigger some bus activity - // This may create variance in the time between iterations - $bufPos = ( $bufPos + 13 ) % $bufLength; - $buffer[$bufPos] = ' '; - // Add the drift between this iteration and the last in as entropy - $nextTime = microtime( true ); - $delta = (int)( ( $nextTime - $currentTime ) * 1000000 ); - $data .= $delta; - // Every 100 iterations hash the data and entropy - if ( $iterations % 100 === 0 ) { - $data = sha1( $data ); - } - $currentTime = $nextTime; - $iterations++; - } - $timeTaken = $currentTime - $startTime; - $data = $this->hash( $data ); - - wfDebug( __METHOD__ . ": Clock drift calculation " . - "(time-taken=" . ( $timeTaken * 1000 ) . "ms, " . - "iterations=$iterations, " . - "time-per-iteration=" . ( $timeTaken / $iterations * 1e6 ) . "us)\n" ); - return $data; - } - - /** - * Return a rolling random state initially build using data from unstable sources - * @return string A new weak random state - */ - protected function randomState() { - static $state = null; - if ( is_null( $state ) ) { - // Initialize the state with whatever unstable data we can find - // It's important that this data is hashed right afterwards to prevent - // it from being leaked into the output stream - $state = $this->hash( $this->initialRandomState() ); - } - // Generate a new random state based on the initial random state or previous - // random state by combining it with clock drift - $state = $this->driftHash( $state ); - return $state; - } - - /** - * Decide on the best acceptable hash algorithm we have available for hash() - * @throws MWException - * @return String A hash algorithm - */ - protected function hashAlgo() { - if ( !is_null( $this->algo ) ) { - return $this->algo; - } - - $algos = hash_algos(); - $preference = array( 'whirlpool', 'sha256', 'sha1', 'md5' ); - - foreach ( $preference as $algorithm ) { - if ( in_array( $algorithm, $algos ) ) { - $this->algo = $algorithm; - wfDebug( __METHOD__ . ": Using the {$this->algo} hash algorithm.\n" ); - return $this->algo; - } - } - - // We only reach here if no acceptable hash is found in the list, this should - // be a technical impossibility since most of php's hash list is fixed and - // some of the ones we list are available as their own native functions - // But since we already require at least 5.2 and hash() was default in - // 5.1.2 we don't bother falling back to methods like sha1 and md5. - throw new MWException( "Could not find an acceptable hashing function in hash_algos()" ); - } - - /** - * Return the byte-length output of the hash algorithm we are - * using in self::hash and self::hmac. - * - * @return int Number of bytes the hash outputs - */ - protected function hashLength() { - if ( is_null( $this->hashLength ) ) { - $this->hashLength = strlen( $this->hash( '' ) ); - } - return $this->hashLength; - } - - /** - * Generate an acceptably unstable one-way-hash of some text - * making use of the best hash algorithm that we have available. - * - * @param $data string - * @return String A raw hash of the data - */ - protected function hash( $data ) { - return hash( $this->hashAlgo(), $data, true ); - } - - /** - * Generate an acceptably unstable one-way-hmac of some text - * making use of the best hash algorithm that we have available. - * - * @param $data string - * @param $key string - * @return String A raw hash of the data - */ - protected function hmac( $data, $key ) { - return hash_hmac( $this->hashAlgo(), $data, $key, true ); - } - - /** - * @see self::wasStrong() - */ - public function realWasStrong() { - if ( is_null( $this->strong ) ) { - throw new MWException( __METHOD__ . ' called before generation of random data' ); - } - return $this->strong; - } - - /** - * @see self::generate() - */ - public function realGenerate( $bytes, $forceStrong = false ) { - wfProfileIn( __METHOD__ ); - - wfDebug( __METHOD__ . ": Generating cryptographic random bytes for " . wfGetAllCallers( 5 ) . "\n" ); - - $bytes = floor( $bytes ); - static $buffer = ''; - if ( is_null( $this->strong ) ) { - // Set strength to false initially until we know what source data is coming from - $this->strong = true; - } - - if ( strlen( $buffer ) < $bytes ) { - // If available make use of mcrypt_create_iv URANDOM source to generate randomness - // On unix-like systems this reads from /dev/urandom but does it without any buffering - // and bypasses openbasedir restrictions, so it's preferable to reading directly - // On Windows starting in PHP 5.3.0 Windows' native CryptGenRandom is used to generate - // entropy so this is also preferable to just trying to read urandom because it may work - // on Windows systems as well. - if ( function_exists( 'mcrypt_create_iv' ) ) { - wfProfileIn( __METHOD__ . '-mcrypt' ); - $rem = $bytes - strlen( $buffer ); - $iv = mcrypt_create_iv( $rem, MCRYPT_DEV_URANDOM ); - if ( $iv === false ) { - wfDebug( __METHOD__ . ": mcrypt_create_iv returned false.\n" ); - } else { - $buffer .= $iv; - wfDebug( __METHOD__ . ": mcrypt_create_iv generated " . strlen( $iv ) . " bytes of randomness.\n" ); - } - wfProfileOut( __METHOD__ . '-mcrypt' ); - } - } - - if ( strlen( $buffer ) < $bytes ) { - // If available make use of openssl's random_pseudo_bytes method to attempt to generate randomness. - // However don't do this on Windows with PHP < 5.3.4 due to a bug: - // http://stackoverflow.com/questions/1940168/openssl-random-pseudo-bytes-is-slow-php - // http://git.php.net/?p=php-src.git;a=commitdiff;h=cd62a70863c261b07f6dadedad9464f7e213cad5 - if ( function_exists( 'openssl_random_pseudo_bytes' ) - && ( !wfIsWindows() || version_compare( PHP_VERSION, '5.3.4', '>=' ) ) - ) { - wfProfileIn( __METHOD__ . '-openssl' ); - $rem = $bytes - strlen( $buffer ); - $openssl_bytes = openssl_random_pseudo_bytes( $rem, $openssl_strong ); - if ( $openssl_bytes === false ) { - wfDebug( __METHOD__ . ": openssl_random_pseudo_bytes returned false.\n" ); - } else { - $buffer .= $openssl_bytes; - wfDebug( __METHOD__ . ": openssl_random_pseudo_bytes generated " . strlen( $openssl_bytes ) . " bytes of " . ( $openssl_strong ? "strong" : "weak" ) . " randomness.\n" ); - } - if ( strlen( $buffer ) >= $bytes ) { - // openssl tells us if the random source was strong, if some of our data was generated - // using it use it's say on whether the randomness is strong - $this->strong = !!$openssl_strong; - } - wfProfileOut( __METHOD__ . '-openssl' ); - } - } - - // Only read from urandom if we can control the buffer size or were passed forceStrong - if ( strlen( $buffer ) < $bytes && ( function_exists( 'stream_set_read_buffer' ) || $forceStrong ) ) { - wfProfileIn( __METHOD__ . '-fopen-urandom' ); - $rem = $bytes - strlen( $buffer ); - if ( !function_exists( 'stream_set_read_buffer' ) && $forceStrong ) { - wfDebug( __METHOD__ . ": Was forced to read from /dev/urandom without control over the buffer size.\n" ); - } - // /dev/urandom is generally considered the best possible commonly - // available random source, and is available on most *nix systems. - wfSuppressWarnings(); - $urandom = fopen( "/dev/urandom", "rb" ); - wfRestoreWarnings(); - - // Attempt to read all our random data from urandom - // php's fread always does buffered reads based on the stream's chunk_size - // so in reality it will usually read more than the amount of data we're - // asked for and not storing that risks depleting the system's random pool. - // If stream_set_read_buffer is available set the chunk_size to the amount - // of data we need. Otherwise read 8k, php's default chunk_size. - if ( $urandom ) { - // php's default chunk_size is 8k - $chunk_size = 1024 * 8; - if ( function_exists( 'stream_set_read_buffer' ) ) { - // If possible set the chunk_size to the amount of data we need - stream_set_read_buffer( $urandom, $rem ); - $chunk_size = $rem; - } - $random_bytes = fread( $urandom, max( $chunk_size, $rem ) ); - $buffer .= $random_bytes; - fclose( $urandom ); - wfDebug( __METHOD__ . ": /dev/urandom generated " . strlen( $random_bytes ) . " bytes of randomness.\n" ); - if ( strlen( $buffer ) >= $bytes ) { - // urandom is always strong, set to true if all our data was generated using it - $this->strong = true; - } - } else { - wfDebug( __METHOD__ . ": /dev/urandom could not be opened.\n" ); - } - wfProfileOut( __METHOD__ . '-fopen-urandom' ); - } - - // If we cannot use or generate enough data from a secure source - // use this loop to generate a good set of pseudo random data. - // This works by initializing a random state using a pile of unstable data - // and continually shoving it through a hash along with a variable salt. - // We hash the random state with more salt to avoid the state from leaking - // out and being used to predict the /randomness/ that follows. - if ( strlen( $buffer ) < $bytes ) { - wfDebug( __METHOD__ . ": Falling back to using a pseudo random state to generate randomness.\n" ); - } - while ( strlen( $buffer ) < $bytes ) { - wfProfileIn( __METHOD__ . '-fallback' ); - $buffer .= $this->hmac( $this->randomState(), mt_rand() ); - // This code is never really cryptographically strong, if we use it - // at all, then set strong to false. - $this->strong = false; - wfProfileOut( __METHOD__ . '-fallback' ); - } - - // Once the buffer has been filled up with enough random data to fulfill - // the request shift off enough data to handle the request and leave the - // unused portion left inside the buffer for the next request for random data - $generated = substr( $buffer, 0, $bytes ); - $buffer = substr( $buffer, $bytes ); - - wfDebug( __METHOD__ . ": " . strlen( $buffer ) . " bytes of randomness leftover in the buffer.\n" ); - - wfProfileOut( __METHOD__ ); - return $generated; - } - - /** - * @see self::generateHex() - */ - public function realGenerateHex( $chars, $forceStrong = false ) { - // hex strings are 2x the length of raw binary so we divide the length in half - // odd numbers will result in a .5 that leads the generate() being 1 character - // short, so we use ceil() to ensure that we always have enough bytes - $bytes = ceil( $chars / 2 ); - // Generate the data and then convert it to a hex string - $hex = bin2hex( $this->generate( $bytes, $forceStrong ) ); - // A bit of paranoia here, the caller asked for a specific length of string - // here, and it's possible (eg when given an odd number) that we may actually - // have at least 1 char more than they asked for. Just in case they made this - // call intending to insert it into a database that does truncation we don't - // want to give them too much and end up with their database and their live - // code having two different values because part of what we gave them is truncated - // hence, we strip out any run of characters longer than what we were asked for. - return substr( $hex, 0, $chars ); - } - - /** Publicly exposed static methods **/ - - /** - * Return a singleton instance of MWCryptRand - * @return MWCryptRand - */ - protected static function singleton() { - if ( is_null( self::$singleton ) ) { - self::$singleton = new self; - } - return self::$singleton; - } - - /** - * Return a boolean indicating whether or not the source used for cryptographic - * random bytes generation in the previously run generate* call - * was cryptographically strong. - * - * @return bool Returns true if the source was strong, false if not. - */ - public static function wasStrong() { - return self::singleton()->realWasStrong(); - } - - /** - * Generate a run of (ideally) cryptographically random data and return - * it in raw binary form. - * You can use MWCryptRand::wasStrong() if you wish to know if the source used - * was cryptographically strong. - * - * @param int $bytes the number of bytes of random data to generate - * @param bool $forceStrong Pass true if you want generate to prefer cryptographically - * strong sources of entropy even if reading from them may steal - * more entropy from the system than optimal. - * @return String Raw binary random data - */ - public static function generate( $bytes, $forceStrong = false ) { - return self::singleton()->realGenerate( $bytes, $forceStrong ); - } - - /** - * Generate a run of (ideally) cryptographically random data and return - * it in hexadecimal string format. - * You can use MWCryptRand::wasStrong() if you wish to know if the source used - * was cryptographically strong. - * - * @param int $chars the number of hex chars of random data to generate - * @param bool $forceStrong Pass true if you want generate to prefer cryptographically - * strong sources of entropy even if reading from them may steal - * more entropy from the system than optimal. - * @return String Hexadecimal random data - */ - public static function generateHex( $chars, $forceStrong = false ) { - return self::singleton()->realGenerateHex( $chars, $forceStrong ); - } - -} diff --git a/includes/MWFunction.php b/includes/MWFunction.php deleted file mode 100644 index 6d11d17813..0000000000 --- a/includes/MWFunction.php +++ /dev/null @@ -1,61 +0,0 @@ -newInstanceArgs( $args ); - } - -} diff --git a/includes/MappedIterator.php b/includes/MappedIterator.php deleted file mode 100644 index 70d20327df..0000000000 --- a/includes/MappedIterator.php +++ /dev/null @@ -1,114 +0,0 @@ -vCallback = $vCallback; - $this->aCallback = isset( $options['accept'] ) ? $options['accept'] : null; - } - - public function next() { - $this->cache = array(); - parent::next(); - } - - public function rewind() { - $this->rewound = true; - $this->cache = array(); - parent::rewind(); - } - - public function accept() { - $value = call_user_func( $this->vCallback, $this->getInnerIterator()->current() ); - $ok = ( $this->aCallback ) ? call_user_func( $this->aCallback, $value ) : true; - if ( $ok ) { - $this->cache['current'] = $value; - } - return $ok; - } - - public function key() { - $this->init(); - return parent::key(); - } - - public function valid() { - $this->init(); - return parent::valid(); - } - - public function current() { - $this->init(); - if ( parent::valid() ) { - return $this->cache['current']; - } else { - return null; // out of range - } - } - - /** - * Obviate the usual need for rewind() before using a FilterIterator in a manual loop - */ - protected function init() { - if ( !$this->rewound ) { - $this->rewind(); - } - } -} diff --git a/includes/ScopedCallback.php b/includes/ScopedCallback.php deleted file mode 100644 index ef22e0a30d..0000000000 --- a/includes/ScopedCallback.php +++ /dev/null @@ -1,73 +0,0 @@ -callback = $callback; - } - - /** - * Trigger a scoped callback and destroy it. - * This is the same is just setting it to null. - * - * @param ScopedCallback $sc - */ - public static function consume( ScopedCallback &$sc = null ) { - $sc = null; - } - - /** - * Destroy a scoped callback without triggering it - * - * @param ScopedCallback $sc - */ - public static function cancel( ScopedCallback &$sc = null ) { - if ( $sc ) { - $sc->callback = null; - } - $sc = null; - } - - /** - * Trigger the callback when this leaves scope - */ - function __destruct() { - if ( $this->callback !== null ) { - call_user_func( $this->callback ); - } - } -} diff --git a/includes/ScopedPHPTimeout.php b/includes/ScopedPHPTimeout.php deleted file mode 100644 index d1493c30b7..0000000000 --- a/includes/ScopedPHPTimeout.php +++ /dev/null @@ -1,84 +0,0 @@ - 0 ) { // CLI uses 0 - if ( self::$totalCalls >= self::MAX_TOTAL_CALLS ) { - trigger_error( "Maximum invocations of " . __CLASS__ . " exceeded." ); - } elseif ( self::$totalElapsed >= self::MAX_TOTAL_TIME ) { - trigger_error( "Time limit within invocations of " . __CLASS__ . " exceeded." ); - } elseif ( self::$stackDepth > 0 ) { // recursion guard - trigger_error( "Resursive invocation of " . __CLASS__ . " attempted." ); - } else { - $this->oldIgnoreAbort = ignore_user_abort( true ); - $this->oldTimeout = ini_set( 'max_execution_time', $seconds ); - $this->startTime = microtime( true ); - ++self::$stackDepth; - ++self::$totalCalls; // proof against < 1us scopes - } - } - } - - /** - * Restore the original timeout. - * This does not account for the timer value on __construct(). - */ - public function __destruct() { - if ( $this->oldTimeout ) { - $elapsed = microtime( true ) - $this->startTime; - // Note: a limit of 0 is treated as "forever" - set_time_limit( max( 1, $this->oldTimeout - (int)$elapsed ) ); - // If each scoped timeout is for less than one second, we end up - // restoring the original timeout without any decrease in value. - // Thus web scripts in an infinite loop can run forever unless we - // take some measures to prevent this. Track total time and calls. - self::$totalElapsed += $elapsed; - --self::$stackDepth; - ignore_user_abort( $this->oldIgnoreAbort ); - } - } -} diff --git a/includes/StringUtils.php b/includes/StringUtils.php deleted file mode 100644 index c1545e6ef5..0000000000 --- a/includes/StringUtils.php +++ /dev/null @@ -1,606 +0,0 @@ -cb(), $subject, $flags ); - } - - /** - * More or less "markup-safe" explode() - * Ignores any instances of the separator inside <...> - * @param string $separator - * @param string $text - * @return array - */ - static function explodeMarkup( $separator, $text ) { - $placeholder = "\x00"; - - // Remove placeholder instances - $text = str_replace( $placeholder, '', $text ); - - // Replace instances of the separator inside HTML-like tags with the placeholder - $replacer = new DoubleReplacer( $separator, $placeholder ); - $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text ); - - // Explode, then put the replaced separators back in - $items = explode( $separator, $cleaned ); - foreach ( $items as $i => $str ) { - $items[$i] = str_replace( $placeholder, $separator, $str ); - } - - return $items; - } - - /** - * Escape a string to make it suitable for inclusion in a preg_replace() - * replacement parameter. - * - * @param string $string - * @return string - */ - static function escapeRegexReplacement( $string ) { - $string = str_replace( '\\', '\\\\', $string ); - $string = str_replace( '$', '\\$', $string ); - return $string; - } - - /** - * Workalike for explode() with limited memory usage. - * Returns an Iterator - * @param string $separator - * @param string $subject - * @return ArrayIterator|ExplodeIterator - */ - static function explode( $separator, $subject ) { - if ( substr_count( $subject, $separator ) > 1000 ) { - return new ExplodeIterator( $separator, $subject ); - } else { - return new ArrayIterator( explode( $separator, $subject ) ); - } - } -} - -/** - * Base class for "replacers", objects used in preg_replace_callback() and - * StringUtils::delimiterReplaceCallback() - */ -class Replacer { - - /** - * @return array - */ - function cb() { - return array( &$this, 'replace' ); - } -} - -/** - * Class to replace regex matches with a string similar to that used in preg_replace() - */ -class RegexlikeReplacer extends Replacer { - var $r; - - /** - * @param string $r - */ - function __construct( $r ) { - $this->r = $r; - } - - /** - * @param array $matches - * @return string - */ - function replace( $matches ) { - $pairs = array(); - foreach ( $matches as $i => $match ) { - $pairs["\$$i"] = $match; - } - return strtr( $this->r, $pairs ); - } - -} - -/** - * Class to perform secondary replacement within each replacement string - */ -class DoubleReplacer extends Replacer { - - /** - * @param $from - * @param $to - * @param int $index - */ - function __construct( $from, $to, $index = 0 ) { - $this->from = $from; - $this->to = $to; - $this->index = $index; - } - - /** - * @param array $matches - * @return mixed - */ - function replace( $matches ) { - return str_replace( $this->from, $this->to, $matches[$this->index] ); - } -} - -/** - * Class to perform replacement based on a simple hashtable lookup - */ -class HashtableReplacer extends Replacer { - var $table, $index; - - /** - * @param $table - * @param int $index - */ - function __construct( $table, $index = 0 ) { - $this->table = $table; - $this->index = $index; - } - - /** - * @param array $matches - * @return mixed - */ - function replace( $matches ) { - return $this->table[$matches[$this->index]]; - } -} - -/** - * Replacement array for FSS with fallback to strtr() - * Supports lazy initialisation of FSS resource - */ -class ReplacementArray { - /*mostly private*/ var $data = false; - /*mostly private*/ var $fss = false; - - /** - * Create an object with the specified replacement array - * The array should have the same form as the replacement array for strtr() - * @param array $data - */ - function __construct( $data = array() ) { - $this->data = $data; - } - - /** - * @return array - */ - function __sleep() { - return array( 'data' ); - } - - function __wakeup() { - $this->fss = false; - } - - /** - * Set the whole replacement array at once - * @param array $data - */ - function setArray( $data ) { - $this->data = $data; - $this->fss = false; - } - - /** - * @return array|bool - */ - function getArray() { - return $this->data; - } - - /** - * Set an element of the replacement array - * @param string $from - * @param string $to - */ - function setPair( $from, $to ) { - $this->data[$from] = $to; - $this->fss = false; - } - - /** - * @param array $data - */ - function mergeArray( $data ) { - $this->data = array_merge( $this->data, $data ); - $this->fss = false; - } - - /** - * @param ReplacementArray $other - */ - function merge( $other ) { - $this->data = array_merge( $this->data, $other->data ); - $this->fss = false; - } - - /** - * @param string $from - */ - function removePair( $from ) { - unset( $this->data[$from] ); - $this->fss = false; - } - - /** - * @param array $data - */ - function removeArray( $data ) { - foreach ( $data as $from => $to ) { - $this->removePair( $from ); - } - $this->fss = false; - } - - /** - * @param string $subject - * @return string - */ - function replace( $subject ) { - if ( function_exists( 'fss_prep_replace' ) ) { - wfProfileIn( __METHOD__ . '-fss' ); - if ( $this->fss === false ) { - $this->fss = fss_prep_replace( $this->data ); - } - $result = fss_exec_replace( $this->fss, $subject ); - wfProfileOut( __METHOD__ . '-fss' ); - } else { - wfProfileIn( __METHOD__ . '-strtr' ); - $result = strtr( $subject, $this->data ); - wfProfileOut( __METHOD__ . '-strtr' ); - } - return $result; - } -} - -/** - * An iterator which works exactly like: - * - * foreach ( explode( $delim, $s ) as $element ) { - * ... - * } - * - * Except it doesn't use 193 byte per element - */ -class ExplodeIterator implements Iterator { - // The subject string - var $subject, $subjectLength; - - // The delimiter - var $delim, $delimLength; - - // The position of the start of the line - var $curPos; - - // The position after the end of the next delimiter - var $endPos; - - // The current token - var $current; - - /** - * Construct a DelimIterator - * @param string $delim - * @param string $subject - */ - function __construct( $delim, $subject ) { - $this->subject = $subject; - $this->delim = $delim; - - // Micro-optimisation (theoretical) - $this->subjectLength = strlen( $subject ); - $this->delimLength = strlen( $delim ); - - $this->rewind(); - } - - function rewind() { - $this->curPos = 0; - $this->endPos = strpos( $this->subject, $this->delim ); - $this->refreshCurrent(); - } - - function refreshCurrent() { - if ( $this->curPos === false ) { - $this->current = false; - } elseif ( $this->curPos >= $this->subjectLength ) { - $this->current = ''; - } elseif ( $this->endPos === false ) { - $this->current = substr( $this->subject, $this->curPos ); - } else { - $this->current = substr( $this->subject, $this->curPos, $this->endPos - $this->curPos ); - } - } - - function current() { - return $this->current; - } - - /** - * @return int|bool Current position or boolean false if invalid - */ - function key() { - return $this->curPos; - } - - /** - * @return string - */ - function next() { - if ( $this->endPos === false ) { - $this->curPos = false; - } else { - $this->curPos = $this->endPos + $this->delimLength; - if ( $this->curPos >= $this->subjectLength ) { - $this->endPos = false; - } else { - $this->endPos = strpos( $this->subject, $this->delim, $this->curPos ); - } - } - $this->refreshCurrent(); - return $this->current; - } - - /** - * @return bool - */ - function valid() { - return $this->curPos !== false; - } -} diff --git a/includes/UIDGenerator.php b/includes/UIDGenerator.php deleted file mode 100644 index 963e51a4d3..0000000000 --- a/includes/UIDGenerator.php +++ /dev/null @@ -1,337 +0,0 @@ -nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 ); - $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 ); - // If different processes run as different users, they may have different temp dirs. - // This is dealt with by initializing the clock sequence number and counters randomly. - $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88'; - $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128'; - } - - /** - * @return UIDGenerator - */ - protected static function singleton() { - if ( self::$instance === null ) { - self::$instance = new self(); - } - return self::$instance; - } - - /** - * Get a statistically unique 88-bit unsigned integer ID string. - * The bits of the UID are prefixed with the time (down to the millisecond). - * - * These IDs are suitable as values for the shard key of distributed data. - * If a column uses these as values, it should be declared UNIQUE to handle collisions. - * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. - * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL. - * - * UID generation is serialized on each server (as the node ID is for the whole machine). - * - * @param $base integer Specifies a base other than 10 - * @return string Number - * @throws MWException - */ - public static function newTimestampedUID88( $base = 10 ) { - if ( !is_integer( $base ) || $base > 36 || $base < 2 ) { - throw new MWException( "Base must an integer be between 2 and 36" ); - } - $gen = self::singleton(); - $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 ); - return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base ); - } - - /** - * @param array $time (UIDGenerator::millitime(), clock sequence) - * @return string 88 bits - */ - protected function getTimestampedID88( array $info ) { - list( $time, $counter ) = $info; - // Take the 46 MSBs of "milliseconds since epoch" - $id_bin = $this->millisecondsSinceEpochBinary( $time ); - // Add a 10 bit counter resulting in 56 bits total - $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT ); - // Add the 32 bit node ID resulting in 88 bits total - $id_bin .= $this->nodeId32; - // Convert to a 1-27 digit integer string - if ( strlen( $id_bin ) !== 88 ) { - throw new MWException( "Detected overflow for millisecond timestamp." ); - } - return $id_bin; - } - - /** - * Get a statistically unique 128-bit unsigned integer ID string. - * The bits of the UID are prefixed with the time (down to the millisecond). - * - * These IDs are suitable as globally unique IDs, without any enforced uniqueness. - * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. - * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL. - * - * UID generation is serialized on each server (as the node ID is for the whole machine). - * - * @param $base integer Specifies a base other than 10 - * @return string Number - * @throws MWException - */ - public static function newTimestampedUID128( $base = 10 ) { - if ( !is_integer( $base ) || $base > 36 || $base < 2 ) { - throw new MWException( "Base must be an integer between 2 and 36" ); - } - $gen = self::singleton(); - $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 ); - return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base ); - } - - /** - * @param array $info (UIDGenerator::millitime(), counter, clock sequence) - * @return string 128 bits - */ - protected function getTimestampedID128( array $info ) { - list( $time, $counter, $clkSeq ) = $info; - // Take the 46 MSBs of "milliseconds since epoch" - $id_bin = $this->millisecondsSinceEpochBinary( $time ); - // Add a 20 bit counter resulting in 66 bits total - $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT ); - // Add a 14 bit clock sequence number resulting in 80 bits total - $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT ); - // Add the 48 bit node ID resulting in 128 bits total - $id_bin .= $this->nodeId48; - // Convert to a 1-39 digit integer string - if ( strlen( $id_bin ) !== 128 ) { - throw new MWException( "Detected overflow for millisecond timestamp." ); - } - return $id_bin; - } - - /** - * Return an RFC4122 compliant v4 UUID - * - * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND) - * @return string - * @throws MWException - */ - public static function newUUIDv4( $flags = 0 ) { - $hex = ( $flags & self::QUICK_RAND ) - ? wfRandomString( 31 ) - : MWCryptRand::generateHex( 31 ); - - return sprintf( '%s-%s-%s-%s-%s', - // "time_low" (32 bits) - substr( $hex, 0, 8 ), - // "time_mid" (16 bits) - substr( $hex, 8, 4 ), - // "time_hi_and_version" (16 bits) - '4' . substr( $hex, 12, 3 ), - // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits) - dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ), - // "node" (48 bits) - substr( $hex, 19, 12 ) - ); - } - - /** - * Return an RFC4122 compliant v4 UUID - * - * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND) - * @return string 32 hex characters with no hyphens - * @throws MWException - */ - public static function newRawUUIDv4( $flags = 0 ) { - return str_replace( '-', '', self::newUUIDv4( $flags ) ); - } - - /** - * Get a (time,counter,clock sequence) where (time,counter) is higher - * than any previous (time,counter) value for the given clock sequence. - * This is useful for making UIDs sequential on a per-node bases. - * - * @param string $lockFile Name of a local lock file - * @param $clockSeqSize integer The number of possible clock sequence values - * @param $counterSize integer The number of possible counter values - * @return Array (result of UIDGenerator::millitime(), counter, clock sequence) - * @throws MWException - */ - protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) { - // Get the UID lock file handle - if ( isset( $this->fileHandles[$lockFile] ) ) { - $handle = $this->fileHandles[$lockFile]; - } else { - $handle = fopen( $this->$lockFile, 'cb+' ); - $this->fileHandles[$lockFile] = $handle ?: null; // cache - } - // Acquire the UID lock file - if ( $handle === false ) { - throw new MWException( "Could not open '{$this->$lockFile}'." ); - } elseif ( !flock( $handle, LOCK_EX ) ) { - throw new MWException( "Could not acquire '{$this->$lockFile}'." ); - } - // Get the current timestamp, clock sequence number, last time, and counter - rewind( $handle ); - $data = explode( ' ', fgets( $handle ) ); // " " - $clockChanged = false; // clock set back significantly? - if ( count( $data ) == 5 ) { // last UID info already initialized - $clkSeq = (int)$data[0] % $clockSeqSize; - $prevTime = array( (int)$data[1], (int)$data[2] ); - $offset = (int)$data[4] % $counterSize; // random counter offset - $counter = 0; // counter for UIDs with the same timestamp - // Delay until the clock reaches the time of the last ID. - // This detects any microtime() drift among processes. - $time = $this->timeWaitUntil( $prevTime ); - if ( !$time ) { // too long to delay? - $clockChanged = true; // bump clock sequence number - $time = self::millitime(); - } elseif ( $time == $prevTime ) { - // Bump the counter if there are timestamp collisions - $counter = (int)$data[3] % $counterSize; - if ( ++$counter >= $counterSize ) { // sanity (starts at 0) - flock( $handle, LOCK_UN ); // abort - throw new MWException( "Counter overflow for timestamp value." ); - } - } - } else { // last UID info not initialized - $clkSeq = mt_rand( 0, $clockSeqSize - 1 ); - $counter = 0; - $offset = mt_rand( 0, $counterSize - 1 ); - $time = self::millitime(); - } - // microtime() and gettimeofday() can drift from time() at least on Windows. - // The drift is immediate for processes running while the system clock changes. - // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659. - if ( abs( time() - $time[0] ) >= 2 ) { - // We don't want processes using too high or low timestamps to avoid duplicate - // UIDs and clock sequence number churn. This process should just be restarted. - flock( $handle, LOCK_UN ); // abort - throw new MWException( "Process clock is outdated or drifted." ); - } - // If microtime() is synced and a clock change was detected, then the clock went back - if ( $clockChanged ) { - // Bump the clock sequence number and also randomize the counter offset, - // which is useful for UIDs that do not include the clock sequence number. - $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize; - $offset = mt_rand( 0, $counterSize - 1 ); - trigger_error( "Clock was set back; sequence number incremented." ); - } - // Update the (clock sequence number, timestamp, counter) - ftruncate( $handle, 0 ); - rewind( $handle ); - fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" ); - fflush( $handle ); - // Release the UID lock file - flock( $handle, LOCK_UN ); - - return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq ); - } - - /** - * Wait till the current timestamp reaches $time and return the current - * timestamp. This returns false if it would have to wait more than 10ms. - * - * @param array $time Result of UIDGenerator::millitime() - * @return Array|bool UIDGenerator::millitime() result or false - */ - protected function timeWaitUntil( array $time ) { - do { - $ct = self::millitime(); - if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php - return $ct; // current timestamp is higher than $time - } - } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 ); - - return false; - } - - /** - * @param array $time Result of UIDGenerator::millitime() - * @return string 46 MSBs of "milliseconds since epoch" in binary (rolls over in 4201) - */ - protected function millisecondsSinceEpochBinary( array $time ) { - list( $sec, $msec ) = $time; - $ts = 1000 * $sec + $msec; - if ( $ts > pow( 2, 52 ) ) { - throw new MWException( __METHOD__ . - ': sorry, this function doesn\'t work after the year 144680' ); - } - return substr( wfBaseConvert( $ts, 10, 2, 46 ), -46 ); - } - - /** - * @return Array (current time in seconds, milliseconds since then) - */ - protected static function millitime() { - list( $msec, $sec ) = explode( ' ', microtime() ); - return array( (int)$sec, (int)( $msec * 1000 ) ); - } - - function __destruct() { - array_map( 'fclose', $this->fileHandles ); - } -} diff --git a/includes/XmlTypeCheck.php b/includes/XmlTypeCheck.php deleted file mode 100644 index 92ca7d8010..0000000000 --- a/includes/XmlTypeCheck.php +++ /dev/null @@ -1,184 +0,0 @@ -filterMatch - * @param boolean $isFile (optional) indicates if the first parameter is a - * filename (default, true) or if it is a string (false) - */ - function __construct( $input, $filterCallback = null, $isFile = true ) { - $this->filterCallback = $filterCallback; - if ( $isFile ) { - $this->validateFromFile( $input ); - } else { - $this->validateFromString( $input ); - } - } - - /** - * Alternative constructor: from filename - * - * @param string $fname the filename of an XML document - * @param callable $filterCallback (optional) - * Function to call to do additional custom validity checks from the - * SAX element handler event. This gives you access to the element - * namespace, name, and attributes, but not to text contents. - * Filter should return 'true' to toggle on $this->filterMatch - * @return XmlTypeCheck - */ - public static function newFromFilename( $fname, $filterCallback = null ) { - return new self( $fname, $filterCallback, true ); - } - - /** - * Alternative constructor: from string - * - * @param string $string a string containing an XML element - * @param callable $filterCallback (optional) - * Function to call to do additional custom validity checks from the - * SAX element handler event. This gives you access to the element - * namespace, name, and attributes, but not to text contents. - * Filter should return 'true' to toggle on $this->filterMatch - * @return XmlTypeCheck - */ - public static function newFromString( $string, $filterCallback = null ) { - return new self( $string, $filterCallback, false ); - } - - /** - * Get the root element. Simple accessor to $rootElement - * - * @return string - */ - public function getRootElement() { - return $this->rootElement; - } - - /** - * Get an XML parser with the root element handler. - * @see XmlTypeCheck::rootElementOpen() - * @return resource a resource handle for the XML parser - */ - private function getParser() { - $parser = xml_parser_create_ns( 'UTF-8' ); - // case folding violates XML standard, turn it off - xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); - xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false ); - return $parser; - } - - /** - * @param string $fname the filename - */ - private function validateFromFile( $fname ) { - $parser = $this->getParser(); - - if ( file_exists( $fname ) ) { - $file = fopen( $fname, "rb" ); - if ( $file ) { - do { - $chunk = fread( $file, 32768 ); - $ret = xml_parse( $parser, $chunk, feof( $file ) ); - if ( $ret == 0 ) { - $this->wellFormed = false; - fclose( $file ); - xml_parser_free( $parser ); - return; - } - } while ( !feof( $file ) ); - - fclose( $file ); - } - } - $this->wellFormed = true; - - xml_parser_free( $parser ); - } - - /** - * - * @param string $string the XML-input-string to be checked. - */ - private function validateFromString( $string ) { - $parser = $this->getParser(); - $ret = xml_parse( $parser, $string, true ); - xml_parser_free( $parser ); - if ( $ret == 0 ) { - $this->wellFormed = false; - return; - } - $this->wellFormed = true; - } - - /** - * @param $parser - * @param $name - * @param $attribs - */ - private function rootElementOpen( $parser, $name, $attribs ) { - $this->rootElement = $name; - - if ( is_callable( $this->filterCallback ) ) { - xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false ); - $this->elementOpen( $parser, $name, $attribs ); - } else { - // We only need the first open element - xml_set_element_handler( $parser, false, false ); - } - } - - /** - * @param $parser - * @param $name - * @param $attribs - */ - private function elementOpen( $parser, $name, $attribs ) { - if ( call_user_func( $this->filterCallback, $name, $attribs ) ) { - // Filter hit! - $this->filterMatch = true; - } - } -} diff --git a/includes/ZipDirectoryReader.php b/includes/ZipDirectoryReader.php deleted file mode 100644 index 307efcea8d..0000000000 --- a/includes/ZipDirectoryReader.php +++ /dev/null @@ -1,712 +0,0 @@ -execute(); - } - - /** The file name */ - var $fileName; - - /** The opened file resource */ - var $file; - - /** The cached length of the file, or null if it has not been loaded yet. */ - var $fileLength; - - /** A segmented cache of the file contents */ - var $buffer; - - /** The file data callback */ - var $callback; - - /** The ZIP64 mode */ - var $zip64 = false; - - /** Stored headers */ - var $eocdr, $eocdr64, $eocdr64Locator; - - var $data; - - /** The "extra field" ID for ZIP64 central directory entries */ - const ZIP64_EXTRA_HEADER = 0x0001; - - /** The segment size for the file contents cache */ - const SEGSIZE = 16384; - - /** The index of the "general field" bit for UTF-8 file names */ - const GENERAL_UTF8 = 11; - - /** The index of the "general field" bit for central directory encryption */ - const GENERAL_CD_ENCRYPTED = 13; - - /** - * Private constructor - */ - protected function __construct( $fileName, $callback, $options ) { - $this->fileName = $fileName; - $this->callback = $callback; - - if ( isset( $options['zip64'] ) ) { - $this->zip64 = $options['zip64']; - } - } - - /** - * Read the directory according to settings in $this. - * - * @return Status - */ - function execute() { - $this->file = fopen( $this->fileName, 'r' ); - $this->data = array(); - if ( !$this->file ) { - return Status::newFatal( 'zip-file-open-error' ); - } - - $status = Status::newGood(); - try { - $this->readEndOfCentralDirectoryRecord(); - if ( $this->zip64 ) { - list( $offset, $size ) = $this->findZip64CentralDirectory(); - $this->readCentralDirectory( $offset, $size ); - } else { - if ( $this->eocdr['CD size'] == 0xffffffff - || $this->eocdr['CD offset'] == 0xffffffff - || $this->eocdr['CD entries total'] == 0xffff ) - { - $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' . - 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' . - 'opening vulnerabilities on clients using OpenJDK 7 or later.' ); - } - - list( $offset, $size ) = $this->findOldCentralDirectory(); - $this->readCentralDirectory( $offset, $size ); - } - } catch ( ZipDirectoryReaderError $e ) { - $status->fatal( $e->getErrorCode() ); - } - - fclose( $this->file ); - return $status; - } - - /** - * Throw an error, and log a debug message - */ - function error( $code, $debugMessage ) { - wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" ); - throw new ZipDirectoryReaderError( $code ); - } - - /** - * Read the header which is at the end of the central directory, - * unimaginatively called the "end of central directory record" by the ZIP - * spec. - */ - function readEndOfCentralDirectoryRecord() { - $info = array( - 'signature' => 4, - 'disk' => 2, - 'CD start disk' => 2, - 'CD entries this disk' => 2, - 'CD entries total' => 2, - 'CD size' => 4, - 'CD offset' => 4, - 'file comment length' => 2, - ); - $structSize = $this->getStructSize( $info ); - $startPos = $this->getFileLength() - 65536 - $structSize; - if ( $startPos < 0 ) { - $startPos = 0; - } - - $block = $this->getBlock( $startPos ); - $sigPos = strrpos( $block, "PK\x05\x06" ); - if ( $sigPos === false ) { - $this->error( 'zip-wrong-format', - "zip file lacks EOCDR signature. It probably isn't a zip file." ); - } - - $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info ); - $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length']; - - if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) { - $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' ); - } - if ( $this->eocdr['disk'] !== 0 - || $this->eocdr['CD start disk'] !== 0 ) - { - $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' ); - } - $this->eocdr += $this->unpack( - $block, - array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ), - $sigPos + $structSize ); - $this->eocdr['position'] = $startPos + $sigPos; - } - - /** - * Read the header called the "ZIP64 end of central directory locator". An - * error will be raised if it does not exist. - */ - function readZip64EndOfCentralDirectoryLocator() { - $info = array( - 'signature' => array( 'string', 4 ), - 'eocdr64 start disk' => 4, - 'eocdr64 offset' => 8, - 'number of disks' => 4, - ); - $structSize = $this->getStructSize( $info ); - - $block = $this->getBlock( $this->getFileLength() - $this->eocdr['EOCDR size'] - - $structSize, $structSize ); - $this->eocdr64Locator = $data = $this->unpack( $block, $info ); - - if ( $data['signature'] !== "PK\x06\x07" ) { - // Note: Java will allow this and continue to read the - // EOCDR64, so we have to reject the upload, we can't - // just use the EOCDR header instead. - $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' ); - } - } - - /** - * Read the header called the "ZIP64 end of central directory record". It - * may replace the regular "end of central directory record" in ZIP64 files. - */ - function readZip64EndOfCentralDirectoryRecord() { - if ( $this->eocdr64Locator['eocdr64 start disk'] != 0 - || $this->eocdr64Locator['number of disks'] != 0 ) - { - $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' ); - } - - $info = array( - 'signature' => array( 'string', 4 ), - 'EOCDR64 size' => 8, - 'version made by' => 2, - 'version needed' => 2, - 'disk' => 4, - 'CD start disk' => 4, - 'CD entries this disk' => 8, - 'CD entries total' => 8, - 'CD size' => 8, - 'CD offset' => 8 - ); - $structSize = $this->getStructSize( $info ); - $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize ); - $this->eocdr64 = $data = $this->unpack( $block, $info ); - if ( $data['signature'] !== "PK\x06\x06" ) { - $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' ); - } - if ( $data['disk'] !== 0 - || $data['CD start disk'] !== 0 ) - { - $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' ); - } - } - - /** - * Find the location of the central directory, as would be seen by a - * non-ZIP64 reader. - * - * @return List containing offset, size and end position. - */ - function findOldCentralDirectory() { - $size = $this->eocdr['CD size']; - $offset = $this->eocdr['CD offset']; - $endPos = $this->eocdr['position']; - - // Some readers use the EOCDR position instead of the offset field - // to find the directory, so to be safe, we check if they both agree. - if ( $offset + $size != $endPos ) { - $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . - 'of central directory record' ); - } - return array( $offset, $size ); - } - - /** - * Find the location of the central directory, as would be seen by a - * ZIP64-compliant reader. - * - * @return array List containing offset, size and end position. - */ - function findZip64CentralDirectory() { - // The spec is ambiguous about the exact rules of precedence between the - // ZIP64 headers and the original headers. Here we follow zip_util.c - // from OpenJDK 7. - $size = $this->eocdr['CD size']; - $offset = $this->eocdr['CD offset']; - $numEntries = $this->eocdr['CD entries total']; - $endPos = $this->eocdr['position']; - if ( $size == 0xffffffff - || $offset == 0xffffffff - || $numEntries == 0xffff ) - { - $this->readZip64EndOfCentralDirectoryLocator(); - - if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) { - $this->readZip64EndOfCentralDirectoryRecord(); - if ( isset( $this->eocdr64['CD offset'] ) ) { - $size = $this->eocdr64['CD size']; - $offset = $this->eocdr64['CD offset']; - $endPos = $this->eocdr64Locator['eocdr64 offset']; - } - } - } - // Some readers use the EOCDR position instead of the offset field - // to find the directory, so to be safe, we check if they both agree. - if ( $offset + $size != $endPos ) { - $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . - 'of central directory record' ); - } - return array( $offset, $size ); - } - - /** - * Read the central directory at the given location - */ - function readCentralDirectory( $offset, $size ) { - $block = $this->getBlock( $offset, $size ); - - $fixedInfo = array( - 'signature' => array( 'string', 4 ), - 'version made by' => 2, - 'version needed' => 2, - 'general bits' => 2, - 'compression method' => 2, - 'mod time' => 2, - 'mod date' => 2, - 'crc-32' => 4, - 'compressed size' => 4, - 'uncompressed size' => 4, - 'name length' => 2, - 'extra field length' => 2, - 'comment length' => 2, - 'disk number start' => 2, - 'internal attrs' => 2, - 'external attrs' => 4, - 'local header offset' => 4, - ); - $fixedSize = $this->getStructSize( $fixedInfo ); - - $pos = 0; - while ( $pos < $size ) { - $data = $this->unpack( $block, $fixedInfo, $pos ); - $pos += $fixedSize; - - if ( $data['signature'] !== "PK\x01\x02" ) { - $this->error( 'zip-bad', 'Invalid signature found in directory entry' ); - } - - $variableInfo = array( - 'name' => array( 'string', $data['name length'] ), - 'extra field' => array( 'string', $data['extra field length'] ), - 'comment' => array( 'string', $data['comment length'] ), - ); - $data += $this->unpack( $block, $variableInfo, $pos ); - $pos += $this->getStructSize( $variableInfo ); - - if ( $this->zip64 && ( - $data['compressed size'] == 0xffffffff - || $data['uncompressed size'] == 0xffffffff - || $data['local header offset'] == 0xffffffff ) ) - { - $zip64Data = $this->unpackZip64Extra( $data['extra field'] ); - if ( $zip64Data ) { - $data = $zip64Data + $data; - } - } - - if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) { - $this->error( 'zip-unsupported', 'central directory encryption is not supported' ); - } - - // Convert the timestamp into MediaWiki format - // For the format, please see the MS-DOS 2.0 Programmer's Reference, - // pages 3-5 and 3-6. - $time = $data['mod time']; - $date = $data['mod date']; - - $year = 1980 + ( $date >> 9 ); - $month = ( $date >> 5 ) & 15; - $day = $date & 31; - $hour = ( $time >> 11 ) & 31; - $minute = ( $time >> 5 ) & 63; - $second = ( $time & 31 ) * 2; - $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d", - $year, $month, $day, $hour, $minute, $second ); - - // Convert the character set in the file name - if ( !function_exists( 'iconv' ) - || $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) - { - $name = $data['name']; - } else { - $name = iconv( 'CP437', 'UTF-8', $data['name'] ); - } - - // Compile a data array for the user, with a sensible format - $userData = array( - 'name' => $name, - 'mtime' => $timestamp, - 'size' => $data['uncompressed size'], - ); - call_user_func( $this->callback, $userData ); - } - } - - /** - * Interpret ZIP64 "extra field" data and return an associative array. - * @return array|bool - */ - function unpackZip64Extra( $extraField ) { - $extraHeaderInfo = array( - 'id' => 2, - 'size' => 2, - ); - $extraHeaderSize = $this->getStructSize( $extraHeaderInfo ); - - $zip64ExtraInfo = array( - 'uncompressed size' => 8, - 'compressed size' => 8, - 'local header offset' => 8, - 'disk number start' => 4, - ); - - $extraPos = 0; - while ( $extraPos < strlen( $extraField ) ) { - $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos ); - $extraPos += $extraHeaderSize; - $extra += $this->unpack( $extraField, - array( 'data' => array( 'string', $extra['size'] ) ), - $extraPos ); - $extraPos += $extra['size']; - - if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) { - return $this->unpack( $extra['data'], $zip64ExtraInfo ); - } - } - - return false; - } - - /** - * Get the length of the file. - */ - function getFileLength() { - if ( $this->fileLength === null ) { - $stat = fstat( $this->file ); - $this->fileLength = $stat['size']; - } - return $this->fileLength; - } - - /** - * Get the file contents from a given offset. If there are not enough bytes - * in the file to satisfy the request, an exception will be thrown. - * - * @param int $start The byte offset of the start of the block. - * @param int $length The number of bytes to return. If omitted, the remainder - * of the file will be returned. - * - * @return string - */ - function getBlock( $start, $length = null ) { - $fileLength = $this->getFileLength(); - if ( $start >= $fileLength ) { - $this->error( 'zip-bad', "getBlock() requested position $start, " . - "file length is $fileLength" ); - } - if ( $length === null ) { - $length = $fileLength - $start; - } - $end = $start + $length; - if ( $end > $fileLength ) { - $this->error( 'zip-bad', "getBlock() requested end position $end, " . - "file length is $fileLength" ); - } - $startSeg = floor( $start / self::SEGSIZE ); - $endSeg = ceil( $end / self::SEGSIZE ); - - $block = ''; - for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) { - $block .= $this->getSegment( $segIndex ); - } - - $block = substr( $block, - $start - $startSeg * self::SEGSIZE, - $length ); - - if ( strlen( $block ) < $length ) { - $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' ); - } - - return $block; - } - - /** - * Get a section of the file starting at position $segIndex * self::SEGSIZE, - * of length self::SEGSIZE. The result is cached. This is a helper function - * for getBlock(). - * - * If there are not enough bytes in the file to satisfy the request, the - * return value will be truncated. If a request is made for a segment beyond - * the end of the file, an empty string will be returned. - * @return string - */ - function getSegment( $segIndex ) { - if ( !isset( $this->buffer[$segIndex] ) ) { - $bytePos = $segIndex * self::SEGSIZE; - if ( $bytePos >= $this->getFileLength() ) { - $this->buffer[$segIndex] = ''; - return ''; - } - if ( fseek( $this->file, $bytePos ) ) { - $this->error( 'zip-bad', "seek to $bytePos failed" ); - } - $seg = fread( $this->file, self::SEGSIZE ); - if ( $seg === false ) { - $this->error( 'zip-bad', "read from $bytePos failed" ); - } - $this->buffer[$segIndex] = $seg; - } - return $this->buffer[$segIndex]; - } - - /** - * Get the size of a structure in bytes. See unpack() for the format of $struct. - * @return int - */ - function getStructSize( $struct ) { - $size = 0; - foreach ( $struct as $type ) { - if ( is_array( $type ) ) { - list( , $fieldSize ) = $type; - $size += $fieldSize; - } else { - $size += $type; - } - } - return $size; - } - - /** - * Unpack a binary structure. This is like the built-in unpack() function - * except nicer. - * - * @param string $string The binary data input - * - * @param array $struct An associative array giving structure members and their - * types. In the key is the field name. The value may be either an - * integer, in which case the field is a little-endian unsigned integer - * encoded in the given number of bytes, or an array, in which case the - * first element of the array is the type name, and the subsequent - * elements are type-dependent parameters. Only one such type is defined: - * - "string": The second array element gives the length of string. - * Not null terminated. - * - * @param int $offset The offset into the string at which to start unpacking. - * - * @throws MWException - * @return array Unpacked associative array. Note that large integers in the input - * may be represented as floating point numbers in the return value, so - * the use of weak comparison is advised. - */ - function unpack( $string, $struct, $offset = 0 ) { - $size = $this->getStructSize( $struct ); - if ( $offset + $size > strlen( $string ) ) { - $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' ); - } - - $data = array(); - $pos = $offset; - foreach ( $struct as $key => $type ) { - if ( is_array( $type ) ) { - list( $typeName, $fieldSize ) = $type; - switch ( $typeName ) { - case 'string': - $data[$key] = substr( $string, $pos, $fieldSize ); - $pos += $fieldSize; - break; - default: - throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" ); - } - } else { - // Unsigned little-endian integer - $length = intval( $type ); - - // Calculate the value. Use an algorithm which automatically - // upgrades the value to floating point if necessary. - $value = 0; - for ( $i = $length - 1; $i >= 0; $i-- ) { - $value *= 256; - $value += ord( $string[$pos + $i] ); - } - - // Throw an exception if there was loss of precision - if ( $value > pow( 2, 52 ) ) { - $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' . - 'This could happen if we tried to unpack a 64-bit structure ' . - 'at an invalid location.' ); - } - $data[$key] = $value; - $pos += $length; - } - } - - return $data; - } - - /** - * Returns a bit from a given position in an integer value, converted to - * boolean. - * - * @param $value integer - * @param int $bitIndex The index of the bit, where 0 is the LSB. - * @return bool - */ - function testBit( $value, $bitIndex ) { - return (bool)( ( $value >> $bitIndex ) & 1 ); - } - - /** - * Debugging helper function which dumps a string in hexdump -C format. - */ - function hexDump( $s ) { - $n = strlen( $s ); - for ( $i = 0; $i < $n; $i += 16 ) { - printf( "%08X ", $i ); - for ( $j = 0; $j < 16; $j++ ) { - print " "; - if ( $j == 8 ) { - print " "; - } - if ( $i + $j >= $n ) { - print " "; - } else { - printf( "%02X", ord( $s[$i + $j] ) ); - } - } - - print " |"; - for ( $j = 0; $j < 16; $j++ ) { - if ( $i + $j >= $n ) { - print " "; - } elseif ( ctype_print( $s[$i + $j] ) ) { - print $s[$i + $j]; - } else { - print '.'; - } - } - print "|\n"; - } - } -} - -/** - * Internal exception class. Will be caught by private code. - */ -class ZipDirectoryReaderError extends Exception { - var $errorCode; - - function __construct( $code ) { - $this->errorCode = $code; - parent::__construct( "ZipDirectoryReader error: $code" ); - } - - /** - * @return mixed - */ - function getErrorCode() { - return $this->errorCode; - } -} diff --git a/includes/libs/ScopedPHPTimeout.php b/includes/libs/ScopedPHPTimeout.php new file mode 100644 index 0000000000..d1493c30b7 --- /dev/null +++ b/includes/libs/ScopedPHPTimeout.php @@ -0,0 +1,84 @@ + 0 ) { // CLI uses 0 + if ( self::$totalCalls >= self::MAX_TOTAL_CALLS ) { + trigger_error( "Maximum invocations of " . __CLASS__ . " exceeded." ); + } elseif ( self::$totalElapsed >= self::MAX_TOTAL_TIME ) { + trigger_error( "Time limit within invocations of " . __CLASS__ . " exceeded." ); + } elseif ( self::$stackDepth > 0 ) { // recursion guard + trigger_error( "Resursive invocation of " . __CLASS__ . " attempted." ); + } else { + $this->oldIgnoreAbort = ignore_user_abort( true ); + $this->oldTimeout = ini_set( 'max_execution_time', $seconds ); + $this->startTime = microtime( true ); + ++self::$stackDepth; + ++self::$totalCalls; // proof against < 1us scopes + } + } + } + + /** + * Restore the original timeout. + * This does not account for the timer value on __construct(). + */ + public function __destruct() { + if ( $this->oldTimeout ) { + $elapsed = microtime( true ) - $this->startTime; + // Note: a limit of 0 is treated as "forever" + set_time_limit( max( 1, $this->oldTimeout - (int)$elapsed ) ); + // If each scoped timeout is for less than one second, we end up + // restoring the original timeout without any decrease in value. + // Thus web scripts in an infinite loop can run forever unless we + // take some measures to prevent this. Track total time and calls. + self::$totalElapsed += $elapsed; + --self::$stackDepth; + ignore_user_abort( $this->oldIgnoreAbort ); + } + } +} diff --git a/includes/libs/XmlTypeCheck.php b/includes/libs/XmlTypeCheck.php new file mode 100644 index 0000000000..92ca7d8010 --- /dev/null +++ b/includes/libs/XmlTypeCheck.php @@ -0,0 +1,184 @@ +filterMatch + * @param boolean $isFile (optional) indicates if the first parameter is a + * filename (default, true) or if it is a string (false) + */ + function __construct( $input, $filterCallback = null, $isFile = true ) { + $this->filterCallback = $filterCallback; + if ( $isFile ) { + $this->validateFromFile( $input ); + } else { + $this->validateFromString( $input ); + } + } + + /** + * Alternative constructor: from filename + * + * @param string $fname the filename of an XML document + * @param callable $filterCallback (optional) + * Function to call to do additional custom validity checks from the + * SAX element handler event. This gives you access to the element + * namespace, name, and attributes, but not to text contents. + * Filter should return 'true' to toggle on $this->filterMatch + * @return XmlTypeCheck + */ + public static function newFromFilename( $fname, $filterCallback = null ) { + return new self( $fname, $filterCallback, true ); + } + + /** + * Alternative constructor: from string + * + * @param string $string a string containing an XML element + * @param callable $filterCallback (optional) + * Function to call to do additional custom validity checks from the + * SAX element handler event. This gives you access to the element + * namespace, name, and attributes, but not to text contents. + * Filter should return 'true' to toggle on $this->filterMatch + * @return XmlTypeCheck + */ + public static function newFromString( $string, $filterCallback = null ) { + return new self( $string, $filterCallback, false ); + } + + /** + * Get the root element. Simple accessor to $rootElement + * + * @return string + */ + public function getRootElement() { + return $this->rootElement; + } + + /** + * Get an XML parser with the root element handler. + * @see XmlTypeCheck::rootElementOpen() + * @return resource a resource handle for the XML parser + */ + private function getParser() { + $parser = xml_parser_create_ns( 'UTF-8' ); + // case folding violates XML standard, turn it off + xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false ); + xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false ); + return $parser; + } + + /** + * @param string $fname the filename + */ + private function validateFromFile( $fname ) { + $parser = $this->getParser(); + + if ( file_exists( $fname ) ) { + $file = fopen( $fname, "rb" ); + if ( $file ) { + do { + $chunk = fread( $file, 32768 ); + $ret = xml_parse( $parser, $chunk, feof( $file ) ); + if ( $ret == 0 ) { + $this->wellFormed = false; + fclose( $file ); + xml_parser_free( $parser ); + return; + } + } while ( !feof( $file ) ); + + fclose( $file ); + } + } + $this->wellFormed = true; + + xml_parser_free( $parser ); + } + + /** + * + * @param string $string the XML-input-string to be checked. + */ + private function validateFromString( $string ) { + $parser = $this->getParser(); + $ret = xml_parse( $parser, $string, true ); + xml_parser_free( $parser ); + if ( $ret == 0 ) { + $this->wellFormed = false; + return; + } + $this->wellFormed = true; + } + + /** + * @param $parser + * @param $name + * @param $attribs + */ + private function rootElementOpen( $parser, $name, $attribs ) { + $this->rootElement = $name; + + if ( is_callable( $this->filterCallback ) ) { + xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false ); + $this->elementOpen( $parser, $name, $attribs ); + } else { + // We only need the first open element + xml_set_element_handler( $parser, false, false ); + } + } + + /** + * @param $parser + * @param $name + * @param $attribs + */ + private function elementOpen( $parser, $name, $attribs ) { + if ( call_user_func( $this->filterCallback, $name, $attribs ) ) { + // Filter hit! + $this->filterMatch = true; + } + } +} diff --git a/includes/utils/ArrayUtils.php b/includes/utils/ArrayUtils.php new file mode 100644 index 0000000000..97a56e1ce9 --- /dev/null +++ b/includes/utils/ArrayUtils.php @@ -0,0 +1,68 @@ + $w ) { + $sum += $w; + # Do not return keys if they have 0 weight. + # Note that the "all 0 weight" case is handed above + if ( $w > 0 && $sum >= $rand ) { + break; + } + } + return $i; + } +} diff --git a/includes/utils/Cdb.php b/includes/utils/Cdb.php new file mode 100644 index 0000000000..81c0afe171 --- /dev/null +++ b/includes/utils/Cdb.php @@ -0,0 +1,184 @@ +handle = dba_open( $fileName, 'r-', 'cdb' ); + if ( !$this->handle ) { + throw new MWException( 'Unable to open CDB file "' . $fileName . '"' ); + } + } + + function close() { + if ( isset( $this->handle ) ) { + dba_close( $this->handle ); + } + unset( $this->handle ); + } + + function get( $key ) { + return dba_fetch( $key, $this->handle ); + } +} + +/** + * Writer class which uses the DBA extension + */ +class CdbWriter_DBA { + var $handle, $realFileName, $tmpFileName; + + function __construct( $fileName ) { + $this->realFileName = $fileName; + $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff ); + $this->handle = dba_open( $this->tmpFileName, 'n', 'cdb_make' ); + if ( !$this->handle ) { + throw new MWException( 'Unable to open CDB file for write "' . $fileName . '"' ); + } + } + + function set( $key, $value ) { + return dba_insert( $key, $value, $this->handle ); + } + + function close() { + if ( isset( $this->handle ) ) { + dba_close( $this->handle ); + } + if ( wfIsWindows() ) { + unlink( $this->realFileName ); + } + if ( !rename( $this->tmpFileName, $this->realFileName ) ) { + throw new MWException( 'Unable to move the new CDB file into place.' ); + } + unset( $this->handle ); + } + + function __destruct() { + if ( isset( $this->handle ) ) { + $this->close(); + } + } +} diff --git a/includes/utils/Cdb_PHP.php b/includes/utils/Cdb_PHP.php new file mode 100644 index 0000000000..a38b9a86b8 --- /dev/null +++ b/includes/utils/Cdb_PHP.php @@ -0,0 +1,493 @@ +> $b ) | ( 0x40000000 >> ( $b - 1 ) ); + } else { + return $a >> $b; + } + } + + /** + * The CDB hash function. + * + * @param $s string + * + * @return + */ + public static function hash( $s ) { + $h = 5381; + for ( $i = 0; $i < strlen( $s ); $i++ ) { + $h5 = ( $h << 5 ) & 0xffffffff; + // Do a 32-bit sum + // Inlined here for speed + $sum = ( $h & 0x3fffffff ) + ( $h5 & 0x3fffffff ); + $h = + ( + ( $sum & 0x40000000 ? 1 : 0 ) + + ( $h & 0x80000000 ? 2 : 0 ) + + ( $h & 0x40000000 ? 1 : 0 ) + + ( $h5 & 0x80000000 ? 2 : 0 ) + + ( $h5 & 0x40000000 ? 1 : 0 ) + ) << 30 + | ( $sum & 0x3fffffff ); + $h ^= ord( $s[$i] ); + $h &= 0xffffffff; + } + return $h; + } +} + +/** + * CDB reader class + */ +class CdbReader_PHP extends CdbReader { + /** The filename */ + var $fileName; + + /** The file handle */ + var $handle; + + /* number of hash slots searched under this key */ + var $loop; + + /* initialized if loop is nonzero */ + var $khash; + + /* initialized if loop is nonzero */ + var $kpos; + + /* initialized if loop is nonzero */ + var $hpos; + + /* initialized if loop is nonzero */ + var $hslots; + + /* initialized if findNext() returns true */ + var $dpos; + + /* initialized if cdb_findnext() returns 1 */ + var $dlen; + + /** + * @param $fileName string + * @throws MWException + */ + function __construct( $fileName ) { + $this->fileName = $fileName; + $this->handle = fopen( $fileName, 'rb' ); + if ( !$this->handle ) { + throw new MWException( 'Unable to open CDB file "' . $this->fileName . '".' ); + } + $this->findStart(); + } + + function close() { + if ( isset( $this->handle ) ) { + fclose( $this->handle ); + } + unset( $this->handle ); + } + + /** + * @param $key + * @return bool|string + */ + public function get( $key ) { + // strval is required + if ( $this->find( strval( $key ) ) ) { + return $this->read( $this->dlen, $this->dpos ); + } else { + return false; + } + } + + /** + * @param $key + * @param $pos + * @return bool + */ + protected function match( $key, $pos ) { + $buf = $this->read( strlen( $key ), $pos ); + return $buf === $key; + } + + protected function findStart() { + $this->loop = 0; + } + + /** + * @throws MWException + * @param $length + * @param $pos + * @return string + */ + protected function read( $length, $pos ) { + if ( fseek( $this->handle, $pos ) == -1 ) { + // This can easily happen if the internal pointers are incorrect + throw new MWException( + 'Seek failed, file "' . $this->fileName . '" may be corrupted.' ); + } + + if ( $length == 0 ) { + return ''; + } + + $buf = fread( $this->handle, $length ); + if ( $buf === false || strlen( $buf ) !== $length ) { + throw new MWException( + 'Read from CDB file failed, file "' . $this->fileName . '" may be corrupted.' ); + } + return $buf; + } + + /** + * Unpack an unsigned integer and throw an exception if it needs more than 31 bits + * @param $s + * @throws MWException + * @return mixed + */ + protected function unpack31( $s ) { + $data = unpack( 'V', $s ); + if ( $data[1] > 0x7fffffff ) { + throw new MWException( + 'Error in CDB file "' . $this->fileName . '", integer too big.' ); + } + return $data[1]; + } + + /** + * Unpack a 32-bit signed integer + * @param $s + * @return int + */ + protected function unpackSigned( $s ) { + $data = unpack( 'va/vb', $s ); + return $data['a'] | ( $data['b'] << 16 ); + } + + /** + * @param $key + * @return bool + */ + protected function findNext( $key ) { + if ( !$this->loop ) { + $u = CdbFunctions::hash( $key ); + $buf = $this->read( 8, ( $u << 3 ) & 2047 ); + $this->hslots = $this->unpack31( substr( $buf, 4 ) ); + if ( !$this->hslots ) { + return false; + } + $this->hpos = $this->unpack31( substr( $buf, 0, 4 ) ); + $this->khash = $u; + $u = CdbFunctions::unsignedShiftRight( $u, 8 ); + $u = CdbFunctions::unsignedMod( $u, $this->hslots ); + $u <<= 3; + $this->kpos = $this->hpos + $u; + } + + while ( $this->loop < $this->hslots ) { + $buf = $this->read( 8, $this->kpos ); + $pos = $this->unpack31( substr( $buf, 4 ) ); + if ( !$pos ) { + return false; + } + $this->loop += 1; + $this->kpos += 8; + if ( $this->kpos == $this->hpos + ( $this->hslots << 3 ) ) { + $this->kpos = $this->hpos; + } + $u = $this->unpackSigned( substr( $buf, 0, 4 ) ); + if ( $u === $this->khash ) { + $buf = $this->read( 8, $pos ); + $keyLen = $this->unpack31( substr( $buf, 0, 4 ) ); + if ( $keyLen == strlen( $key ) && $this->match( $key, $pos + 8 ) ) { + // Found + $this->dlen = $this->unpack31( substr( $buf, 4 ) ); + $this->dpos = $pos + 8 + $keyLen; + return true; + } + } + } + return false; + } + + /** + * @param $key + * @return bool + */ + protected function find( $key ) { + $this->findStart(); + return $this->findNext( $key ); + } +} + +/** + * CDB writer class + */ +class CdbWriter_PHP extends CdbWriter { + var $handle, $realFileName, $tmpFileName; + + var $hplist; + var $numentries, $pos; + + /** + * @param $fileName string + */ + function __construct( $fileName ) { + $this->realFileName = $fileName; + $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff ); + $this->handle = fopen( $this->tmpFileName, 'wb' ); + if ( !$this->handle ) { + $this->throwException( + 'Unable to open CDB file "' . $this->tmpFileName . '" for write.' ); + } + $this->hplist = array(); + $this->numentries = 0; + $this->pos = 2048; // leaving space for the pointer array, 256 * 8 + if ( fseek( $this->handle, $this->pos ) == -1 ) { + $this->throwException( 'fseek failed in file "' . $this->tmpFileName . '".' ); + } + } + + function __destruct() { + if ( isset( $this->handle ) ) { + $this->close(); + } + } + + /** + * @param $key + * @param $value + * @return + */ + public function set( $key, $value ) { + if ( strval( $key ) === '' ) { + // DBA cross-check hack + return; + } + $this->addbegin( strlen( $key ), strlen( $value ) ); + $this->write( $key ); + $this->write( $value ); + $this->addend( strlen( $key ), strlen( $value ), CdbFunctions::hash( $key ) ); + } + + /** + * @throws MWException + */ + public function close() { + $this->finish(); + if ( isset( $this->handle ) ) { + fclose( $this->handle ); + } + if ( wfIsWindows() && file_exists( $this->realFileName ) ) { + unlink( $this->realFileName ); + } + if ( !rename( $this->tmpFileName, $this->realFileName ) ) { + $this->throwException( 'Unable to move the new CDB file into place.' ); + } + unset( $this->handle ); + } + + /** + * @throws MWException + * @param $buf + */ + protected function write( $buf ) { + $len = fwrite( $this->handle, $buf ); + if ( $len !== strlen( $buf ) ) { + $this->throwException( 'Error writing to CDB file "' . $this->tmpFileName . '".' ); + } + } + + /** + * @throws MWException + * @param $len + */ + protected function posplus( $len ) { + $newpos = $this->pos + $len; + if ( $newpos > 0x7fffffff ) { + $this->throwException( + 'A value in the CDB file "' . $this->tmpFileName . '" is too large.' ); + } + $this->pos = $newpos; + } + + /** + * @param $keylen + * @param $datalen + * @param $h + */ + protected function addend( $keylen, $datalen, $h ) { + $this->hplist[] = array( + 'h' => $h, + 'p' => $this->pos + ); + + $this->numentries++; + $this->posplus( 8 ); + $this->posplus( $keylen ); + $this->posplus( $datalen ); + } + + /** + * @throws MWException + * @param $keylen + * @param $datalen + */ + protected function addbegin( $keylen, $datalen ) { + if ( $keylen > 0x7fffffff ) { + $this->throwException( 'Key length too long in file "' . $this->tmpFileName . '".' ); + } + if ( $datalen > 0x7fffffff ) { + $this->throwException( 'Data length too long in file "' . $this->tmpFileName . '".' ); + } + $buf = pack( 'VV', $keylen, $datalen ); + $this->write( $buf ); + } + + /** + * @throws MWException + */ + protected function finish() { + // Hack for DBA cross-check + $this->hplist = array_reverse( $this->hplist ); + + // Calculate the number of items that will be in each hashtable + $counts = array_fill( 0, 256, 0 ); + foreach ( $this->hplist as $item ) { + ++ $counts[255 & $item['h']]; + } + + // Fill in $starts with the *end* indexes + $starts = array(); + $pos = 0; + for ( $i = 0; $i < 256; ++$i ) { + $pos += $counts[$i]; + $starts[$i] = $pos; + } + + // Excessively clever and indulgent code to simultaneously fill $packedTables + // with the packed hashtables, and adjust the elements of $starts + // to actually point to the starts instead of the ends. + $packedTables = array_fill( 0, $this->numentries, false ); + foreach ( $this->hplist as $item ) { + $packedTables[--$starts[255 & $item['h']]] = $item; + } + + $final = ''; + for ( $i = 0; $i < 256; ++$i ) { + $count = $counts[$i]; + + // The size of the hashtable will be double the item count. + // The rest of the slots will be empty. + $len = $count + $count; + $final .= pack( 'VV', $this->pos, $len ); + + $hashtable = array(); + for ( $u = 0; $u < $len; ++$u ) { + $hashtable[$u] = array( 'h' => 0, 'p' => 0 ); + } + + // Fill the hashtable, using the next empty slot if the hashed slot + // is taken. + for ( $u = 0; $u < $count; ++$u ) { + $hp = $packedTables[$starts[$i] + $u]; + $where = CdbFunctions::unsignedMod( + CdbFunctions::unsignedShiftRight( $hp['h'], 8 ), $len ); + while ( $hashtable[$where]['p'] ) { + if ( ++$where == $len ) { + $where = 0; + } + } + $hashtable[$where] = $hp; + } + + // Write the hashtable + for ( $u = 0; $u < $len; ++$u ) { + $buf = pack( 'vvV', + $hashtable[$u]['h'] & 0xffff, + CdbFunctions::unsignedShiftRight( $hashtable[$u]['h'], 16 ), + $hashtable[$u]['p'] ); + $this->write( $buf ); + $this->posplus( 8 ); + } + } + + // Write the pointer array at the start of the file + rewind( $this->handle ); + if ( ftell( $this->handle ) != 0 ) { + $this->throwException( 'Error rewinding to start of file "' . $this->tmpFileName . '".' ); + } + $this->write( $final ); + } + + /** + * Clean up the temp file and throw an exception + * + * @param $msg string + * @throws MWException + */ + protected function throwException( $msg ) { + if ( $this->handle ) { + fclose( $this->handle ); + unlink( $this->tmpFileName ); + } + throw new MWException( $msg ); + } +} diff --git a/includes/utils/ConfEditor.php b/includes/utils/ConfEditor.php new file mode 100644 index 0000000000..67cb87db1e --- /dev/null +++ b/includes/utils/ConfEditor.php @@ -0,0 +1,1109 @@ +", or false if there isn't one + */ + var $pathStack; + + /** + * The elements of the top of the pathStack for every path encountered, indexed + * by slash-separated path. + */ + var $pathInfo; + + /** + * Next serial number for whitespace placeholder paths (\@extra-N) + */ + var $serial; + + /** + * Editor state. This consists of the internal copy/insert operations which + * are applied to the source string to obtain the destination string. + */ + var $edits; + + /** + * Simple entry point for command-line testing + * + * @param $text string + * + * @return string + */ + static function test( $text ) { + try { + $ce = new self( $text ); + $ce->parse(); + } catch ( ConfEditorParseError $e ) { + return $e->getMessage() . "\n" . $e->highlight( $text ); + } + return "OK"; + } + + /** + * Construct a new parser + */ + public function __construct( $text ) { + $this->text = $text; + } + + /** + * Edit the text. Returns the edited text. + * @param array $ops of operations. + * + * Operations are given as an associative array, with members: + * type: One of delete, set, append or insert (required) + * path: The path to operate on (required) + * key: The array key to insert/append, with PHP quotes + * value: The value, with PHP quotes + * + * delete + * Deletes an array element or statement with the specified path. + * e.g. + * array('type' => 'delete', 'path' => '$foo/bar/baz' ) + * is equivalent to the runtime PHP code: + * unset( $foo['bar']['baz'] ); + * + * set + * Sets the value of an array element. If the element doesn't exist, it + * is appended to the array. If it does exist, the value is set, with + * comments and indenting preserved. + * + * append + * Appends a new element to the end of the array. Adds a trailing comma. + * e.g. + * array( 'type' => 'append', 'path', '$foo/bar', + * 'key' => 'baz', 'value' => "'x'" ) + * is like the PHP code: + * $foo['bar']['baz'] = 'x'; + * + * insert + * Insert a new element at the start of the array. + * + * @throws MWException + * @return string + */ + public function edit( $ops ) { + $this->parse(); + + $this->edits = array( + array( 'copy', 0, strlen( $this->text ) ) + ); + foreach ( $ops as $op ) { + $type = $op['type']; + $path = $op['path']; + $value = isset( $op['value'] ) ? $op['value'] : null; + $key = isset( $op['key'] ) ? $op['key'] : null; + + switch ( $type ) { + case 'delete': + list( $start, $end ) = $this->findDeletionRegion( $path ); + $this->replaceSourceRegion( $start, $end, false ); + break; + case 'set': + if ( isset( $this->pathInfo[$path] ) ) { + list( $start, $end ) = $this->findValueRegion( $path ); + $encValue = $value; // var_export( $value, true ); + $this->replaceSourceRegion( $start, $end, $encValue ); + break; + } + // No existing path, fall through to append + $slashPos = strrpos( $path, '/' ); + $key = var_export( substr( $path, $slashPos + 1 ), true ); + $path = substr( $path, 0, $slashPos ); + // Fall through + case 'append': + // Find the last array element + $lastEltPath = $this->findLastArrayElement( $path ); + if ( $lastEltPath === false ) { + throw new MWException( "Can't find any element of array \"$path\"" ); + } + $lastEltInfo = $this->pathInfo[$lastEltPath]; + + // Has it got a comma already? + if ( strpos( $lastEltPath, '@extra' ) === false && !$lastEltInfo['hasComma'] ) { + // No comma, insert one after the value region + list( , $end ) = $this->findValueRegion( $lastEltPath ); + $this->replaceSourceRegion( $end - 1, $end - 1, ',' ); + } + + // Make the text to insert + list( $start, $end ) = $this->findDeletionRegion( $lastEltPath ); + + if ( $key === null ) { + list( $indent, ) = $this->getIndent( $start ); + $textToInsert = "$indent$value,"; + } else { + list( $indent, $arrowIndent ) = + $this->getIndent( $start, $key, $lastEltInfo['arrowByte'] ); + $textToInsert = "$indent$key$arrowIndent=> $value,"; + } + $textToInsert .= ( $indent === false ? ' ' : "\n" ); + + // Insert the item + $this->replaceSourceRegion( $end, $end, $textToInsert ); + break; + case 'insert': + // Find first array element + $firstEltPath = $this->findFirstArrayElement( $path ); + if ( $firstEltPath === false ) { + throw new MWException( "Can't find array element of \"$path\"" ); + } + list( $start, ) = $this->findDeletionRegion( $firstEltPath ); + $info = $this->pathInfo[$firstEltPath]; + + // Make the text to insert + if ( $key === null ) { + list( $indent, ) = $this->getIndent( $start ); + $textToInsert = "$indent$value,"; + } else { + list( $indent, $arrowIndent ) = + $this->getIndent( $start, $key, $info['arrowByte'] ); + $textToInsert = "$indent$key$arrowIndent=> $value,"; + } + $textToInsert .= ( $indent === false ? ' ' : "\n" ); + + // Insert the item + $this->replaceSourceRegion( $start, $start, $textToInsert ); + break; + default: + throw new MWException( "Unrecognised operation: \"$type\"" ); + } + } + + // Do the edits + $out = ''; + foreach ( $this->edits as $edit ) { + if ( $edit[0] == 'copy' ) { + $out .= substr( $this->text, $edit[1], $edit[2] - $edit[1] ); + } else { // if ( $edit[0] == 'insert' ) + $out .= $edit[1]; + } + } + + // Do a second parse as a sanity check + $this->text = $out; + try { + $this->parse(); + } catch ( ConfEditorParseError $e ) { + throw new MWException( + "Sorry, ConfEditor broke the file during editing and it won't parse anymore: " . + $e->getMessage() ); + } + return $out; + } + + /** + * Get the variables defined in the text + * @return array( varname => value ) + */ + function getVars() { + $vars = array(); + $this->parse(); + foreach ( $this->pathInfo as $path => $data ) { + if ( $path[0] != '$' ) { + continue; + } + $trimmedPath = substr( $path, 1 ); + $name = $data['name']; + if ( $name[0] == '@' ) { + continue; + } + if ( $name[0] == '$' ) { + $name = substr( $name, 1 ); + } + $parentPath = substr( $trimmedPath, 0, + strlen( $trimmedPath ) - strlen( $name ) ); + if ( substr( $parentPath, -1 ) == '/' ) { + $parentPath = substr( $parentPath, 0, -1 ); + } + + $value = substr( $this->text, $data['valueStartByte'], + $data['valueEndByte'] - $data['valueStartByte'] + ); + $this->setVar( $vars, $parentPath, $name, + $this->parseScalar( $value ) ); + } + return $vars; + } + + /** + * Set a value in an array, unless it's set already. For instance, + * setVar( $arr, 'foo/bar', 'baz', 3 ); will set + * $arr['foo']['bar']['baz'] = 3; + * @param $array array + * @param string $path slash-delimited path + * @param $key mixed Key + * @param $value mixed Value + */ + function setVar( &$array, $path, $key, $value ) { + $pathArr = explode( '/', $path ); + $target =& $array; + if ( $path !== '' ) { + foreach ( $pathArr as $p ) { + if ( !isset( $target[$p] ) ) { + $target[$p] = array(); + } + $target =& $target[$p]; + } + } + if ( !isset( $target[$key] ) ) { + $target[$key] = $value; + } + } + + /** + * Parse a scalar value in PHP + * @return mixed Parsed value + */ + function parseScalar( $str ) { + if ( $str !== '' && $str[0] == '\'' ) { + // Single-quoted string + // @todo FIXME: trim() call is due to mystery bug where whitespace gets + // appended to the token; without it we ended up reading in the + // extra quote on the end! + return strtr( substr( trim( $str ), 1, -1 ), + array( '\\\'' => '\'', '\\\\' => '\\' ) ); + } + if ( $str !== '' && $str[0] == '"' ) { + // Double-quoted string + // @todo FIXME: trim() call is due to mystery bug where whitespace gets + // appended to the token; without it we ended up reading in the + // extra quote on the end! + return stripcslashes( substr( trim( $str ), 1, -1 ) ); + } + if ( substr( $str, 0, 4 ) == 'true' ) { + return true; + } + if ( substr( $str, 0, 5 ) == 'false' ) { + return false; + } + if ( substr( $str, 0, 4 ) == 'null' ) { + return null; + } + // Must be some kind of numeric value, so let PHP's weak typing + // be useful for a change + return $str; + } + + /** + * Replace the byte offset region of the source with $newText. + * Works by adding elements to the $this->edits array. + */ + function replaceSourceRegion( $start, $end, $newText = false ) { + // Split all copy operations with a source corresponding to the region + // in question. + $newEdits = array(); + foreach ( $this->edits as $edit ) { + if ( $edit[0] !== 'copy' ) { + $newEdits[] = $edit; + continue; + } + $copyStart = $edit[1]; + $copyEnd = $edit[2]; + if ( $start >= $copyEnd || $end <= $copyStart ) { + // Outside this region + $newEdits[] = $edit; + continue; + } + if ( ( $start < $copyStart && $end > $copyStart ) + || ( $start < $copyEnd && $end > $copyEnd ) + ) { + throw new MWException( "Overlapping regions found, can't do the edit" ); + } + // Split the copy + $newEdits[] = array( 'copy', $copyStart, $start ); + if ( $newText !== false ) { + $newEdits[] = array( 'insert', $newText ); + } + $newEdits[] = array( 'copy', $end, $copyEnd ); + } + $this->edits = $newEdits; + } + + /** + * Finds the source byte region which you would want to delete, if $pathName + * was to be deleted. Includes the leading spaces and tabs, the trailing line + * break, and any comments in between. + * @param $pathName + * @throws MWException + * @return array + */ + function findDeletionRegion( $pathName ) { + if ( !isset( $this->pathInfo[$pathName] ) ) { + throw new MWException( "Can't find path \"$pathName\"" ); + } + $path = $this->pathInfo[$pathName]; + // Find the start + $this->firstToken(); + while ( $this->pos != $path['startToken'] ) { + $this->nextToken(); + } + $regionStart = $path['startByte']; + for ( $offset = -1; $offset >= -$this->pos; $offset-- ) { + $token = $this->getTokenAhead( $offset ); + if ( !$token->isSkip() ) { + // If there is other content on the same line, don't move the start point + // back, because that will cause the regions to overlap. + $regionStart = $path['startByte']; + break; + } + $lfPos = strrpos( $token->text, "\n" ); + if ( $lfPos === false ) { + $regionStart -= strlen( $token->text ); + } else { + // The line start does not include the LF + $regionStart -= strlen( $token->text ) - $lfPos - 1; + break; + } + } + // Find the end + while ( $this->pos != $path['endToken'] ) { + $this->nextToken(); + } + $regionEnd = $path['endByte']; // past the end + for ( $offset = 0; $offset < count( $this->tokens ) - $this->pos; $offset++ ) { + $token = $this->getTokenAhead( $offset ); + if ( !$token->isSkip() ) { + break; + } + $lfPos = strpos( $token->text, "\n" ); + if ( $lfPos === false ) { + $regionEnd += strlen( $token->text ); + } else { + // This should point past the LF + $regionEnd += $lfPos + 1; + break; + } + } + return array( $regionStart, $regionEnd ); + } + + /** + * Find the byte region in the source corresponding to the value part. + * This includes the quotes, but does not include the trailing comma + * or semicolon. + * + * The end position is the past-the-end (end + 1) value as per convention. + * @param $pathName + * @throws MWException + * @return array + */ + function findValueRegion( $pathName ) { + if ( !isset( $this->pathInfo[$pathName] ) ) { + throw new MWException( "Can't find path \"$pathName\"" ); + } + $path = $this->pathInfo[$pathName]; + if ( $path['valueStartByte'] === false || $path['valueEndByte'] === false ) { + throw new MWException( "Can't find value region for path \"$pathName\"" ); + } + return array( $path['valueStartByte'], $path['valueEndByte'] ); + } + + /** + * Find the path name of the last element in the array. + * If the array is empty, this will return the \@extra interstitial element. + * If the specified path is not found or is not an array, it will return false. + * @return bool|int|string + */ + function findLastArrayElement( $path ) { + // Try for a real element + $lastEltPath = false; + foreach ( $this->pathInfo as $candidatePath => $info ) { + $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 ); + $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 ); + if ( $part2 == '@' ) { + // Do nothing + } elseif ( $part1 == "$path/" ) { + $lastEltPath = $candidatePath; + } elseif ( $lastEltPath !== false ) { + break; + } + } + if ( $lastEltPath !== false ) { + return $lastEltPath; + } + + // Try for an interstitial element + $extraPath = false; + foreach ( $this->pathInfo as $candidatePath => $info ) { + $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 ); + if ( $part1 == "$path/" ) { + $extraPath = $candidatePath; + } elseif ( $extraPath !== false ) { + break; + } + } + return $extraPath; + } + + /** + * Find the path name of first element in the array. + * If the array is empty, this will return the \@extra interstitial element. + * If the specified path is not found or is not an array, it will return false. + * @return bool|int|string + */ + function findFirstArrayElement( $path ) { + // Try for an ordinary element + foreach ( $this->pathInfo as $candidatePath => $info ) { + $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 ); + $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 ); + if ( $part1 == "$path/" && $part2 != '@' ) { + return $candidatePath; + } + } + + // Try for an interstitial element + foreach ( $this->pathInfo as $candidatePath => $info ) { + $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 ); + if ( $part1 == "$path/" ) { + return $candidatePath; + } + } + return false; + } + + /** + * Get the indent string which sits after a given start position. + * Returns false if the position is not at the start of the line. + * @return array + */ + function getIndent( $pos, $key = false, $arrowPos = false ) { + $arrowIndent = ' '; + if ( $pos == 0 || $this->text[$pos - 1] == "\n" ) { + $indentLength = strspn( $this->text, " \t", $pos ); + $indent = substr( $this->text, $pos, $indentLength ); + } else { + $indent = false; + } + if ( $indent !== false && $arrowPos !== false ) { + $arrowIndentLength = $arrowPos - $pos - $indentLength - strlen( $key ); + if ( $arrowIndentLength > 0 ) { + $arrowIndent = str_repeat( ' ', $arrowIndentLength ); + } + } + return array( $indent, $arrowIndent ); + } + + /** + * Run the parser on the text. Throws an exception if the string does not + * match our defined subset of PHP syntax. + */ + public function parse() { + $this->initParse(); + $this->pushState( 'file' ); + $this->pushPath( '@extra-' . ( $this->serial++ ) ); + $token = $this->firstToken(); + + while ( !$token->isEnd() ) { + $state = $this->popState(); + if ( !$state ) { + $this->error( 'internal error: empty state stack' ); + } + + switch ( $state ) { + case 'file': + $this->expect( T_OPEN_TAG ); + $token = $this->skipSpace(); + if ( $token->isEnd() ) { + break 2; + } + $this->pushState( 'statement', 'file 2' ); + break; + case 'file 2': + $token = $this->skipSpace(); + if ( $token->isEnd() ) { + break 2; + } + $this->pushState( 'statement', 'file 2' ); + break; + case 'statement': + $token = $this->skipSpace(); + if ( !$this->validatePath( $token->text ) ) { + $this->error( "Invalid variable name \"{$token->text}\"" ); + } + $this->nextPath( $token->text ); + $this->expect( T_VARIABLE ); + $this->skipSpace(); + $arrayAssign = false; + if ( $this->currentToken()->type == '[' ) { + $this->nextToken(); + $token = $this->skipSpace(); + if ( !$token->isScalar() ) { + $this->error( "expected a string or number for the array key" ); + } + if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) { + $text = $this->parseScalar( $token->text ); + } else { + $text = $token->text; + } + if ( !$this->validatePath( $text ) ) { + $this->error( "Invalid associative array name \"$text\"" ); + } + $this->pushPath( $text ); + $this->nextToken(); + $this->skipSpace(); + $this->expect( ']' ); + $this->skipSpace(); + $arrayAssign = true; + } + $this->expect( '=' ); + $this->skipSpace(); + $this->startPathValue(); + if ( $arrayAssign ) { + $this->pushState( 'expression', 'array assign end' ); + } else { + $this->pushState( 'expression', 'statement end' ); + } + break; + case 'array assign end': + case 'statement end': + $this->endPathValue(); + if ( $state == 'array assign end' ) { + $this->popPath(); + } + $this->skipSpace(); + $this->expect( ';' ); + $this->nextPath( '@extra-' . ( $this->serial++ ) ); + break; + case 'expression': + $token = $this->skipSpace(); + if ( $token->type == T_ARRAY ) { + $this->pushState( 'array' ); + } elseif ( $token->isScalar() ) { + $this->nextToken(); + } elseif ( $token->type == T_VARIABLE ) { + $this->nextToken(); + } else { + $this->error( "expected simple expression" ); + } + break; + case 'array': + $this->skipSpace(); + $this->expect( T_ARRAY ); + $this->skipSpace(); + $this->expect( '(' ); + $this->skipSpace(); + $this->pushPath( '@extra-' . ( $this->serial++ ) ); + if ( $this->isAhead( ')' ) ) { + // Empty array + $this->pushState( 'array end' ); + } else { + $this->pushState( 'element', 'array end' ); + } + break; + case 'array end': + $this->skipSpace(); + $this->popPath(); + $this->expect( ')' ); + break; + case 'element': + $token = $this->skipSpace(); + // Look ahead to find the double arrow + if ( $token->isScalar() && $this->isAhead( T_DOUBLE_ARROW, 1 ) ) { + // Found associative element + $this->pushState( 'assoc-element', 'element end' ); + } else { + // Not associative + $this->nextPath( '@next' ); + $this->startPathValue(); + $this->pushState( 'expression', 'element end' ); + } + break; + case 'element end': + $token = $this->skipSpace(); + if ( $token->type == ',' ) { + $this->endPathValue(); + $this->markComma(); + $this->nextToken(); + $this->nextPath( '@extra-' . ( $this->serial++ ) ); + // Look ahead to find ending bracket + if ( $this->isAhead( ")" ) ) { + // Found ending bracket, no continuation + $this->skipSpace(); + } else { + // No ending bracket, continue to next element + $this->pushState( 'element' ); + } + } elseif ( $token->type == ')' ) { + // End array + $this->endPathValue(); + } else { + $this->error( "expected the next array element or the end of the array" ); + } + break; + case 'assoc-element': + $token = $this->skipSpace(); + if ( !$token->isScalar() ) { + $this->error( "expected a string or number for the array key" ); + } + if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) { + $text = $this->parseScalar( $token->text ); + } else { + $text = $token->text; + } + if ( !$this->validatePath( $text ) ) { + $this->error( "Invalid associative array name \"$text\"" ); + } + $this->nextPath( $text ); + $this->nextToken(); + $this->skipSpace(); + $this->markArrow(); + $this->expect( T_DOUBLE_ARROW ); + $this->skipSpace(); + $this->startPathValue(); + $this->pushState( 'expression' ); + break; + } + } + if ( count( $this->stateStack ) ) { + $this->error( 'unexpected end of file' ); + } + $this->popPath(); + } + + /** + * Initialise a parse. + */ + protected function initParse() { + $this->tokens = token_get_all( $this->text ); + $this->stateStack = array(); + $this->pathStack = array(); + $this->firstToken(); + $this->pathInfo = array(); + $this->serial = 1; + } + + /** + * Set the parse position. Do not call this except from firstToken() and + * nextToken(), there is more to update than just the position. + */ + protected function setPos( $pos ) { + $this->pos = $pos; + if ( $this->pos >= count( $this->tokens ) ) { + $this->currentToken = ConfEditorToken::newEnd(); + } else { + $this->currentToken = $this->newTokenObj( $this->tokens[$this->pos] ); + } + return $this->currentToken; + } + + /** + * Create a ConfEditorToken from an element of token_get_all() + * @return ConfEditorToken + */ + function newTokenObj( $internalToken ) { + if ( is_array( $internalToken ) ) { + return new ConfEditorToken( $internalToken[0], $internalToken[1] ); + } else { + return new ConfEditorToken( $internalToken, $internalToken ); + } + } + + /** + * Reset the parse position + */ + function firstToken() { + $this->setPos( 0 ); + $this->prevToken = ConfEditorToken::newEnd(); + $this->lineNum = 1; + $this->colNum = 1; + $this->byteNum = 0; + return $this->currentToken; + } + + /** + * Get the current token + */ + function currentToken() { + return $this->currentToken; + } + + /** + * Advance the current position and return the resulting next token + */ + function nextToken() { + if ( $this->currentToken ) { + $text = $this->currentToken->text; + $lfCount = substr_count( $text, "\n" ); + if ( $lfCount ) { + $this->lineNum += $lfCount; + $this->colNum = strlen( $text ) - strrpos( $text, "\n" ); + } else { + $this->colNum += strlen( $text ); + } + $this->byteNum += strlen( $text ); + } + $this->prevToken = $this->currentToken; + $this->setPos( $this->pos + 1 ); + return $this->currentToken; + } + + /** + * Get the token $offset steps ahead of the current position. + * $offset may be negative, to get tokens behind the current position. + * @return ConfEditorToken + */ + function getTokenAhead( $offset ) { + $pos = $this->pos + $offset; + if ( $pos >= count( $this->tokens ) || $pos < 0 ) { + return ConfEditorToken::newEnd(); + } else { + return $this->newTokenObj( $this->tokens[$pos] ); + } + } + + /** + * Advances the current position past any whitespace or comments + */ + function skipSpace() { + while ( $this->currentToken && $this->currentToken->isSkip() ) { + $this->nextToken(); + } + return $this->currentToken; + } + + /** + * Throws an error if the current token is not of the given type, and + * then advances to the next position. + */ + function expect( $type ) { + if ( $this->currentToken && $this->currentToken->type == $type ) { + return $this->nextToken(); + } else { + $this->error( "expected " . $this->getTypeName( $type ) . + ", got " . $this->getTypeName( $this->currentToken->type ) ); + } + } + + /** + * Push a state or two on to the state stack. + */ + function pushState( $nextState, $stateAfterThat = null ) { + if ( $stateAfterThat !== null ) { + $this->stateStack[] = $stateAfterThat; + } + $this->stateStack[] = $nextState; + } + + /** + * Pop a state from the state stack. + * @return mixed + */ + function popState() { + return array_pop( $this->stateStack ); + } + + /** + * Returns true if the user input path is valid. + * This exists to allow "/" and "@" to be reserved for string path keys + * @return bool + */ + function validatePath( $path ) { + return strpos( $path, '/' ) === false && substr( $path, 0, 1 ) != '@'; + } + + /** + * Internal function to update some things at the end of a path region. Do + * not call except from popPath() or nextPath(). + */ + function endPath() { + $key = ''; + foreach ( $this->pathStack as $pathInfo ) { + if ( $key !== '' ) { + $key .= '/'; + } + $key .= $pathInfo['name']; + } + $pathInfo['endByte'] = $this->byteNum; + $pathInfo['endToken'] = $this->pos; + $this->pathInfo[$key] = $pathInfo; + } + + /** + * Go up to a new path level, for example at the start of an array. + */ + function pushPath( $path ) { + $this->pathStack[] = array( + 'name' => $path, + 'level' => count( $this->pathStack ) + 1, + 'startByte' => $this->byteNum, + 'startToken' => $this->pos, + 'valueStartToken' => false, + 'valueStartByte' => false, + 'valueEndToken' => false, + 'valueEndByte' => false, + 'nextArrayIndex' => 0, + 'hasComma' => false, + 'arrowByte' => false + ); + } + + /** + * Go down a path level, for example at the end of an array. + */ + function popPath() { + $this->endPath(); + array_pop( $this->pathStack ); + } + + /** + * Go to the next path on the same level. This ends the current path and + * starts a new one. If $path is \@next, the new path is set to the next + * numeric array element. + */ + function nextPath( $path ) { + $this->endPath(); + $i = count( $this->pathStack ) - 1; + if ( $path == '@next' ) { + $nextArrayIndex =& $this->pathStack[$i]['nextArrayIndex']; + $this->pathStack[$i]['name'] = $nextArrayIndex; + $nextArrayIndex++; + } else { + $this->pathStack[$i]['name'] = $path; + } + $this->pathStack[$i] = + array( + 'startByte' => $this->byteNum, + 'startToken' => $this->pos, + 'valueStartToken' => false, + 'valueStartByte' => false, + 'valueEndToken' => false, + 'valueEndByte' => false, + 'hasComma' => false, + 'arrowByte' => false, + ) + $this->pathStack[$i]; + } + + /** + * Mark the start of the value part of a path. + */ + function startPathValue() { + $path =& $this->pathStack[count( $this->pathStack ) - 1]; + $path['valueStartToken'] = $this->pos; + $path['valueStartByte'] = $this->byteNum; + } + + /** + * Mark the end of the value part of a path. + */ + function endPathValue() { + $path =& $this->pathStack[count( $this->pathStack ) - 1]; + $path['valueEndToken'] = $this->pos; + $path['valueEndByte'] = $this->byteNum; + } + + /** + * Mark the comma separator in an array element + */ + function markComma() { + $path =& $this->pathStack[count( $this->pathStack ) - 1]; + $path['hasComma'] = true; + } + + /** + * Mark the arrow separator in an associative array element + */ + function markArrow() { + $path =& $this->pathStack[count( $this->pathStack ) - 1]; + $path['arrowByte'] = $this->byteNum; + } + + /** + * Generate a parse error + */ + function error( $msg ) { + throw new ConfEditorParseError( $this, $msg ); + } + + /** + * Get a readable name for the given token type. + * @return string + */ + function getTypeName( $type ) { + if ( is_int( $type ) ) { + return token_name( $type ); + } else { + return "\"$type\""; + } + } + + /** + * Looks ahead to see if the given type is the next token type, starting + * from the current position plus the given offset. Skips any intervening + * whitespace. + * @return bool + */ + function isAhead( $type, $offset = 0 ) { + $ahead = $offset; + $token = $this->getTokenAhead( $offset ); + while ( !$token->isEnd() ) { + if ( $token->isSkip() ) { + $ahead++; + $token = $this->getTokenAhead( $ahead ); + continue; + } elseif ( $token->type == $type ) { + // Found the type + return true; + } else { + // Not found + return false; + } + } + return false; + } + + /** + * Get the previous token object + */ + function prevToken() { + return $this->prevToken; + } + + /** + * Echo a reasonably readable representation of the tokenizer array. + */ + function dumpTokens() { + $out = ''; + foreach ( $this->tokens as $token ) { + $obj = $this->newTokenObj( $token ); + $out .= sprintf( "%-28s %s\n", + $this->getTypeName( $obj->type ), + addcslashes( $obj->text, "\0..\37" ) ); + } + echo "
" . htmlspecialchars( $out ) . "
"; + } +} + +/** + * Exception class for parse errors + */ +class ConfEditorParseError extends MWException { + var $lineNum, $colNum; + function __construct( $editor, $msg ) { + $this->lineNum = $editor->lineNum; + $this->colNum = $editor->colNum; + parent::__construct( "Parse error on line {$editor->lineNum} " . + "col {$editor->colNum}: $msg" ); + } + + function highlight( $text ) { + $lines = StringUtils::explode( "\n", $text ); + foreach ( $lines as $lineNum => $line ) { + if ( $lineNum == $this->lineNum - 1 ) { + return "$line\n" . str_repeat( ' ', $this->colNum - 1 ) . "^\n"; + } + } + return ''; + } + +} + +/** + * Class to wrap a token from the tokenizer. + */ +class ConfEditorToken { + var $type, $text; + + static $scalarTypes = array( T_LNUMBER, T_DNUMBER, T_STRING, T_CONSTANT_ENCAPSED_STRING ); + static $skipTypes = array( T_WHITESPACE, T_COMMENT, T_DOC_COMMENT ); + + static function newEnd() { + return new self( 'END', '' ); + } + + function __construct( $type, $text ) { + $this->type = $type; + $this->text = $text; + } + + function isSkip() { + return in_array( $this->type, self::$skipTypes ); + } + + function isScalar() { + return in_array( $this->type, self::$scalarTypes ); + } + + function isEnd() { + return $this->type == 'END'; + } +} diff --git a/includes/utils/HashRing.php b/includes/utils/HashRing.php new file mode 100644 index 0000000000..930f8c0aa1 --- /dev/null +++ b/includes/utils/HashRing.php @@ -0,0 +1,142 @@ + weight) */ + protected $sourceMap = array(); + /** @var Array (location => (start, end)) */ + protected $ring = array(); + + const RING_SIZE = 268435456; // 2^28 + + /** + * @param array $map (location => weight) + */ + public function __construct( array $map ) { + $map = array_filter( $map, function( $w ) { return $w > 0; } ); + if ( !count( $map ) ) { + throw new MWException( "Ring is empty or all weights are zero." ); + } + $this->sourceMap = $map; + // Sort the locations based on the hash of their names + $hashes = array(); + foreach ( $map as $location => $weight ) { + $hashes[$location] = sha1( $location ); + } + uksort( $map, function ( $a, $b ) use ( $hashes ) { + return strcmp( $hashes[$a], $hashes[$b] ); + } ); + // Fit the map to weight-proportionate one with a space of size RING_SIZE + $sum = array_sum( $map ); + $standardMap = array(); + foreach ( $map as $location => $weight ) { + $standardMap[$location] = (int)floor( $weight / $sum * self::RING_SIZE ); + } + // Build a ring of RING_SIZE spots, with each location at a spot in location hash order + $index = 0; + foreach ( $standardMap as $location => $weight ) { + // Location covers half-closed interval [$index,$index + $weight) + $this->ring[$location] = array( $index, $index + $weight ); + $index += $weight; + } + // Make sure the last location covers what is left + end( $this->ring ); + $this->ring[key( $this->ring )][1] = self::RING_SIZE; + } + + /** + * Get the location of an item on the ring + * + * @param string $item + * @return string Location + */ + public function getLocation( $item ) { + $locations = $this->getLocations( $item, 1 ); + return $locations[0]; + } + + /** + * Get the location of an item on the ring, as well as the next clockwise locations + * + * @param string $item + * @param integer $limit Maximum number of locations to return + * @return array List of locations + */ + public function getLocations( $item, $limit ) { + $locations = array(); + $primaryLocation = null; + $spot = hexdec( substr( sha1( $item ), 0, 7 ) ); // first 28 bits + foreach ( $this->ring as $location => $range ) { + if ( count( $locations ) >= $limit ) { + break; + } + // The $primaryLocation is the location the item spot is in. + // After that is reached, keep appending the next locations. + if ( ( $range[0] <= $spot && $spot < $range[1] ) || $primaryLocation !== null ) { + if ( $primaryLocation === null ) { + $primaryLocation = $location; + } + $locations[] = $location; + } + } + // If more locations are requested, wrap-around and keep adding them + reset( $this->ring ); + while ( count( $locations ) < $limit ) { + list( $location, ) = each( $this->ring ); + if ( $location === $primaryLocation ) { + break; // don't go in circles + } + $locations[] = $location; + } + return $locations; + } + + /** + * Get the map of locations to weight (ignores 0-weight items) + * + * @return array + */ + public function getLocationWeights() { + return $this->sourceMap; + } + + /** + * Get a new hash ring with a location removed from the ring + * + * @param string $location + * @return HashRing|bool Returns false if no non-zero weighted spots are left + */ + public function newWithoutLocation( $location ) { + $map = $this->sourceMap; + unset( $map[$location] ); + if ( count( $map ) ) { + return new self( $map ); + } + return false; + } +} diff --git a/includes/utils/IP.php b/includes/utils/IP.php new file mode 100644 index 0000000000..73834a5950 --- /dev/null +++ b/includes/utils/IP.php @@ -0,0 +1,761 @@ +", Aaron Schulz + */ + +// Some regex definition to "play" with IP address and IP address blocks + +// An IPv4 address is made of 4 bytes from x00 to xFF which is d0 to d255 +define( 'RE_IP_BYTE', '(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[0-9])' ); +define( 'RE_IP_ADD', RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE ); +// An IPv4 block is an IP address and a prefix (d1 to d32) +define( 'RE_IP_PREFIX', '(3[0-2]|[12]?\d)' ); +define( 'RE_IP_BLOCK', RE_IP_ADD . '\/' . RE_IP_PREFIX ); + +// An IPv6 address is made up of 8 words (each x0000 to xFFFF). +// However, the "::" abbreviation can be used on consecutive x0000 words. +define( 'RE_IPV6_WORD', '([0-9A-Fa-f]{1,4})' ); +define( 'RE_IPV6_PREFIX', '(12[0-8]|1[01][0-9]|[1-9]?\d)' ); +define( 'RE_IPV6_ADD', + '(?:' . // starts with "::" (including "::") + ':(?::|(?::' . RE_IPV6_WORD . '){1,7})' . + '|' . // ends with "::" (except "::") + RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){0,6}::' . + '|' . // contains one "::" in the middle (the ^ makes the test fail if none found) + RE_IPV6_WORD . '(?::((?(-1)|:))?' . RE_IPV6_WORD . '){1,6}(?(-2)|^)' . + '|' . // contains no "::" + RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){7}' . + ')' +); +// An IPv6 block is an IP address and a prefix (d1 to d128) +define( 'RE_IPV6_BLOCK', RE_IPV6_ADD . '\/' . RE_IPV6_PREFIX ); +// For IPv6 canonicalization (NOT for strict validation; these are quite lax!) +define( 'RE_IPV6_GAP', ':(?:0+:)*(?::(?:0+:)*)?' ); +define( 'RE_IPV6_V4_PREFIX', '0*' . RE_IPV6_GAP . '(?:ffff:)?' ); + +// This might be useful for regexps used elsewhere, matches any IPv6 or IPv6 address or network +define( 'IP_ADDRESS_STRING', + '(?:' . + RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?' . // IPv4 + '|' . + RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?' . // IPv6 + ')' +); + +/** + * A collection of public static functions to play with IP address + * and IP blocks. + */ +class IP { + /** + * Determine if a string is as valid IP address or network (CIDR prefix). + * SIIT IPv4-translated addresses are rejected. + * Note: canonicalize() tries to convert translated addresses to IPv4. + * + * @param string $ip possible IP address + * @return Boolean + */ + public static function isIPAddress( $ip ) { + return (bool)preg_match( '/^' . IP_ADDRESS_STRING . '$/', $ip ); + } + + /** + * Given a string, determine if it as valid IP in IPv6 only. + * Note: Unlike isValid(), this looks for networks too. + * + * @param string $ip possible IP address + * @return Boolean + */ + public static function isIPv6( $ip ) { + return (bool)preg_match( '/^' . RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?$/', $ip ); + } + + /** + * Given a string, determine if it as valid IP in IPv4 only. + * Note: Unlike isValid(), this looks for networks too. + * + * @param string $ip possible IP address + * @return Boolean + */ + public static function isIPv4( $ip ) { + return (bool)preg_match( '/^' . RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?$/', $ip ); + } + + /** + * Validate an IP address. Ranges are NOT considered valid. + * SIIT IPv4-translated addresses are rejected. + * Note: canonicalize() tries to convert translated addresses to IPv4. + * + * @param $ip String + * @return Boolean: True if it is valid. + */ + public static function isValid( $ip ) { + return ( preg_match( '/^' . RE_IP_ADD . '$/', $ip ) + || preg_match( '/^' . RE_IPV6_ADD . '$/', $ip ) ); + } + + /** + * Validate an IP Block (valid address WITH a valid prefix). + * SIIT IPv4-translated addresses are rejected. + * Note: canonicalize() tries to convert translated addresses to IPv4. + * + * @param $ipblock String + * @return Boolean: True if it is valid. + */ + public static function isValidBlock( $ipblock ) { + return ( preg_match( '/^' . RE_IPV6_BLOCK . '$/', $ipblock ) + || preg_match( '/^' . RE_IP_BLOCK . '$/', $ipblock ) ); + } + + /** + * Convert an IP into a verbose, uppercase, normalized form. + * IPv6 addresses in octet notation are expanded to 8 words. + * IPv4 addresses are just trimmed. + * + * @param string $ip IP address in quad or octet form (CIDR or not). + * @return String + */ + public static function sanitizeIP( $ip ) { + $ip = trim( $ip ); + if ( $ip === '' ) { + return null; + } + if ( self::isIPv4( $ip ) || !self::isIPv6( $ip ) ) { + return $ip; // nothing else to do for IPv4 addresses or invalid ones + } + // Remove any whitespaces, convert to upper case + $ip = strtoupper( $ip ); + // Expand zero abbreviations + $abbrevPos = strpos( $ip, '::' ); + if ( $abbrevPos !== false ) { + // We know this is valid IPv6. Find the last index of the + // address before any CIDR number (e.g. "a:b:c::/24"). + $CIDRStart = strpos( $ip, "/" ); + $addressEnd = ( $CIDRStart !== false ) + ? $CIDRStart - 1 + : strlen( $ip ) - 1; + // If the '::' is at the beginning... + if ( $abbrevPos == 0 ) { + $repeat = '0:'; + $extra = ( $ip == '::' ) ? '0' : ''; // for the address '::' + $pad = 9; // 7+2 (due to '::') + // If the '::' is at the end... + } elseif ( $abbrevPos == ( $addressEnd - 1 ) ) { + $repeat = ':0'; + $extra = ''; + $pad = 9; // 7+2 (due to '::') + // If the '::' is in the middle... + } else { + $repeat = ':0'; + $extra = ':'; + $pad = 8; // 6+2 (due to '::') + } + $ip = str_replace( '::', + str_repeat( $repeat, $pad - substr_count( $ip, ':' ) ) . $extra, + $ip + ); + } + // Remove leading zeros from each bloc as needed + $ip = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip ); + return $ip; + } + + /** + * Prettify an IP for display to end users. + * This will make it more compact and lower-case. + * + * @param $ip string + * @return string + */ + public static function prettifyIP( $ip ) { + $ip = self::sanitizeIP( $ip ); // normalize (removes '::') + if ( self::isIPv6( $ip ) ) { + // Split IP into an address and a CIDR + if ( strpos( $ip, '/' ) !== false ) { + list( $ip, $cidr ) = explode( '/', $ip, 2 ); + } else { + list( $ip, $cidr ) = array( $ip, '' ); + } + // Get the largest slice of words with multiple zeros + $offset = 0; + $longest = $longestPos = false; + while ( preg_match( + '!(?:^|:)0(?::0)+(?:$|:)!', $ip, $m, PREG_OFFSET_CAPTURE, $offset + ) ) { + list( $match, $pos ) = $m[0]; // full match + if ( strlen( $match ) > strlen( $longest ) ) { + $longest = $match; + $longestPos = $pos; + } + $offset = ( $pos + strlen( $match ) ); // advance + } + if ( $longest !== false ) { + // Replace this portion of the string with the '::' abbreviation + $ip = substr_replace( $ip, '::', $longestPos, strlen( $longest ) ); + } + // Add any CIDR back on + if ( $cidr !== '' ) { + $ip = "{$ip}/{$cidr}"; + } + // Convert to lower case to make it more readable + $ip = strtolower( $ip ); + } + return $ip; + } + + /** + * Given a host/port string, like one might find in the host part of a URL + * per RFC 2732, split the hostname part and the port part and return an + * array with an element for each. If there is no port part, the array will + * have false in place of the port. If the string was invalid in some way, + * false is returned. + * + * This was easy with IPv4 and was generally done in an ad-hoc way, but + * with IPv6 it's somewhat more complicated due to the need to parse the + * square brackets and colons. + * + * A bare IPv6 address is accepted despite the lack of square brackets. + * + * @param string $both The string with the host and port + * @return array + */ + public static function splitHostAndPort( $both ) { + if ( substr( $both, 0, 1 ) === '[' ) { + if ( preg_match( '/^\[(' . RE_IPV6_ADD . ')\](?::(?P\d+))?$/', $both, $m ) ) { + if ( isset( $m['port'] ) ) { + return array( $m[1], intval( $m['port'] ) ); + } else { + return array( $m[1], false ); + } + } else { + // Square bracket found but no IPv6 + return false; + } + } + $numColons = substr_count( $both, ':' ); + if ( $numColons >= 2 ) { + // Is it a bare IPv6 address? + if ( preg_match( '/^' . RE_IPV6_ADD . '$/', $both ) ) { + return array( $both, false ); + } else { + // Not valid IPv6, but too many colons for anything else + return false; + } + } + if ( $numColons >= 1 ) { + // Host:port? + $bits = explode( ':', $both ); + if ( preg_match( '/^\d+/', $bits[1] ) ) { + return array( $bits[0], intval( $bits[1] ) ); + } else { + // Not a valid port + return false; + } + } + // Plain hostname + return array( $both, false ); + } + + /** + * Given a host name and a port, combine them into host/port string like + * you might find in a URL. If the host contains a colon, wrap it in square + * brackets like in RFC 2732. If the port matches the default port, omit + * the port specification + * + * @param $host string + * @param $port int + * @param $defaultPort bool|int + * @return string + */ + public static function combineHostAndPort( $host, $port, $defaultPort = false ) { + if ( strpos( $host, ':' ) !== false ) { + $host = "[$host]"; + } + if ( $defaultPort !== false && $port == $defaultPort ) { + return $host; + } else { + return "$host:$port"; + } + } + + /** + * Given an unsigned integer, returns an IPv6 address in octet notation + * + * @param $ip_int String: IP address. + * @return String + */ + public static function toOctet( $ip_int ) { + return self::hexToOctet( wfBaseConvert( $ip_int, 10, 16, 32, false ) ); + } + + /** + * Convert an IPv4 or IPv6 hexadecimal representation back to readable format + * + * @param string $hex number, with "v6-" prefix if it is IPv6 + * @return String: quad-dotted (IPv4) or octet notation (IPv6) + */ + public static function formatHex( $hex ) { + if ( substr( $hex, 0, 3 ) == 'v6-' ) { // IPv6 + return self::hexToOctet( substr( $hex, 3 ) ); + } else { // IPv4 + return self::hexToQuad( $hex ); + } + } + + /** + * Converts a hexadecimal number to an IPv6 address in octet notation + * + * @param $ip_hex String: pure hex (no v6- prefix) + * @return String (of format a:b:c:d:e:f:g:h) + */ + public static function hexToOctet( $ip_hex ) { + // Pad hex to 32 chars (128 bits) + $ip_hex = str_pad( strtoupper( $ip_hex ), 32, '0', STR_PAD_LEFT ); + // Separate into 8 words + $ip_oct = substr( $ip_hex, 0, 4 ); + for ( $n = 1; $n < 8; $n++ ) { + $ip_oct .= ':' . substr( $ip_hex, 4 * $n, 4 ); + } + // NO leading zeroes + $ip_oct = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip_oct ); + return $ip_oct; + } + + /** + * Converts a hexadecimal number to an IPv4 address in quad-dotted notation + * + * @param $ip_hex String: pure hex + * @return String (of format a.b.c.d) + */ + public static function hexToQuad( $ip_hex ) { + // Pad hex to 8 chars (32 bits) + $ip_hex = str_pad( strtoupper( $ip_hex ), 8, '0', STR_PAD_LEFT ); + // Separate into four quads + $s = ''; + for ( $i = 0; $i < 4; $i++ ) { + if ( $s !== '' ) { + $s .= '.'; + } + $s .= base_convert( substr( $ip_hex, $i * 2, 2 ), 16, 10 ); + } + return $s; + } + + /** + * Determine if an IP address really is an IP address, and if it is public, + * i.e. not RFC 1918 or similar + * Comes from ProxyTools.php + * + * @param $ip String + * @return Boolean + */ + public static function isPublic( $ip ) { + if ( self::isIPv6( $ip ) ) { + return self::isPublic6( $ip ); + } + $n = self::toUnsigned( $ip ); + if ( !$n ) { + return false; + } + + // ip2long accepts incomplete addresses, as well as some addresses + // followed by garbage characters. Check that it's really valid. + if ( $ip != long2ip( $n ) ) { + return false; + } + + static $privateRanges = false; + if ( !$privateRanges ) { + $privateRanges = array( + array( '10.0.0.0', '10.255.255.255' ), # RFC 1918 (private) + array( '172.16.0.0', '172.31.255.255' ), # RFC 1918 (private) + array( '192.168.0.0', '192.168.255.255' ), # RFC 1918 (private) + array( '0.0.0.0', '0.255.255.255' ), # this network + array( '127.0.0.0', '127.255.255.255' ), # loopback + ); + } + + foreach ( $privateRanges as $r ) { + $start = self::toUnsigned( $r[0] ); + $end = self::toUnsigned( $r[1] ); + if ( $n >= $start && $n <= $end ) { + return false; + } + } + return true; + } + + /** + * Determine if an IPv6 address really is an IP address, and if it is public, + * i.e. not RFC 4193 or similar + * + * @param $ip String + * @return Boolean + */ + private static function isPublic6( $ip ) { + static $privateRanges = false; + if ( !$privateRanges ) { + $privateRanges = array( + array( 'fc00::', 'fdff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' ), # RFC 4193 (local) + array( '0:0:0:0:0:0:0:1', '0:0:0:0:0:0:0:1' ), # loopback + ); + } + $n = self::toHex( $ip ); + foreach ( $privateRanges as $r ) { + $start = self::toHex( $r[0] ); + $end = self::toHex( $r[1] ); + if ( $n >= $start && $n <= $end ) { + return false; + } + } + return true; + } + + /** + * Return a zero-padded upper case hexadecimal representation of an IP address. + * + * Hexadecimal addresses are used because they can easily be extended to + * IPv6 support. To separate the ranges, the return value from this + * function for an IPv6 address will be prefixed with "v6-", a non- + * hexadecimal string which sorts after the IPv4 addresses. + * + * @param string $ip quad dotted/octet IP address. + * @return String + */ + public static function toHex( $ip ) { + if ( self::isIPv6( $ip ) ) { + $n = 'v6-' . self::IPv6ToRawHex( $ip ); + } else { + $n = self::toUnsigned( $ip ); + if ( $n !== false ) { + $n = wfBaseConvert( $n, 10, 16, 8, false ); + } + } + return $n; + } + + /** + * Given an IPv6 address in octet notation, returns a pure hex string. + * + * @param string $ip octet ipv6 IP address. + * @return String: pure hex (uppercase) + */ + private static function IPv6ToRawHex( $ip ) { + $ip = self::sanitizeIP( $ip ); + if ( !$ip ) { + return null; + } + $r_ip = ''; + foreach ( explode( ':', $ip ) as $v ) { + $r_ip .= str_pad( $v, 4, 0, STR_PAD_LEFT ); + } + return $r_ip; + } + + /** + * Given an IP address in dotted-quad/octet notation, returns an unsigned integer. + * Like ip2long() except that it actually works and has a consistent error return value. + * Comes from ProxyTools.php + * + * @param string $ip quad dotted IP address. + * @return Mixed: string/int/false + */ + public static function toUnsigned( $ip ) { + if ( self::isIPv6( $ip ) ) { + $n = self::toUnsigned6( $ip ); + } else { + $n = ip2long( $ip ); + if ( $n < 0 ) { + $n += pow( 2, 32 ); + # On 32-bit platforms (and on Windows), 2^32 does not fit into an int, + # so $n becomes a float. We convert it to string instead. + if ( is_float( $n ) ) { + $n = (string)$n; + } + } + } + return $n; + } + + /** + * @param $ip + * @return String + */ + private static function toUnsigned6( $ip ) { + return wfBaseConvert( self::IPv6ToRawHex( $ip ), 16, 10 ); + } + + /** + * Convert a network specification in CIDR notation + * to an integer network and a number of bits + * + * @param string $range IP with CIDR prefix + * @return array(int or string, int) + */ + public static function parseCIDR( $range ) { + if ( self::isIPv6( $range ) ) { + return self::parseCIDR6( $range ); + } + $parts = explode( '/', $range, 2 ); + if ( count( $parts ) != 2 ) { + return array( false, false ); + } + list( $network, $bits ) = $parts; + $network = ip2long( $network ); + if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 32 ) { + if ( $bits == 0 ) { + $network = 0; + } else { + $network &= ~( ( 1 << ( 32 - $bits ) ) - 1 ); + } + # Convert to unsigned + if ( $network < 0 ) { + $network += pow( 2, 32 ); + } + } else { + $network = false; + $bits = false; + } + return array( $network, $bits ); + } + + /** + * Given a string range in a number of formats, + * return the start and end of the range in hexadecimal. + * + * Formats are: + * 1.2.3.4/24 CIDR + * 1.2.3.4 - 1.2.3.5 Explicit range + * 1.2.3.4 Single IP + * + * 2001:0db8:85a3::7344/96 CIDR + * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range + * 2001:0db8:85a3::7344 Single IP + * @param string $range IP range + * @return array(string, string) + */ + public static function parseRange( $range ) { + // CIDR notation + if ( strpos( $range, '/' ) !== false ) { + if ( self::isIPv6( $range ) ) { + return self::parseRange6( $range ); + } + list( $network, $bits ) = self::parseCIDR( $range ); + if ( $network === false ) { + $start = $end = false; + } else { + $start = sprintf( '%08X', $network ); + $end = sprintf( '%08X', $network + pow( 2, ( 32 - $bits ) ) - 1 ); + } + // Explicit range + } elseif ( strpos( $range, '-' ) !== false ) { + list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) ); + if ( self::isIPv6( $start ) && self::isIPv6( $end ) ) { + return self::parseRange6( $range ); + } + if ( self::isIPv4( $start ) && self::isIPv4( $end ) ) { + $start = self::toUnsigned( $start ); + $end = self::toUnsigned( $end ); + if ( $start > $end ) { + $start = $end = false; + } else { + $start = sprintf( '%08X', $start ); + $end = sprintf( '%08X', $end ); + } + } else { + $start = $end = false; + } + } else { + # Single IP + $start = $end = self::toHex( $range ); + } + if ( $start === false || $end === false ) { + return array( false, false ); + } else { + return array( $start, $end ); + } + } + + /** + * Convert a network specification in IPv6 CIDR notation to an + * integer network and a number of bits + * + * @param $range + * + * @return array(string, int) + */ + private static function parseCIDR6( $range ) { + # Explode into + $parts = explode( '/', IP::sanitizeIP( $range ), 2 ); + if ( count( $parts ) != 2 ) { + return array( false, false ); + } + list( $network, $bits ) = $parts; + $network = self::IPv6ToRawHex( $network ); + if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 128 ) { + if ( $bits == 0 ) { + $network = "0"; + } else { + # Native 32 bit functions WONT work here!!! + # Convert to a padded binary number + $network = wfBaseConvert( $network, 16, 2, 128 ); + # Truncate the last (128-$bits) bits and replace them with zeros + $network = str_pad( substr( $network, 0, $bits ), 128, 0, STR_PAD_RIGHT ); + # Convert back to an integer + $network = wfBaseConvert( $network, 2, 10 ); + } + } else { + $network = false; + $bits = false; + } + return array( $network, (int)$bits ); + } + + /** + * Given a string range in a number of formats, return the + * start and end of the range in hexadecimal. For IPv6. + * + * Formats are: + * 2001:0db8:85a3::7344/96 CIDR + * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range + * 2001:0db8:85a3::7344/96 Single IP + * + * @param $range + * + * @return array(string, string) + */ + private static function parseRange6( $range ) { + # Expand any IPv6 IP + $range = IP::sanitizeIP( $range ); + // CIDR notation... + if ( strpos( $range, '/' ) !== false ) { + list( $network, $bits ) = self::parseCIDR6( $range ); + if ( $network === false ) { + $start = $end = false; + } else { + $start = wfBaseConvert( $network, 10, 16, 32, false ); + # Turn network to binary (again) + $end = wfBaseConvert( $network, 10, 2, 128 ); + # Truncate the last (128-$bits) bits and replace them with ones + $end = str_pad( substr( $end, 0, $bits ), 128, 1, STR_PAD_RIGHT ); + # Convert to hex + $end = wfBaseConvert( $end, 2, 16, 32, false ); + # see toHex() comment + $start = "v6-$start"; + $end = "v6-$end"; + } + // Explicit range notation... + } elseif ( strpos( $range, '-' ) !== false ) { + list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) ); + $start = self::toUnsigned6( $start ); + $end = self::toUnsigned6( $end ); + if ( $start > $end ) { + $start = $end = false; + } else { + $start = wfBaseConvert( $start, 10, 16, 32, false ); + $end = wfBaseConvert( $end, 10, 16, 32, false ); + } + # see toHex() comment + $start = "v6-$start"; + $end = "v6-$end"; + } else { + # Single IP + $start = $end = self::toHex( $range ); + } + if ( $start === false || $end === false ) { + return array( false, false ); + } else { + return array( $start, $end ); + } + } + + /** + * Determine if a given IPv4/IPv6 address is in a given CIDR network + * + * @param string $addr the address to check against the given range. + * @param string $range the range to check the given address against. + * @return Boolean: whether or not the given address is in the given range. + */ + public static function isInRange( $addr, $range ) { + $hexIP = self::toHex( $addr ); + list( $start, $end ) = self::parseRange( $range ); + return ( strcmp( $hexIP, $start ) >= 0 && + strcmp( $hexIP, $end ) <= 0 ); + } + + /** + * Convert some unusual representations of IPv4 addresses to their + * canonical dotted quad representation. + * + * This currently only checks a few IPV4-to-IPv6 related cases. More + * unusual representations may be added later. + * + * @param string $addr something that might be an IP address + * @return String: valid dotted quad IPv4 address or null + */ + public static function canonicalize( $addr ) { + // remove zone info (bug 35738) + $addr = preg_replace( '/\%.*/', '', $addr ); + + if ( self::isValid( $addr ) ) { + return $addr; + } + // Turn mapped addresses from ::ce:ffff:1.2.3.4 to 1.2.3.4 + if ( strpos( $addr, ':' ) !== false && strpos( $addr, '.' ) !== false ) { + $addr = substr( $addr, strrpos( $addr, ':' ) + 1 ); + if ( self::isIPv4( $addr ) ) { + return $addr; + } + } + // IPv6 loopback address + $m = array(); + if ( preg_match( '/^0*' . RE_IPV6_GAP . '1$/', $addr, $m ) ) { + return '127.0.0.1'; + } + // IPv4-mapped and IPv4-compatible IPv6 addresses + if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . '(' . RE_IP_ADD . ')$/i', $addr, $m ) ) { + return $m[1]; + } + if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . RE_IPV6_WORD . + ':' . RE_IPV6_WORD . '$/i', $addr, $m ) ) + { + return long2ip( ( hexdec( $m[1] ) << 16 ) + hexdec( $m[2] ) ); + } + + return null; // give up + } + + /** + * Gets rid of unneeded numbers in quad-dotted/octet IP strings + * For example, 127.111.113.151/24 -> 127.111.113.0/24 + * @param string $range IP address to normalize + * @return string + */ + public static function sanitizeRange( $range ) { + list( /*...*/, $bits ) = self::parseCIDR( $range ); + list( $start, /*...*/ ) = self::parseRange( $range ); + $start = self::formatHex( $start ); + if ( $bits === false ) { + return $start; // wasn't actually a range + } + return "$start/$bits"; + } +} diff --git a/includes/utils/MWCryptRand.php b/includes/utils/MWCryptRand.php new file mode 100644 index 0000000000..bac018e896 --- /dev/null +++ b/includes/utils/MWCryptRand.php @@ -0,0 +1,497 @@ + $v ) { + if ( is_numeric( $k ) ) { + unset( $k ); + } + } + // The absolute filename itself will differ from install to install so don't leave it out + if ( ( $path = realpath( $file ) ) !== false ) { + $state .= $path; + } else { + $state .= $file; + } + $state .= implode( '', $stat ); + } else { + // The fact that the file isn't there is worth at least a + // minuscule amount of entropy. + $state .= '0'; + } + } + + // Try and make this a little more unstable by including the varying process + // id of the php process we are running inside of if we are able to access it + if ( function_exists( 'getmypid' ) ) { + $state .= getmypid(); + } + + // If available try to increase the instability of the data by throwing in + // the precise amount of memory that we happen to be using at the moment. + if ( function_exists( 'memory_get_usage' ) ) { + $state .= memory_get_usage( true ); + } + + // It's mostly worthless but throw the wiki's id into the data for a little more variance + $state .= wfWikiID(); + + // If we have a secret key or proxy key set then throw it into the state as well + global $wgSecretKey, $wgProxyKey; + if ( $wgSecretKey ) { + $state .= $wgSecretKey; + } elseif ( $wgProxyKey ) { + $state .= $wgProxyKey; + } + + return $state; + } + + /** + * Randomly hash data while mixing in clock drift data for randomness + * + * @param string $data The data to randomly hash. + * @return String The hashed bytes + * @author Tim Starling + */ + protected function driftHash( $data ) { + // Minimum number of iterations (to avoid slow operations causing the loop to gather little entropy) + $minIterations = self::MIN_ITERATIONS; + // Duration of time to spend doing calculations (in seconds) + $duration = ( self::MSEC_PER_BYTE / 1000 ) * $this->hashLength(); + // Create a buffer to use to trigger memory operations + $bufLength = 10000000; + $buffer = str_repeat( ' ', $bufLength ); + $bufPos = 0; + + // Iterate for $duration seconds or at least $minIterations number of iterations + $iterations = 0; + $startTime = microtime( true ); + $currentTime = $startTime; + while ( $iterations < $minIterations || $currentTime - $startTime < $duration ) { + // Trigger some memory writing to trigger some bus activity + // This may create variance in the time between iterations + $bufPos = ( $bufPos + 13 ) % $bufLength; + $buffer[$bufPos] = ' '; + // Add the drift between this iteration and the last in as entropy + $nextTime = microtime( true ); + $delta = (int)( ( $nextTime - $currentTime ) * 1000000 ); + $data .= $delta; + // Every 100 iterations hash the data and entropy + if ( $iterations % 100 === 0 ) { + $data = sha1( $data ); + } + $currentTime = $nextTime; + $iterations++; + } + $timeTaken = $currentTime - $startTime; + $data = $this->hash( $data ); + + wfDebug( __METHOD__ . ": Clock drift calculation " . + "(time-taken=" . ( $timeTaken * 1000 ) . "ms, " . + "iterations=$iterations, " . + "time-per-iteration=" . ( $timeTaken / $iterations * 1e6 ) . "us)\n" ); + return $data; + } + + /** + * Return a rolling random state initially build using data from unstable sources + * @return string A new weak random state + */ + protected function randomState() { + static $state = null; + if ( is_null( $state ) ) { + // Initialize the state with whatever unstable data we can find + // It's important that this data is hashed right afterwards to prevent + // it from being leaked into the output stream + $state = $this->hash( $this->initialRandomState() ); + } + // Generate a new random state based on the initial random state or previous + // random state by combining it with clock drift + $state = $this->driftHash( $state ); + return $state; + } + + /** + * Decide on the best acceptable hash algorithm we have available for hash() + * @throws MWException + * @return String A hash algorithm + */ + protected function hashAlgo() { + if ( !is_null( $this->algo ) ) { + return $this->algo; + } + + $algos = hash_algos(); + $preference = array( 'whirlpool', 'sha256', 'sha1', 'md5' ); + + foreach ( $preference as $algorithm ) { + if ( in_array( $algorithm, $algos ) ) { + $this->algo = $algorithm; + wfDebug( __METHOD__ . ": Using the {$this->algo} hash algorithm.\n" ); + return $this->algo; + } + } + + // We only reach here if no acceptable hash is found in the list, this should + // be a technical impossibility since most of php's hash list is fixed and + // some of the ones we list are available as their own native functions + // But since we already require at least 5.2 and hash() was default in + // 5.1.2 we don't bother falling back to methods like sha1 and md5. + throw new MWException( "Could not find an acceptable hashing function in hash_algos()" ); + } + + /** + * Return the byte-length output of the hash algorithm we are + * using in self::hash and self::hmac. + * + * @return int Number of bytes the hash outputs + */ + protected function hashLength() { + if ( is_null( $this->hashLength ) ) { + $this->hashLength = strlen( $this->hash( '' ) ); + } + return $this->hashLength; + } + + /** + * Generate an acceptably unstable one-way-hash of some text + * making use of the best hash algorithm that we have available. + * + * @param $data string + * @return String A raw hash of the data + */ + protected function hash( $data ) { + return hash( $this->hashAlgo(), $data, true ); + } + + /** + * Generate an acceptably unstable one-way-hmac of some text + * making use of the best hash algorithm that we have available. + * + * @param $data string + * @param $key string + * @return String A raw hash of the data + */ + protected function hmac( $data, $key ) { + return hash_hmac( $this->hashAlgo(), $data, $key, true ); + } + + /** + * @see self::wasStrong() + */ + public function realWasStrong() { + if ( is_null( $this->strong ) ) { + throw new MWException( __METHOD__ . ' called before generation of random data' ); + } + return $this->strong; + } + + /** + * @see self::generate() + */ + public function realGenerate( $bytes, $forceStrong = false ) { + wfProfileIn( __METHOD__ ); + + wfDebug( __METHOD__ . ": Generating cryptographic random bytes for " . wfGetAllCallers( 5 ) . "\n" ); + + $bytes = floor( $bytes ); + static $buffer = ''; + if ( is_null( $this->strong ) ) { + // Set strength to false initially until we know what source data is coming from + $this->strong = true; + } + + if ( strlen( $buffer ) < $bytes ) { + // If available make use of mcrypt_create_iv URANDOM source to generate randomness + // On unix-like systems this reads from /dev/urandom but does it without any buffering + // and bypasses openbasedir restrictions, so it's preferable to reading directly + // On Windows starting in PHP 5.3.0 Windows' native CryptGenRandom is used to generate + // entropy so this is also preferable to just trying to read urandom because it may work + // on Windows systems as well. + if ( function_exists( 'mcrypt_create_iv' ) ) { + wfProfileIn( __METHOD__ . '-mcrypt' ); + $rem = $bytes - strlen( $buffer ); + $iv = mcrypt_create_iv( $rem, MCRYPT_DEV_URANDOM ); + if ( $iv === false ) { + wfDebug( __METHOD__ . ": mcrypt_create_iv returned false.\n" ); + } else { + $buffer .= $iv; + wfDebug( __METHOD__ . ": mcrypt_create_iv generated " . strlen( $iv ) . " bytes of randomness.\n" ); + } + wfProfileOut( __METHOD__ . '-mcrypt' ); + } + } + + if ( strlen( $buffer ) < $bytes ) { + // If available make use of openssl's random_pseudo_bytes method to attempt to generate randomness. + // However don't do this on Windows with PHP < 5.3.4 due to a bug: + // http://stackoverflow.com/questions/1940168/openssl-random-pseudo-bytes-is-slow-php + // http://git.php.net/?p=php-src.git;a=commitdiff;h=cd62a70863c261b07f6dadedad9464f7e213cad5 + if ( function_exists( 'openssl_random_pseudo_bytes' ) + && ( !wfIsWindows() || version_compare( PHP_VERSION, '5.3.4', '>=' ) ) + ) { + wfProfileIn( __METHOD__ . '-openssl' ); + $rem = $bytes - strlen( $buffer ); + $openssl_bytes = openssl_random_pseudo_bytes( $rem, $openssl_strong ); + if ( $openssl_bytes === false ) { + wfDebug( __METHOD__ . ": openssl_random_pseudo_bytes returned false.\n" ); + } else { + $buffer .= $openssl_bytes; + wfDebug( __METHOD__ . ": openssl_random_pseudo_bytes generated " . strlen( $openssl_bytes ) . " bytes of " . ( $openssl_strong ? "strong" : "weak" ) . " randomness.\n" ); + } + if ( strlen( $buffer ) >= $bytes ) { + // openssl tells us if the random source was strong, if some of our data was generated + // using it use it's say on whether the randomness is strong + $this->strong = !!$openssl_strong; + } + wfProfileOut( __METHOD__ . '-openssl' ); + } + } + + // Only read from urandom if we can control the buffer size or were passed forceStrong + if ( strlen( $buffer ) < $bytes && ( function_exists( 'stream_set_read_buffer' ) || $forceStrong ) ) { + wfProfileIn( __METHOD__ . '-fopen-urandom' ); + $rem = $bytes - strlen( $buffer ); + if ( !function_exists( 'stream_set_read_buffer' ) && $forceStrong ) { + wfDebug( __METHOD__ . ": Was forced to read from /dev/urandom without control over the buffer size.\n" ); + } + // /dev/urandom is generally considered the best possible commonly + // available random source, and is available on most *nix systems. + wfSuppressWarnings(); + $urandom = fopen( "/dev/urandom", "rb" ); + wfRestoreWarnings(); + + // Attempt to read all our random data from urandom + // php's fread always does buffered reads based on the stream's chunk_size + // so in reality it will usually read more than the amount of data we're + // asked for and not storing that risks depleting the system's random pool. + // If stream_set_read_buffer is available set the chunk_size to the amount + // of data we need. Otherwise read 8k, php's default chunk_size. + if ( $urandom ) { + // php's default chunk_size is 8k + $chunk_size = 1024 * 8; + if ( function_exists( 'stream_set_read_buffer' ) ) { + // If possible set the chunk_size to the amount of data we need + stream_set_read_buffer( $urandom, $rem ); + $chunk_size = $rem; + } + $random_bytes = fread( $urandom, max( $chunk_size, $rem ) ); + $buffer .= $random_bytes; + fclose( $urandom ); + wfDebug( __METHOD__ . ": /dev/urandom generated " . strlen( $random_bytes ) . " bytes of randomness.\n" ); + if ( strlen( $buffer ) >= $bytes ) { + // urandom is always strong, set to true if all our data was generated using it + $this->strong = true; + } + } else { + wfDebug( __METHOD__ . ": /dev/urandom could not be opened.\n" ); + } + wfProfileOut( __METHOD__ . '-fopen-urandom' ); + } + + // If we cannot use or generate enough data from a secure source + // use this loop to generate a good set of pseudo random data. + // This works by initializing a random state using a pile of unstable data + // and continually shoving it through a hash along with a variable salt. + // We hash the random state with more salt to avoid the state from leaking + // out and being used to predict the /randomness/ that follows. + if ( strlen( $buffer ) < $bytes ) { + wfDebug( __METHOD__ . ": Falling back to using a pseudo random state to generate randomness.\n" ); + } + while ( strlen( $buffer ) < $bytes ) { + wfProfileIn( __METHOD__ . '-fallback' ); + $buffer .= $this->hmac( $this->randomState(), mt_rand() ); + // This code is never really cryptographically strong, if we use it + // at all, then set strong to false. + $this->strong = false; + wfProfileOut( __METHOD__ . '-fallback' ); + } + + // Once the buffer has been filled up with enough random data to fulfill + // the request shift off enough data to handle the request and leave the + // unused portion left inside the buffer for the next request for random data + $generated = substr( $buffer, 0, $bytes ); + $buffer = substr( $buffer, $bytes ); + + wfDebug( __METHOD__ . ": " . strlen( $buffer ) . " bytes of randomness leftover in the buffer.\n" ); + + wfProfileOut( __METHOD__ ); + return $generated; + } + + /** + * @see self::generateHex() + */ + public function realGenerateHex( $chars, $forceStrong = false ) { + // hex strings are 2x the length of raw binary so we divide the length in half + // odd numbers will result in a .5 that leads the generate() being 1 character + // short, so we use ceil() to ensure that we always have enough bytes + $bytes = ceil( $chars / 2 ); + // Generate the data and then convert it to a hex string + $hex = bin2hex( $this->generate( $bytes, $forceStrong ) ); + // A bit of paranoia here, the caller asked for a specific length of string + // here, and it's possible (eg when given an odd number) that we may actually + // have at least 1 char more than they asked for. Just in case they made this + // call intending to insert it into a database that does truncation we don't + // want to give them too much and end up with their database and their live + // code having two different values because part of what we gave them is truncated + // hence, we strip out any run of characters longer than what we were asked for. + return substr( $hex, 0, $chars ); + } + + /** Publicly exposed static methods **/ + + /** + * Return a singleton instance of MWCryptRand + * @return MWCryptRand + */ + protected static function singleton() { + if ( is_null( self::$singleton ) ) { + self::$singleton = new self; + } + return self::$singleton; + } + + /** + * Return a boolean indicating whether or not the source used for cryptographic + * random bytes generation in the previously run generate* call + * was cryptographically strong. + * + * @return bool Returns true if the source was strong, false if not. + */ + public static function wasStrong() { + return self::singleton()->realWasStrong(); + } + + /** + * Generate a run of (ideally) cryptographically random data and return + * it in raw binary form. + * You can use MWCryptRand::wasStrong() if you wish to know if the source used + * was cryptographically strong. + * + * @param int $bytes the number of bytes of random data to generate + * @param bool $forceStrong Pass true if you want generate to prefer cryptographically + * strong sources of entropy even if reading from them may steal + * more entropy from the system than optimal. + * @return String Raw binary random data + */ + public static function generate( $bytes, $forceStrong = false ) { + return self::singleton()->realGenerate( $bytes, $forceStrong ); + } + + /** + * Generate a run of (ideally) cryptographically random data and return + * it in hexadecimal string format. + * You can use MWCryptRand::wasStrong() if you wish to know if the source used + * was cryptographically strong. + * + * @param int $chars the number of hex chars of random data to generate + * @param bool $forceStrong Pass true if you want generate to prefer cryptographically + * strong sources of entropy even if reading from them may steal + * more entropy from the system than optimal. + * @return String Hexadecimal random data + */ + public static function generateHex( $chars, $forceStrong = false ) { + return self::singleton()->realGenerateHex( $chars, $forceStrong ); + } + +} diff --git a/includes/utils/MWFunction.php b/includes/utils/MWFunction.php new file mode 100644 index 0000000000..6d11d17813 --- /dev/null +++ b/includes/utils/MWFunction.php @@ -0,0 +1,61 @@ +newInstanceArgs( $args ); + } + +} diff --git a/includes/utils/MappedIterator.php b/includes/utils/MappedIterator.php new file mode 100644 index 0000000000..70d20327df --- /dev/null +++ b/includes/utils/MappedIterator.php @@ -0,0 +1,114 @@ +vCallback = $vCallback; + $this->aCallback = isset( $options['accept'] ) ? $options['accept'] : null; + } + + public function next() { + $this->cache = array(); + parent::next(); + } + + public function rewind() { + $this->rewound = true; + $this->cache = array(); + parent::rewind(); + } + + public function accept() { + $value = call_user_func( $this->vCallback, $this->getInnerIterator()->current() ); + $ok = ( $this->aCallback ) ? call_user_func( $this->aCallback, $value ) : true; + if ( $ok ) { + $this->cache['current'] = $value; + } + return $ok; + } + + public function key() { + $this->init(); + return parent::key(); + } + + public function valid() { + $this->init(); + return parent::valid(); + } + + public function current() { + $this->init(); + if ( parent::valid() ) { + return $this->cache['current']; + } else { + return null; // out of range + } + } + + /** + * Obviate the usual need for rewind() before using a FilterIterator in a manual loop + */ + protected function init() { + if ( !$this->rewound ) { + $this->rewind(); + } + } +} diff --git a/includes/utils/README b/includes/utils/README new file mode 100644 index 0000000000..b5b8ec88c5 --- /dev/null +++ b/includes/utils/README @@ -0,0 +1,9 @@ +The classes in this directory are general utilities for use by any part of +MediaWiki. They do not favour any particular user interface and are not +constrained to serve any particular feature. This is similar to includes/libs, +except that some dependency on the MediaWiki framework (such as the use of +MWException, Status or wfDebug()) disqualifies them from use outside of +MediaWiki without modification. + +Utilities should not use global configuration variables, rather they should rely +on the caller to configure their behaviour. diff --git a/includes/utils/ScopedCallback.php b/includes/utils/ScopedCallback.php new file mode 100644 index 0000000000..ef22e0a30d --- /dev/null +++ b/includes/utils/ScopedCallback.php @@ -0,0 +1,73 @@ +callback = $callback; + } + + /** + * Trigger a scoped callback and destroy it. + * This is the same is just setting it to null. + * + * @param ScopedCallback $sc + */ + public static function consume( ScopedCallback &$sc = null ) { + $sc = null; + } + + /** + * Destroy a scoped callback without triggering it + * + * @param ScopedCallback $sc + */ + public static function cancel( ScopedCallback &$sc = null ) { + if ( $sc ) { + $sc->callback = null; + } + $sc = null; + } + + /** + * Trigger the callback when this leaves scope + */ + function __destruct() { + if ( $this->callback !== null ) { + call_user_func( $this->callback ); + } + } +} diff --git a/includes/utils/StringUtils.php b/includes/utils/StringUtils.php new file mode 100644 index 0000000000..c1545e6ef5 --- /dev/null +++ b/includes/utils/StringUtils.php @@ -0,0 +1,606 @@ +cb(), $subject, $flags ); + } + + /** + * More or less "markup-safe" explode() + * Ignores any instances of the separator inside <...> + * @param string $separator + * @param string $text + * @return array + */ + static function explodeMarkup( $separator, $text ) { + $placeholder = "\x00"; + + // Remove placeholder instances + $text = str_replace( $placeholder, '', $text ); + + // Replace instances of the separator inside HTML-like tags with the placeholder + $replacer = new DoubleReplacer( $separator, $placeholder ); + $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text ); + + // Explode, then put the replaced separators back in + $items = explode( $separator, $cleaned ); + foreach ( $items as $i => $str ) { + $items[$i] = str_replace( $placeholder, $separator, $str ); + } + + return $items; + } + + /** + * Escape a string to make it suitable for inclusion in a preg_replace() + * replacement parameter. + * + * @param string $string + * @return string + */ + static function escapeRegexReplacement( $string ) { + $string = str_replace( '\\', '\\\\', $string ); + $string = str_replace( '$', '\\$', $string ); + return $string; + } + + /** + * Workalike for explode() with limited memory usage. + * Returns an Iterator + * @param string $separator + * @param string $subject + * @return ArrayIterator|ExplodeIterator + */ + static function explode( $separator, $subject ) { + if ( substr_count( $subject, $separator ) > 1000 ) { + return new ExplodeIterator( $separator, $subject ); + } else { + return new ArrayIterator( explode( $separator, $subject ) ); + } + } +} + +/** + * Base class for "replacers", objects used in preg_replace_callback() and + * StringUtils::delimiterReplaceCallback() + */ +class Replacer { + + /** + * @return array + */ + function cb() { + return array( &$this, 'replace' ); + } +} + +/** + * Class to replace regex matches with a string similar to that used in preg_replace() + */ +class RegexlikeReplacer extends Replacer { + var $r; + + /** + * @param string $r + */ + function __construct( $r ) { + $this->r = $r; + } + + /** + * @param array $matches + * @return string + */ + function replace( $matches ) { + $pairs = array(); + foreach ( $matches as $i => $match ) { + $pairs["\$$i"] = $match; + } + return strtr( $this->r, $pairs ); + } + +} + +/** + * Class to perform secondary replacement within each replacement string + */ +class DoubleReplacer extends Replacer { + + /** + * @param $from + * @param $to + * @param int $index + */ + function __construct( $from, $to, $index = 0 ) { + $this->from = $from; + $this->to = $to; + $this->index = $index; + } + + /** + * @param array $matches + * @return mixed + */ + function replace( $matches ) { + return str_replace( $this->from, $this->to, $matches[$this->index] ); + } +} + +/** + * Class to perform replacement based on a simple hashtable lookup + */ +class HashtableReplacer extends Replacer { + var $table, $index; + + /** + * @param $table + * @param int $index + */ + function __construct( $table, $index = 0 ) { + $this->table = $table; + $this->index = $index; + } + + /** + * @param array $matches + * @return mixed + */ + function replace( $matches ) { + return $this->table[$matches[$this->index]]; + } +} + +/** + * Replacement array for FSS with fallback to strtr() + * Supports lazy initialisation of FSS resource + */ +class ReplacementArray { + /*mostly private*/ var $data = false; + /*mostly private*/ var $fss = false; + + /** + * Create an object with the specified replacement array + * The array should have the same form as the replacement array for strtr() + * @param array $data + */ + function __construct( $data = array() ) { + $this->data = $data; + } + + /** + * @return array + */ + function __sleep() { + return array( 'data' ); + } + + function __wakeup() { + $this->fss = false; + } + + /** + * Set the whole replacement array at once + * @param array $data + */ + function setArray( $data ) { + $this->data = $data; + $this->fss = false; + } + + /** + * @return array|bool + */ + function getArray() { + return $this->data; + } + + /** + * Set an element of the replacement array + * @param string $from + * @param string $to + */ + function setPair( $from, $to ) { + $this->data[$from] = $to; + $this->fss = false; + } + + /** + * @param array $data + */ + function mergeArray( $data ) { + $this->data = array_merge( $this->data, $data ); + $this->fss = false; + } + + /** + * @param ReplacementArray $other + */ + function merge( $other ) { + $this->data = array_merge( $this->data, $other->data ); + $this->fss = false; + } + + /** + * @param string $from + */ + function removePair( $from ) { + unset( $this->data[$from] ); + $this->fss = false; + } + + /** + * @param array $data + */ + function removeArray( $data ) { + foreach ( $data as $from => $to ) { + $this->removePair( $from ); + } + $this->fss = false; + } + + /** + * @param string $subject + * @return string + */ + function replace( $subject ) { + if ( function_exists( 'fss_prep_replace' ) ) { + wfProfileIn( __METHOD__ . '-fss' ); + if ( $this->fss === false ) { + $this->fss = fss_prep_replace( $this->data ); + } + $result = fss_exec_replace( $this->fss, $subject ); + wfProfileOut( __METHOD__ . '-fss' ); + } else { + wfProfileIn( __METHOD__ . '-strtr' ); + $result = strtr( $subject, $this->data ); + wfProfileOut( __METHOD__ . '-strtr' ); + } + return $result; + } +} + +/** + * An iterator which works exactly like: + * + * foreach ( explode( $delim, $s ) as $element ) { + * ... + * } + * + * Except it doesn't use 193 byte per element + */ +class ExplodeIterator implements Iterator { + // The subject string + var $subject, $subjectLength; + + // The delimiter + var $delim, $delimLength; + + // The position of the start of the line + var $curPos; + + // The position after the end of the next delimiter + var $endPos; + + // The current token + var $current; + + /** + * Construct a DelimIterator + * @param string $delim + * @param string $subject + */ + function __construct( $delim, $subject ) { + $this->subject = $subject; + $this->delim = $delim; + + // Micro-optimisation (theoretical) + $this->subjectLength = strlen( $subject ); + $this->delimLength = strlen( $delim ); + + $this->rewind(); + } + + function rewind() { + $this->curPos = 0; + $this->endPos = strpos( $this->subject, $this->delim ); + $this->refreshCurrent(); + } + + function refreshCurrent() { + if ( $this->curPos === false ) { + $this->current = false; + } elseif ( $this->curPos >= $this->subjectLength ) { + $this->current = ''; + } elseif ( $this->endPos === false ) { + $this->current = substr( $this->subject, $this->curPos ); + } else { + $this->current = substr( $this->subject, $this->curPos, $this->endPos - $this->curPos ); + } + } + + function current() { + return $this->current; + } + + /** + * @return int|bool Current position or boolean false if invalid + */ + function key() { + return $this->curPos; + } + + /** + * @return string + */ + function next() { + if ( $this->endPos === false ) { + $this->curPos = false; + } else { + $this->curPos = $this->endPos + $this->delimLength; + if ( $this->curPos >= $this->subjectLength ) { + $this->endPos = false; + } else { + $this->endPos = strpos( $this->subject, $this->delim, $this->curPos ); + } + } + $this->refreshCurrent(); + return $this->current; + } + + /** + * @return bool + */ + function valid() { + return $this->curPos !== false; + } +} diff --git a/includes/utils/UIDGenerator.php b/includes/utils/UIDGenerator.php new file mode 100644 index 0000000000..963e51a4d3 --- /dev/null +++ b/includes/utils/UIDGenerator.php @@ -0,0 +1,337 @@ +nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 ); + $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 ); + // If different processes run as different users, they may have different temp dirs. + // This is dealt with by initializing the clock sequence number and counters randomly. + $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88'; + $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128'; + } + + /** + * @return UIDGenerator + */ + protected static function singleton() { + if ( self::$instance === null ) { + self::$instance = new self(); + } + return self::$instance; + } + + /** + * Get a statistically unique 88-bit unsigned integer ID string. + * The bits of the UID are prefixed with the time (down to the millisecond). + * + * These IDs are suitable as values for the shard key of distributed data. + * If a column uses these as values, it should be declared UNIQUE to handle collisions. + * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. + * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL. + * + * UID generation is serialized on each server (as the node ID is for the whole machine). + * + * @param $base integer Specifies a base other than 10 + * @return string Number + * @throws MWException + */ + public static function newTimestampedUID88( $base = 10 ) { + if ( !is_integer( $base ) || $base > 36 || $base < 2 ) { + throw new MWException( "Base must an integer be between 2 and 36" ); + } + $gen = self::singleton(); + $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 ); + return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base ); + } + + /** + * @param array $time (UIDGenerator::millitime(), clock sequence) + * @return string 88 bits + */ + protected function getTimestampedID88( array $info ) { + list( $time, $counter ) = $info; + // Take the 46 MSBs of "milliseconds since epoch" + $id_bin = $this->millisecondsSinceEpochBinary( $time ); + // Add a 10 bit counter resulting in 56 bits total + $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT ); + // Add the 32 bit node ID resulting in 88 bits total + $id_bin .= $this->nodeId32; + // Convert to a 1-27 digit integer string + if ( strlen( $id_bin ) !== 88 ) { + throw new MWException( "Detected overflow for millisecond timestamp." ); + } + return $id_bin; + } + + /** + * Get a statistically unique 128-bit unsigned integer ID string. + * The bits of the UID are prefixed with the time (down to the millisecond). + * + * These IDs are suitable as globally unique IDs, without any enforced uniqueness. + * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast. + * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL. + * + * UID generation is serialized on each server (as the node ID is for the whole machine). + * + * @param $base integer Specifies a base other than 10 + * @return string Number + * @throws MWException + */ + public static function newTimestampedUID128( $base = 10 ) { + if ( !is_integer( $base ) || $base > 36 || $base < 2 ) { + throw new MWException( "Base must be an integer between 2 and 36" ); + } + $gen = self::singleton(); + $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 ); + return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base ); + } + + /** + * @param array $info (UIDGenerator::millitime(), counter, clock sequence) + * @return string 128 bits + */ + protected function getTimestampedID128( array $info ) { + list( $time, $counter, $clkSeq ) = $info; + // Take the 46 MSBs of "milliseconds since epoch" + $id_bin = $this->millisecondsSinceEpochBinary( $time ); + // Add a 20 bit counter resulting in 66 bits total + $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT ); + // Add a 14 bit clock sequence number resulting in 80 bits total + $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT ); + // Add the 48 bit node ID resulting in 128 bits total + $id_bin .= $this->nodeId48; + // Convert to a 1-39 digit integer string + if ( strlen( $id_bin ) !== 128 ) { + throw new MWException( "Detected overflow for millisecond timestamp." ); + } + return $id_bin; + } + + /** + * Return an RFC4122 compliant v4 UUID + * + * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND) + * @return string + * @throws MWException + */ + public static function newUUIDv4( $flags = 0 ) { + $hex = ( $flags & self::QUICK_RAND ) + ? wfRandomString( 31 ) + : MWCryptRand::generateHex( 31 ); + + return sprintf( '%s-%s-%s-%s-%s', + // "time_low" (32 bits) + substr( $hex, 0, 8 ), + // "time_mid" (16 bits) + substr( $hex, 8, 4 ), + // "time_hi_and_version" (16 bits) + '4' . substr( $hex, 12, 3 ), + // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits) + dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ), + // "node" (48 bits) + substr( $hex, 19, 12 ) + ); + } + + /** + * Return an RFC4122 compliant v4 UUID + * + * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND) + * @return string 32 hex characters with no hyphens + * @throws MWException + */ + public static function newRawUUIDv4( $flags = 0 ) { + return str_replace( '-', '', self::newUUIDv4( $flags ) ); + } + + /** + * Get a (time,counter,clock sequence) where (time,counter) is higher + * than any previous (time,counter) value for the given clock sequence. + * This is useful for making UIDs sequential on a per-node bases. + * + * @param string $lockFile Name of a local lock file + * @param $clockSeqSize integer The number of possible clock sequence values + * @param $counterSize integer The number of possible counter values + * @return Array (result of UIDGenerator::millitime(), counter, clock sequence) + * @throws MWException + */ + protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) { + // Get the UID lock file handle + if ( isset( $this->fileHandles[$lockFile] ) ) { + $handle = $this->fileHandles[$lockFile]; + } else { + $handle = fopen( $this->$lockFile, 'cb+' ); + $this->fileHandles[$lockFile] = $handle ?: null; // cache + } + // Acquire the UID lock file + if ( $handle === false ) { + throw new MWException( "Could not open '{$this->$lockFile}'." ); + } elseif ( !flock( $handle, LOCK_EX ) ) { + throw new MWException( "Could not acquire '{$this->$lockFile}'." ); + } + // Get the current timestamp, clock sequence number, last time, and counter + rewind( $handle ); + $data = explode( ' ', fgets( $handle ) ); // " " + $clockChanged = false; // clock set back significantly? + if ( count( $data ) == 5 ) { // last UID info already initialized + $clkSeq = (int)$data[0] % $clockSeqSize; + $prevTime = array( (int)$data[1], (int)$data[2] ); + $offset = (int)$data[4] % $counterSize; // random counter offset + $counter = 0; // counter for UIDs with the same timestamp + // Delay until the clock reaches the time of the last ID. + // This detects any microtime() drift among processes. + $time = $this->timeWaitUntil( $prevTime ); + if ( !$time ) { // too long to delay? + $clockChanged = true; // bump clock sequence number + $time = self::millitime(); + } elseif ( $time == $prevTime ) { + // Bump the counter if there are timestamp collisions + $counter = (int)$data[3] % $counterSize; + if ( ++$counter >= $counterSize ) { // sanity (starts at 0) + flock( $handle, LOCK_UN ); // abort + throw new MWException( "Counter overflow for timestamp value." ); + } + } + } else { // last UID info not initialized + $clkSeq = mt_rand( 0, $clockSeqSize - 1 ); + $counter = 0; + $offset = mt_rand( 0, $counterSize - 1 ); + $time = self::millitime(); + } + // microtime() and gettimeofday() can drift from time() at least on Windows. + // The drift is immediate for processes running while the system clock changes. + // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659. + if ( abs( time() - $time[0] ) >= 2 ) { + // We don't want processes using too high or low timestamps to avoid duplicate + // UIDs and clock sequence number churn. This process should just be restarted. + flock( $handle, LOCK_UN ); // abort + throw new MWException( "Process clock is outdated or drifted." ); + } + // If microtime() is synced and a clock change was detected, then the clock went back + if ( $clockChanged ) { + // Bump the clock sequence number and also randomize the counter offset, + // which is useful for UIDs that do not include the clock sequence number. + $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize; + $offset = mt_rand( 0, $counterSize - 1 ); + trigger_error( "Clock was set back; sequence number incremented." ); + } + // Update the (clock sequence number, timestamp, counter) + ftruncate( $handle, 0 ); + rewind( $handle ); + fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" ); + fflush( $handle ); + // Release the UID lock file + flock( $handle, LOCK_UN ); + + return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq ); + } + + /** + * Wait till the current timestamp reaches $time and return the current + * timestamp. This returns false if it would have to wait more than 10ms. + * + * @param array $time Result of UIDGenerator::millitime() + * @return Array|bool UIDGenerator::millitime() result or false + */ + protected function timeWaitUntil( array $time ) { + do { + $ct = self::millitime(); + if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php + return $ct; // current timestamp is higher than $time + } + } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 ); + + return false; + } + + /** + * @param array $time Result of UIDGenerator::millitime() + * @return string 46 MSBs of "milliseconds since epoch" in binary (rolls over in 4201) + */ + protected function millisecondsSinceEpochBinary( array $time ) { + list( $sec, $msec ) = $time; + $ts = 1000 * $sec + $msec; + if ( $ts > pow( 2, 52 ) ) { + throw new MWException( __METHOD__ . + ': sorry, this function doesn\'t work after the year 144680' ); + } + return substr( wfBaseConvert( $ts, 10, 2, 46 ), -46 ); + } + + /** + * @return Array (current time in seconds, milliseconds since then) + */ + protected static function millitime() { + list( $msec, $sec ) = explode( ' ', microtime() ); + return array( (int)$sec, (int)( $msec * 1000 ) ); + } + + function __destruct() { + array_map( 'fclose', $this->fileHandles ); + } +} diff --git a/includes/utils/ZipDirectoryReader.php b/includes/utils/ZipDirectoryReader.php new file mode 100644 index 0000000000..307efcea8d --- /dev/null +++ b/includes/utils/ZipDirectoryReader.php @@ -0,0 +1,712 @@ +execute(); + } + + /** The file name */ + var $fileName; + + /** The opened file resource */ + var $file; + + /** The cached length of the file, or null if it has not been loaded yet. */ + var $fileLength; + + /** A segmented cache of the file contents */ + var $buffer; + + /** The file data callback */ + var $callback; + + /** The ZIP64 mode */ + var $zip64 = false; + + /** Stored headers */ + var $eocdr, $eocdr64, $eocdr64Locator; + + var $data; + + /** The "extra field" ID for ZIP64 central directory entries */ + const ZIP64_EXTRA_HEADER = 0x0001; + + /** The segment size for the file contents cache */ + const SEGSIZE = 16384; + + /** The index of the "general field" bit for UTF-8 file names */ + const GENERAL_UTF8 = 11; + + /** The index of the "general field" bit for central directory encryption */ + const GENERAL_CD_ENCRYPTED = 13; + + /** + * Private constructor + */ + protected function __construct( $fileName, $callback, $options ) { + $this->fileName = $fileName; + $this->callback = $callback; + + if ( isset( $options['zip64'] ) ) { + $this->zip64 = $options['zip64']; + } + } + + /** + * Read the directory according to settings in $this. + * + * @return Status + */ + function execute() { + $this->file = fopen( $this->fileName, 'r' ); + $this->data = array(); + if ( !$this->file ) { + return Status::newFatal( 'zip-file-open-error' ); + } + + $status = Status::newGood(); + try { + $this->readEndOfCentralDirectoryRecord(); + if ( $this->zip64 ) { + list( $offset, $size ) = $this->findZip64CentralDirectory(); + $this->readCentralDirectory( $offset, $size ); + } else { + if ( $this->eocdr['CD size'] == 0xffffffff + || $this->eocdr['CD offset'] == 0xffffffff + || $this->eocdr['CD entries total'] == 0xffff ) + { + $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' . + 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' . + 'opening vulnerabilities on clients using OpenJDK 7 or later.' ); + } + + list( $offset, $size ) = $this->findOldCentralDirectory(); + $this->readCentralDirectory( $offset, $size ); + } + } catch ( ZipDirectoryReaderError $e ) { + $status->fatal( $e->getErrorCode() ); + } + + fclose( $this->file ); + return $status; + } + + /** + * Throw an error, and log a debug message + */ + function error( $code, $debugMessage ) { + wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" ); + throw new ZipDirectoryReaderError( $code ); + } + + /** + * Read the header which is at the end of the central directory, + * unimaginatively called the "end of central directory record" by the ZIP + * spec. + */ + function readEndOfCentralDirectoryRecord() { + $info = array( + 'signature' => 4, + 'disk' => 2, + 'CD start disk' => 2, + 'CD entries this disk' => 2, + 'CD entries total' => 2, + 'CD size' => 4, + 'CD offset' => 4, + 'file comment length' => 2, + ); + $structSize = $this->getStructSize( $info ); + $startPos = $this->getFileLength() - 65536 - $structSize; + if ( $startPos < 0 ) { + $startPos = 0; + } + + $block = $this->getBlock( $startPos ); + $sigPos = strrpos( $block, "PK\x05\x06" ); + if ( $sigPos === false ) { + $this->error( 'zip-wrong-format', + "zip file lacks EOCDR signature. It probably isn't a zip file." ); + } + + $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info ); + $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length']; + + if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) { + $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' ); + } + if ( $this->eocdr['disk'] !== 0 + || $this->eocdr['CD start disk'] !== 0 ) + { + $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' ); + } + $this->eocdr += $this->unpack( + $block, + array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ), + $sigPos + $structSize ); + $this->eocdr['position'] = $startPos + $sigPos; + } + + /** + * Read the header called the "ZIP64 end of central directory locator". An + * error will be raised if it does not exist. + */ + function readZip64EndOfCentralDirectoryLocator() { + $info = array( + 'signature' => array( 'string', 4 ), + 'eocdr64 start disk' => 4, + 'eocdr64 offset' => 8, + 'number of disks' => 4, + ); + $structSize = $this->getStructSize( $info ); + + $block = $this->getBlock( $this->getFileLength() - $this->eocdr['EOCDR size'] + - $structSize, $structSize ); + $this->eocdr64Locator = $data = $this->unpack( $block, $info ); + + if ( $data['signature'] !== "PK\x06\x07" ) { + // Note: Java will allow this and continue to read the + // EOCDR64, so we have to reject the upload, we can't + // just use the EOCDR header instead. + $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' ); + } + } + + /** + * Read the header called the "ZIP64 end of central directory record". It + * may replace the regular "end of central directory record" in ZIP64 files. + */ + function readZip64EndOfCentralDirectoryRecord() { + if ( $this->eocdr64Locator['eocdr64 start disk'] != 0 + || $this->eocdr64Locator['number of disks'] != 0 ) + { + $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' ); + } + + $info = array( + 'signature' => array( 'string', 4 ), + 'EOCDR64 size' => 8, + 'version made by' => 2, + 'version needed' => 2, + 'disk' => 4, + 'CD start disk' => 4, + 'CD entries this disk' => 8, + 'CD entries total' => 8, + 'CD size' => 8, + 'CD offset' => 8 + ); + $structSize = $this->getStructSize( $info ); + $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize ); + $this->eocdr64 = $data = $this->unpack( $block, $info ); + if ( $data['signature'] !== "PK\x06\x06" ) { + $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' ); + } + if ( $data['disk'] !== 0 + || $data['CD start disk'] !== 0 ) + { + $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' ); + } + } + + /** + * Find the location of the central directory, as would be seen by a + * non-ZIP64 reader. + * + * @return List containing offset, size and end position. + */ + function findOldCentralDirectory() { + $size = $this->eocdr['CD size']; + $offset = $this->eocdr['CD offset']; + $endPos = $this->eocdr['position']; + + // Some readers use the EOCDR position instead of the offset field + // to find the directory, so to be safe, we check if they both agree. + if ( $offset + $size != $endPos ) { + $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . + 'of central directory record' ); + } + return array( $offset, $size ); + } + + /** + * Find the location of the central directory, as would be seen by a + * ZIP64-compliant reader. + * + * @return array List containing offset, size and end position. + */ + function findZip64CentralDirectory() { + // The spec is ambiguous about the exact rules of precedence between the + // ZIP64 headers and the original headers. Here we follow zip_util.c + // from OpenJDK 7. + $size = $this->eocdr['CD size']; + $offset = $this->eocdr['CD offset']; + $numEntries = $this->eocdr['CD entries total']; + $endPos = $this->eocdr['position']; + if ( $size == 0xffffffff + || $offset == 0xffffffff + || $numEntries == 0xffff ) + { + $this->readZip64EndOfCentralDirectoryLocator(); + + if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) { + $this->readZip64EndOfCentralDirectoryRecord(); + if ( isset( $this->eocdr64['CD offset'] ) ) { + $size = $this->eocdr64['CD size']; + $offset = $this->eocdr64['CD offset']; + $endPos = $this->eocdr64Locator['eocdr64 offset']; + } + } + } + // Some readers use the EOCDR position instead of the offset field + // to find the directory, so to be safe, we check if they both agree. + if ( $offset + $size != $endPos ) { + $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' . + 'of central directory record' ); + } + return array( $offset, $size ); + } + + /** + * Read the central directory at the given location + */ + function readCentralDirectory( $offset, $size ) { + $block = $this->getBlock( $offset, $size ); + + $fixedInfo = array( + 'signature' => array( 'string', 4 ), + 'version made by' => 2, + 'version needed' => 2, + 'general bits' => 2, + 'compression method' => 2, + 'mod time' => 2, + 'mod date' => 2, + 'crc-32' => 4, + 'compressed size' => 4, + 'uncompressed size' => 4, + 'name length' => 2, + 'extra field length' => 2, + 'comment length' => 2, + 'disk number start' => 2, + 'internal attrs' => 2, + 'external attrs' => 4, + 'local header offset' => 4, + ); + $fixedSize = $this->getStructSize( $fixedInfo ); + + $pos = 0; + while ( $pos < $size ) { + $data = $this->unpack( $block, $fixedInfo, $pos ); + $pos += $fixedSize; + + if ( $data['signature'] !== "PK\x01\x02" ) { + $this->error( 'zip-bad', 'Invalid signature found in directory entry' ); + } + + $variableInfo = array( + 'name' => array( 'string', $data['name length'] ), + 'extra field' => array( 'string', $data['extra field length'] ), + 'comment' => array( 'string', $data['comment length'] ), + ); + $data += $this->unpack( $block, $variableInfo, $pos ); + $pos += $this->getStructSize( $variableInfo ); + + if ( $this->zip64 && ( + $data['compressed size'] == 0xffffffff + || $data['uncompressed size'] == 0xffffffff + || $data['local header offset'] == 0xffffffff ) ) + { + $zip64Data = $this->unpackZip64Extra( $data['extra field'] ); + if ( $zip64Data ) { + $data = $zip64Data + $data; + } + } + + if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) { + $this->error( 'zip-unsupported', 'central directory encryption is not supported' ); + } + + // Convert the timestamp into MediaWiki format + // For the format, please see the MS-DOS 2.0 Programmer's Reference, + // pages 3-5 and 3-6. + $time = $data['mod time']; + $date = $data['mod date']; + + $year = 1980 + ( $date >> 9 ); + $month = ( $date >> 5 ) & 15; + $day = $date & 31; + $hour = ( $time >> 11 ) & 31; + $minute = ( $time >> 5 ) & 63; + $second = ( $time & 31 ) * 2; + $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d", + $year, $month, $day, $hour, $minute, $second ); + + // Convert the character set in the file name + if ( !function_exists( 'iconv' ) + || $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) ) + { + $name = $data['name']; + } else { + $name = iconv( 'CP437', 'UTF-8', $data['name'] ); + } + + // Compile a data array for the user, with a sensible format + $userData = array( + 'name' => $name, + 'mtime' => $timestamp, + 'size' => $data['uncompressed size'], + ); + call_user_func( $this->callback, $userData ); + } + } + + /** + * Interpret ZIP64 "extra field" data and return an associative array. + * @return array|bool + */ + function unpackZip64Extra( $extraField ) { + $extraHeaderInfo = array( + 'id' => 2, + 'size' => 2, + ); + $extraHeaderSize = $this->getStructSize( $extraHeaderInfo ); + + $zip64ExtraInfo = array( + 'uncompressed size' => 8, + 'compressed size' => 8, + 'local header offset' => 8, + 'disk number start' => 4, + ); + + $extraPos = 0; + while ( $extraPos < strlen( $extraField ) ) { + $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos ); + $extraPos += $extraHeaderSize; + $extra += $this->unpack( $extraField, + array( 'data' => array( 'string', $extra['size'] ) ), + $extraPos ); + $extraPos += $extra['size']; + + if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) { + return $this->unpack( $extra['data'], $zip64ExtraInfo ); + } + } + + return false; + } + + /** + * Get the length of the file. + */ + function getFileLength() { + if ( $this->fileLength === null ) { + $stat = fstat( $this->file ); + $this->fileLength = $stat['size']; + } + return $this->fileLength; + } + + /** + * Get the file contents from a given offset. If there are not enough bytes + * in the file to satisfy the request, an exception will be thrown. + * + * @param int $start The byte offset of the start of the block. + * @param int $length The number of bytes to return. If omitted, the remainder + * of the file will be returned. + * + * @return string + */ + function getBlock( $start, $length = null ) { + $fileLength = $this->getFileLength(); + if ( $start >= $fileLength ) { + $this->error( 'zip-bad', "getBlock() requested position $start, " . + "file length is $fileLength" ); + } + if ( $length === null ) { + $length = $fileLength - $start; + } + $end = $start + $length; + if ( $end > $fileLength ) { + $this->error( 'zip-bad', "getBlock() requested end position $end, " . + "file length is $fileLength" ); + } + $startSeg = floor( $start / self::SEGSIZE ); + $endSeg = ceil( $end / self::SEGSIZE ); + + $block = ''; + for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) { + $block .= $this->getSegment( $segIndex ); + } + + $block = substr( $block, + $start - $startSeg * self::SEGSIZE, + $length ); + + if ( strlen( $block ) < $length ) { + $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' ); + } + + return $block; + } + + /** + * Get a section of the file starting at position $segIndex * self::SEGSIZE, + * of length self::SEGSIZE. The result is cached. This is a helper function + * for getBlock(). + * + * If there are not enough bytes in the file to satisfy the request, the + * return value will be truncated. If a request is made for a segment beyond + * the end of the file, an empty string will be returned. + * @return string + */ + function getSegment( $segIndex ) { + if ( !isset( $this->buffer[$segIndex] ) ) { + $bytePos = $segIndex * self::SEGSIZE; + if ( $bytePos >= $this->getFileLength() ) { + $this->buffer[$segIndex] = ''; + return ''; + } + if ( fseek( $this->file, $bytePos ) ) { + $this->error( 'zip-bad', "seek to $bytePos failed" ); + } + $seg = fread( $this->file, self::SEGSIZE ); + if ( $seg === false ) { + $this->error( 'zip-bad', "read from $bytePos failed" ); + } + $this->buffer[$segIndex] = $seg; + } + return $this->buffer[$segIndex]; + } + + /** + * Get the size of a structure in bytes. See unpack() for the format of $struct. + * @return int + */ + function getStructSize( $struct ) { + $size = 0; + foreach ( $struct as $type ) { + if ( is_array( $type ) ) { + list( , $fieldSize ) = $type; + $size += $fieldSize; + } else { + $size += $type; + } + } + return $size; + } + + /** + * Unpack a binary structure. This is like the built-in unpack() function + * except nicer. + * + * @param string $string The binary data input + * + * @param array $struct An associative array giving structure members and their + * types. In the key is the field name. The value may be either an + * integer, in which case the field is a little-endian unsigned integer + * encoded in the given number of bytes, or an array, in which case the + * first element of the array is the type name, and the subsequent + * elements are type-dependent parameters. Only one such type is defined: + * - "string": The second array element gives the length of string. + * Not null terminated. + * + * @param int $offset The offset into the string at which to start unpacking. + * + * @throws MWException + * @return array Unpacked associative array. Note that large integers in the input + * may be represented as floating point numbers in the return value, so + * the use of weak comparison is advised. + */ + function unpack( $string, $struct, $offset = 0 ) { + $size = $this->getStructSize( $struct ); + if ( $offset + $size > strlen( $string ) ) { + $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' ); + } + + $data = array(); + $pos = $offset; + foreach ( $struct as $key => $type ) { + if ( is_array( $type ) ) { + list( $typeName, $fieldSize ) = $type; + switch ( $typeName ) { + case 'string': + $data[$key] = substr( $string, $pos, $fieldSize ); + $pos += $fieldSize; + break; + default: + throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" ); + } + } else { + // Unsigned little-endian integer + $length = intval( $type ); + + // Calculate the value. Use an algorithm which automatically + // upgrades the value to floating point if necessary. + $value = 0; + for ( $i = $length - 1; $i >= 0; $i-- ) { + $value *= 256; + $value += ord( $string[$pos + $i] ); + } + + // Throw an exception if there was loss of precision + if ( $value > pow( 2, 52 ) ) { + $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' . + 'This could happen if we tried to unpack a 64-bit structure ' . + 'at an invalid location.' ); + } + $data[$key] = $value; + $pos += $length; + } + } + + return $data; + } + + /** + * Returns a bit from a given position in an integer value, converted to + * boolean. + * + * @param $value integer + * @param int $bitIndex The index of the bit, where 0 is the LSB. + * @return bool + */ + function testBit( $value, $bitIndex ) { + return (bool)( ( $value >> $bitIndex ) & 1 ); + } + + /** + * Debugging helper function which dumps a string in hexdump -C format. + */ + function hexDump( $s ) { + $n = strlen( $s ); + for ( $i = 0; $i < $n; $i += 16 ) { + printf( "%08X ", $i ); + for ( $j = 0; $j < 16; $j++ ) { + print " "; + if ( $j == 8 ) { + print " "; + } + if ( $i + $j >= $n ) { + print " "; + } else { + printf( "%02X", ord( $s[$i + $j] ) ); + } + } + + print " |"; + for ( $j = 0; $j < 16; $j++ ) { + if ( $i + $j >= $n ) { + print " "; + } elseif ( ctype_print( $s[$i + $j] ) ) { + print $s[$i + $j]; + } else { + print '.'; + } + } + print "|\n"; + } + } +} + +/** + * Internal exception class. Will be caught by private code. + */ +class ZipDirectoryReaderError extends Exception { + var $errorCode; + + function __construct( $code ) { + $this->errorCode = $code; + parent::__construct( "ZipDirectoryReader error: $code" ); + } + + /** + * @return mixed + */ + function getErrorCode() { + return $this->errorCode; + } +}