+++ /dev/null
-<?php
-
-class ArrayUtils {
- /**
- * Sort the given array in a pseudo-random order which depends only on the
- * given key and each element value. This is typically used for load
- * balancing between servers each with a local cache.
- *
- * Keys are preserved. The input array is modified in place.
- *
- * Note: Benchmarking on PHP 5.3 and 5.4 indicates that for small
- * strings, md5() is only 10% slower than hash('joaat',...) etc.,
- * since the function call overhead dominates. So there's not much
- * justification for breaking compatibility with installations
- * compiled with ./configure --disable-hash.
- *
- * @param array $array Array to sort
- * @param string $key
- * @param string $separator A separator used to delimit the array elements and the
- * key. This can be chosen to provide backwards compatibility with
- * various consistent hash implementations that existed before this
- * function was introduced.
- */
- public static function consistentHashSort( &$array, $key, $separator = "\000" ) {
- $hashes = array();
- foreach ( $array as $elt ) {
- $hashes[$elt] = md5( $elt . $separator . $key );
- }
- uasort( $array, function ( $a, $b ) use ( $hashes ) {
- return strcmp( $hashes[$a], $hashes[$b] );
- } );
- }
-
- /**
- * Given an array of non-normalised probabilities, this function will select
- * an element and return the appropriate key
- *
- * @param array $weights
- * @return bool|int|string
- */
- public static function pickRandom( $weights ) {
- if ( !is_array( $weights ) || count( $weights ) == 0 ) {
- return false;
- }
-
- $sum = array_sum( $weights );
- if ( $sum == 0 ) {
- # No loads on any of them
- # In previous versions, this triggered an unweighted random selection,
- # but this feature has been removed as of April 2006 to allow for strict
- # separation of query groups.
- return false;
- }
- $max = mt_getrandmax();
- $rand = mt_rand( 0, $max ) / $max * $sum;
-
- $sum = 0;
- foreach ( $weights as $i => $w ) {
- $sum += $w;
- # Do not return keys if they have 0 weight.
- # Note that the "all 0 weight" case is handed above
- if ( $w > 0 && $sum >= $rand ) {
- break;
- }
- }
- return $i;
- }
-}
'AjaxDispatcher' => 'includes/AjaxDispatcher.php',
'AjaxResponse' => 'includes/AjaxResponse.php',
'AlphabeticPager' => 'includes/Pager.php',
- 'ArrayUtils' => 'includes/ArrayUtils.php',
'Article' => 'includes/Article.php',
'AtomFeed' => 'includes/Feed.php',
'AuthPlugin' => 'includes/AuthPlugin.php',
'Categoryfinder' => 'includes/Categoryfinder.php',
'CategoryPage' => 'includes/CategoryPage.php',
'CategoryViewer' => 'includes/CategoryViewer.php',
- 'CdbFunctions' => 'includes/Cdb_PHP.php',
- 'CdbReader' => 'includes/Cdb.php',
- 'CdbReader_DBA' => 'includes/Cdb.php',
- 'CdbReader_PHP' => 'includes/Cdb_PHP.php',
- 'CdbWriter' => 'includes/Cdb.php',
- 'CdbWriter_DBA' => 'includes/Cdb.php',
- 'CdbWriter_PHP' => 'includes/Cdb_PHP.php',
'ChangesFeed' => 'includes/ChangesFeed.php',
'ChangeTags' => 'includes/ChangeTags.php',
'ChannelFeed' => 'includes/Feed.php',
'Collation' => 'includes/Collation.php',
'ConcatenatedGzipHistoryBlob' => 'includes/HistoryBlob.php',
- 'ConfEditor' => 'includes/ConfEditor.php',
- 'ConfEditorParseError' => 'includes/ConfEditor.php',
- 'ConfEditorToken' => 'includes/ConfEditor.php',
'Cookie' => 'includes/Cookie.php',
'CookieJar' => 'includes/Cookie.php',
'CurlHttpRequest' => 'includes/HttpFunctions.php',
'DeprecatedGlobal' => 'includes/DeprecatedGlobal.php',
'DerivativeRequest' => 'includes/WebRequest.php',
'DiffHistoryBlob' => 'includes/HistoryBlob.php',
- 'DoubleReplacer' => 'includes/StringUtils.php',
'DummyLinker' => 'includes/Linker.php',
'Dump7ZipOutput' => 'includes/Export.php',
'DumpBZip2Output' => 'includes/Export.php',
'EditPage' => 'includes/EditPage.php',
'EmailNotification' => 'includes/UserMailer.php',
'ErrorPageError' => 'includes/Exception.php',
- 'ExplodeIterator' => 'includes/StringUtils.php',
'FakeTitle' => 'includes/FakeTitle.php',
'Fallback' => 'includes/Fallback.php',
'FatalError' => 'includes/Exception.php',
'FormOptions' => 'includes/FormOptions.php',
'FormSpecialPage' => 'includes/SpecialPage.php',
'GitInfo' => 'includes/GitInfo.php',
- 'HashRing' => 'includes/HashRing.php',
- 'HashtableReplacer' => 'includes/StringUtils.php',
'HistoryBlob' => 'includes/HistoryBlob.php',
'HistoryBlobCurStub' => 'includes/HistoryBlob.php',
'HistoryBlobStub' => 'includes/HistoryBlob.php',
'IncludableSpecialPage' => 'includes/SpecialPage.php',
'IndexPager' => 'includes/Pager.php',
'Interwiki' => 'includes/interwiki/Interwiki.php',
- 'IP' => 'includes/IP.php',
'LCStore' => 'includes/cache/LocalisationCache.php',
'LCStore_Accel' => 'includes/cache/LocalisationCache.php',
'LCStore_CDB' => 'includes/cache/LocalisationCache.php',
'MagicWord' => 'includes/MagicWord.php',
'MagicWordArray' => 'includes/MagicWord.php',
'MailAddress' => 'includes/UserMailer.php',
- 'MappedIterator' => 'includes/MappedIterator.php',
'MediaWiki' => 'includes/Wiki.php',
'MediaWiki_I18N' => 'includes/SkinTemplate.php',
'Message' => 'includes/Message.php',
'MessageBlobStore' => 'includes/MessageBlobStore.php',
'MimeMagic' => 'includes/MimeMagic.php',
- 'MWCryptRand' => 'includes/MWCryptRand.php',
'MWException' => 'includes/Exception.php',
'MWExceptionHandler' => 'includes/Exception.php',
- 'MWFunction' => 'includes/MWFunction.php',
'MWHookException' => 'includes/Hooks.php',
'MWHttpRequest' => 'includes/HttpFunctions.php',
'MWInit' => 'includes/Init.php',
'ReadOnlyError' => 'includes/Exception.php',
'RedirectSpecialArticle' => 'includes/SpecialPage.php',
'RedirectSpecialPage' => 'includes/SpecialPage.php',
- 'RegexlikeReplacer' => 'includes/StringUtils.php',
- 'ReplacementArray' => 'includes/StringUtils.php',
- 'Replacer' => 'includes/StringUtils.php',
'ReverseChronologicalPager' => 'includes/Pager.php',
'RevisionItem' => 'includes/RevisionList.php',
'RevisionItemBase' => 'includes/RevisionList.php',
'RevisionList' => 'includes/RevisionList.php',
'RSSFeed' => 'includes/Feed.php',
'Sanitizer' => 'includes/Sanitizer.php',
- 'ScopedCallback' => 'includes/ScopedCallback.php',
- 'ScopedPHPTimeout' => 'includes/ScopedPHPTimeout.php',
'SiteConfiguration' => 'includes/SiteConfiguration.php',
'SiteStats' => 'includes/SiteStats.php',
'SiteStatsInit' => 'includes/SiteStats.php',
'StatCounter' => 'includes/StatCounter.php',
'Status' => 'includes/Status.php',
'StreamFile' => 'includes/StreamFile.php',
- 'StringUtils' => 'includes/StringUtils.php',
'StubContLang' => 'includes/StubObject.php',
'StubObject' => 'includes/StubObject.php',
'StubUserLang' => 'includes/StubObject.php',
'TitleArray' => 'includes/TitleArray.php',
'TitleArrayFromResult' => 'includes/TitleArray.php',
'ThrottledError' => 'includes/Exception.php',
- 'UIDGenerator' => 'includes/UIDGenerator.php',
'UnlistedSpecialPage' => 'includes/SpecialPage.php',
'UploadSourceAdapter' => 'includes/Import.php',
'UppercaseCollation' => 'includes/Collation.php',
'XmlJsCode' => 'includes/Xml.php',
'XMLReader2' => 'includes/Import.php',
'XmlSelect' => 'includes/Xml.php',
- 'XmlTypeCheck' => 'includes/XmlTypeCheck.php',
'ZhClient' => 'includes/ZhClient.php',
- 'ZipDirectoryReader' => 'includes/ZipDirectoryReader.php',
- 'ZipDirectoryReaderError' => 'includes/ZipDirectoryReader.php',
# includes/actions
'CachedAction' => 'includes/actions/CachedAction.php',
'JSParser' => 'includes/libs/jsminplus.php',
'JSToken' => 'includes/libs/jsminplus.php',
'JSTokenizer' => 'includes/libs/jsminplus.php',
+ 'ScopedPHPTimeout' => 'includes/libs/ScopedPHPTimeout.php',
+ 'XmlTypeCheck' => 'includes/libs/XmlTypeCheck.php',
# includes/libs/lessphp
'lessc' => 'includes/libs/lessc.inc.php',
'UploadStashWrongOwnerException' => 'includes/upload/UploadStash.php',
'UploadStashNoSuchKeyException' => 'includes/upload/UploadStash.php',
+ # includes/utils
+ 'ArrayUtils' => 'includes/utils/ArrayUtils.php',
+ 'CdbFunctions' => 'includes/utils/Cdb_PHP.php',
+ 'CdbReader' => 'includes/utils/Cdb.php',
+ 'CdbReader_DBA' => 'includes/utils/Cdb.php',
+ 'CdbReader_PHP' => 'includes/utils/Cdb_PHP.php',
+ 'CdbWriter' => 'includes/utils/Cdb.php',
+ 'CdbWriter_DBA' => 'includes/utils/Cdb.php',
+ 'CdbWriter_PHP' => 'includes/utils/Cdb_PHP.php',
+ 'ConfEditor' => 'includes/utils/ConfEditor.php',
+ 'ConfEditorParseError' => 'includes/utils/ConfEditor.php',
+ 'ConfEditorToken' => 'includes/utils/ConfEditor.php',
+ 'DoubleReplacer' => 'includes/utils/StringUtils.php',
+ 'ExplodeIterator' => 'includes/utils/StringUtils.php',
+ 'HashRing' => 'includes/utils/HashRing.php',
+ 'HashtableReplacer' => 'includes/utils/StringUtils.php',
+ 'IP' => 'includes/utils/IP.php',
+ 'MWCryptRand' => 'includes/utils/MWCryptRand.php',
+ 'MWFunction' => 'includes/utils/MWFunction.php',
+ 'MappedIterator' => 'includes/utils/MappedIterator.php',
+ 'RegexlikeReplacer' => 'includes/utils/StringUtils.php',
+ 'ReplacementArray' => 'includes/utils/StringUtils.php',
+ 'Replacer' => 'includes/utils/StringUtils.php',
+ 'ScopedCallback' => 'includes/utils/ScopedCallback.php',
+ 'StringUtils' => 'includes/utils/StringUtils.php',
+ 'UIDGenerator' => 'includes/utils/UIDGenerator.php',
+ 'ZipDirectoryReader' => 'includes/utils/ZipDirectoryReader.php',
+ 'ZipDirectoryReaderError' => 'includes/utils/ZipDirectoryReader.php',
+
# languages
'ConverterRule' => 'languages/LanguageConverter.php',
'FakeConverter' => 'languages/Language.php',
+++ /dev/null
-<?php
-/**
- * Native CDB file reader and writer.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-/**
- * Read from a CDB file.
- * Native and pure PHP implementations are provided.
- * http://cr.yp.to/cdb.html
- */
-abstract class CdbReader {
- /**
- * Open a file and return a subclass instance
- *
- * @param $fileName string
- *
- * @return CdbReader
- */
- public static function open( $fileName ) {
- if ( self::haveExtension() ) {
- return new CdbReader_DBA( $fileName );
- } else {
- wfDebug( "Warning: no dba extension found, using emulation.\n" );
- return new CdbReader_PHP( $fileName );
- }
- }
-
- /**
- * Returns true if the native extension is available
- *
- * @return bool
- */
- public static function haveExtension() {
- if ( !function_exists( 'dba_handlers' ) ) {
- return false;
- }
- $handlers = dba_handlers();
- if ( !in_array( 'cdb', $handlers ) || !in_array( 'cdb_make', $handlers ) ) {
- return false;
- }
- return true;
- }
-
- /**
- * Construct the object and open the file
- */
- abstract function __construct( $fileName );
-
- /**
- * Close the file. Optional, you can just let the variable go out of scope.
- */
- abstract function close();
-
- /**
- * Get a value with a given key. Only string values are supported.
- *
- * @param $key string
- */
- abstract public function get( $key );
-}
-
-/**
- * Write to a CDB file.
- * Native and pure PHP implementations are provided.
- */
-abstract class CdbWriter {
- /**
- * Open a writer and return a subclass instance.
- * The user must have write access to the directory, for temporary file creation.
- *
- * @param $fileName string
- *
- * @return CdbWriter_DBA|CdbWriter_PHP
- */
- public static function open( $fileName ) {
- if ( CdbReader::haveExtension() ) {
- return new CdbWriter_DBA( $fileName );
- } else {
- wfDebug( "Warning: no dba extension found, using emulation.\n" );
- return new CdbWriter_PHP( $fileName );
- }
- }
-
- /**
- * Create the object and open the file
- *
- * @param $fileName string
- */
- abstract function __construct( $fileName );
-
- /**
- * Set a key to a given value. The value will be converted to string.
- * @param $key string
- * @param $value string
- */
- abstract public function set( $key, $value );
-
- /**
- * Close the writer object. You should call this function before the object
- * goes out of scope, to write out the final hashtables.
- */
- abstract public function close();
-}
-
-/**
- * Reader class which uses the DBA extension
- */
-class CdbReader_DBA {
- var $handle;
-
- function __construct( $fileName ) {
- $this->handle = dba_open( $fileName, 'r-', 'cdb' );
- if ( !$this->handle ) {
- throw new MWException( 'Unable to open CDB file "' . $fileName . '"' );
- }
- }
-
- function close() {
- if ( isset( $this->handle ) ) {
- dba_close( $this->handle );
- }
- unset( $this->handle );
- }
-
- function get( $key ) {
- return dba_fetch( $key, $this->handle );
- }
-}
-
-/**
- * Writer class which uses the DBA extension
- */
-class CdbWriter_DBA {
- var $handle, $realFileName, $tmpFileName;
-
- function __construct( $fileName ) {
- $this->realFileName = $fileName;
- $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
- $this->handle = dba_open( $this->tmpFileName, 'n', 'cdb_make' );
- if ( !$this->handle ) {
- throw new MWException( 'Unable to open CDB file for write "' . $fileName . '"' );
- }
- }
-
- function set( $key, $value ) {
- return dba_insert( $key, $value, $this->handle );
- }
-
- function close() {
- if ( isset( $this->handle ) ) {
- dba_close( $this->handle );
- }
- if ( wfIsWindows() ) {
- unlink( $this->realFileName );
- }
- if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
- throw new MWException( 'Unable to move the new CDB file into place.' );
- }
- unset( $this->handle );
- }
-
- function __destruct() {
- if ( isset( $this->handle ) ) {
- $this->close();
- }
- }
-}
+++ /dev/null
-<?php
-/**
- * This is a port of D.J. Bernstein's CDB to PHP. It's based on the copy that
- * appears in PHP 5.3. Changes are:
- * * Error returns replaced with exceptions
- * * Exception thrown if sizes or offsets are between 2GB and 4GB
- * * Some variables renamed
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-/**
- * Common functions for readers and writers
- */
-class CdbFunctions {
- /**
- * Take a modulo of a signed integer as if it were an unsigned integer.
- * $b must be less than 0x40000000 and greater than 0
- *
- * @param $a
- * @param $b
- *
- * @return int
- */
- public static function unsignedMod( $a, $b ) {
- if ( $a & 0x80000000 ) {
- $m = ( $a & 0x7fffffff ) % $b + 2 * ( 0x40000000 % $b );
- return $m % $b;
- } else {
- return $a % $b;
- }
- }
-
- /**
- * Shift a signed integer right as if it were unsigned
- * @param $a
- * @param $b
- * @return int
- */
- public static function unsignedShiftRight( $a, $b ) {
- if ( $b == 0 ) {
- return $a;
- }
- if ( $a & 0x80000000 ) {
- return ( ( $a & 0x7fffffff ) >> $b ) | ( 0x40000000 >> ( $b - 1 ) );
- } else {
- return $a >> $b;
- }
- }
-
- /**
- * The CDB hash function.
- *
- * @param $s string
- *
- * @return
- */
- public static function hash( $s ) {
- $h = 5381;
- for ( $i = 0; $i < strlen( $s ); $i++ ) {
- $h5 = ( $h << 5 ) & 0xffffffff;
- // Do a 32-bit sum
- // Inlined here for speed
- $sum = ( $h & 0x3fffffff ) + ( $h5 & 0x3fffffff );
- $h =
- (
- ( $sum & 0x40000000 ? 1 : 0 )
- + ( $h & 0x80000000 ? 2 : 0 )
- + ( $h & 0x40000000 ? 1 : 0 )
- + ( $h5 & 0x80000000 ? 2 : 0 )
- + ( $h5 & 0x40000000 ? 1 : 0 )
- ) << 30
- | ( $sum & 0x3fffffff );
- $h ^= ord( $s[$i] );
- $h &= 0xffffffff;
- }
- return $h;
- }
-}
-
-/**
- * CDB reader class
- */
-class CdbReader_PHP extends CdbReader {
- /** The filename */
- var $fileName;
-
- /** The file handle */
- var $handle;
-
- /* number of hash slots searched under this key */
- var $loop;
-
- /* initialized if loop is nonzero */
- var $khash;
-
- /* initialized if loop is nonzero */
- var $kpos;
-
- /* initialized if loop is nonzero */
- var $hpos;
-
- /* initialized if loop is nonzero */
- var $hslots;
-
- /* initialized if findNext() returns true */
- var $dpos;
-
- /* initialized if cdb_findnext() returns 1 */
- var $dlen;
-
- /**
- * @param $fileName string
- * @throws MWException
- */
- function __construct( $fileName ) {
- $this->fileName = $fileName;
- $this->handle = fopen( $fileName, 'rb' );
- if ( !$this->handle ) {
- throw new MWException( 'Unable to open CDB file "' . $this->fileName . '".' );
- }
- $this->findStart();
- }
-
- function close() {
- if ( isset( $this->handle ) ) {
- fclose( $this->handle );
- }
- unset( $this->handle );
- }
-
- /**
- * @param $key
- * @return bool|string
- */
- public function get( $key ) {
- // strval is required
- if ( $this->find( strval( $key ) ) ) {
- return $this->read( $this->dlen, $this->dpos );
- } else {
- return false;
- }
- }
-
- /**
- * @param $key
- * @param $pos
- * @return bool
- */
- protected function match( $key, $pos ) {
- $buf = $this->read( strlen( $key ), $pos );
- return $buf === $key;
- }
-
- protected function findStart() {
- $this->loop = 0;
- }
-
- /**
- * @throws MWException
- * @param $length
- * @param $pos
- * @return string
- */
- protected function read( $length, $pos ) {
- if ( fseek( $this->handle, $pos ) == -1 ) {
- // This can easily happen if the internal pointers are incorrect
- throw new MWException(
- 'Seek failed, file "' . $this->fileName . '" may be corrupted.' );
- }
-
- if ( $length == 0 ) {
- return '';
- }
-
- $buf = fread( $this->handle, $length );
- if ( $buf === false || strlen( $buf ) !== $length ) {
- throw new MWException(
- 'Read from CDB file failed, file "' . $this->fileName . '" may be corrupted.' );
- }
- return $buf;
- }
-
- /**
- * Unpack an unsigned integer and throw an exception if it needs more than 31 bits
- * @param $s
- * @throws MWException
- * @return mixed
- */
- protected function unpack31( $s ) {
- $data = unpack( 'V', $s );
- if ( $data[1] > 0x7fffffff ) {
- throw new MWException(
- 'Error in CDB file "' . $this->fileName . '", integer too big.' );
- }
- return $data[1];
- }
-
- /**
- * Unpack a 32-bit signed integer
- * @param $s
- * @return int
- */
- protected function unpackSigned( $s ) {
- $data = unpack( 'va/vb', $s );
- return $data['a'] | ( $data['b'] << 16 );
- }
-
- /**
- * @param $key
- * @return bool
- */
- protected function findNext( $key ) {
- if ( !$this->loop ) {
- $u = CdbFunctions::hash( $key );
- $buf = $this->read( 8, ( $u << 3 ) & 2047 );
- $this->hslots = $this->unpack31( substr( $buf, 4 ) );
- if ( !$this->hslots ) {
- return false;
- }
- $this->hpos = $this->unpack31( substr( $buf, 0, 4 ) );
- $this->khash = $u;
- $u = CdbFunctions::unsignedShiftRight( $u, 8 );
- $u = CdbFunctions::unsignedMod( $u, $this->hslots );
- $u <<= 3;
- $this->kpos = $this->hpos + $u;
- }
-
- while ( $this->loop < $this->hslots ) {
- $buf = $this->read( 8, $this->kpos );
- $pos = $this->unpack31( substr( $buf, 4 ) );
- if ( !$pos ) {
- return false;
- }
- $this->loop += 1;
- $this->kpos += 8;
- if ( $this->kpos == $this->hpos + ( $this->hslots << 3 ) ) {
- $this->kpos = $this->hpos;
- }
- $u = $this->unpackSigned( substr( $buf, 0, 4 ) );
- if ( $u === $this->khash ) {
- $buf = $this->read( 8, $pos );
- $keyLen = $this->unpack31( substr( $buf, 0, 4 ) );
- if ( $keyLen == strlen( $key ) && $this->match( $key, $pos + 8 ) ) {
- // Found
- $this->dlen = $this->unpack31( substr( $buf, 4 ) );
- $this->dpos = $pos + 8 + $keyLen;
- return true;
- }
- }
- }
- return false;
- }
-
- /**
- * @param $key
- * @return bool
- */
- protected function find( $key ) {
- $this->findStart();
- return $this->findNext( $key );
- }
-}
-
-/**
- * CDB writer class
- */
-class CdbWriter_PHP extends CdbWriter {
- var $handle, $realFileName, $tmpFileName;
-
- var $hplist;
- var $numentries, $pos;
-
- /**
- * @param $fileName string
- */
- function __construct( $fileName ) {
- $this->realFileName = $fileName;
- $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
- $this->handle = fopen( $this->tmpFileName, 'wb' );
- if ( !$this->handle ) {
- $this->throwException(
- 'Unable to open CDB file "' . $this->tmpFileName . '" for write.' );
- }
- $this->hplist = array();
- $this->numentries = 0;
- $this->pos = 2048; // leaving space for the pointer array, 256 * 8
- if ( fseek( $this->handle, $this->pos ) == -1 ) {
- $this->throwException( 'fseek failed in file "' . $this->tmpFileName . '".' );
- }
- }
-
- function __destruct() {
- if ( isset( $this->handle ) ) {
- $this->close();
- }
- }
-
- /**
- * @param $key
- * @param $value
- * @return
- */
- public function set( $key, $value ) {
- if ( strval( $key ) === '' ) {
- // DBA cross-check hack
- return;
- }
- $this->addbegin( strlen( $key ), strlen( $value ) );
- $this->write( $key );
- $this->write( $value );
- $this->addend( strlen( $key ), strlen( $value ), CdbFunctions::hash( $key ) );
- }
-
- /**
- * @throws MWException
- */
- public function close() {
- $this->finish();
- if ( isset( $this->handle ) ) {
- fclose( $this->handle );
- }
- if ( wfIsWindows() && file_exists( $this->realFileName ) ) {
- unlink( $this->realFileName );
- }
- if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
- $this->throwException( 'Unable to move the new CDB file into place.' );
- }
- unset( $this->handle );
- }
-
- /**
- * @throws MWException
- * @param $buf
- */
- protected function write( $buf ) {
- $len = fwrite( $this->handle, $buf );
- if ( $len !== strlen( $buf ) ) {
- $this->throwException( 'Error writing to CDB file "' . $this->tmpFileName . '".' );
- }
- }
-
- /**
- * @throws MWException
- * @param $len
- */
- protected function posplus( $len ) {
- $newpos = $this->pos + $len;
- if ( $newpos > 0x7fffffff ) {
- $this->throwException(
- 'A value in the CDB file "' . $this->tmpFileName . '" is too large.' );
- }
- $this->pos = $newpos;
- }
-
- /**
- * @param $keylen
- * @param $datalen
- * @param $h
- */
- protected function addend( $keylen, $datalen, $h ) {
- $this->hplist[] = array(
- 'h' => $h,
- 'p' => $this->pos
- );
-
- $this->numentries++;
- $this->posplus( 8 );
- $this->posplus( $keylen );
- $this->posplus( $datalen );
- }
-
- /**
- * @throws MWException
- * @param $keylen
- * @param $datalen
- */
- protected function addbegin( $keylen, $datalen ) {
- if ( $keylen > 0x7fffffff ) {
- $this->throwException( 'Key length too long in file "' . $this->tmpFileName . '".' );
- }
- if ( $datalen > 0x7fffffff ) {
- $this->throwException( 'Data length too long in file "' . $this->tmpFileName . '".' );
- }
- $buf = pack( 'VV', $keylen, $datalen );
- $this->write( $buf );
- }
-
- /**
- * @throws MWException
- */
- protected function finish() {
- // Hack for DBA cross-check
- $this->hplist = array_reverse( $this->hplist );
-
- // Calculate the number of items that will be in each hashtable
- $counts = array_fill( 0, 256, 0 );
- foreach ( $this->hplist as $item ) {
- ++ $counts[255 & $item['h']];
- }
-
- // Fill in $starts with the *end* indexes
- $starts = array();
- $pos = 0;
- for ( $i = 0; $i < 256; ++$i ) {
- $pos += $counts[$i];
- $starts[$i] = $pos;
- }
-
- // Excessively clever and indulgent code to simultaneously fill $packedTables
- // with the packed hashtables, and adjust the elements of $starts
- // to actually point to the starts instead of the ends.
- $packedTables = array_fill( 0, $this->numentries, false );
- foreach ( $this->hplist as $item ) {
- $packedTables[--$starts[255 & $item['h']]] = $item;
- }
-
- $final = '';
- for ( $i = 0; $i < 256; ++$i ) {
- $count = $counts[$i];
-
- // The size of the hashtable will be double the item count.
- // The rest of the slots will be empty.
- $len = $count + $count;
- $final .= pack( 'VV', $this->pos, $len );
-
- $hashtable = array();
- for ( $u = 0; $u < $len; ++$u ) {
- $hashtable[$u] = array( 'h' => 0, 'p' => 0 );
- }
-
- // Fill the hashtable, using the next empty slot if the hashed slot
- // is taken.
- for ( $u = 0; $u < $count; ++$u ) {
- $hp = $packedTables[$starts[$i] + $u];
- $where = CdbFunctions::unsignedMod(
- CdbFunctions::unsignedShiftRight( $hp['h'], 8 ), $len );
- while ( $hashtable[$where]['p'] ) {
- if ( ++$where == $len ) {
- $where = 0;
- }
- }
- $hashtable[$where] = $hp;
- }
-
- // Write the hashtable
- for ( $u = 0; $u < $len; ++$u ) {
- $buf = pack( 'vvV',
- $hashtable[$u]['h'] & 0xffff,
- CdbFunctions::unsignedShiftRight( $hashtable[$u]['h'], 16 ),
- $hashtable[$u]['p'] );
- $this->write( $buf );
- $this->posplus( 8 );
- }
- }
-
- // Write the pointer array at the start of the file
- rewind( $this->handle );
- if ( ftell( $this->handle ) != 0 ) {
- $this->throwException( 'Error rewinding to start of file "' . $this->tmpFileName . '".' );
- }
- $this->write( $final );
- }
-
- /**
- * Clean up the temp file and throw an exception
- *
- * @param $msg string
- * @throws MWException
- */
- protected function throwException( $msg ) {
- if ( $this->handle ) {
- fclose( $this->handle );
- unlink( $this->tmpFileName );
- }
- throw new MWException( $msg );
- }
-}
+++ /dev/null
-<?php
-/**
- * Configuration file editor.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-/**
- * This is a state machine style parser with two internal stacks:
- * * A next state stack, which determines the state the machine will progress to next
- * * A path stack, which keeps track of the logical location in the file.
- *
- * Reference grammar:
- *
- * file = T_OPEN_TAG *statement
- * statement = T_VARIABLE "=" expression ";"
- * expression = array / scalar / T_VARIABLE
- * array = T_ARRAY "(" [ element *( "," element ) [ "," ] ] ")"
- * element = assoc-element / expression
- * assoc-element = scalar T_DOUBLE_ARROW expression
- * scalar = T_LNUMBER / T_DNUMBER / T_STRING / T_CONSTANT_ENCAPSED_STRING
- */
-class ConfEditor {
- /** The text to parse */
- var $text;
-
- /** The token array from token_get_all() */
- var $tokens;
-
- /** The current position in the token array */
- var $pos;
-
- /** The current 1-based line number */
- var $lineNum;
-
- /** The current 1-based column number */
- var $colNum;
-
- /** The current 0-based byte number */
- var $byteNum;
-
- /** The current ConfEditorToken object */
- var $currentToken;
-
- /** The previous ConfEditorToken object */
- var $prevToken;
-
- /**
- * The state machine stack. This is an array of strings where the topmost
- * element will be popped off and become the next parser state.
- */
- var $stateStack;
-
- /**
- * The path stack is a stack of associative arrays with the following elements:
- * name The name of top level of the path
- * level The level (number of elements) of the path
- * startByte The byte offset of the start of the path
- * startToken The token offset of the start
- * endByte The byte offset of thee
- * endToken The token offset of the end, plus one
- * valueStartToken The start token offset of the value part
- * valueStartByte The start byte offset of the value part
- * valueEndToken The end token offset of the value part, plus one
- * valueEndByte The end byte offset of the value part, plus one
- * nextArrayIndex The next numeric array index at this level
- * hasComma True if the array element ends with a comma
- * arrowByte The byte offset of the "=>", or false if there isn't one
- */
- var $pathStack;
-
- /**
- * The elements of the top of the pathStack for every path encountered, indexed
- * by slash-separated path.
- */
- var $pathInfo;
-
- /**
- * Next serial number for whitespace placeholder paths (\@extra-N)
- */
- var $serial;
-
- /**
- * Editor state. This consists of the internal copy/insert operations which
- * are applied to the source string to obtain the destination string.
- */
- var $edits;
-
- /**
- * Simple entry point for command-line testing
- *
- * @param $text string
- *
- * @return string
- */
- static function test( $text ) {
- try {
- $ce = new self( $text );
- $ce->parse();
- } catch ( ConfEditorParseError $e ) {
- return $e->getMessage() . "\n" . $e->highlight( $text );
- }
- return "OK";
- }
-
- /**
- * Construct a new parser
- */
- public function __construct( $text ) {
- $this->text = $text;
- }
-
- /**
- * Edit the text. Returns the edited text.
- * @param array $ops of operations.
- *
- * Operations are given as an associative array, with members:
- * type: One of delete, set, append or insert (required)
- * path: The path to operate on (required)
- * key: The array key to insert/append, with PHP quotes
- * value: The value, with PHP quotes
- *
- * delete
- * Deletes an array element or statement with the specified path.
- * e.g.
- * array('type' => 'delete', 'path' => '$foo/bar/baz' )
- * is equivalent to the runtime PHP code:
- * unset( $foo['bar']['baz'] );
- *
- * set
- * Sets the value of an array element. If the element doesn't exist, it
- * is appended to the array. If it does exist, the value is set, with
- * comments and indenting preserved.
- *
- * append
- * Appends a new element to the end of the array. Adds a trailing comma.
- * e.g.
- * array( 'type' => 'append', 'path', '$foo/bar',
- * 'key' => 'baz', 'value' => "'x'" )
- * is like the PHP code:
- * $foo['bar']['baz'] = 'x';
- *
- * insert
- * Insert a new element at the start of the array.
- *
- * @throws MWException
- * @return string
- */
- public function edit( $ops ) {
- $this->parse();
-
- $this->edits = array(
- array( 'copy', 0, strlen( $this->text ) )
- );
- foreach ( $ops as $op ) {
- $type = $op['type'];
- $path = $op['path'];
- $value = isset( $op['value'] ) ? $op['value'] : null;
- $key = isset( $op['key'] ) ? $op['key'] : null;
-
- switch ( $type ) {
- case 'delete':
- list( $start, $end ) = $this->findDeletionRegion( $path );
- $this->replaceSourceRegion( $start, $end, false );
- break;
- case 'set':
- if ( isset( $this->pathInfo[$path] ) ) {
- list( $start, $end ) = $this->findValueRegion( $path );
- $encValue = $value; // var_export( $value, true );
- $this->replaceSourceRegion( $start, $end, $encValue );
- break;
- }
- // No existing path, fall through to append
- $slashPos = strrpos( $path, '/' );
- $key = var_export( substr( $path, $slashPos + 1 ), true );
- $path = substr( $path, 0, $slashPos );
- // Fall through
- case 'append':
- // Find the last array element
- $lastEltPath = $this->findLastArrayElement( $path );
- if ( $lastEltPath === false ) {
- throw new MWException( "Can't find any element of array \"$path\"" );
- }
- $lastEltInfo = $this->pathInfo[$lastEltPath];
-
- // Has it got a comma already?
- if ( strpos( $lastEltPath, '@extra' ) === false && !$lastEltInfo['hasComma'] ) {
- // No comma, insert one after the value region
- list( , $end ) = $this->findValueRegion( $lastEltPath );
- $this->replaceSourceRegion( $end - 1, $end - 1, ',' );
- }
-
- // Make the text to insert
- list( $start, $end ) = $this->findDeletionRegion( $lastEltPath );
-
- if ( $key === null ) {
- list( $indent, ) = $this->getIndent( $start );
- $textToInsert = "$indent$value,";
- } else {
- list( $indent, $arrowIndent ) =
- $this->getIndent( $start, $key, $lastEltInfo['arrowByte'] );
- $textToInsert = "$indent$key$arrowIndent=> $value,";
- }
- $textToInsert .= ( $indent === false ? ' ' : "\n" );
-
- // Insert the item
- $this->replaceSourceRegion( $end, $end, $textToInsert );
- break;
- case 'insert':
- // Find first array element
- $firstEltPath = $this->findFirstArrayElement( $path );
- if ( $firstEltPath === false ) {
- throw new MWException( "Can't find array element of \"$path\"" );
- }
- list( $start, ) = $this->findDeletionRegion( $firstEltPath );
- $info = $this->pathInfo[$firstEltPath];
-
- // Make the text to insert
- if ( $key === null ) {
- list( $indent, ) = $this->getIndent( $start );
- $textToInsert = "$indent$value,";
- } else {
- list( $indent, $arrowIndent ) =
- $this->getIndent( $start, $key, $info['arrowByte'] );
- $textToInsert = "$indent$key$arrowIndent=> $value,";
- }
- $textToInsert .= ( $indent === false ? ' ' : "\n" );
-
- // Insert the item
- $this->replaceSourceRegion( $start, $start, $textToInsert );
- break;
- default:
- throw new MWException( "Unrecognised operation: \"$type\"" );
- }
- }
-
- // Do the edits
- $out = '';
- foreach ( $this->edits as $edit ) {
- if ( $edit[0] == 'copy' ) {
- $out .= substr( $this->text, $edit[1], $edit[2] - $edit[1] );
- } else { // if ( $edit[0] == 'insert' )
- $out .= $edit[1];
- }
- }
-
- // Do a second parse as a sanity check
- $this->text = $out;
- try {
- $this->parse();
- } catch ( ConfEditorParseError $e ) {
- throw new MWException(
- "Sorry, ConfEditor broke the file during editing and it won't parse anymore: " .
- $e->getMessage() );
- }
- return $out;
- }
-
- /**
- * Get the variables defined in the text
- * @return array( varname => value )
- */
- function getVars() {
- $vars = array();
- $this->parse();
- foreach ( $this->pathInfo as $path => $data ) {
- if ( $path[0] != '$' ) {
- continue;
- }
- $trimmedPath = substr( $path, 1 );
- $name = $data['name'];
- if ( $name[0] == '@' ) {
- continue;
- }
- if ( $name[0] == '$' ) {
- $name = substr( $name, 1 );
- }
- $parentPath = substr( $trimmedPath, 0,
- strlen( $trimmedPath ) - strlen( $name ) );
- if ( substr( $parentPath, -1 ) == '/' ) {
- $parentPath = substr( $parentPath, 0, -1 );
- }
-
- $value = substr( $this->text, $data['valueStartByte'],
- $data['valueEndByte'] - $data['valueStartByte']
- );
- $this->setVar( $vars, $parentPath, $name,
- $this->parseScalar( $value ) );
- }
- return $vars;
- }
-
- /**
- * Set a value in an array, unless it's set already. For instance,
- * setVar( $arr, 'foo/bar', 'baz', 3 ); will set
- * $arr['foo']['bar']['baz'] = 3;
- * @param $array array
- * @param string $path slash-delimited path
- * @param $key mixed Key
- * @param $value mixed Value
- */
- function setVar( &$array, $path, $key, $value ) {
- $pathArr = explode( '/', $path );
- $target =& $array;
- if ( $path !== '' ) {
- foreach ( $pathArr as $p ) {
- if ( !isset( $target[$p] ) ) {
- $target[$p] = array();
- }
- $target =& $target[$p];
- }
- }
- if ( !isset( $target[$key] ) ) {
- $target[$key] = $value;
- }
- }
-
- /**
- * Parse a scalar value in PHP
- * @return mixed Parsed value
- */
- function parseScalar( $str ) {
- if ( $str !== '' && $str[0] == '\'' ) {
- // Single-quoted string
- // @todo FIXME: trim() call is due to mystery bug where whitespace gets
- // appended to the token; without it we ended up reading in the
- // extra quote on the end!
- return strtr( substr( trim( $str ), 1, -1 ),
- array( '\\\'' => '\'', '\\\\' => '\\' ) );
- }
- if ( $str !== '' && $str[0] == '"' ) {
- // Double-quoted string
- // @todo FIXME: trim() call is due to mystery bug where whitespace gets
- // appended to the token; without it we ended up reading in the
- // extra quote on the end!
- return stripcslashes( substr( trim( $str ), 1, -1 ) );
- }
- if ( substr( $str, 0, 4 ) == 'true' ) {
- return true;
- }
- if ( substr( $str, 0, 5 ) == 'false' ) {
- return false;
- }
- if ( substr( $str, 0, 4 ) == 'null' ) {
- return null;
- }
- // Must be some kind of numeric value, so let PHP's weak typing
- // be useful for a change
- return $str;
- }
-
- /**
- * Replace the byte offset region of the source with $newText.
- * Works by adding elements to the $this->edits array.
- */
- function replaceSourceRegion( $start, $end, $newText = false ) {
- // Split all copy operations with a source corresponding to the region
- // in question.
- $newEdits = array();
- foreach ( $this->edits as $edit ) {
- if ( $edit[0] !== 'copy' ) {
- $newEdits[] = $edit;
- continue;
- }
- $copyStart = $edit[1];
- $copyEnd = $edit[2];
- if ( $start >= $copyEnd || $end <= $copyStart ) {
- // Outside this region
- $newEdits[] = $edit;
- continue;
- }
- if ( ( $start < $copyStart && $end > $copyStart )
- || ( $start < $copyEnd && $end > $copyEnd )
- ) {
- throw new MWException( "Overlapping regions found, can't do the edit" );
- }
- // Split the copy
- $newEdits[] = array( 'copy', $copyStart, $start );
- if ( $newText !== false ) {
- $newEdits[] = array( 'insert', $newText );
- }
- $newEdits[] = array( 'copy', $end, $copyEnd );
- }
- $this->edits = $newEdits;
- }
-
- /**
- * Finds the source byte region which you would want to delete, if $pathName
- * was to be deleted. Includes the leading spaces and tabs, the trailing line
- * break, and any comments in between.
- * @param $pathName
- * @throws MWException
- * @return array
- */
- function findDeletionRegion( $pathName ) {
- if ( !isset( $this->pathInfo[$pathName] ) ) {
- throw new MWException( "Can't find path \"$pathName\"" );
- }
- $path = $this->pathInfo[$pathName];
- // Find the start
- $this->firstToken();
- while ( $this->pos != $path['startToken'] ) {
- $this->nextToken();
- }
- $regionStart = $path['startByte'];
- for ( $offset = -1; $offset >= -$this->pos; $offset-- ) {
- $token = $this->getTokenAhead( $offset );
- if ( !$token->isSkip() ) {
- // If there is other content on the same line, don't move the start point
- // back, because that will cause the regions to overlap.
- $regionStart = $path['startByte'];
- break;
- }
- $lfPos = strrpos( $token->text, "\n" );
- if ( $lfPos === false ) {
- $regionStart -= strlen( $token->text );
- } else {
- // The line start does not include the LF
- $regionStart -= strlen( $token->text ) - $lfPos - 1;
- break;
- }
- }
- // Find the end
- while ( $this->pos != $path['endToken'] ) {
- $this->nextToken();
- }
- $regionEnd = $path['endByte']; // past the end
- for ( $offset = 0; $offset < count( $this->tokens ) - $this->pos; $offset++ ) {
- $token = $this->getTokenAhead( $offset );
- if ( !$token->isSkip() ) {
- break;
- }
- $lfPos = strpos( $token->text, "\n" );
- if ( $lfPos === false ) {
- $regionEnd += strlen( $token->text );
- } else {
- // This should point past the LF
- $regionEnd += $lfPos + 1;
- break;
- }
- }
- return array( $regionStart, $regionEnd );
- }
-
- /**
- * Find the byte region in the source corresponding to the value part.
- * This includes the quotes, but does not include the trailing comma
- * or semicolon.
- *
- * The end position is the past-the-end (end + 1) value as per convention.
- * @param $pathName
- * @throws MWException
- * @return array
- */
- function findValueRegion( $pathName ) {
- if ( !isset( $this->pathInfo[$pathName] ) ) {
- throw new MWException( "Can't find path \"$pathName\"" );
- }
- $path = $this->pathInfo[$pathName];
- if ( $path['valueStartByte'] === false || $path['valueEndByte'] === false ) {
- throw new MWException( "Can't find value region for path \"$pathName\"" );
- }
- return array( $path['valueStartByte'], $path['valueEndByte'] );
- }
-
- /**
- * Find the path name of the last element in the array.
- * If the array is empty, this will return the \@extra interstitial element.
- * If the specified path is not found or is not an array, it will return false.
- * @return bool|int|string
- */
- function findLastArrayElement( $path ) {
- // Try for a real element
- $lastEltPath = false;
- foreach ( $this->pathInfo as $candidatePath => $info ) {
- $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
- $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
- if ( $part2 == '@' ) {
- // Do nothing
- } elseif ( $part1 == "$path/" ) {
- $lastEltPath = $candidatePath;
- } elseif ( $lastEltPath !== false ) {
- break;
- }
- }
- if ( $lastEltPath !== false ) {
- return $lastEltPath;
- }
-
- // Try for an interstitial element
- $extraPath = false;
- foreach ( $this->pathInfo as $candidatePath => $info ) {
- $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
- if ( $part1 == "$path/" ) {
- $extraPath = $candidatePath;
- } elseif ( $extraPath !== false ) {
- break;
- }
- }
- return $extraPath;
- }
-
- /**
- * Find the path name of first element in the array.
- * If the array is empty, this will return the \@extra interstitial element.
- * If the specified path is not found or is not an array, it will return false.
- * @return bool|int|string
- */
- function findFirstArrayElement( $path ) {
- // Try for an ordinary element
- foreach ( $this->pathInfo as $candidatePath => $info ) {
- $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
- $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
- if ( $part1 == "$path/" && $part2 != '@' ) {
- return $candidatePath;
- }
- }
-
- // Try for an interstitial element
- foreach ( $this->pathInfo as $candidatePath => $info ) {
- $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
- if ( $part1 == "$path/" ) {
- return $candidatePath;
- }
- }
- return false;
- }
-
- /**
- * Get the indent string which sits after a given start position.
- * Returns false if the position is not at the start of the line.
- * @return array
- */
- function getIndent( $pos, $key = false, $arrowPos = false ) {
- $arrowIndent = ' ';
- if ( $pos == 0 || $this->text[$pos - 1] == "\n" ) {
- $indentLength = strspn( $this->text, " \t", $pos );
- $indent = substr( $this->text, $pos, $indentLength );
- } else {
- $indent = false;
- }
- if ( $indent !== false && $arrowPos !== false ) {
- $arrowIndentLength = $arrowPos - $pos - $indentLength - strlen( $key );
- if ( $arrowIndentLength > 0 ) {
- $arrowIndent = str_repeat( ' ', $arrowIndentLength );
- }
- }
- return array( $indent, $arrowIndent );
- }
-
- /**
- * Run the parser on the text. Throws an exception if the string does not
- * match our defined subset of PHP syntax.
- */
- public function parse() {
- $this->initParse();
- $this->pushState( 'file' );
- $this->pushPath( '@extra-' . ( $this->serial++ ) );
- $token = $this->firstToken();
-
- while ( !$token->isEnd() ) {
- $state = $this->popState();
- if ( !$state ) {
- $this->error( 'internal error: empty state stack' );
- }
-
- switch ( $state ) {
- case 'file':
- $this->expect( T_OPEN_TAG );
- $token = $this->skipSpace();
- if ( $token->isEnd() ) {
- break 2;
- }
- $this->pushState( 'statement', 'file 2' );
- break;
- case 'file 2':
- $token = $this->skipSpace();
- if ( $token->isEnd() ) {
- break 2;
- }
- $this->pushState( 'statement', 'file 2' );
- break;
- case 'statement':
- $token = $this->skipSpace();
- if ( !$this->validatePath( $token->text ) ) {
- $this->error( "Invalid variable name \"{$token->text}\"" );
- }
- $this->nextPath( $token->text );
- $this->expect( T_VARIABLE );
- $this->skipSpace();
- $arrayAssign = false;
- if ( $this->currentToken()->type == '[' ) {
- $this->nextToken();
- $token = $this->skipSpace();
- if ( !$token->isScalar() ) {
- $this->error( "expected a string or number for the array key" );
- }
- if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
- $text = $this->parseScalar( $token->text );
- } else {
- $text = $token->text;
- }
- if ( !$this->validatePath( $text ) ) {
- $this->error( "Invalid associative array name \"$text\"" );
- }
- $this->pushPath( $text );
- $this->nextToken();
- $this->skipSpace();
- $this->expect( ']' );
- $this->skipSpace();
- $arrayAssign = true;
- }
- $this->expect( '=' );
- $this->skipSpace();
- $this->startPathValue();
- if ( $arrayAssign ) {
- $this->pushState( 'expression', 'array assign end' );
- } else {
- $this->pushState( 'expression', 'statement end' );
- }
- break;
- case 'array assign end':
- case 'statement end':
- $this->endPathValue();
- if ( $state == 'array assign end' ) {
- $this->popPath();
- }
- $this->skipSpace();
- $this->expect( ';' );
- $this->nextPath( '@extra-' . ( $this->serial++ ) );
- break;
- case 'expression':
- $token = $this->skipSpace();
- if ( $token->type == T_ARRAY ) {
- $this->pushState( 'array' );
- } elseif ( $token->isScalar() ) {
- $this->nextToken();
- } elseif ( $token->type == T_VARIABLE ) {
- $this->nextToken();
- } else {
- $this->error( "expected simple expression" );
- }
- break;
- case 'array':
- $this->skipSpace();
- $this->expect( T_ARRAY );
- $this->skipSpace();
- $this->expect( '(' );
- $this->skipSpace();
- $this->pushPath( '@extra-' . ( $this->serial++ ) );
- if ( $this->isAhead( ')' ) ) {
- // Empty array
- $this->pushState( 'array end' );
- } else {
- $this->pushState( 'element', 'array end' );
- }
- break;
- case 'array end':
- $this->skipSpace();
- $this->popPath();
- $this->expect( ')' );
- break;
- case 'element':
- $token = $this->skipSpace();
- // Look ahead to find the double arrow
- if ( $token->isScalar() && $this->isAhead( T_DOUBLE_ARROW, 1 ) ) {
- // Found associative element
- $this->pushState( 'assoc-element', 'element end' );
- } else {
- // Not associative
- $this->nextPath( '@next' );
- $this->startPathValue();
- $this->pushState( 'expression', 'element end' );
- }
- break;
- case 'element end':
- $token = $this->skipSpace();
- if ( $token->type == ',' ) {
- $this->endPathValue();
- $this->markComma();
- $this->nextToken();
- $this->nextPath( '@extra-' . ( $this->serial++ ) );
- // Look ahead to find ending bracket
- if ( $this->isAhead( ")" ) ) {
- // Found ending bracket, no continuation
- $this->skipSpace();
- } else {
- // No ending bracket, continue to next element
- $this->pushState( 'element' );
- }
- } elseif ( $token->type == ')' ) {
- // End array
- $this->endPathValue();
- } else {
- $this->error( "expected the next array element or the end of the array" );
- }
- break;
- case 'assoc-element':
- $token = $this->skipSpace();
- if ( !$token->isScalar() ) {
- $this->error( "expected a string or number for the array key" );
- }
- if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
- $text = $this->parseScalar( $token->text );
- } else {
- $text = $token->text;
- }
- if ( !$this->validatePath( $text ) ) {
- $this->error( "Invalid associative array name \"$text\"" );
- }
- $this->nextPath( $text );
- $this->nextToken();
- $this->skipSpace();
- $this->markArrow();
- $this->expect( T_DOUBLE_ARROW );
- $this->skipSpace();
- $this->startPathValue();
- $this->pushState( 'expression' );
- break;
- }
- }
- if ( count( $this->stateStack ) ) {
- $this->error( 'unexpected end of file' );
- }
- $this->popPath();
- }
-
- /**
- * Initialise a parse.
- */
- protected function initParse() {
- $this->tokens = token_get_all( $this->text );
- $this->stateStack = array();
- $this->pathStack = array();
- $this->firstToken();
- $this->pathInfo = array();
- $this->serial = 1;
- }
-
- /**
- * Set the parse position. Do not call this except from firstToken() and
- * nextToken(), there is more to update than just the position.
- */
- protected function setPos( $pos ) {
- $this->pos = $pos;
- if ( $this->pos >= count( $this->tokens ) ) {
- $this->currentToken = ConfEditorToken::newEnd();
- } else {
- $this->currentToken = $this->newTokenObj( $this->tokens[$this->pos] );
- }
- return $this->currentToken;
- }
-
- /**
- * Create a ConfEditorToken from an element of token_get_all()
- * @return ConfEditorToken
- */
- function newTokenObj( $internalToken ) {
- if ( is_array( $internalToken ) ) {
- return new ConfEditorToken( $internalToken[0], $internalToken[1] );
- } else {
- return new ConfEditorToken( $internalToken, $internalToken );
- }
- }
-
- /**
- * Reset the parse position
- */
- function firstToken() {
- $this->setPos( 0 );
- $this->prevToken = ConfEditorToken::newEnd();
- $this->lineNum = 1;
- $this->colNum = 1;
- $this->byteNum = 0;
- return $this->currentToken;
- }
-
- /**
- * Get the current token
- */
- function currentToken() {
- return $this->currentToken;
- }
-
- /**
- * Advance the current position and return the resulting next token
- */
- function nextToken() {
- if ( $this->currentToken ) {
- $text = $this->currentToken->text;
- $lfCount = substr_count( $text, "\n" );
- if ( $lfCount ) {
- $this->lineNum += $lfCount;
- $this->colNum = strlen( $text ) - strrpos( $text, "\n" );
- } else {
- $this->colNum += strlen( $text );
- }
- $this->byteNum += strlen( $text );
- }
- $this->prevToken = $this->currentToken;
- $this->setPos( $this->pos + 1 );
- return $this->currentToken;
- }
-
- /**
- * Get the token $offset steps ahead of the current position.
- * $offset may be negative, to get tokens behind the current position.
- * @return ConfEditorToken
- */
- function getTokenAhead( $offset ) {
- $pos = $this->pos + $offset;
- if ( $pos >= count( $this->tokens ) || $pos < 0 ) {
- return ConfEditorToken::newEnd();
- } else {
- return $this->newTokenObj( $this->tokens[$pos] );
- }
- }
-
- /**
- * Advances the current position past any whitespace or comments
- */
- function skipSpace() {
- while ( $this->currentToken && $this->currentToken->isSkip() ) {
- $this->nextToken();
- }
- return $this->currentToken;
- }
-
- /**
- * Throws an error if the current token is not of the given type, and
- * then advances to the next position.
- */
- function expect( $type ) {
- if ( $this->currentToken && $this->currentToken->type == $type ) {
- return $this->nextToken();
- } else {
- $this->error( "expected " . $this->getTypeName( $type ) .
- ", got " . $this->getTypeName( $this->currentToken->type ) );
- }
- }
-
- /**
- * Push a state or two on to the state stack.
- */
- function pushState( $nextState, $stateAfterThat = null ) {
- if ( $stateAfterThat !== null ) {
- $this->stateStack[] = $stateAfterThat;
- }
- $this->stateStack[] = $nextState;
- }
-
- /**
- * Pop a state from the state stack.
- * @return mixed
- */
- function popState() {
- return array_pop( $this->stateStack );
- }
-
- /**
- * Returns true if the user input path is valid.
- * This exists to allow "/" and "@" to be reserved for string path keys
- * @return bool
- */
- function validatePath( $path ) {
- return strpos( $path, '/' ) === false && substr( $path, 0, 1 ) != '@';
- }
-
- /**
- * Internal function to update some things at the end of a path region. Do
- * not call except from popPath() or nextPath().
- */
- function endPath() {
- $key = '';
- foreach ( $this->pathStack as $pathInfo ) {
- if ( $key !== '' ) {
- $key .= '/';
- }
- $key .= $pathInfo['name'];
- }
- $pathInfo['endByte'] = $this->byteNum;
- $pathInfo['endToken'] = $this->pos;
- $this->pathInfo[$key] = $pathInfo;
- }
-
- /**
- * Go up to a new path level, for example at the start of an array.
- */
- function pushPath( $path ) {
- $this->pathStack[] = array(
- 'name' => $path,
- 'level' => count( $this->pathStack ) + 1,
- 'startByte' => $this->byteNum,
- 'startToken' => $this->pos,
- 'valueStartToken' => false,
- 'valueStartByte' => false,
- 'valueEndToken' => false,
- 'valueEndByte' => false,
- 'nextArrayIndex' => 0,
- 'hasComma' => false,
- 'arrowByte' => false
- );
- }
-
- /**
- * Go down a path level, for example at the end of an array.
- */
- function popPath() {
- $this->endPath();
- array_pop( $this->pathStack );
- }
-
- /**
- * Go to the next path on the same level. This ends the current path and
- * starts a new one. If $path is \@next, the new path is set to the next
- * numeric array element.
- */
- function nextPath( $path ) {
- $this->endPath();
- $i = count( $this->pathStack ) - 1;
- if ( $path == '@next' ) {
- $nextArrayIndex =& $this->pathStack[$i]['nextArrayIndex'];
- $this->pathStack[$i]['name'] = $nextArrayIndex;
- $nextArrayIndex++;
- } else {
- $this->pathStack[$i]['name'] = $path;
- }
- $this->pathStack[$i] =
- array(
- 'startByte' => $this->byteNum,
- 'startToken' => $this->pos,
- 'valueStartToken' => false,
- 'valueStartByte' => false,
- 'valueEndToken' => false,
- 'valueEndByte' => false,
- 'hasComma' => false,
- 'arrowByte' => false,
- ) + $this->pathStack[$i];
- }
-
- /**
- * Mark the start of the value part of a path.
- */
- function startPathValue() {
- $path =& $this->pathStack[count( $this->pathStack ) - 1];
- $path['valueStartToken'] = $this->pos;
- $path['valueStartByte'] = $this->byteNum;
- }
-
- /**
- * Mark the end of the value part of a path.
- */
- function endPathValue() {
- $path =& $this->pathStack[count( $this->pathStack ) - 1];
- $path['valueEndToken'] = $this->pos;
- $path['valueEndByte'] = $this->byteNum;
- }
-
- /**
- * Mark the comma separator in an array element
- */
- function markComma() {
- $path =& $this->pathStack[count( $this->pathStack ) - 1];
- $path['hasComma'] = true;
- }
-
- /**
- * Mark the arrow separator in an associative array element
- */
- function markArrow() {
- $path =& $this->pathStack[count( $this->pathStack ) - 1];
- $path['arrowByte'] = $this->byteNum;
- }
-
- /**
- * Generate a parse error
- */
- function error( $msg ) {
- throw new ConfEditorParseError( $this, $msg );
- }
-
- /**
- * Get a readable name for the given token type.
- * @return string
- */
- function getTypeName( $type ) {
- if ( is_int( $type ) ) {
- return token_name( $type );
- } else {
- return "\"$type\"";
- }
- }
-
- /**
- * Looks ahead to see if the given type is the next token type, starting
- * from the current position plus the given offset. Skips any intervening
- * whitespace.
- * @return bool
- */
- function isAhead( $type, $offset = 0 ) {
- $ahead = $offset;
- $token = $this->getTokenAhead( $offset );
- while ( !$token->isEnd() ) {
- if ( $token->isSkip() ) {
- $ahead++;
- $token = $this->getTokenAhead( $ahead );
- continue;
- } elseif ( $token->type == $type ) {
- // Found the type
- return true;
- } else {
- // Not found
- return false;
- }
- }
- return false;
- }
-
- /**
- * Get the previous token object
- */
- function prevToken() {
- return $this->prevToken;
- }
-
- /**
- * Echo a reasonably readable representation of the tokenizer array.
- */
- function dumpTokens() {
- $out = '';
- foreach ( $this->tokens as $token ) {
- $obj = $this->newTokenObj( $token );
- $out .= sprintf( "%-28s %s\n",
- $this->getTypeName( $obj->type ),
- addcslashes( $obj->text, "\0..\37" ) );
- }
- echo "<pre>" . htmlspecialchars( $out ) . "</pre>";
- }
-}
-
-/**
- * Exception class for parse errors
- */
-class ConfEditorParseError extends MWException {
- var $lineNum, $colNum;
- function __construct( $editor, $msg ) {
- $this->lineNum = $editor->lineNum;
- $this->colNum = $editor->colNum;
- parent::__construct( "Parse error on line {$editor->lineNum} " .
- "col {$editor->colNum}: $msg" );
- }
-
- function highlight( $text ) {
- $lines = StringUtils::explode( "\n", $text );
- foreach ( $lines as $lineNum => $line ) {
- if ( $lineNum == $this->lineNum - 1 ) {
- return "$line\n" . str_repeat( ' ', $this->colNum - 1 ) . "^\n";
- }
- }
- return '';
- }
-
-}
-
-/**
- * Class to wrap a token from the tokenizer.
- */
-class ConfEditorToken {
- var $type, $text;
-
- static $scalarTypes = array( T_LNUMBER, T_DNUMBER, T_STRING, T_CONSTANT_ENCAPSED_STRING );
- static $skipTypes = array( T_WHITESPACE, T_COMMENT, T_DOC_COMMENT );
-
- static function newEnd() {
- return new self( 'END', '' );
- }
-
- function __construct( $type, $text ) {
- $this->type = $type;
- $this->text = $text;
- }
-
- function isSkip() {
- return in_array( $this->type, self::$skipTypes );
- }
-
- function isScalar() {
- return in_array( $this->type, self::$scalarTypes );
- }
-
- function isEnd() {
- return $this->type == 'END';
- }
-}
+++ /dev/null
-<?php
-/**
- * Convenience class for weighted consistent hash rings.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @author Aaron Schulz
- */
-
-/**
- * Convenience class for weighted consistent hash rings
- *
- * @since 1.22
- */
-class HashRing {
- /** @var Array (location => weight) */
- protected $sourceMap = array();
- /** @var Array (location => (start, end)) */
- protected $ring = array();
-
- const RING_SIZE = 268435456; // 2^28
-
- /**
- * @param array $map (location => weight)
- */
- public function __construct( array $map ) {
- $map = array_filter( $map, function( $w ) { return $w > 0; } );
- if ( !count( $map ) ) {
- throw new MWException( "Ring is empty or all weights are zero." );
- }
- $this->sourceMap = $map;
- // Sort the locations based on the hash of their names
- $hashes = array();
- foreach ( $map as $location => $weight ) {
- $hashes[$location] = sha1( $location );
- }
- uksort( $map, function ( $a, $b ) use ( $hashes ) {
- return strcmp( $hashes[$a], $hashes[$b] );
- } );
- // Fit the map to weight-proportionate one with a space of size RING_SIZE
- $sum = array_sum( $map );
- $standardMap = array();
- foreach ( $map as $location => $weight ) {
- $standardMap[$location] = (int)floor( $weight / $sum * self::RING_SIZE );
- }
- // Build a ring of RING_SIZE spots, with each location at a spot in location hash order
- $index = 0;
- foreach ( $standardMap as $location => $weight ) {
- // Location covers half-closed interval [$index,$index + $weight)
- $this->ring[$location] = array( $index, $index + $weight );
- $index += $weight;
- }
- // Make sure the last location covers what is left
- end( $this->ring );
- $this->ring[key( $this->ring )][1] = self::RING_SIZE;
- }
-
- /**
- * Get the location of an item on the ring
- *
- * @param string $item
- * @return string Location
- */
- public function getLocation( $item ) {
- $locations = $this->getLocations( $item, 1 );
- return $locations[0];
- }
-
- /**
- * Get the location of an item on the ring, as well as the next clockwise locations
- *
- * @param string $item
- * @param integer $limit Maximum number of locations to return
- * @return array List of locations
- */
- public function getLocations( $item, $limit ) {
- $locations = array();
- $primaryLocation = null;
- $spot = hexdec( substr( sha1( $item ), 0, 7 ) ); // first 28 bits
- foreach ( $this->ring as $location => $range ) {
- if ( count( $locations ) >= $limit ) {
- break;
- }
- // The $primaryLocation is the location the item spot is in.
- // After that is reached, keep appending the next locations.
- if ( ( $range[0] <= $spot && $spot < $range[1] ) || $primaryLocation !== null ) {
- if ( $primaryLocation === null ) {
- $primaryLocation = $location;
- }
- $locations[] = $location;
- }
- }
- // If more locations are requested, wrap-around and keep adding them
- reset( $this->ring );
- while ( count( $locations ) < $limit ) {
- list( $location, ) = each( $this->ring );
- if ( $location === $primaryLocation ) {
- break; // don't go in circles
- }
- $locations[] = $location;
- }
- return $locations;
- }
-
- /**
- * Get the map of locations to weight (ignores 0-weight items)
- *
- * @return array
- */
- public function getLocationWeights() {
- return $this->sourceMap;
- }
-
- /**
- * Get a new hash ring with a location removed from the ring
- *
- * @param string $location
- * @return HashRing|bool Returns false if no non-zero weighted spots are left
- */
- public function newWithoutLocation( $location ) {
- $map = $this->sourceMap;
- unset( $map[$location] );
- if ( count( $map ) ) {
- return new self( $map );
- }
- return false;
- }
-}
+++ /dev/null
-<?php
-/**
- * Functions and constants to play with IP addresses and ranges
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @author Antoine Musso "<hashar at free dot fr>", Aaron Schulz
- */
-
-// Some regex definition to "play" with IP address and IP address blocks
-
-// An IPv4 address is made of 4 bytes from x00 to xFF which is d0 to d255
-define( 'RE_IP_BYTE', '(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[0-9])' );
-define( 'RE_IP_ADD', RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE );
-// An IPv4 block is an IP address and a prefix (d1 to d32)
-define( 'RE_IP_PREFIX', '(3[0-2]|[12]?\d)' );
-define( 'RE_IP_BLOCK', RE_IP_ADD . '\/' . RE_IP_PREFIX );
-
-// An IPv6 address is made up of 8 words (each x0000 to xFFFF).
-// However, the "::" abbreviation can be used on consecutive x0000 words.
-define( 'RE_IPV6_WORD', '([0-9A-Fa-f]{1,4})' );
-define( 'RE_IPV6_PREFIX', '(12[0-8]|1[01][0-9]|[1-9]?\d)' );
-define( 'RE_IPV6_ADD',
- '(?:' . // starts with "::" (including "::")
- ':(?::|(?::' . RE_IPV6_WORD . '){1,7})' .
- '|' . // ends with "::" (except "::")
- RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){0,6}::' .
- '|' . // contains one "::" in the middle (the ^ makes the test fail if none found)
- RE_IPV6_WORD . '(?::((?(-1)|:))?' . RE_IPV6_WORD . '){1,6}(?(-2)|^)' .
- '|' . // contains no "::"
- RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){7}' .
- ')'
-);
-// An IPv6 block is an IP address and a prefix (d1 to d128)
-define( 'RE_IPV6_BLOCK', RE_IPV6_ADD . '\/' . RE_IPV6_PREFIX );
-// For IPv6 canonicalization (NOT for strict validation; these are quite lax!)
-define( 'RE_IPV6_GAP', ':(?:0+:)*(?::(?:0+:)*)?' );
-define( 'RE_IPV6_V4_PREFIX', '0*' . RE_IPV6_GAP . '(?:ffff:)?' );
-
-// This might be useful for regexps used elsewhere, matches any IPv6 or IPv6 address or network
-define( 'IP_ADDRESS_STRING',
- '(?:' .
- RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?' . // IPv4
- '|' .
- RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?' . // IPv6
- ')'
-);
-
-/**
- * A collection of public static functions to play with IP address
- * and IP blocks.
- */
-class IP {
- /**
- * Determine if a string is as valid IP address or network (CIDR prefix).
- * SIIT IPv4-translated addresses are rejected.
- * Note: canonicalize() tries to convert translated addresses to IPv4.
- *
- * @param string $ip possible IP address
- * @return Boolean
- */
- public static function isIPAddress( $ip ) {
- return (bool)preg_match( '/^' . IP_ADDRESS_STRING . '$/', $ip );
- }
-
- /**
- * Given a string, determine if it as valid IP in IPv6 only.
- * Note: Unlike isValid(), this looks for networks too.
- *
- * @param string $ip possible IP address
- * @return Boolean
- */
- public static function isIPv6( $ip ) {
- return (bool)preg_match( '/^' . RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?$/', $ip );
- }
-
- /**
- * Given a string, determine if it as valid IP in IPv4 only.
- * Note: Unlike isValid(), this looks for networks too.
- *
- * @param string $ip possible IP address
- * @return Boolean
- */
- public static function isIPv4( $ip ) {
- return (bool)preg_match( '/^' . RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?$/', $ip );
- }
-
- /**
- * Validate an IP address. Ranges are NOT considered valid.
- * SIIT IPv4-translated addresses are rejected.
- * Note: canonicalize() tries to convert translated addresses to IPv4.
- *
- * @param $ip String
- * @return Boolean: True if it is valid.
- */
- public static function isValid( $ip ) {
- return ( preg_match( '/^' . RE_IP_ADD . '$/', $ip )
- || preg_match( '/^' . RE_IPV6_ADD . '$/', $ip ) );
- }
-
- /**
- * Validate an IP Block (valid address WITH a valid prefix).
- * SIIT IPv4-translated addresses are rejected.
- * Note: canonicalize() tries to convert translated addresses to IPv4.
- *
- * @param $ipblock String
- * @return Boolean: True if it is valid.
- */
- public static function isValidBlock( $ipblock ) {
- return ( preg_match( '/^' . RE_IPV6_BLOCK . '$/', $ipblock )
- || preg_match( '/^' . RE_IP_BLOCK . '$/', $ipblock ) );
- }
-
- /**
- * Convert an IP into a verbose, uppercase, normalized form.
- * IPv6 addresses in octet notation are expanded to 8 words.
- * IPv4 addresses are just trimmed.
- *
- * @param string $ip IP address in quad or octet form (CIDR or not).
- * @return String
- */
- public static function sanitizeIP( $ip ) {
- $ip = trim( $ip );
- if ( $ip === '' ) {
- return null;
- }
- if ( self::isIPv4( $ip ) || !self::isIPv6( $ip ) ) {
- return $ip; // nothing else to do for IPv4 addresses or invalid ones
- }
- // Remove any whitespaces, convert to upper case
- $ip = strtoupper( $ip );
- // Expand zero abbreviations
- $abbrevPos = strpos( $ip, '::' );
- if ( $abbrevPos !== false ) {
- // We know this is valid IPv6. Find the last index of the
- // address before any CIDR number (e.g. "a:b:c::/24").
- $CIDRStart = strpos( $ip, "/" );
- $addressEnd = ( $CIDRStart !== false )
- ? $CIDRStart - 1
- : strlen( $ip ) - 1;
- // If the '::' is at the beginning...
- if ( $abbrevPos == 0 ) {
- $repeat = '0:';
- $extra = ( $ip == '::' ) ? '0' : ''; // for the address '::'
- $pad = 9; // 7+2 (due to '::')
- // If the '::' is at the end...
- } elseif ( $abbrevPos == ( $addressEnd - 1 ) ) {
- $repeat = ':0';
- $extra = '';
- $pad = 9; // 7+2 (due to '::')
- // If the '::' is in the middle...
- } else {
- $repeat = ':0';
- $extra = ':';
- $pad = 8; // 6+2 (due to '::')
- }
- $ip = str_replace( '::',
- str_repeat( $repeat, $pad - substr_count( $ip, ':' ) ) . $extra,
- $ip
- );
- }
- // Remove leading zeros from each bloc as needed
- $ip = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip );
- return $ip;
- }
-
- /**
- * Prettify an IP for display to end users.
- * This will make it more compact and lower-case.
- *
- * @param $ip string
- * @return string
- */
- public static function prettifyIP( $ip ) {
- $ip = self::sanitizeIP( $ip ); // normalize (removes '::')
- if ( self::isIPv6( $ip ) ) {
- // Split IP into an address and a CIDR
- if ( strpos( $ip, '/' ) !== false ) {
- list( $ip, $cidr ) = explode( '/', $ip, 2 );
- } else {
- list( $ip, $cidr ) = array( $ip, '' );
- }
- // Get the largest slice of words with multiple zeros
- $offset = 0;
- $longest = $longestPos = false;
- while ( preg_match(
- '!(?:^|:)0(?::0)+(?:$|:)!', $ip, $m, PREG_OFFSET_CAPTURE, $offset
- ) ) {
- list( $match, $pos ) = $m[0]; // full match
- if ( strlen( $match ) > strlen( $longest ) ) {
- $longest = $match;
- $longestPos = $pos;
- }
- $offset = ( $pos + strlen( $match ) ); // advance
- }
- if ( $longest !== false ) {
- // Replace this portion of the string with the '::' abbreviation
- $ip = substr_replace( $ip, '::', $longestPos, strlen( $longest ) );
- }
- // Add any CIDR back on
- if ( $cidr !== '' ) {
- $ip = "{$ip}/{$cidr}";
- }
- // Convert to lower case to make it more readable
- $ip = strtolower( $ip );
- }
- return $ip;
- }
-
- /**
- * Given a host/port string, like one might find in the host part of a URL
- * per RFC 2732, split the hostname part and the port part and return an
- * array with an element for each. If there is no port part, the array will
- * have false in place of the port. If the string was invalid in some way,
- * false is returned.
- *
- * This was easy with IPv4 and was generally done in an ad-hoc way, but
- * with IPv6 it's somewhat more complicated due to the need to parse the
- * square brackets and colons.
- *
- * A bare IPv6 address is accepted despite the lack of square brackets.
- *
- * @param string $both The string with the host and port
- * @return array
- */
- public static function splitHostAndPort( $both ) {
- if ( substr( $both, 0, 1 ) === '[' ) {
- if ( preg_match( '/^\[(' . RE_IPV6_ADD . ')\](?::(?P<port>\d+))?$/', $both, $m ) ) {
- if ( isset( $m['port'] ) ) {
- return array( $m[1], intval( $m['port'] ) );
- } else {
- return array( $m[1], false );
- }
- } else {
- // Square bracket found but no IPv6
- return false;
- }
- }
- $numColons = substr_count( $both, ':' );
- if ( $numColons >= 2 ) {
- // Is it a bare IPv6 address?
- if ( preg_match( '/^' . RE_IPV6_ADD . '$/', $both ) ) {
- return array( $both, false );
- } else {
- // Not valid IPv6, but too many colons for anything else
- return false;
- }
- }
- if ( $numColons >= 1 ) {
- // Host:port?
- $bits = explode( ':', $both );
- if ( preg_match( '/^\d+/', $bits[1] ) ) {
- return array( $bits[0], intval( $bits[1] ) );
- } else {
- // Not a valid port
- return false;
- }
- }
- // Plain hostname
- return array( $both, false );
- }
-
- /**
- * Given a host name and a port, combine them into host/port string like
- * you might find in a URL. If the host contains a colon, wrap it in square
- * brackets like in RFC 2732. If the port matches the default port, omit
- * the port specification
- *
- * @param $host string
- * @param $port int
- * @param $defaultPort bool|int
- * @return string
- */
- public static function combineHostAndPort( $host, $port, $defaultPort = false ) {
- if ( strpos( $host, ':' ) !== false ) {
- $host = "[$host]";
- }
- if ( $defaultPort !== false && $port == $defaultPort ) {
- return $host;
- } else {
- return "$host:$port";
- }
- }
-
- /**
- * Given an unsigned integer, returns an IPv6 address in octet notation
- *
- * @param $ip_int String: IP address.
- * @return String
- */
- public static function toOctet( $ip_int ) {
- return self::hexToOctet( wfBaseConvert( $ip_int, 10, 16, 32, false ) );
- }
-
- /**
- * Convert an IPv4 or IPv6 hexadecimal representation back to readable format
- *
- * @param string $hex number, with "v6-" prefix if it is IPv6
- * @return String: quad-dotted (IPv4) or octet notation (IPv6)
- */
- public static function formatHex( $hex ) {
- if ( substr( $hex, 0, 3 ) == 'v6-' ) { // IPv6
- return self::hexToOctet( substr( $hex, 3 ) );
- } else { // IPv4
- return self::hexToQuad( $hex );
- }
- }
-
- /**
- * Converts a hexadecimal number to an IPv6 address in octet notation
- *
- * @param $ip_hex String: pure hex (no v6- prefix)
- * @return String (of format a:b:c:d:e:f:g:h)
- */
- public static function hexToOctet( $ip_hex ) {
- // Pad hex to 32 chars (128 bits)
- $ip_hex = str_pad( strtoupper( $ip_hex ), 32, '0', STR_PAD_LEFT );
- // Separate into 8 words
- $ip_oct = substr( $ip_hex, 0, 4 );
- for ( $n = 1; $n < 8; $n++ ) {
- $ip_oct .= ':' . substr( $ip_hex, 4 * $n, 4 );
- }
- // NO leading zeroes
- $ip_oct = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip_oct );
- return $ip_oct;
- }
-
- /**
- * Converts a hexadecimal number to an IPv4 address in quad-dotted notation
- *
- * @param $ip_hex String: pure hex
- * @return String (of format a.b.c.d)
- */
- public static function hexToQuad( $ip_hex ) {
- // Pad hex to 8 chars (32 bits)
- $ip_hex = str_pad( strtoupper( $ip_hex ), 8, '0', STR_PAD_LEFT );
- // Separate into four quads
- $s = '';
- for ( $i = 0; $i < 4; $i++ ) {
- if ( $s !== '' ) {
- $s .= '.';
- }
- $s .= base_convert( substr( $ip_hex, $i * 2, 2 ), 16, 10 );
- }
- return $s;
- }
-
- /**
- * Determine if an IP address really is an IP address, and if it is public,
- * i.e. not RFC 1918 or similar
- * Comes from ProxyTools.php
- *
- * @param $ip String
- * @return Boolean
- */
- public static function isPublic( $ip ) {
- if ( self::isIPv6( $ip ) ) {
- return self::isPublic6( $ip );
- }
- $n = self::toUnsigned( $ip );
- if ( !$n ) {
- return false;
- }
-
- // ip2long accepts incomplete addresses, as well as some addresses
- // followed by garbage characters. Check that it's really valid.
- if ( $ip != long2ip( $n ) ) {
- return false;
- }
-
- static $privateRanges = false;
- if ( !$privateRanges ) {
- $privateRanges = array(
- array( '10.0.0.0', '10.255.255.255' ), # RFC 1918 (private)
- array( '172.16.0.0', '172.31.255.255' ), # RFC 1918 (private)
- array( '192.168.0.0', '192.168.255.255' ), # RFC 1918 (private)
- array( '0.0.0.0', '0.255.255.255' ), # this network
- array( '127.0.0.0', '127.255.255.255' ), # loopback
- );
- }
-
- foreach ( $privateRanges as $r ) {
- $start = self::toUnsigned( $r[0] );
- $end = self::toUnsigned( $r[1] );
- if ( $n >= $start && $n <= $end ) {
- return false;
- }
- }
- return true;
- }
-
- /**
- * Determine if an IPv6 address really is an IP address, and if it is public,
- * i.e. not RFC 4193 or similar
- *
- * @param $ip String
- * @return Boolean
- */
- private static function isPublic6( $ip ) {
- static $privateRanges = false;
- if ( !$privateRanges ) {
- $privateRanges = array(
- array( 'fc00::', 'fdff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' ), # RFC 4193 (local)
- array( '0:0:0:0:0:0:0:1', '0:0:0:0:0:0:0:1' ), # loopback
- );
- }
- $n = self::toHex( $ip );
- foreach ( $privateRanges as $r ) {
- $start = self::toHex( $r[0] );
- $end = self::toHex( $r[1] );
- if ( $n >= $start && $n <= $end ) {
- return false;
- }
- }
- return true;
- }
-
- /**
- * Return a zero-padded upper case hexadecimal representation of an IP address.
- *
- * Hexadecimal addresses are used because they can easily be extended to
- * IPv6 support. To separate the ranges, the return value from this
- * function for an IPv6 address will be prefixed with "v6-", a non-
- * hexadecimal string which sorts after the IPv4 addresses.
- *
- * @param string $ip quad dotted/octet IP address.
- * @return String
- */
- public static function toHex( $ip ) {
- if ( self::isIPv6( $ip ) ) {
- $n = 'v6-' . self::IPv6ToRawHex( $ip );
- } else {
- $n = self::toUnsigned( $ip );
- if ( $n !== false ) {
- $n = wfBaseConvert( $n, 10, 16, 8, false );
- }
- }
- return $n;
- }
-
- /**
- * Given an IPv6 address in octet notation, returns a pure hex string.
- *
- * @param string $ip octet ipv6 IP address.
- * @return String: pure hex (uppercase)
- */
- private static function IPv6ToRawHex( $ip ) {
- $ip = self::sanitizeIP( $ip );
- if ( !$ip ) {
- return null;
- }
- $r_ip = '';
- foreach ( explode( ':', $ip ) as $v ) {
- $r_ip .= str_pad( $v, 4, 0, STR_PAD_LEFT );
- }
- return $r_ip;
- }
-
- /**
- * Given an IP address in dotted-quad/octet notation, returns an unsigned integer.
- * Like ip2long() except that it actually works and has a consistent error return value.
- * Comes from ProxyTools.php
- *
- * @param string $ip quad dotted IP address.
- * @return Mixed: string/int/false
- */
- public static function toUnsigned( $ip ) {
- if ( self::isIPv6( $ip ) ) {
- $n = self::toUnsigned6( $ip );
- } else {
- $n = ip2long( $ip );
- if ( $n < 0 ) {
- $n += pow( 2, 32 );
- # On 32-bit platforms (and on Windows), 2^32 does not fit into an int,
- # so $n becomes a float. We convert it to string instead.
- if ( is_float( $n ) ) {
- $n = (string)$n;
- }
- }
- }
- return $n;
- }
-
- /**
- * @param $ip
- * @return String
- */
- private static function toUnsigned6( $ip ) {
- return wfBaseConvert( self::IPv6ToRawHex( $ip ), 16, 10 );
- }
-
- /**
- * Convert a network specification in CIDR notation
- * to an integer network and a number of bits
- *
- * @param string $range IP with CIDR prefix
- * @return array(int or string, int)
- */
- public static function parseCIDR( $range ) {
- if ( self::isIPv6( $range ) ) {
- return self::parseCIDR6( $range );
- }
- $parts = explode( '/', $range, 2 );
- if ( count( $parts ) != 2 ) {
- return array( false, false );
- }
- list( $network, $bits ) = $parts;
- $network = ip2long( $network );
- if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 32 ) {
- if ( $bits == 0 ) {
- $network = 0;
- } else {
- $network &= ~( ( 1 << ( 32 - $bits ) ) - 1 );
- }
- # Convert to unsigned
- if ( $network < 0 ) {
- $network += pow( 2, 32 );
- }
- } else {
- $network = false;
- $bits = false;
- }
- return array( $network, $bits );
- }
-
- /**
- * Given a string range in a number of formats,
- * return the start and end of the range in hexadecimal.
- *
- * Formats are:
- * 1.2.3.4/24 CIDR
- * 1.2.3.4 - 1.2.3.5 Explicit range
- * 1.2.3.4 Single IP
- *
- * 2001:0db8:85a3::7344/96 CIDR
- * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range
- * 2001:0db8:85a3::7344 Single IP
- * @param string $range IP range
- * @return array(string, string)
- */
- public static function parseRange( $range ) {
- // CIDR notation
- if ( strpos( $range, '/' ) !== false ) {
- if ( self::isIPv6( $range ) ) {
- return self::parseRange6( $range );
- }
- list( $network, $bits ) = self::parseCIDR( $range );
- if ( $network === false ) {
- $start = $end = false;
- } else {
- $start = sprintf( '%08X', $network );
- $end = sprintf( '%08X', $network + pow( 2, ( 32 - $bits ) ) - 1 );
- }
- // Explicit range
- } elseif ( strpos( $range, '-' ) !== false ) {
- list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) );
- if ( self::isIPv6( $start ) && self::isIPv6( $end ) ) {
- return self::parseRange6( $range );
- }
- if ( self::isIPv4( $start ) && self::isIPv4( $end ) ) {
- $start = self::toUnsigned( $start );
- $end = self::toUnsigned( $end );
- if ( $start > $end ) {
- $start = $end = false;
- } else {
- $start = sprintf( '%08X', $start );
- $end = sprintf( '%08X', $end );
- }
- } else {
- $start = $end = false;
- }
- } else {
- # Single IP
- $start = $end = self::toHex( $range );
- }
- if ( $start === false || $end === false ) {
- return array( false, false );
- } else {
- return array( $start, $end );
- }
- }
-
- /**
- * Convert a network specification in IPv6 CIDR notation to an
- * integer network and a number of bits
- *
- * @param $range
- *
- * @return array(string, int)
- */
- private static function parseCIDR6( $range ) {
- # Explode into <expanded IP,range>
- $parts = explode( '/', IP::sanitizeIP( $range ), 2 );
- if ( count( $parts ) != 2 ) {
- return array( false, false );
- }
- list( $network, $bits ) = $parts;
- $network = self::IPv6ToRawHex( $network );
- if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 128 ) {
- if ( $bits == 0 ) {
- $network = "0";
- } else {
- # Native 32 bit functions WONT work here!!!
- # Convert to a padded binary number
- $network = wfBaseConvert( $network, 16, 2, 128 );
- # Truncate the last (128-$bits) bits and replace them with zeros
- $network = str_pad( substr( $network, 0, $bits ), 128, 0, STR_PAD_RIGHT );
- # Convert back to an integer
- $network = wfBaseConvert( $network, 2, 10 );
- }
- } else {
- $network = false;
- $bits = false;
- }
- return array( $network, (int)$bits );
- }
-
- /**
- * Given a string range in a number of formats, return the
- * start and end of the range in hexadecimal. For IPv6.
- *
- * Formats are:
- * 2001:0db8:85a3::7344/96 CIDR
- * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range
- * 2001:0db8:85a3::7344/96 Single IP
- *
- * @param $range
- *
- * @return array(string, string)
- */
- private static function parseRange6( $range ) {
- # Expand any IPv6 IP
- $range = IP::sanitizeIP( $range );
- // CIDR notation...
- if ( strpos( $range, '/' ) !== false ) {
- list( $network, $bits ) = self::parseCIDR6( $range );
- if ( $network === false ) {
- $start = $end = false;
- } else {
- $start = wfBaseConvert( $network, 10, 16, 32, false );
- # Turn network to binary (again)
- $end = wfBaseConvert( $network, 10, 2, 128 );
- # Truncate the last (128-$bits) bits and replace them with ones
- $end = str_pad( substr( $end, 0, $bits ), 128, 1, STR_PAD_RIGHT );
- # Convert to hex
- $end = wfBaseConvert( $end, 2, 16, 32, false );
- # see toHex() comment
- $start = "v6-$start";
- $end = "v6-$end";
- }
- // Explicit range notation...
- } elseif ( strpos( $range, '-' ) !== false ) {
- list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) );
- $start = self::toUnsigned6( $start );
- $end = self::toUnsigned6( $end );
- if ( $start > $end ) {
- $start = $end = false;
- } else {
- $start = wfBaseConvert( $start, 10, 16, 32, false );
- $end = wfBaseConvert( $end, 10, 16, 32, false );
- }
- # see toHex() comment
- $start = "v6-$start";
- $end = "v6-$end";
- } else {
- # Single IP
- $start = $end = self::toHex( $range );
- }
- if ( $start === false || $end === false ) {
- return array( false, false );
- } else {
- return array( $start, $end );
- }
- }
-
- /**
- * Determine if a given IPv4/IPv6 address is in a given CIDR network
- *
- * @param string $addr the address to check against the given range.
- * @param string $range the range to check the given address against.
- * @return Boolean: whether or not the given address is in the given range.
- */
- public static function isInRange( $addr, $range ) {
- $hexIP = self::toHex( $addr );
- list( $start, $end ) = self::parseRange( $range );
- return ( strcmp( $hexIP, $start ) >= 0 &&
- strcmp( $hexIP, $end ) <= 0 );
- }
-
- /**
- * Convert some unusual representations of IPv4 addresses to their
- * canonical dotted quad representation.
- *
- * This currently only checks a few IPV4-to-IPv6 related cases. More
- * unusual representations may be added later.
- *
- * @param string $addr something that might be an IP address
- * @return String: valid dotted quad IPv4 address or null
- */
- public static function canonicalize( $addr ) {
- // remove zone info (bug 35738)
- $addr = preg_replace( '/\%.*/', '', $addr );
-
- if ( self::isValid( $addr ) ) {
- return $addr;
- }
- // Turn mapped addresses from ::ce:ffff:1.2.3.4 to 1.2.3.4
- if ( strpos( $addr, ':' ) !== false && strpos( $addr, '.' ) !== false ) {
- $addr = substr( $addr, strrpos( $addr, ':' ) + 1 );
- if ( self::isIPv4( $addr ) ) {
- return $addr;
- }
- }
- // IPv6 loopback address
- $m = array();
- if ( preg_match( '/^0*' . RE_IPV6_GAP . '1$/', $addr, $m ) ) {
- return '127.0.0.1';
- }
- // IPv4-mapped and IPv4-compatible IPv6 addresses
- if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . '(' . RE_IP_ADD . ')$/i', $addr, $m ) ) {
- return $m[1];
- }
- if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . RE_IPV6_WORD .
- ':' . RE_IPV6_WORD . '$/i', $addr, $m ) )
- {
- return long2ip( ( hexdec( $m[1] ) << 16 ) + hexdec( $m[2] ) );
- }
-
- return null; // give up
- }
-
- /**
- * Gets rid of unneeded numbers in quad-dotted/octet IP strings
- * For example, 127.111.113.151/24 -> 127.111.113.0/24
- * @param string $range IP address to normalize
- * @return string
- */
- public static function sanitizeRange( $range ) {
- list( /*...*/, $bits ) = self::parseCIDR( $range );
- list( $start, /*...*/ ) = self::parseRange( $range );
- $start = self::formatHex( $start );
- if ( $bits === false ) {
- return $start; // wasn't actually a range
- }
- return "$start/$bits";
- }
-}
+++ /dev/null
-<?php
-/**
- * A cryptographic random generator class used for generating secret keys
- *
- * This is based in part on Drupal code as well as what we used in our own code
- * prior to introduction of this class.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @author Daniel Friesen
- * @file
- */
-
-class MWCryptRand {
-
- /**
- * Minimum number of iterations we want to make in our drift calculations.
- */
- const MIN_ITERATIONS = 1000;
-
- /**
- * Number of milliseconds we want to spend generating each separate byte
- * of the final generated bytes.
- * This is used in combination with the hash length to determine the duration
- * we should spend doing drift calculations.
- */
- const MSEC_PER_BYTE = 0.5;
-
- /**
- * Singleton instance for public use
- */
- protected static $singleton = null;
-
- /**
- * The hash algorithm being used
- */
- protected $algo = null;
-
- /**
- * The number of bytes outputted by the hash algorithm
- */
- protected $hashLength = null;
-
- /**
- * A boolean indicating whether the previous random generation was done using
- * cryptographically strong random number generator or not.
- */
- protected $strong = null;
-
- /**
- * Initialize an initial random state based off of whatever we can find
- */
- protected function initialRandomState() {
- // $_SERVER contains a variety of unstable user and system specific information
- // It'll vary a little with each page, and vary even more with separate users
- // It'll also vary slightly across different machines
- $state = serialize( $_SERVER );
-
- // To try vary the system information of the state a bit more
- // by including the system's hostname into the state
- $state .= wfHostname();
-
- // Try to gather a little entropy from the different php rand sources
- $state .= rand() . uniqid( mt_rand(), true );
-
- // Include some information about the filesystem's current state in the random state
- $files = array();
-
- // We know this file is here so grab some info about ourselves
- $files[] = __FILE__;
-
- // We must also have a parent folder, and with the usual file structure, a grandparent
- $files[] = __DIR__;
- $files[] = dirname( __DIR__ );
-
- // The config file is likely the most often edited file we know should be around
- // so include its stat info into the state.
- // The constant with its location will almost always be defined, as WebStart.php defines
- // MW_CONFIG_FILE to $IP/LocalSettings.php unless being configured with MW_CONFIG_CALLBACK (eg. the installer)
- if ( defined( 'MW_CONFIG_FILE' ) ) {
- $files[] = MW_CONFIG_FILE;
- }
-
- foreach ( $files as $file ) {
- wfSuppressWarnings();
- $stat = stat( $file );
- wfRestoreWarnings();
- if ( $stat ) {
- // stat() duplicates data into numeric and string keys so kill off all the numeric ones
- foreach ( $stat as $k => $v ) {
- if ( is_numeric( $k ) ) {
- unset( $k );
- }
- }
- // The absolute filename itself will differ from install to install so don't leave it out
- if ( ( $path = realpath( $file ) ) !== false ) {
- $state .= $path;
- } else {
- $state .= $file;
- }
- $state .= implode( '', $stat );
- } else {
- // The fact that the file isn't there is worth at least a
- // minuscule amount of entropy.
- $state .= '0';
- }
- }
-
- // Try and make this a little more unstable by including the varying process
- // id of the php process we are running inside of if we are able to access it
- if ( function_exists( 'getmypid' ) ) {
- $state .= getmypid();
- }
-
- // If available try to increase the instability of the data by throwing in
- // the precise amount of memory that we happen to be using at the moment.
- if ( function_exists( 'memory_get_usage' ) ) {
- $state .= memory_get_usage( true );
- }
-
- // It's mostly worthless but throw the wiki's id into the data for a little more variance
- $state .= wfWikiID();
-
- // If we have a secret key or proxy key set then throw it into the state as well
- global $wgSecretKey, $wgProxyKey;
- if ( $wgSecretKey ) {
- $state .= $wgSecretKey;
- } elseif ( $wgProxyKey ) {
- $state .= $wgProxyKey;
- }
-
- return $state;
- }
-
- /**
- * Randomly hash data while mixing in clock drift data for randomness
- *
- * @param string $data The data to randomly hash.
- * @return String The hashed bytes
- * @author Tim Starling
- */
- protected function driftHash( $data ) {
- // Minimum number of iterations (to avoid slow operations causing the loop to gather little entropy)
- $minIterations = self::MIN_ITERATIONS;
- // Duration of time to spend doing calculations (in seconds)
- $duration = ( self::MSEC_PER_BYTE / 1000 ) * $this->hashLength();
- // Create a buffer to use to trigger memory operations
- $bufLength = 10000000;
- $buffer = str_repeat( ' ', $bufLength );
- $bufPos = 0;
-
- // Iterate for $duration seconds or at least $minIterations number of iterations
- $iterations = 0;
- $startTime = microtime( true );
- $currentTime = $startTime;
- while ( $iterations < $minIterations || $currentTime - $startTime < $duration ) {
- // Trigger some memory writing to trigger some bus activity
- // This may create variance in the time between iterations
- $bufPos = ( $bufPos + 13 ) % $bufLength;
- $buffer[$bufPos] = ' ';
- // Add the drift between this iteration and the last in as entropy
- $nextTime = microtime( true );
- $delta = (int)( ( $nextTime - $currentTime ) * 1000000 );
- $data .= $delta;
- // Every 100 iterations hash the data and entropy
- if ( $iterations % 100 === 0 ) {
- $data = sha1( $data );
- }
- $currentTime = $nextTime;
- $iterations++;
- }
- $timeTaken = $currentTime - $startTime;
- $data = $this->hash( $data );
-
- wfDebug( __METHOD__ . ": Clock drift calculation " .
- "(time-taken=" . ( $timeTaken * 1000 ) . "ms, " .
- "iterations=$iterations, " .
- "time-per-iteration=" . ( $timeTaken / $iterations * 1e6 ) . "us)\n" );
- return $data;
- }
-
- /**
- * Return a rolling random state initially build using data from unstable sources
- * @return string A new weak random state
- */
- protected function randomState() {
- static $state = null;
- if ( is_null( $state ) ) {
- // Initialize the state with whatever unstable data we can find
- // It's important that this data is hashed right afterwards to prevent
- // it from being leaked into the output stream
- $state = $this->hash( $this->initialRandomState() );
- }
- // Generate a new random state based on the initial random state or previous
- // random state by combining it with clock drift
- $state = $this->driftHash( $state );
- return $state;
- }
-
- /**
- * Decide on the best acceptable hash algorithm we have available for hash()
- * @throws MWException
- * @return String A hash algorithm
- */
- protected function hashAlgo() {
- if ( !is_null( $this->algo ) ) {
- return $this->algo;
- }
-
- $algos = hash_algos();
- $preference = array( 'whirlpool', 'sha256', 'sha1', 'md5' );
-
- foreach ( $preference as $algorithm ) {
- if ( in_array( $algorithm, $algos ) ) {
- $this->algo = $algorithm;
- wfDebug( __METHOD__ . ": Using the {$this->algo} hash algorithm.\n" );
- return $this->algo;
- }
- }
-
- // We only reach here if no acceptable hash is found in the list, this should
- // be a technical impossibility since most of php's hash list is fixed and
- // some of the ones we list are available as their own native functions
- // But since we already require at least 5.2 and hash() was default in
- // 5.1.2 we don't bother falling back to methods like sha1 and md5.
- throw new MWException( "Could not find an acceptable hashing function in hash_algos()" );
- }
-
- /**
- * Return the byte-length output of the hash algorithm we are
- * using in self::hash and self::hmac.
- *
- * @return int Number of bytes the hash outputs
- */
- protected function hashLength() {
- if ( is_null( $this->hashLength ) ) {
- $this->hashLength = strlen( $this->hash( '' ) );
- }
- return $this->hashLength;
- }
-
- /**
- * Generate an acceptably unstable one-way-hash of some text
- * making use of the best hash algorithm that we have available.
- *
- * @param $data string
- * @return String A raw hash of the data
- */
- protected function hash( $data ) {
- return hash( $this->hashAlgo(), $data, true );
- }
-
- /**
- * Generate an acceptably unstable one-way-hmac of some text
- * making use of the best hash algorithm that we have available.
- *
- * @param $data string
- * @param $key string
- * @return String A raw hash of the data
- */
- protected function hmac( $data, $key ) {
- return hash_hmac( $this->hashAlgo(), $data, $key, true );
- }
-
- /**
- * @see self::wasStrong()
- */
- public function realWasStrong() {
- if ( is_null( $this->strong ) ) {
- throw new MWException( __METHOD__ . ' called before generation of random data' );
- }
- return $this->strong;
- }
-
- /**
- * @see self::generate()
- */
- public function realGenerate( $bytes, $forceStrong = false ) {
- wfProfileIn( __METHOD__ );
-
- wfDebug( __METHOD__ . ": Generating cryptographic random bytes for " . wfGetAllCallers( 5 ) . "\n" );
-
- $bytes = floor( $bytes );
- static $buffer = '';
- if ( is_null( $this->strong ) ) {
- // Set strength to false initially until we know what source data is coming from
- $this->strong = true;
- }
-
- if ( strlen( $buffer ) < $bytes ) {
- // If available make use of mcrypt_create_iv URANDOM source to generate randomness
- // On unix-like systems this reads from /dev/urandom but does it without any buffering
- // and bypasses openbasedir restrictions, so it's preferable to reading directly
- // On Windows starting in PHP 5.3.0 Windows' native CryptGenRandom is used to generate
- // entropy so this is also preferable to just trying to read urandom because it may work
- // on Windows systems as well.
- if ( function_exists( 'mcrypt_create_iv' ) ) {
- wfProfileIn( __METHOD__ . '-mcrypt' );
- $rem = $bytes - strlen( $buffer );
- $iv = mcrypt_create_iv( $rem, MCRYPT_DEV_URANDOM );
- if ( $iv === false ) {
- wfDebug( __METHOD__ . ": mcrypt_create_iv returned false.\n" );
- } else {
- $buffer .= $iv;
- wfDebug( __METHOD__ . ": mcrypt_create_iv generated " . strlen( $iv ) . " bytes of randomness.\n" );
- }
- wfProfileOut( __METHOD__ . '-mcrypt' );
- }
- }
-
- if ( strlen( $buffer ) < $bytes ) {
- // If available make use of openssl's random_pseudo_bytes method to attempt to generate randomness.
- // However don't do this on Windows with PHP < 5.3.4 due to a bug:
- // http://stackoverflow.com/questions/1940168/openssl-random-pseudo-bytes-is-slow-php
- // http://git.php.net/?p=php-src.git;a=commitdiff;h=cd62a70863c261b07f6dadedad9464f7e213cad5
- if ( function_exists( 'openssl_random_pseudo_bytes' )
- && ( !wfIsWindows() || version_compare( PHP_VERSION, '5.3.4', '>=' ) )
- ) {
- wfProfileIn( __METHOD__ . '-openssl' );
- $rem = $bytes - strlen( $buffer );
- $openssl_bytes = openssl_random_pseudo_bytes( $rem, $openssl_strong );
- if ( $openssl_bytes === false ) {
- wfDebug( __METHOD__ . ": openssl_random_pseudo_bytes returned false.\n" );
- } else {
- $buffer .= $openssl_bytes;
- wfDebug( __METHOD__ . ": openssl_random_pseudo_bytes generated " . strlen( $openssl_bytes ) . " bytes of " . ( $openssl_strong ? "strong" : "weak" ) . " randomness.\n" );
- }
- if ( strlen( $buffer ) >= $bytes ) {
- // openssl tells us if the random source was strong, if some of our data was generated
- // using it use it's say on whether the randomness is strong
- $this->strong = !!$openssl_strong;
- }
- wfProfileOut( __METHOD__ . '-openssl' );
- }
- }
-
- // Only read from urandom if we can control the buffer size or were passed forceStrong
- if ( strlen( $buffer ) < $bytes && ( function_exists( 'stream_set_read_buffer' ) || $forceStrong ) ) {
- wfProfileIn( __METHOD__ . '-fopen-urandom' );
- $rem = $bytes - strlen( $buffer );
- if ( !function_exists( 'stream_set_read_buffer' ) && $forceStrong ) {
- wfDebug( __METHOD__ . ": Was forced to read from /dev/urandom without control over the buffer size.\n" );
- }
- // /dev/urandom is generally considered the best possible commonly
- // available random source, and is available on most *nix systems.
- wfSuppressWarnings();
- $urandom = fopen( "/dev/urandom", "rb" );
- wfRestoreWarnings();
-
- // Attempt to read all our random data from urandom
- // php's fread always does buffered reads based on the stream's chunk_size
- // so in reality it will usually read more than the amount of data we're
- // asked for and not storing that risks depleting the system's random pool.
- // If stream_set_read_buffer is available set the chunk_size to the amount
- // of data we need. Otherwise read 8k, php's default chunk_size.
- if ( $urandom ) {
- // php's default chunk_size is 8k
- $chunk_size = 1024 * 8;
- if ( function_exists( 'stream_set_read_buffer' ) ) {
- // If possible set the chunk_size to the amount of data we need
- stream_set_read_buffer( $urandom, $rem );
- $chunk_size = $rem;
- }
- $random_bytes = fread( $urandom, max( $chunk_size, $rem ) );
- $buffer .= $random_bytes;
- fclose( $urandom );
- wfDebug( __METHOD__ . ": /dev/urandom generated " . strlen( $random_bytes ) . " bytes of randomness.\n" );
- if ( strlen( $buffer ) >= $bytes ) {
- // urandom is always strong, set to true if all our data was generated using it
- $this->strong = true;
- }
- } else {
- wfDebug( __METHOD__ . ": /dev/urandom could not be opened.\n" );
- }
- wfProfileOut( __METHOD__ . '-fopen-urandom' );
- }
-
- // If we cannot use or generate enough data from a secure source
- // use this loop to generate a good set of pseudo random data.
- // This works by initializing a random state using a pile of unstable data
- // and continually shoving it through a hash along with a variable salt.
- // We hash the random state with more salt to avoid the state from leaking
- // out and being used to predict the /randomness/ that follows.
- if ( strlen( $buffer ) < $bytes ) {
- wfDebug( __METHOD__ . ": Falling back to using a pseudo random state to generate randomness.\n" );
- }
- while ( strlen( $buffer ) < $bytes ) {
- wfProfileIn( __METHOD__ . '-fallback' );
- $buffer .= $this->hmac( $this->randomState(), mt_rand() );
- // This code is never really cryptographically strong, if we use it
- // at all, then set strong to false.
- $this->strong = false;
- wfProfileOut( __METHOD__ . '-fallback' );
- }
-
- // Once the buffer has been filled up with enough random data to fulfill
- // the request shift off enough data to handle the request and leave the
- // unused portion left inside the buffer for the next request for random data
- $generated = substr( $buffer, 0, $bytes );
- $buffer = substr( $buffer, $bytes );
-
- wfDebug( __METHOD__ . ": " . strlen( $buffer ) . " bytes of randomness leftover in the buffer.\n" );
-
- wfProfileOut( __METHOD__ );
- return $generated;
- }
-
- /**
- * @see self::generateHex()
- */
- public function realGenerateHex( $chars, $forceStrong = false ) {
- // hex strings are 2x the length of raw binary so we divide the length in half
- // odd numbers will result in a .5 that leads the generate() being 1 character
- // short, so we use ceil() to ensure that we always have enough bytes
- $bytes = ceil( $chars / 2 );
- // Generate the data and then convert it to a hex string
- $hex = bin2hex( $this->generate( $bytes, $forceStrong ) );
- // A bit of paranoia here, the caller asked for a specific length of string
- // here, and it's possible (eg when given an odd number) that we may actually
- // have at least 1 char more than they asked for. Just in case they made this
- // call intending to insert it into a database that does truncation we don't
- // want to give them too much and end up with their database and their live
- // code having two different values because part of what we gave them is truncated
- // hence, we strip out any run of characters longer than what we were asked for.
- return substr( $hex, 0, $chars );
- }
-
- /** Publicly exposed static methods **/
-
- /**
- * Return a singleton instance of MWCryptRand
- * @return MWCryptRand
- */
- protected static function singleton() {
- if ( is_null( self::$singleton ) ) {
- self::$singleton = new self;
- }
- return self::$singleton;
- }
-
- /**
- * Return a boolean indicating whether or not the source used for cryptographic
- * random bytes generation in the previously run generate* call
- * was cryptographically strong.
- *
- * @return bool Returns true if the source was strong, false if not.
- */
- public static function wasStrong() {
- return self::singleton()->realWasStrong();
- }
-
- /**
- * Generate a run of (ideally) cryptographically random data and return
- * it in raw binary form.
- * You can use MWCryptRand::wasStrong() if you wish to know if the source used
- * was cryptographically strong.
- *
- * @param int $bytes the number of bytes of random data to generate
- * @param bool $forceStrong Pass true if you want generate to prefer cryptographically
- * strong sources of entropy even if reading from them may steal
- * more entropy from the system than optimal.
- * @return String Raw binary random data
- */
- public static function generate( $bytes, $forceStrong = false ) {
- return self::singleton()->realGenerate( $bytes, $forceStrong );
- }
-
- /**
- * Generate a run of (ideally) cryptographically random data and return
- * it in hexadecimal string format.
- * You can use MWCryptRand::wasStrong() if you wish to know if the source used
- * was cryptographically strong.
- *
- * @param int $chars the number of hex chars of random data to generate
- * @param bool $forceStrong Pass true if you want generate to prefer cryptographically
- * strong sources of entropy even if reading from them may steal
- * more entropy from the system than optimal.
- * @return String Hexadecimal random data
- */
- public static function generateHex( $chars, $forceStrong = false ) {
- return self::singleton()->realGenerateHex( $chars, $forceStrong );
- }
-
-}
+++ /dev/null
-<?php
-/**
- * Helper methods to call functions and instance objects.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-class MWFunction {
-
- /**
- * @deprecated since 1.22; use call_user_func()
- * @param $callback
- * @return mixed
- */
- public static function call( $callback ) {
- wfDeprecated( __METHOD__, '1.22' );
- $args = func_get_args();
- return call_user_func_array( 'call_user_func', $args );
- }
-
- /**
- * @deprecated since 1.22; use call_user_func_array()
- * @param $callback
- * @param $argsarams
- * @return mixed
- */
- public static function callArray( $callback, $argsarams ) {
- wfDeprecated( __METHOD__, '1.22' );
- return call_user_func_array( $callback, $argsarams );
- }
-
- /**
- * @param $class
- * @param $args array
- * @return object
- */
- public static function newObj( $class, $args = array() ) {
- if ( !count( $args ) ) {
- return new $class;
- }
-
- $ref = new ReflectionClass( $class );
- return $ref->newInstanceArgs( $args );
- }
-
-}
+++ /dev/null
-<?php
-/**
- * Convenience class for generating iterators from iterators.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @author Aaron Schulz
- */
-
-/**
- * Convenience class for generating iterators from iterators.
- *
- * @since 1.21
- */
-class MappedIterator extends FilterIterator {
- /** @var callable */
- protected $vCallback;
- /** @var callable */
- protected $aCallback;
- /** @var array */
- protected $cache = array();
-
- protected $rewound = false; // boolean; whether rewind() has been called
-
- /**
- * Build an new iterator from a base iterator by having the former wrap the
- * later, returning the result of "value" callback for each current() invocation.
- * The callback takes the result of current() on the base iterator as an argument.
- * The keys of the base iterator are reused verbatim.
- *
- * An "accept" callback can also be provided which will be called for each value in
- * the base iterator (post-callback) and will return true if that value should be
- * included in iteration of the MappedIterator (otherwise it will be filtered out).
- *
- * @param Iterator|Array $iter
- * @param callable $vCallback Value transformation callback
- * @param array $options Options map (includes "accept") (since 1.22)
- * @throws MWException
- */
- public function __construct( $iter, $vCallback, array $options = array() ) {
- if ( is_array( $iter ) ) {
- $baseIterator = new ArrayIterator( $iter );
- } elseif ( $iter instanceof Iterator ) {
- $baseIterator = $iter;
- } else {
- throw new MWException( "Invalid base iterator provided." );
- }
- parent::__construct( $baseIterator );
- $this->vCallback = $vCallback;
- $this->aCallback = isset( $options['accept'] ) ? $options['accept'] : null;
- }
-
- public function next() {
- $this->cache = array();
- parent::next();
- }
-
- public function rewind() {
- $this->rewound = true;
- $this->cache = array();
- parent::rewind();
- }
-
- public function accept() {
- $value = call_user_func( $this->vCallback, $this->getInnerIterator()->current() );
- $ok = ( $this->aCallback ) ? call_user_func( $this->aCallback, $value ) : true;
- if ( $ok ) {
- $this->cache['current'] = $value;
- }
- return $ok;
- }
-
- public function key() {
- $this->init();
- return parent::key();
- }
-
- public function valid() {
- $this->init();
- return parent::valid();
- }
-
- public function current() {
- $this->init();
- if ( parent::valid() ) {
- return $this->cache['current'];
- } else {
- return null; // out of range
- }
- }
-
- /**
- * Obviate the usual need for rewind() before using a FilterIterator in a manual loop
- */
- protected function init() {
- if ( !$this->rewound ) {
- $this->rewind();
- }
- }
-}
+++ /dev/null
-<?php
-/**
- * This file deals with RAII style scoped callbacks.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-/**
- * Class for asserting that a callback happens when an dummy object leaves scope
- *
- * @since 1.21
- */
-class ScopedCallback {
- /** @var callable */
- protected $callback;
-
- /**
- * @param callable $callback
- * @throws MWException
- */
- public function __construct( $callback ) {
- if ( !is_callable( $callback ) ) {
- throw new MWException( "Provided callback is not valid." );
- }
- $this->callback = $callback;
- }
-
- /**
- * Trigger a scoped callback and destroy it.
- * This is the same is just setting it to null.
- *
- * @param ScopedCallback $sc
- */
- public static function consume( ScopedCallback &$sc = null ) {
- $sc = null;
- }
-
- /**
- * Destroy a scoped callback without triggering it
- *
- * @param ScopedCallback $sc
- */
- public static function cancel( ScopedCallback &$sc = null ) {
- if ( $sc ) {
- $sc->callback = null;
- }
- $sc = null;
- }
-
- /**
- * Trigger the callback when this leaves scope
- */
- function __destruct() {
- if ( $this->callback !== null ) {
- call_user_func( $this->callback );
- }
- }
-}
+++ /dev/null
-<?php
-/**
- * Expansion of the PHP execution time limit feature for a function call.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-/**
- * Class to expand PHP execution time for a function call.
- * Use this when performing changes that should not be interrupted.
- *
- * On construction, set_time_limit() is called and set to $seconds.
- * If the client aborts the connection, PHP will continue to run.
- * When the object goes out of scope, the timer is restarted, with
- * the original time limit minus the time the object existed.
- */
-class ScopedPHPTimeout {
- protected $startTime; // float; seconds
- protected $oldTimeout; // integer; seconds
- protected $oldIgnoreAbort; // boolean
-
- protected static $stackDepth = 0; // integer
- protected static $totalCalls = 0; // integer
- protected static $totalElapsed = 0; // float; seconds
-
- /* Prevent callers in infinite loops from running forever */
- const MAX_TOTAL_CALLS = 1000000;
- const MAX_TOTAL_TIME = 300; // seconds
-
- /**
- * @param $seconds integer
- */
- public function __construct( $seconds ) {
- if ( ini_get( 'max_execution_time' ) > 0 ) { // CLI uses 0
- if ( self::$totalCalls >= self::MAX_TOTAL_CALLS ) {
- trigger_error( "Maximum invocations of " . __CLASS__ . " exceeded." );
- } elseif ( self::$totalElapsed >= self::MAX_TOTAL_TIME ) {
- trigger_error( "Time limit within invocations of " . __CLASS__ . " exceeded." );
- } elseif ( self::$stackDepth > 0 ) { // recursion guard
- trigger_error( "Resursive invocation of " . __CLASS__ . " attempted." );
- } else {
- $this->oldIgnoreAbort = ignore_user_abort( true );
- $this->oldTimeout = ini_set( 'max_execution_time', $seconds );
- $this->startTime = microtime( true );
- ++self::$stackDepth;
- ++self::$totalCalls; // proof against < 1us scopes
- }
- }
- }
-
- /**
- * Restore the original timeout.
- * This does not account for the timer value on __construct().
- */
- public function __destruct() {
- if ( $this->oldTimeout ) {
- $elapsed = microtime( true ) - $this->startTime;
- // Note: a limit of 0 is treated as "forever"
- set_time_limit( max( 1, $this->oldTimeout - (int)$elapsed ) );
- // If each scoped timeout is for less than one second, we end up
- // restoring the original timeout without any decrease in value.
- // Thus web scripts in an infinite loop can run forever unless we
- // take some measures to prevent this. Track total time and calls.
- self::$totalElapsed += $elapsed;
- --self::$stackDepth;
- ignore_user_abort( $this->oldIgnoreAbort );
- }
- }
-}
+++ /dev/null
-<?php
-/**
- * Methods to play with strings.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-/**
- * A collection of static methods to play with strings.
- */
-class StringUtils {
-
- /**
- * Test whether a string is valid UTF-8.
- *
- * The function check for invalid byte sequences, overlong encoding but
- * not for different normalisations.
- *
- * This relies internally on the mbstring function mb_check_encoding()
- * hardcoded to check against UTF-8. Whenever the function is not available
- * we fallback to a pure PHP implementation. Setting $disableMbstring to
- * true will skip the use of mb_check_encoding, this is mostly intended for
- * unit testing our internal implementation.
- *
- * @since 1.21
- * @note In MediaWiki 1.21, this function did not provide proper UTF-8 validation.
- * In particular, the pure PHP code path did not in fact check for overlong forms.
- * Beware of this when backporting code to that version of MediaWiki.
- *
- * @param string $value String to check
- * @param boolean $disableMbstring Whether to use the pure PHP
- * implementation instead of trying mb_check_encoding. Intended for unit
- * testing. Default: false
- *
- * @return boolean Whether the given $value is a valid UTF-8 encoded string
- */
- static function isUtf8( $value, $disableMbstring = false ) {
- $value = (string)$value;
-
- // If the mbstring extension is loaded, use it. However, before PHP 5.4, values above
- // U+10FFFF are incorrectly allowed, so we have to check for them separately.
- if ( !$disableMbstring && function_exists( 'mb_check_encoding' ) ) {
- static $newPHP;
- if ( $newPHP === null ) {
- $newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' );
- }
-
- return mb_check_encoding( $value, 'UTF-8' ) &&
- ( $newPHP || preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $value ) === 0 );
- }
-
- if ( preg_match( "/[\x80-\xff]/S", $value ) === 0 ) {
- // String contains only ASCII characters, has to be valid
- return true;
- }
-
- // PCRE implements repetition using recursion; to avoid a stack overflow (and segfault)
- // for large input, we check for invalid sequences (<= 5 bytes) rather than valid
- // sequences, which can be as long as the input string is. Multiple short regexes are
- // used rather than a single long regex for performance.
- static $regexes;
- if ( $regexes === null ) {
- $cont = "[\x80-\xbf]";
- $after = "(?!$cont)"; // "(?:[^\x80-\xbf]|$)" would work here
- $regexes = array(
- // Continuation byte at the start
- "/^$cont/",
-
- // ASCII byte followed by a continuation byte
- "/[\\x00-\x7f]$cont/S",
-
- // Illegal byte
- "/[\xc0\xc1\xf5-\xff]/S",
-
- // Invalid 2-byte sequence, or valid one then an extra continuation byte
- "/[\xc2-\xdf](?!$cont$after)/S",
-
- // Invalid 3-byte sequence, or valid one then an extra continuation byte
- "/\xe0(?![\xa0-\xbf]$cont$after)/",
- "/[\xe1-\xec\xee\xef](?!$cont{2}$after)/S",
- "/\xed(?![\x80-\x9f]$cont$after)/",
-
- // Invalid 4-byte sequence, or valid one then an extra continuation byte
- "/\xf0(?![\x90-\xbf]$cont{2}$after)/",
- "/[\xf1-\xf3](?!$cont{3}$after)/S",
- "/\xf4(?![\x80-\x8f]$cont{2}$after)/",
- );
- }
-
- foreach ( $regexes as $regex ) {
- if ( preg_match( $regex, $value ) !== 0 ) {
- return false;
- }
- }
- return true;
- }
-
- /**
- * Perform an operation equivalent to
- *
- * preg_replace( "!$startDelim(.*?)$endDelim!", $replace, $subject );
- *
- * except that it's worst-case O(N) instead of O(N^2)
- *
- * Compared to delimiterReplace(), this implementation is fast but memory-
- * hungry and inflexible. The memory requirements are such that I don't
- * recommend using it on anything but guaranteed small chunks of text.
- *
- * @param $startDelim
- * @param $endDelim
- * @param $replace
- * @param $subject
- *
- * @return string
- */
- static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
- $segments = explode( $startDelim, $subject );
- $output = array_shift( $segments );
- foreach ( $segments as $s ) {
- $endDelimPos = strpos( $s, $endDelim );
- if ( $endDelimPos === false ) {
- $output .= $startDelim . $s;
- } else {
- $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) );
- }
- }
- return $output;
- }
-
- /**
- * Perform an operation equivalent to
- *
- * preg_replace_callback( "!$startDelim(.*)$endDelim!s$flags", $callback, $subject )
- *
- * This implementation is slower than hungryDelimiterReplace but uses far less
- * memory. The delimiters are literal strings, not regular expressions.
- *
- * If the start delimiter ends with an initial substring of the end delimiter,
- * e.g. in the case of C-style comments, the behavior differs from the model
- * regex. In this implementation, the end must share no characters with the
- * start, so e.g. /*\/ is not considered to be both the start and end of a
- * comment. /*\/xy/*\/ is considered to be a single comment with contents /xy/.
- *
- * @param string $startDelim start delimiter
- * @param string $endDelim end delimiter
- * @param $callback Callback: function to call on each match
- * @param $subject String
- * @param string $flags regular expression flags
- * @throws MWException
- * @return string
- */
- static function delimiterReplaceCallback( $startDelim, $endDelim, $callback, $subject, $flags = '' ) {
- $inputPos = 0;
- $outputPos = 0;
- $output = '';
- $foundStart = false;
- $encStart = preg_quote( $startDelim, '!' );
- $encEnd = preg_quote( $endDelim, '!' );
- $strcmp = strpos( $flags, 'i' ) === false ? 'strcmp' : 'strcasecmp';
- $endLength = strlen( $endDelim );
- $m = array();
-
- while ( $inputPos < strlen( $subject ) &&
- preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos ) )
- {
- $tokenOffset = $m[0][1];
- if ( $m[1][0] != '' ) {
- if ( $foundStart &&
- $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0 )
- {
- # An end match is present at the same location
- $tokenType = 'end';
- $tokenLength = $endLength;
- } else {
- $tokenType = 'start';
- $tokenLength = strlen( $m[0][0] );
- }
- } elseif ( $m[2][0] != '' ) {
- $tokenType = 'end';
- $tokenLength = strlen( $m[0][0] );
- } else {
- throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
- }
-
- if ( $tokenType == 'start' ) {
- # Only move the start position if we haven't already found a start
- # This means that START START END matches outer pair
- if ( !$foundStart ) {
- # Found start
- $inputPos = $tokenOffset + $tokenLength;
- # Write out the non-matching section
- $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos );
- $outputPos = $tokenOffset;
- $contentPos = $inputPos;
- $foundStart = true;
- } else {
- # Move the input position past the *first character* of START,
- # to protect against missing END when it overlaps with START
- $inputPos = $tokenOffset + 1;
- }
- } elseif ( $tokenType == 'end' ) {
- if ( $foundStart ) {
- # Found match
- $output .= call_user_func( $callback, array(
- substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ),
- substr( $subject, $contentPos, $tokenOffset - $contentPos )
- ));
- $foundStart = false;
- } else {
- # Non-matching end, write it out
- $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos );
- }
- $inputPos = $outputPos = $tokenOffset + $tokenLength;
- } else {
- throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
- }
- }
- if ( $outputPos < strlen( $subject ) ) {
- $output .= substr( $subject, $outputPos );
- }
- return $output;
- }
-
- /**
- * Perform an operation equivalent to
- *
- * preg_replace( "!$startDelim(.*)$endDelim!$flags", $replace, $subject )
- *
- * @param string $startDelim start delimiter regular expression
- * @param string $endDelim end delimiter regular expression
- * @param string $replace replacement string. May contain $1, which will be
- * replaced by the text between the delimiters
- * @param string $subject to search
- * @param string $flags regular expression flags
- * @return String: The string with the matches replaced
- */
- static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) {
- $replacer = new RegexlikeReplacer( $replace );
- return self::delimiterReplaceCallback( $startDelim, $endDelim,
- $replacer->cb(), $subject, $flags );
- }
-
- /**
- * More or less "markup-safe" explode()
- * Ignores any instances of the separator inside <...>
- * @param string $separator
- * @param string $text
- * @return array
- */
- static function explodeMarkup( $separator, $text ) {
- $placeholder = "\x00";
-
- // Remove placeholder instances
- $text = str_replace( $placeholder, '', $text );
-
- // Replace instances of the separator inside HTML-like tags with the placeholder
- $replacer = new DoubleReplacer( $separator, $placeholder );
- $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
-
- // Explode, then put the replaced separators back in
- $items = explode( $separator, $cleaned );
- foreach ( $items as $i => $str ) {
- $items[$i] = str_replace( $placeholder, $separator, $str );
- }
-
- return $items;
- }
-
- /**
- * Escape a string to make it suitable for inclusion in a preg_replace()
- * replacement parameter.
- *
- * @param string $string
- * @return string
- */
- static function escapeRegexReplacement( $string ) {
- $string = str_replace( '\\', '\\\\', $string );
- $string = str_replace( '$', '\\$', $string );
- return $string;
- }
-
- /**
- * Workalike for explode() with limited memory usage.
- * Returns an Iterator
- * @param string $separator
- * @param string $subject
- * @return ArrayIterator|ExplodeIterator
- */
- static function explode( $separator, $subject ) {
- if ( substr_count( $subject, $separator ) > 1000 ) {
- return new ExplodeIterator( $separator, $subject );
- } else {
- return new ArrayIterator( explode( $separator, $subject ) );
- }
- }
-}
-
-/**
- * Base class for "replacers", objects used in preg_replace_callback() and
- * StringUtils::delimiterReplaceCallback()
- */
-class Replacer {
-
- /**
- * @return array
- */
- function cb() {
- return array( &$this, 'replace' );
- }
-}
-
-/**
- * Class to replace regex matches with a string similar to that used in preg_replace()
- */
-class RegexlikeReplacer extends Replacer {
- var $r;
-
- /**
- * @param string $r
- */
- function __construct( $r ) {
- $this->r = $r;
- }
-
- /**
- * @param array $matches
- * @return string
- */
- function replace( $matches ) {
- $pairs = array();
- foreach ( $matches as $i => $match ) {
- $pairs["\$$i"] = $match;
- }
- return strtr( $this->r, $pairs );
- }
-
-}
-
-/**
- * Class to perform secondary replacement within each replacement string
- */
-class DoubleReplacer extends Replacer {
-
- /**
- * @param $from
- * @param $to
- * @param int $index
- */
- function __construct( $from, $to, $index = 0 ) {
- $this->from = $from;
- $this->to = $to;
- $this->index = $index;
- }
-
- /**
- * @param array $matches
- * @return mixed
- */
- function replace( $matches ) {
- return str_replace( $this->from, $this->to, $matches[$this->index] );
- }
-}
-
-/**
- * Class to perform replacement based on a simple hashtable lookup
- */
-class HashtableReplacer extends Replacer {
- var $table, $index;
-
- /**
- * @param $table
- * @param int $index
- */
- function __construct( $table, $index = 0 ) {
- $this->table = $table;
- $this->index = $index;
- }
-
- /**
- * @param array $matches
- * @return mixed
- */
- function replace( $matches ) {
- return $this->table[$matches[$this->index]];
- }
-}
-
-/**
- * Replacement array for FSS with fallback to strtr()
- * Supports lazy initialisation of FSS resource
- */
-class ReplacementArray {
- /*mostly private*/ var $data = false;
- /*mostly private*/ var $fss = false;
-
- /**
- * Create an object with the specified replacement array
- * The array should have the same form as the replacement array for strtr()
- * @param array $data
- */
- function __construct( $data = array() ) {
- $this->data = $data;
- }
-
- /**
- * @return array
- */
- function __sleep() {
- return array( 'data' );
- }
-
- function __wakeup() {
- $this->fss = false;
- }
-
- /**
- * Set the whole replacement array at once
- * @param array $data
- */
- function setArray( $data ) {
- $this->data = $data;
- $this->fss = false;
- }
-
- /**
- * @return array|bool
- */
- function getArray() {
- return $this->data;
- }
-
- /**
- * Set an element of the replacement array
- * @param string $from
- * @param string $to
- */
- function setPair( $from, $to ) {
- $this->data[$from] = $to;
- $this->fss = false;
- }
-
- /**
- * @param array $data
- */
- function mergeArray( $data ) {
- $this->data = array_merge( $this->data, $data );
- $this->fss = false;
- }
-
- /**
- * @param ReplacementArray $other
- */
- function merge( $other ) {
- $this->data = array_merge( $this->data, $other->data );
- $this->fss = false;
- }
-
- /**
- * @param string $from
- */
- function removePair( $from ) {
- unset( $this->data[$from] );
- $this->fss = false;
- }
-
- /**
- * @param array $data
- */
- function removeArray( $data ) {
- foreach ( $data as $from => $to ) {
- $this->removePair( $from );
- }
- $this->fss = false;
- }
-
- /**
- * @param string $subject
- * @return string
- */
- function replace( $subject ) {
- if ( function_exists( 'fss_prep_replace' ) ) {
- wfProfileIn( __METHOD__ . '-fss' );
- if ( $this->fss === false ) {
- $this->fss = fss_prep_replace( $this->data );
- }
- $result = fss_exec_replace( $this->fss, $subject );
- wfProfileOut( __METHOD__ . '-fss' );
- } else {
- wfProfileIn( __METHOD__ . '-strtr' );
- $result = strtr( $subject, $this->data );
- wfProfileOut( __METHOD__ . '-strtr' );
- }
- return $result;
- }
-}
-
-/**
- * An iterator which works exactly like:
- *
- * foreach ( explode( $delim, $s ) as $element ) {
- * ...
- * }
- *
- * Except it doesn't use 193 byte per element
- */
-class ExplodeIterator implements Iterator {
- // The subject string
- var $subject, $subjectLength;
-
- // The delimiter
- var $delim, $delimLength;
-
- // The position of the start of the line
- var $curPos;
-
- // The position after the end of the next delimiter
- var $endPos;
-
- // The current token
- var $current;
-
- /**
- * Construct a DelimIterator
- * @param string $delim
- * @param string $subject
- */
- function __construct( $delim, $subject ) {
- $this->subject = $subject;
- $this->delim = $delim;
-
- // Micro-optimisation (theoretical)
- $this->subjectLength = strlen( $subject );
- $this->delimLength = strlen( $delim );
-
- $this->rewind();
- }
-
- function rewind() {
- $this->curPos = 0;
- $this->endPos = strpos( $this->subject, $this->delim );
- $this->refreshCurrent();
- }
-
- function refreshCurrent() {
- if ( $this->curPos === false ) {
- $this->current = false;
- } elseif ( $this->curPos >= $this->subjectLength ) {
- $this->current = '';
- } elseif ( $this->endPos === false ) {
- $this->current = substr( $this->subject, $this->curPos );
- } else {
- $this->current = substr( $this->subject, $this->curPos, $this->endPos - $this->curPos );
- }
- }
-
- function current() {
- return $this->current;
- }
-
- /**
- * @return int|bool Current position or boolean false if invalid
- */
- function key() {
- return $this->curPos;
- }
-
- /**
- * @return string
- */
- function next() {
- if ( $this->endPos === false ) {
- $this->curPos = false;
- } else {
- $this->curPos = $this->endPos + $this->delimLength;
- if ( $this->curPos >= $this->subjectLength ) {
- $this->endPos = false;
- } else {
- $this->endPos = strpos( $this->subject, $this->delim, $this->curPos );
- }
- }
- $this->refreshCurrent();
- return $this->current;
- }
-
- /**
- * @return bool
- */
- function valid() {
- return $this->curPos !== false;
- }
-}
+++ /dev/null
-<?php
-/**
- * This file deals with UID generation.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @author Aaron Schulz
- */
-
-/**
- * Class for getting statistically unique IDs
- *
- * @since 1.21
- */
-class UIDGenerator {
- /** @var UIDGenerator */
- protected static $instance = null;
-
- protected $nodeId32; // string; node ID in binary (32 bits)
- protected $nodeId48; // string; node ID in binary (48 bits)
-
- protected $lockFile88; // string; local file path
- protected $lockFile128; // string; local file path
-
- /** @var Array */
- protected $fileHandles = array(); // cache file handles
-
- const QUICK_RAND = 1; // get randomness from fast and insecure sources
-
- protected function __construct() {
- $idFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
- $nodeId = is_file( $idFile ) ? file_get_contents( $idFile ) : '';
- // Try to get some ID that uniquely identifies this machine (RFC 4122)...
- if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
- wfSuppressWarnings();
- if ( wfIsWindows() ) {
- // http://technet.microsoft.com/en-us/library/bb490913.aspx
- $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
- $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
- $info = str_getcsv( $line );
- $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
- } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
- // See http://linux.die.net/man/8/ifconfig
- $m = array();
- preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
- wfShellExec( '/sbin/ifconfig -a' ), $m );
- $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
- }
- wfRestoreWarnings();
- if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
- $nodeId = MWCryptRand::generateHex( 12, true );
- $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
- }
- file_put_contents( $idFile, $nodeId ); // cache
- }
- $this->nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
- $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 );
- // If different processes run as different users, they may have different temp dirs.
- // This is dealt with by initializing the clock sequence number and counters randomly.
- $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
- $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
- }
-
- /**
- * @return UIDGenerator
- */
- protected static function singleton() {
- if ( self::$instance === null ) {
- self::$instance = new self();
- }
- return self::$instance;
- }
-
- /**
- * Get a statistically unique 88-bit unsigned integer ID string.
- * The bits of the UID are prefixed with the time (down to the millisecond).
- *
- * These IDs are suitable as values for the shard key of distributed data.
- * If a column uses these as values, it should be declared UNIQUE to handle collisions.
- * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
- * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL.
- *
- * UID generation is serialized on each server (as the node ID is for the whole machine).
- *
- * @param $base integer Specifies a base other than 10
- * @return string Number
- * @throws MWException
- */
- public static function newTimestampedUID88( $base = 10 ) {
- if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
- throw new MWException( "Base must an integer be between 2 and 36" );
- }
- $gen = self::singleton();
- $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 );
- return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base );
- }
-
- /**
- * @param array $time (UIDGenerator::millitime(), clock sequence)
- * @return string 88 bits
- */
- protected function getTimestampedID88( array $info ) {
- list( $time, $counter ) = $info;
- // Take the 46 MSBs of "milliseconds since epoch"
- $id_bin = $this->millisecondsSinceEpochBinary( $time );
- // Add a 10 bit counter resulting in 56 bits total
- $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
- // Add the 32 bit node ID resulting in 88 bits total
- $id_bin .= $this->nodeId32;
- // Convert to a 1-27 digit integer string
- if ( strlen( $id_bin ) !== 88 ) {
- throw new MWException( "Detected overflow for millisecond timestamp." );
- }
- return $id_bin;
- }
-
- /**
- * Get a statistically unique 128-bit unsigned integer ID string.
- * The bits of the UID are prefixed with the time (down to the millisecond).
- *
- * These IDs are suitable as globally unique IDs, without any enforced uniqueness.
- * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
- * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL.
- *
- * UID generation is serialized on each server (as the node ID is for the whole machine).
- *
- * @param $base integer Specifies a base other than 10
- * @return string Number
- * @throws MWException
- */
- public static function newTimestampedUID128( $base = 10 ) {
- if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
- throw new MWException( "Base must be an integer between 2 and 36" );
- }
- $gen = self::singleton();
- $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 );
- return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base );
- }
-
- /**
- * @param array $info (UIDGenerator::millitime(), counter, clock sequence)
- * @return string 128 bits
- */
- protected function getTimestampedID128( array $info ) {
- list( $time, $counter, $clkSeq ) = $info;
- // Take the 46 MSBs of "milliseconds since epoch"
- $id_bin = $this->millisecondsSinceEpochBinary( $time );
- // Add a 20 bit counter resulting in 66 bits total
- $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
- // Add a 14 bit clock sequence number resulting in 80 bits total
- $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
- // Add the 48 bit node ID resulting in 128 bits total
- $id_bin .= $this->nodeId48;
- // Convert to a 1-39 digit integer string
- if ( strlen( $id_bin ) !== 128 ) {
- throw new MWException( "Detected overflow for millisecond timestamp." );
- }
- return $id_bin;
- }
-
- /**
- * Return an RFC4122 compliant v4 UUID
- *
- * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND)
- * @return string
- * @throws MWException
- */
- public static function newUUIDv4( $flags = 0 ) {
- $hex = ( $flags & self::QUICK_RAND )
- ? wfRandomString( 31 )
- : MWCryptRand::generateHex( 31 );
-
- return sprintf( '%s-%s-%s-%s-%s',
- // "time_low" (32 bits)
- substr( $hex, 0, 8 ),
- // "time_mid" (16 bits)
- substr( $hex, 8, 4 ),
- // "time_hi_and_version" (16 bits)
- '4' . substr( $hex, 12, 3 ),
- // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
- dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
- // "node" (48 bits)
- substr( $hex, 19, 12 )
- );
- }
-
- /**
- * Return an RFC4122 compliant v4 UUID
- *
- * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND)
- * @return string 32 hex characters with no hyphens
- * @throws MWException
- */
- public static function newRawUUIDv4( $flags = 0 ) {
- return str_replace( '-', '', self::newUUIDv4( $flags ) );
- }
-
- /**
- * Get a (time,counter,clock sequence) where (time,counter) is higher
- * than any previous (time,counter) value for the given clock sequence.
- * This is useful for making UIDs sequential on a per-node bases.
- *
- * @param string $lockFile Name of a local lock file
- * @param $clockSeqSize integer The number of possible clock sequence values
- * @param $counterSize integer The number of possible counter values
- * @return Array (result of UIDGenerator::millitime(), counter, clock sequence)
- * @throws MWException
- */
- protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) {
- // Get the UID lock file handle
- if ( isset( $this->fileHandles[$lockFile] ) ) {
- $handle = $this->fileHandles[$lockFile];
- } else {
- $handle = fopen( $this->$lockFile, 'cb+' );
- $this->fileHandles[$lockFile] = $handle ?: null; // cache
- }
- // Acquire the UID lock file
- if ( $handle === false ) {
- throw new MWException( "Could not open '{$this->$lockFile}'." );
- } elseif ( !flock( $handle, LOCK_EX ) ) {
- throw new MWException( "Could not acquire '{$this->$lockFile}'." );
- }
- // Get the current timestamp, clock sequence number, last time, and counter
- rewind( $handle );
- $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
- $clockChanged = false; // clock set back significantly?
- if ( count( $data ) == 5 ) { // last UID info already initialized
- $clkSeq = (int)$data[0] % $clockSeqSize;
- $prevTime = array( (int)$data[1], (int)$data[2] );
- $offset = (int)$data[4] % $counterSize; // random counter offset
- $counter = 0; // counter for UIDs with the same timestamp
- // Delay until the clock reaches the time of the last ID.
- // This detects any microtime() drift among processes.
- $time = $this->timeWaitUntil( $prevTime );
- if ( !$time ) { // too long to delay?
- $clockChanged = true; // bump clock sequence number
- $time = self::millitime();
- } elseif ( $time == $prevTime ) {
- // Bump the counter if there are timestamp collisions
- $counter = (int)$data[3] % $counterSize;
- if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
- flock( $handle, LOCK_UN ); // abort
- throw new MWException( "Counter overflow for timestamp value." );
- }
- }
- } else { // last UID info not initialized
- $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
- $counter = 0;
- $offset = mt_rand( 0, $counterSize - 1 );
- $time = self::millitime();
- }
- // microtime() and gettimeofday() can drift from time() at least on Windows.
- // The drift is immediate for processes running while the system clock changes.
- // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
- if ( abs( time() - $time[0] ) >= 2 ) {
- // We don't want processes using too high or low timestamps to avoid duplicate
- // UIDs and clock sequence number churn. This process should just be restarted.
- flock( $handle, LOCK_UN ); // abort
- throw new MWException( "Process clock is outdated or drifted." );
- }
- // If microtime() is synced and a clock change was detected, then the clock went back
- if ( $clockChanged ) {
- // Bump the clock sequence number and also randomize the counter offset,
- // which is useful for UIDs that do not include the clock sequence number.
- $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
- $offset = mt_rand( 0, $counterSize - 1 );
- trigger_error( "Clock was set back; sequence number incremented." );
- }
- // Update the (clock sequence number, timestamp, counter)
- ftruncate( $handle, 0 );
- rewind( $handle );
- fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
- fflush( $handle );
- // Release the UID lock file
- flock( $handle, LOCK_UN );
-
- return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq );
- }
-
- /**
- * Wait till the current timestamp reaches $time and return the current
- * timestamp. This returns false if it would have to wait more than 10ms.
- *
- * @param array $time Result of UIDGenerator::millitime()
- * @return Array|bool UIDGenerator::millitime() result or false
- */
- protected function timeWaitUntil( array $time ) {
- do {
- $ct = self::millitime();
- if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php
- return $ct; // current timestamp is higher than $time
- }
- } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 );
-
- return false;
- }
-
- /**
- * @param array $time Result of UIDGenerator::millitime()
- * @return string 46 MSBs of "milliseconds since epoch" in binary (rolls over in 4201)
- */
- protected function millisecondsSinceEpochBinary( array $time ) {
- list( $sec, $msec ) = $time;
- $ts = 1000 * $sec + $msec;
- if ( $ts > pow( 2, 52 ) ) {
- throw new MWException( __METHOD__ .
- ': sorry, this function doesn\'t work after the year 144680' );
- }
- return substr( wfBaseConvert( $ts, 10, 2, 46 ), -46 );
- }
-
- /**
- * @return Array (current time in seconds, milliseconds since then)
- */
- protected static function millitime() {
- list( $msec, $sec ) = explode( ' ', microtime() );
- return array( (int)$sec, (int)( $msec * 1000 ) );
- }
-
- function __destruct() {
- array_map( 'fclose', $this->fileHandles );
- }
-}
+++ /dev/null
-<?php
-/**
- * XML syntax and type checker.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-class XmlTypeCheck {
- /**
- * Will be set to true or false to indicate whether the file is
- * well-formed XML. Note that this doesn't check schema validity.
- */
- public $wellFormed = false;
-
- /**
- * Will be set to true if the optional element filter returned
- * a match at some point.
- */
- public $filterMatch = false;
-
- /**
- * Name of the document's root element, including any namespace
- * as an expanded URL.
- */
- public $rootElement = '';
-
- /**
- * @param string $input a filename or string containing the XML element
- * @param callable $filterCallback (optional)
- * Function to call to do additional custom validity checks from the
- * SAX element handler event. This gives you access to the element
- * namespace, name, and attributes, but not to text contents.
- * Filter should return 'true' to toggle on $this->filterMatch
- * @param boolean $isFile (optional) indicates if the first parameter is a
- * filename (default, true) or if it is a string (false)
- */
- function __construct( $input, $filterCallback = null, $isFile = true ) {
- $this->filterCallback = $filterCallback;
- if ( $isFile ) {
- $this->validateFromFile( $input );
- } else {
- $this->validateFromString( $input );
- }
- }
-
- /**
- * Alternative constructor: from filename
- *
- * @param string $fname the filename of an XML document
- * @param callable $filterCallback (optional)
- * Function to call to do additional custom validity checks from the
- * SAX element handler event. This gives you access to the element
- * namespace, name, and attributes, but not to text contents.
- * Filter should return 'true' to toggle on $this->filterMatch
- * @return XmlTypeCheck
- */
- public static function newFromFilename( $fname, $filterCallback = null ) {
- return new self( $fname, $filterCallback, true );
- }
-
- /**
- * Alternative constructor: from string
- *
- * @param string $string a string containing an XML element
- * @param callable $filterCallback (optional)
- * Function to call to do additional custom validity checks from the
- * SAX element handler event. This gives you access to the element
- * namespace, name, and attributes, but not to text contents.
- * Filter should return 'true' to toggle on $this->filterMatch
- * @return XmlTypeCheck
- */
- public static function newFromString( $string, $filterCallback = null ) {
- return new self( $string, $filterCallback, false );
- }
-
- /**
- * Get the root element. Simple accessor to $rootElement
- *
- * @return string
- */
- public function getRootElement() {
- return $this->rootElement;
- }
-
- /**
- * Get an XML parser with the root element handler.
- * @see XmlTypeCheck::rootElementOpen()
- * @return resource a resource handle for the XML parser
- */
- private function getParser() {
- $parser = xml_parser_create_ns( 'UTF-8' );
- // case folding violates XML standard, turn it off
- xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
- xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false );
- return $parser;
- }
-
- /**
- * @param string $fname the filename
- */
- private function validateFromFile( $fname ) {
- $parser = $this->getParser();
-
- if ( file_exists( $fname ) ) {
- $file = fopen( $fname, "rb" );
- if ( $file ) {
- do {
- $chunk = fread( $file, 32768 );
- $ret = xml_parse( $parser, $chunk, feof( $file ) );
- if ( $ret == 0 ) {
- $this->wellFormed = false;
- fclose( $file );
- xml_parser_free( $parser );
- return;
- }
- } while ( !feof( $file ) );
-
- fclose( $file );
- }
- }
- $this->wellFormed = true;
-
- xml_parser_free( $parser );
- }
-
- /**
- *
- * @param string $string the XML-input-string to be checked.
- */
- private function validateFromString( $string ) {
- $parser = $this->getParser();
- $ret = xml_parse( $parser, $string, true );
- xml_parser_free( $parser );
- if ( $ret == 0 ) {
- $this->wellFormed = false;
- return;
- }
- $this->wellFormed = true;
- }
-
- /**
- * @param $parser
- * @param $name
- * @param $attribs
- */
- private function rootElementOpen( $parser, $name, $attribs ) {
- $this->rootElement = $name;
-
- if ( is_callable( $this->filterCallback ) ) {
- xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false );
- $this->elementOpen( $parser, $name, $attribs );
- } else {
- // We only need the first open element
- xml_set_element_handler( $parser, false, false );
- }
- }
-
- /**
- * @param $parser
- * @param $name
- * @param $attribs
- */
- private function elementOpen( $parser, $name, $attribs ) {
- if ( call_user_func( $this->filterCallback, $name, $attribs ) ) {
- // Filter hit!
- $this->filterMatch = true;
- }
- }
-}
+++ /dev/null
-<?php
-/**
- * ZIP file directories reader, for the purposes of upload verification.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- */
-
-/**
- * A class for reading ZIP file directories, for the purposes of upload
- * verification.
- *
- * Only a functional interface is provided: ZipFileReader::read(). No access is
- * given to object instances.
- *
- */
-class ZipDirectoryReader {
- /**
- * Read a ZIP file and call a function for each file discovered in it.
- *
- * Because this class is aimed at verification, an error is raised on
- * suspicious or ambiguous input, instead of emulating some standard
- * behavior.
- *
- * @param string $fileName The archive file name
- * @param array $callback The callback function. It will be called for each file
- * with a single associative array each time, with members:
- *
- * - name: The file name. Directories conventionally have a trailing
- * slash.
- *
- * - mtime: The file modification time, in MediaWiki 14-char format
- *
- * - size: The uncompressed file size
- *
- * @param array $options An associative array of read options, with the option
- * name in the key. This may currently contain:
- *
- * - zip64: If this is set to true, then we will emulate a
- * library with ZIP64 support, like OpenJDK 7. If it is set to
- * false, then we will emulate a library with no knowledge of
- * ZIP64.
- *
- * NOTE: The ZIP64 code is untested and probably doesn't work. It
- * turned out to be easier to just reject ZIP64 archive uploads,
- * since they are likely to be very rare. Confirming safety of a
- * ZIP64 file is fairly complex. What do you do with a file that is
- * ambiguous and broken when read with a non-ZIP64 reader, but valid
- * when read with a ZIP64 reader? This situation is normal for a
- * valid ZIP64 file, and working out what non-ZIP64 readers will make
- * of such a file is not trivial.
- *
- * @return Status object. The following fatal errors are defined:
- *
- * - zip-file-open-error: The file could not be opened.
- *
- * - zip-wrong-format: The file does not appear to be a ZIP file.
- *
- * - zip-bad: There was something wrong or ambiguous about the file
- * data.
- *
- * - zip-unsupported: The ZIP file uses features which
- * ZipDirectoryReader does not support.
- *
- * The default messages for those fatal errors are written in a way that
- * makes sense for upload verification.
- *
- * If a fatal error is returned, more information about the error will be
- * available in the debug log.
- *
- * Note that the callback function may be called any number of times before
- * a fatal error is returned. If this occurs, the data sent to the callback
- * function should be discarded.
- */
- public static function read( $fileName, $callback, $options = array() ) {
- $zdr = new self( $fileName, $callback, $options );
- return $zdr->execute();
- }
-
- /** The file name */
- var $fileName;
-
- /** The opened file resource */
- var $file;
-
- /** The cached length of the file, or null if it has not been loaded yet. */
- var $fileLength;
-
- /** A segmented cache of the file contents */
- var $buffer;
-
- /** The file data callback */
- var $callback;
-
- /** The ZIP64 mode */
- var $zip64 = false;
-
- /** Stored headers */
- var $eocdr, $eocdr64, $eocdr64Locator;
-
- var $data;
-
- /** The "extra field" ID for ZIP64 central directory entries */
- const ZIP64_EXTRA_HEADER = 0x0001;
-
- /** The segment size for the file contents cache */
- const SEGSIZE = 16384;
-
- /** The index of the "general field" bit for UTF-8 file names */
- const GENERAL_UTF8 = 11;
-
- /** The index of the "general field" bit for central directory encryption */
- const GENERAL_CD_ENCRYPTED = 13;
-
- /**
- * Private constructor
- */
- protected function __construct( $fileName, $callback, $options ) {
- $this->fileName = $fileName;
- $this->callback = $callback;
-
- if ( isset( $options['zip64'] ) ) {
- $this->zip64 = $options['zip64'];
- }
- }
-
- /**
- * Read the directory according to settings in $this.
- *
- * @return Status
- */
- function execute() {
- $this->file = fopen( $this->fileName, 'r' );
- $this->data = array();
- if ( !$this->file ) {
- return Status::newFatal( 'zip-file-open-error' );
- }
-
- $status = Status::newGood();
- try {
- $this->readEndOfCentralDirectoryRecord();
- if ( $this->zip64 ) {
- list( $offset, $size ) = $this->findZip64CentralDirectory();
- $this->readCentralDirectory( $offset, $size );
- } else {
- if ( $this->eocdr['CD size'] == 0xffffffff
- || $this->eocdr['CD offset'] == 0xffffffff
- || $this->eocdr['CD entries total'] == 0xffff )
- {
- $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
- 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
- 'opening vulnerabilities on clients using OpenJDK 7 or later.' );
- }
-
- list( $offset, $size ) = $this->findOldCentralDirectory();
- $this->readCentralDirectory( $offset, $size );
- }
- } catch ( ZipDirectoryReaderError $e ) {
- $status->fatal( $e->getErrorCode() );
- }
-
- fclose( $this->file );
- return $status;
- }
-
- /**
- * Throw an error, and log a debug message
- */
- function error( $code, $debugMessage ) {
- wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
- throw new ZipDirectoryReaderError( $code );
- }
-
- /**
- * Read the header which is at the end of the central directory,
- * unimaginatively called the "end of central directory record" by the ZIP
- * spec.
- */
- function readEndOfCentralDirectoryRecord() {
- $info = array(
- 'signature' => 4,
- 'disk' => 2,
- 'CD start disk' => 2,
- 'CD entries this disk' => 2,
- 'CD entries total' => 2,
- 'CD size' => 4,
- 'CD offset' => 4,
- 'file comment length' => 2,
- );
- $structSize = $this->getStructSize( $info );
- $startPos = $this->getFileLength() - 65536 - $structSize;
- if ( $startPos < 0 ) {
- $startPos = 0;
- }
-
- $block = $this->getBlock( $startPos );
- $sigPos = strrpos( $block, "PK\x05\x06" );
- if ( $sigPos === false ) {
- $this->error( 'zip-wrong-format',
- "zip file lacks EOCDR signature. It probably isn't a zip file." );
- }
-
- $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
- $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
-
- if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
- $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
- }
- if ( $this->eocdr['disk'] !== 0
- || $this->eocdr['CD start disk'] !== 0 )
- {
- $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
- }
- $this->eocdr += $this->unpack(
- $block,
- array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ),
- $sigPos + $structSize );
- $this->eocdr['position'] = $startPos + $sigPos;
- }
-
- /**
- * Read the header called the "ZIP64 end of central directory locator". An
- * error will be raised if it does not exist.
- */
- function readZip64EndOfCentralDirectoryLocator() {
- $info = array(
- 'signature' => array( 'string', 4 ),
- 'eocdr64 start disk' => 4,
- 'eocdr64 offset' => 8,
- 'number of disks' => 4,
- );
- $structSize = $this->getStructSize( $info );
-
- $block = $this->getBlock( $this->getFileLength() - $this->eocdr['EOCDR size']
- - $structSize, $structSize );
- $this->eocdr64Locator = $data = $this->unpack( $block, $info );
-
- if ( $data['signature'] !== "PK\x06\x07" ) {
- // Note: Java will allow this and continue to read the
- // EOCDR64, so we have to reject the upload, we can't
- // just use the EOCDR header instead.
- $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
- }
- }
-
- /**
- * Read the header called the "ZIP64 end of central directory record". It
- * may replace the regular "end of central directory record" in ZIP64 files.
- */
- function readZip64EndOfCentralDirectoryRecord() {
- if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
- || $this->eocdr64Locator['number of disks'] != 0 )
- {
- $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
- }
-
- $info = array(
- 'signature' => array( 'string', 4 ),
- 'EOCDR64 size' => 8,
- 'version made by' => 2,
- 'version needed' => 2,
- 'disk' => 4,
- 'CD start disk' => 4,
- 'CD entries this disk' => 8,
- 'CD entries total' => 8,
- 'CD size' => 8,
- 'CD offset' => 8
- );
- $structSize = $this->getStructSize( $info );
- $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
- $this->eocdr64 = $data = $this->unpack( $block, $info );
- if ( $data['signature'] !== "PK\x06\x06" ) {
- $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
- }
- if ( $data['disk'] !== 0
- || $data['CD start disk'] !== 0 )
- {
- $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
- }
- }
-
- /**
- * Find the location of the central directory, as would be seen by a
- * non-ZIP64 reader.
- *
- * @return List containing offset, size and end position.
- */
- function findOldCentralDirectory() {
- $size = $this->eocdr['CD size'];
- $offset = $this->eocdr['CD offset'];
- $endPos = $this->eocdr['position'];
-
- // Some readers use the EOCDR position instead of the offset field
- // to find the directory, so to be safe, we check if they both agree.
- if ( $offset + $size != $endPos ) {
- $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
- 'of central directory record' );
- }
- return array( $offset, $size );
- }
-
- /**
- * Find the location of the central directory, as would be seen by a
- * ZIP64-compliant reader.
- *
- * @return array List containing offset, size and end position.
- */
- function findZip64CentralDirectory() {
- // The spec is ambiguous about the exact rules of precedence between the
- // ZIP64 headers and the original headers. Here we follow zip_util.c
- // from OpenJDK 7.
- $size = $this->eocdr['CD size'];
- $offset = $this->eocdr['CD offset'];
- $numEntries = $this->eocdr['CD entries total'];
- $endPos = $this->eocdr['position'];
- if ( $size == 0xffffffff
- || $offset == 0xffffffff
- || $numEntries == 0xffff )
- {
- $this->readZip64EndOfCentralDirectoryLocator();
-
- if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
- $this->readZip64EndOfCentralDirectoryRecord();
- if ( isset( $this->eocdr64['CD offset'] ) ) {
- $size = $this->eocdr64['CD size'];
- $offset = $this->eocdr64['CD offset'];
- $endPos = $this->eocdr64Locator['eocdr64 offset'];
- }
- }
- }
- // Some readers use the EOCDR position instead of the offset field
- // to find the directory, so to be safe, we check if they both agree.
- if ( $offset + $size != $endPos ) {
- $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
- 'of central directory record' );
- }
- return array( $offset, $size );
- }
-
- /**
- * Read the central directory at the given location
- */
- function readCentralDirectory( $offset, $size ) {
- $block = $this->getBlock( $offset, $size );
-
- $fixedInfo = array(
- 'signature' => array( 'string', 4 ),
- 'version made by' => 2,
- 'version needed' => 2,
- 'general bits' => 2,
- 'compression method' => 2,
- 'mod time' => 2,
- 'mod date' => 2,
- 'crc-32' => 4,
- 'compressed size' => 4,
- 'uncompressed size' => 4,
- 'name length' => 2,
- 'extra field length' => 2,
- 'comment length' => 2,
- 'disk number start' => 2,
- 'internal attrs' => 2,
- 'external attrs' => 4,
- 'local header offset' => 4,
- );
- $fixedSize = $this->getStructSize( $fixedInfo );
-
- $pos = 0;
- while ( $pos < $size ) {
- $data = $this->unpack( $block, $fixedInfo, $pos );
- $pos += $fixedSize;
-
- if ( $data['signature'] !== "PK\x01\x02" ) {
- $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
- }
-
- $variableInfo = array(
- 'name' => array( 'string', $data['name length'] ),
- 'extra field' => array( 'string', $data['extra field length'] ),
- 'comment' => array( 'string', $data['comment length'] ),
- );
- $data += $this->unpack( $block, $variableInfo, $pos );
- $pos += $this->getStructSize( $variableInfo );
-
- if ( $this->zip64 && (
- $data['compressed size'] == 0xffffffff
- || $data['uncompressed size'] == 0xffffffff
- || $data['local header offset'] == 0xffffffff ) )
- {
- $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
- if ( $zip64Data ) {
- $data = $zip64Data + $data;
- }
- }
-
- if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
- $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
- }
-
- // Convert the timestamp into MediaWiki format
- // For the format, please see the MS-DOS 2.0 Programmer's Reference,
- // pages 3-5 and 3-6.
- $time = $data['mod time'];
- $date = $data['mod date'];
-
- $year = 1980 + ( $date >> 9 );
- $month = ( $date >> 5 ) & 15;
- $day = $date & 31;
- $hour = ( $time >> 11 ) & 31;
- $minute = ( $time >> 5 ) & 63;
- $second = ( $time & 31 ) * 2;
- $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
- $year, $month, $day, $hour, $minute, $second );
-
- // Convert the character set in the file name
- if ( !function_exists( 'iconv' )
- || $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) )
- {
- $name = $data['name'];
- } else {
- $name = iconv( 'CP437', 'UTF-8', $data['name'] );
- }
-
- // Compile a data array for the user, with a sensible format
- $userData = array(
- 'name' => $name,
- 'mtime' => $timestamp,
- 'size' => $data['uncompressed size'],
- );
- call_user_func( $this->callback, $userData );
- }
- }
-
- /**
- * Interpret ZIP64 "extra field" data and return an associative array.
- * @return array|bool
- */
- function unpackZip64Extra( $extraField ) {
- $extraHeaderInfo = array(
- 'id' => 2,
- 'size' => 2,
- );
- $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
-
- $zip64ExtraInfo = array(
- 'uncompressed size' => 8,
- 'compressed size' => 8,
- 'local header offset' => 8,
- 'disk number start' => 4,
- );
-
- $extraPos = 0;
- while ( $extraPos < strlen( $extraField ) ) {
- $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
- $extraPos += $extraHeaderSize;
- $extra += $this->unpack( $extraField,
- array( 'data' => array( 'string', $extra['size'] ) ),
- $extraPos );
- $extraPos += $extra['size'];
-
- if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
- return $this->unpack( $extra['data'], $zip64ExtraInfo );
- }
- }
-
- return false;
- }
-
- /**
- * Get the length of the file.
- */
- function getFileLength() {
- if ( $this->fileLength === null ) {
- $stat = fstat( $this->file );
- $this->fileLength = $stat['size'];
- }
- return $this->fileLength;
- }
-
- /**
- * Get the file contents from a given offset. If there are not enough bytes
- * in the file to satisfy the request, an exception will be thrown.
- *
- * @param int $start The byte offset of the start of the block.
- * @param int $length The number of bytes to return. If omitted, the remainder
- * of the file will be returned.
- *
- * @return string
- */
- function getBlock( $start, $length = null ) {
- $fileLength = $this->getFileLength();
- if ( $start >= $fileLength ) {
- $this->error( 'zip-bad', "getBlock() requested position $start, " .
- "file length is $fileLength" );
- }
- if ( $length === null ) {
- $length = $fileLength - $start;
- }
- $end = $start + $length;
- if ( $end > $fileLength ) {
- $this->error( 'zip-bad', "getBlock() requested end position $end, " .
- "file length is $fileLength" );
- }
- $startSeg = floor( $start / self::SEGSIZE );
- $endSeg = ceil( $end / self::SEGSIZE );
-
- $block = '';
- for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
- $block .= $this->getSegment( $segIndex );
- }
-
- $block = substr( $block,
- $start - $startSeg * self::SEGSIZE,
- $length );
-
- if ( strlen( $block ) < $length ) {
- $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
- }
-
- return $block;
- }
-
- /**
- * Get a section of the file starting at position $segIndex * self::SEGSIZE,
- * of length self::SEGSIZE. The result is cached. This is a helper function
- * for getBlock().
- *
- * If there are not enough bytes in the file to satisfy the request, the
- * return value will be truncated. If a request is made for a segment beyond
- * the end of the file, an empty string will be returned.
- * @return string
- */
- function getSegment( $segIndex ) {
- if ( !isset( $this->buffer[$segIndex] ) ) {
- $bytePos = $segIndex * self::SEGSIZE;
- if ( $bytePos >= $this->getFileLength() ) {
- $this->buffer[$segIndex] = '';
- return '';
- }
- if ( fseek( $this->file, $bytePos ) ) {
- $this->error( 'zip-bad', "seek to $bytePos failed" );
- }
- $seg = fread( $this->file, self::SEGSIZE );
- if ( $seg === false ) {
- $this->error( 'zip-bad', "read from $bytePos failed" );
- }
- $this->buffer[$segIndex] = $seg;
- }
- return $this->buffer[$segIndex];
- }
-
- /**
- * Get the size of a structure in bytes. See unpack() for the format of $struct.
- * @return int
- */
- function getStructSize( $struct ) {
- $size = 0;
- foreach ( $struct as $type ) {
- if ( is_array( $type ) ) {
- list( , $fieldSize ) = $type;
- $size += $fieldSize;
- } else {
- $size += $type;
- }
- }
- return $size;
- }
-
- /**
- * Unpack a binary structure. This is like the built-in unpack() function
- * except nicer.
- *
- * @param string $string The binary data input
- *
- * @param array $struct An associative array giving structure members and their
- * types. In the key is the field name. The value may be either an
- * integer, in which case the field is a little-endian unsigned integer
- * encoded in the given number of bytes, or an array, in which case the
- * first element of the array is the type name, and the subsequent
- * elements are type-dependent parameters. Only one such type is defined:
- * - "string": The second array element gives the length of string.
- * Not null terminated.
- *
- * @param int $offset The offset into the string at which to start unpacking.
- *
- * @throws MWException
- * @return array Unpacked associative array. Note that large integers in the input
- * may be represented as floating point numbers in the return value, so
- * the use of weak comparison is advised.
- */
- function unpack( $string, $struct, $offset = 0 ) {
- $size = $this->getStructSize( $struct );
- if ( $offset + $size > strlen( $string ) ) {
- $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
- }
-
- $data = array();
- $pos = $offset;
- foreach ( $struct as $key => $type ) {
- if ( is_array( $type ) ) {
- list( $typeName, $fieldSize ) = $type;
- switch ( $typeName ) {
- case 'string':
- $data[$key] = substr( $string, $pos, $fieldSize );
- $pos += $fieldSize;
- break;
- default:
- throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
- }
- } else {
- // Unsigned little-endian integer
- $length = intval( $type );
-
- // Calculate the value. Use an algorithm which automatically
- // upgrades the value to floating point if necessary.
- $value = 0;
- for ( $i = $length - 1; $i >= 0; $i-- ) {
- $value *= 256;
- $value += ord( $string[$pos + $i] );
- }
-
- // Throw an exception if there was loss of precision
- if ( $value > pow( 2, 52 ) ) {
- $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
- 'This could happen if we tried to unpack a 64-bit structure ' .
- 'at an invalid location.' );
- }
- $data[$key] = $value;
- $pos += $length;
- }
- }
-
- return $data;
- }
-
- /**
- * Returns a bit from a given position in an integer value, converted to
- * boolean.
- *
- * @param $value integer
- * @param int $bitIndex The index of the bit, where 0 is the LSB.
- * @return bool
- */
- function testBit( $value, $bitIndex ) {
- return (bool)( ( $value >> $bitIndex ) & 1 );
- }
-
- /**
- * Debugging helper function which dumps a string in hexdump -C format.
- */
- function hexDump( $s ) {
- $n = strlen( $s );
- for ( $i = 0; $i < $n; $i += 16 ) {
- printf( "%08X ", $i );
- for ( $j = 0; $j < 16; $j++ ) {
- print " ";
- if ( $j == 8 ) {
- print " ";
- }
- if ( $i + $j >= $n ) {
- print " ";
- } else {
- printf( "%02X", ord( $s[$i + $j] ) );
- }
- }
-
- print " |";
- for ( $j = 0; $j < 16; $j++ ) {
- if ( $i + $j >= $n ) {
- print " ";
- } elseif ( ctype_print( $s[$i + $j] ) ) {
- print $s[$i + $j];
- } else {
- print '.';
- }
- }
- print "|\n";
- }
- }
-}
-
-/**
- * Internal exception class. Will be caught by private code.
- */
-class ZipDirectoryReaderError extends Exception {
- var $errorCode;
-
- function __construct( $code ) {
- $this->errorCode = $code;
- parent::__construct( "ZipDirectoryReader error: $code" );
- }
-
- /**
- * @return mixed
- */
- function getErrorCode() {
- return $this->errorCode;
- }
-}
--- /dev/null
+<?php
+/**
+ * Expansion of the PHP execution time limit feature for a function call.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Class to expand PHP execution time for a function call.
+ * Use this when performing changes that should not be interrupted.
+ *
+ * On construction, set_time_limit() is called and set to $seconds.
+ * If the client aborts the connection, PHP will continue to run.
+ * When the object goes out of scope, the timer is restarted, with
+ * the original time limit minus the time the object existed.
+ */
+class ScopedPHPTimeout {
+ protected $startTime; // float; seconds
+ protected $oldTimeout; // integer; seconds
+ protected $oldIgnoreAbort; // boolean
+
+ protected static $stackDepth = 0; // integer
+ protected static $totalCalls = 0; // integer
+ protected static $totalElapsed = 0; // float; seconds
+
+ /* Prevent callers in infinite loops from running forever */
+ const MAX_TOTAL_CALLS = 1000000;
+ const MAX_TOTAL_TIME = 300; // seconds
+
+ /**
+ * @param $seconds integer
+ */
+ public function __construct( $seconds ) {
+ if ( ini_get( 'max_execution_time' ) > 0 ) { // CLI uses 0
+ if ( self::$totalCalls >= self::MAX_TOTAL_CALLS ) {
+ trigger_error( "Maximum invocations of " . __CLASS__ . " exceeded." );
+ } elseif ( self::$totalElapsed >= self::MAX_TOTAL_TIME ) {
+ trigger_error( "Time limit within invocations of " . __CLASS__ . " exceeded." );
+ } elseif ( self::$stackDepth > 0 ) { // recursion guard
+ trigger_error( "Resursive invocation of " . __CLASS__ . " attempted." );
+ } else {
+ $this->oldIgnoreAbort = ignore_user_abort( true );
+ $this->oldTimeout = ini_set( 'max_execution_time', $seconds );
+ $this->startTime = microtime( true );
+ ++self::$stackDepth;
+ ++self::$totalCalls; // proof against < 1us scopes
+ }
+ }
+ }
+
+ /**
+ * Restore the original timeout.
+ * This does not account for the timer value on __construct().
+ */
+ public function __destruct() {
+ if ( $this->oldTimeout ) {
+ $elapsed = microtime( true ) - $this->startTime;
+ // Note: a limit of 0 is treated as "forever"
+ set_time_limit( max( 1, $this->oldTimeout - (int)$elapsed ) );
+ // If each scoped timeout is for less than one second, we end up
+ // restoring the original timeout without any decrease in value.
+ // Thus web scripts in an infinite loop can run forever unless we
+ // take some measures to prevent this. Track total time and calls.
+ self::$totalElapsed += $elapsed;
+ --self::$stackDepth;
+ ignore_user_abort( $this->oldIgnoreAbort );
+ }
+ }
+}
--- /dev/null
+<?php
+/**
+ * XML syntax and type checker.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+class XmlTypeCheck {
+ /**
+ * Will be set to true or false to indicate whether the file is
+ * well-formed XML. Note that this doesn't check schema validity.
+ */
+ public $wellFormed = false;
+
+ /**
+ * Will be set to true if the optional element filter returned
+ * a match at some point.
+ */
+ public $filterMatch = false;
+
+ /**
+ * Name of the document's root element, including any namespace
+ * as an expanded URL.
+ */
+ public $rootElement = '';
+
+ /**
+ * @param string $input a filename or string containing the XML element
+ * @param callable $filterCallback (optional)
+ * Function to call to do additional custom validity checks from the
+ * SAX element handler event. This gives you access to the element
+ * namespace, name, and attributes, but not to text contents.
+ * Filter should return 'true' to toggle on $this->filterMatch
+ * @param boolean $isFile (optional) indicates if the first parameter is a
+ * filename (default, true) or if it is a string (false)
+ */
+ function __construct( $input, $filterCallback = null, $isFile = true ) {
+ $this->filterCallback = $filterCallback;
+ if ( $isFile ) {
+ $this->validateFromFile( $input );
+ } else {
+ $this->validateFromString( $input );
+ }
+ }
+
+ /**
+ * Alternative constructor: from filename
+ *
+ * @param string $fname the filename of an XML document
+ * @param callable $filterCallback (optional)
+ * Function to call to do additional custom validity checks from the
+ * SAX element handler event. This gives you access to the element
+ * namespace, name, and attributes, but not to text contents.
+ * Filter should return 'true' to toggle on $this->filterMatch
+ * @return XmlTypeCheck
+ */
+ public static function newFromFilename( $fname, $filterCallback = null ) {
+ return new self( $fname, $filterCallback, true );
+ }
+
+ /**
+ * Alternative constructor: from string
+ *
+ * @param string $string a string containing an XML element
+ * @param callable $filterCallback (optional)
+ * Function to call to do additional custom validity checks from the
+ * SAX element handler event. This gives you access to the element
+ * namespace, name, and attributes, but not to text contents.
+ * Filter should return 'true' to toggle on $this->filterMatch
+ * @return XmlTypeCheck
+ */
+ public static function newFromString( $string, $filterCallback = null ) {
+ return new self( $string, $filterCallback, false );
+ }
+
+ /**
+ * Get the root element. Simple accessor to $rootElement
+ *
+ * @return string
+ */
+ public function getRootElement() {
+ return $this->rootElement;
+ }
+
+ /**
+ * Get an XML parser with the root element handler.
+ * @see XmlTypeCheck::rootElementOpen()
+ * @return resource a resource handle for the XML parser
+ */
+ private function getParser() {
+ $parser = xml_parser_create_ns( 'UTF-8' );
+ // case folding violates XML standard, turn it off
+ xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
+ xml_set_element_handler( $parser, array( $this, 'rootElementOpen' ), false );
+ return $parser;
+ }
+
+ /**
+ * @param string $fname the filename
+ */
+ private function validateFromFile( $fname ) {
+ $parser = $this->getParser();
+
+ if ( file_exists( $fname ) ) {
+ $file = fopen( $fname, "rb" );
+ if ( $file ) {
+ do {
+ $chunk = fread( $file, 32768 );
+ $ret = xml_parse( $parser, $chunk, feof( $file ) );
+ if ( $ret == 0 ) {
+ $this->wellFormed = false;
+ fclose( $file );
+ xml_parser_free( $parser );
+ return;
+ }
+ } while ( !feof( $file ) );
+
+ fclose( $file );
+ }
+ }
+ $this->wellFormed = true;
+
+ xml_parser_free( $parser );
+ }
+
+ /**
+ *
+ * @param string $string the XML-input-string to be checked.
+ */
+ private function validateFromString( $string ) {
+ $parser = $this->getParser();
+ $ret = xml_parse( $parser, $string, true );
+ xml_parser_free( $parser );
+ if ( $ret == 0 ) {
+ $this->wellFormed = false;
+ return;
+ }
+ $this->wellFormed = true;
+ }
+
+ /**
+ * @param $parser
+ * @param $name
+ * @param $attribs
+ */
+ private function rootElementOpen( $parser, $name, $attribs ) {
+ $this->rootElement = $name;
+
+ if ( is_callable( $this->filterCallback ) ) {
+ xml_set_element_handler( $parser, array( $this, 'elementOpen' ), false );
+ $this->elementOpen( $parser, $name, $attribs );
+ } else {
+ // We only need the first open element
+ xml_set_element_handler( $parser, false, false );
+ }
+ }
+
+ /**
+ * @param $parser
+ * @param $name
+ * @param $attribs
+ */
+ private function elementOpen( $parser, $name, $attribs ) {
+ if ( call_user_func( $this->filterCallback, $name, $attribs ) ) {
+ // Filter hit!
+ $this->filterMatch = true;
+ }
+ }
+}
--- /dev/null
+<?php
+
+class ArrayUtils {
+ /**
+ * Sort the given array in a pseudo-random order which depends only on the
+ * given key and each element value. This is typically used for load
+ * balancing between servers each with a local cache.
+ *
+ * Keys are preserved. The input array is modified in place.
+ *
+ * Note: Benchmarking on PHP 5.3 and 5.4 indicates that for small
+ * strings, md5() is only 10% slower than hash('joaat',...) etc.,
+ * since the function call overhead dominates. So there's not much
+ * justification for breaking compatibility with installations
+ * compiled with ./configure --disable-hash.
+ *
+ * @param array $array Array to sort
+ * @param string $key
+ * @param string $separator A separator used to delimit the array elements and the
+ * key. This can be chosen to provide backwards compatibility with
+ * various consistent hash implementations that existed before this
+ * function was introduced.
+ */
+ public static function consistentHashSort( &$array, $key, $separator = "\000" ) {
+ $hashes = array();
+ foreach ( $array as $elt ) {
+ $hashes[$elt] = md5( $elt . $separator . $key );
+ }
+ uasort( $array, function ( $a, $b ) use ( $hashes ) {
+ return strcmp( $hashes[$a], $hashes[$b] );
+ } );
+ }
+
+ /**
+ * Given an array of non-normalised probabilities, this function will select
+ * an element and return the appropriate key
+ *
+ * @param array $weights
+ * @return bool|int|string
+ */
+ public static function pickRandom( $weights ) {
+ if ( !is_array( $weights ) || count( $weights ) == 0 ) {
+ return false;
+ }
+
+ $sum = array_sum( $weights );
+ if ( $sum == 0 ) {
+ # No loads on any of them
+ # In previous versions, this triggered an unweighted random selection,
+ # but this feature has been removed as of April 2006 to allow for strict
+ # separation of query groups.
+ return false;
+ }
+ $max = mt_getrandmax();
+ $rand = mt_rand( 0, $max ) / $max * $sum;
+
+ $sum = 0;
+ foreach ( $weights as $i => $w ) {
+ $sum += $w;
+ # Do not return keys if they have 0 weight.
+ # Note that the "all 0 weight" case is handed above
+ if ( $w > 0 && $sum >= $rand ) {
+ break;
+ }
+ }
+ return $i;
+ }
+}
--- /dev/null
+<?php
+/**
+ * Native CDB file reader and writer.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Read from a CDB file.
+ * Native and pure PHP implementations are provided.
+ * http://cr.yp.to/cdb.html
+ */
+abstract class CdbReader {
+ /**
+ * Open a file and return a subclass instance
+ *
+ * @param $fileName string
+ *
+ * @return CdbReader
+ */
+ public static function open( $fileName ) {
+ if ( self::haveExtension() ) {
+ return new CdbReader_DBA( $fileName );
+ } else {
+ wfDebug( "Warning: no dba extension found, using emulation.\n" );
+ return new CdbReader_PHP( $fileName );
+ }
+ }
+
+ /**
+ * Returns true if the native extension is available
+ *
+ * @return bool
+ */
+ public static function haveExtension() {
+ if ( !function_exists( 'dba_handlers' ) ) {
+ return false;
+ }
+ $handlers = dba_handlers();
+ if ( !in_array( 'cdb', $handlers ) || !in_array( 'cdb_make', $handlers ) ) {
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Construct the object and open the file
+ */
+ abstract function __construct( $fileName );
+
+ /**
+ * Close the file. Optional, you can just let the variable go out of scope.
+ */
+ abstract function close();
+
+ /**
+ * Get a value with a given key. Only string values are supported.
+ *
+ * @param $key string
+ */
+ abstract public function get( $key );
+}
+
+/**
+ * Write to a CDB file.
+ * Native and pure PHP implementations are provided.
+ */
+abstract class CdbWriter {
+ /**
+ * Open a writer and return a subclass instance.
+ * The user must have write access to the directory, for temporary file creation.
+ *
+ * @param $fileName string
+ *
+ * @return CdbWriter_DBA|CdbWriter_PHP
+ */
+ public static function open( $fileName ) {
+ if ( CdbReader::haveExtension() ) {
+ return new CdbWriter_DBA( $fileName );
+ } else {
+ wfDebug( "Warning: no dba extension found, using emulation.\n" );
+ return new CdbWriter_PHP( $fileName );
+ }
+ }
+
+ /**
+ * Create the object and open the file
+ *
+ * @param $fileName string
+ */
+ abstract function __construct( $fileName );
+
+ /**
+ * Set a key to a given value. The value will be converted to string.
+ * @param $key string
+ * @param $value string
+ */
+ abstract public function set( $key, $value );
+
+ /**
+ * Close the writer object. You should call this function before the object
+ * goes out of scope, to write out the final hashtables.
+ */
+ abstract public function close();
+}
+
+/**
+ * Reader class which uses the DBA extension
+ */
+class CdbReader_DBA {
+ var $handle;
+
+ function __construct( $fileName ) {
+ $this->handle = dba_open( $fileName, 'r-', 'cdb' );
+ if ( !$this->handle ) {
+ throw new MWException( 'Unable to open CDB file "' . $fileName . '"' );
+ }
+ }
+
+ function close() {
+ if ( isset( $this->handle ) ) {
+ dba_close( $this->handle );
+ }
+ unset( $this->handle );
+ }
+
+ function get( $key ) {
+ return dba_fetch( $key, $this->handle );
+ }
+}
+
+/**
+ * Writer class which uses the DBA extension
+ */
+class CdbWriter_DBA {
+ var $handle, $realFileName, $tmpFileName;
+
+ function __construct( $fileName ) {
+ $this->realFileName = $fileName;
+ $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
+ $this->handle = dba_open( $this->tmpFileName, 'n', 'cdb_make' );
+ if ( !$this->handle ) {
+ throw new MWException( 'Unable to open CDB file for write "' . $fileName . '"' );
+ }
+ }
+
+ function set( $key, $value ) {
+ return dba_insert( $key, $value, $this->handle );
+ }
+
+ function close() {
+ if ( isset( $this->handle ) ) {
+ dba_close( $this->handle );
+ }
+ if ( wfIsWindows() ) {
+ unlink( $this->realFileName );
+ }
+ if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
+ throw new MWException( 'Unable to move the new CDB file into place.' );
+ }
+ unset( $this->handle );
+ }
+
+ function __destruct() {
+ if ( isset( $this->handle ) ) {
+ $this->close();
+ }
+ }
+}
--- /dev/null
+<?php
+/**
+ * This is a port of D.J. Bernstein's CDB to PHP. It's based on the copy that
+ * appears in PHP 5.3. Changes are:
+ * * Error returns replaced with exceptions
+ * * Exception thrown if sizes or offsets are between 2GB and 4GB
+ * * Some variables renamed
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Common functions for readers and writers
+ */
+class CdbFunctions {
+ /**
+ * Take a modulo of a signed integer as if it were an unsigned integer.
+ * $b must be less than 0x40000000 and greater than 0
+ *
+ * @param $a
+ * @param $b
+ *
+ * @return int
+ */
+ public static function unsignedMod( $a, $b ) {
+ if ( $a & 0x80000000 ) {
+ $m = ( $a & 0x7fffffff ) % $b + 2 * ( 0x40000000 % $b );
+ return $m % $b;
+ } else {
+ return $a % $b;
+ }
+ }
+
+ /**
+ * Shift a signed integer right as if it were unsigned
+ * @param $a
+ * @param $b
+ * @return int
+ */
+ public static function unsignedShiftRight( $a, $b ) {
+ if ( $b == 0 ) {
+ return $a;
+ }
+ if ( $a & 0x80000000 ) {
+ return ( ( $a & 0x7fffffff ) >> $b ) | ( 0x40000000 >> ( $b - 1 ) );
+ } else {
+ return $a >> $b;
+ }
+ }
+
+ /**
+ * The CDB hash function.
+ *
+ * @param $s string
+ *
+ * @return
+ */
+ public static function hash( $s ) {
+ $h = 5381;
+ for ( $i = 0; $i < strlen( $s ); $i++ ) {
+ $h5 = ( $h << 5 ) & 0xffffffff;
+ // Do a 32-bit sum
+ // Inlined here for speed
+ $sum = ( $h & 0x3fffffff ) + ( $h5 & 0x3fffffff );
+ $h =
+ (
+ ( $sum & 0x40000000 ? 1 : 0 )
+ + ( $h & 0x80000000 ? 2 : 0 )
+ + ( $h & 0x40000000 ? 1 : 0 )
+ + ( $h5 & 0x80000000 ? 2 : 0 )
+ + ( $h5 & 0x40000000 ? 1 : 0 )
+ ) << 30
+ | ( $sum & 0x3fffffff );
+ $h ^= ord( $s[$i] );
+ $h &= 0xffffffff;
+ }
+ return $h;
+ }
+}
+
+/**
+ * CDB reader class
+ */
+class CdbReader_PHP extends CdbReader {
+ /** The filename */
+ var $fileName;
+
+ /** The file handle */
+ var $handle;
+
+ /* number of hash slots searched under this key */
+ var $loop;
+
+ /* initialized if loop is nonzero */
+ var $khash;
+
+ /* initialized if loop is nonzero */
+ var $kpos;
+
+ /* initialized if loop is nonzero */
+ var $hpos;
+
+ /* initialized if loop is nonzero */
+ var $hslots;
+
+ /* initialized if findNext() returns true */
+ var $dpos;
+
+ /* initialized if cdb_findnext() returns 1 */
+ var $dlen;
+
+ /**
+ * @param $fileName string
+ * @throws MWException
+ */
+ function __construct( $fileName ) {
+ $this->fileName = $fileName;
+ $this->handle = fopen( $fileName, 'rb' );
+ if ( !$this->handle ) {
+ throw new MWException( 'Unable to open CDB file "' . $this->fileName . '".' );
+ }
+ $this->findStart();
+ }
+
+ function close() {
+ if ( isset( $this->handle ) ) {
+ fclose( $this->handle );
+ }
+ unset( $this->handle );
+ }
+
+ /**
+ * @param $key
+ * @return bool|string
+ */
+ public function get( $key ) {
+ // strval is required
+ if ( $this->find( strval( $key ) ) ) {
+ return $this->read( $this->dlen, $this->dpos );
+ } else {
+ return false;
+ }
+ }
+
+ /**
+ * @param $key
+ * @param $pos
+ * @return bool
+ */
+ protected function match( $key, $pos ) {
+ $buf = $this->read( strlen( $key ), $pos );
+ return $buf === $key;
+ }
+
+ protected function findStart() {
+ $this->loop = 0;
+ }
+
+ /**
+ * @throws MWException
+ * @param $length
+ * @param $pos
+ * @return string
+ */
+ protected function read( $length, $pos ) {
+ if ( fseek( $this->handle, $pos ) == -1 ) {
+ // This can easily happen if the internal pointers are incorrect
+ throw new MWException(
+ 'Seek failed, file "' . $this->fileName . '" may be corrupted.' );
+ }
+
+ if ( $length == 0 ) {
+ return '';
+ }
+
+ $buf = fread( $this->handle, $length );
+ if ( $buf === false || strlen( $buf ) !== $length ) {
+ throw new MWException(
+ 'Read from CDB file failed, file "' . $this->fileName . '" may be corrupted.' );
+ }
+ return $buf;
+ }
+
+ /**
+ * Unpack an unsigned integer and throw an exception if it needs more than 31 bits
+ * @param $s
+ * @throws MWException
+ * @return mixed
+ */
+ protected function unpack31( $s ) {
+ $data = unpack( 'V', $s );
+ if ( $data[1] > 0x7fffffff ) {
+ throw new MWException(
+ 'Error in CDB file "' . $this->fileName . '", integer too big.' );
+ }
+ return $data[1];
+ }
+
+ /**
+ * Unpack a 32-bit signed integer
+ * @param $s
+ * @return int
+ */
+ protected function unpackSigned( $s ) {
+ $data = unpack( 'va/vb', $s );
+ return $data['a'] | ( $data['b'] << 16 );
+ }
+
+ /**
+ * @param $key
+ * @return bool
+ */
+ protected function findNext( $key ) {
+ if ( !$this->loop ) {
+ $u = CdbFunctions::hash( $key );
+ $buf = $this->read( 8, ( $u << 3 ) & 2047 );
+ $this->hslots = $this->unpack31( substr( $buf, 4 ) );
+ if ( !$this->hslots ) {
+ return false;
+ }
+ $this->hpos = $this->unpack31( substr( $buf, 0, 4 ) );
+ $this->khash = $u;
+ $u = CdbFunctions::unsignedShiftRight( $u, 8 );
+ $u = CdbFunctions::unsignedMod( $u, $this->hslots );
+ $u <<= 3;
+ $this->kpos = $this->hpos + $u;
+ }
+
+ while ( $this->loop < $this->hslots ) {
+ $buf = $this->read( 8, $this->kpos );
+ $pos = $this->unpack31( substr( $buf, 4 ) );
+ if ( !$pos ) {
+ return false;
+ }
+ $this->loop += 1;
+ $this->kpos += 8;
+ if ( $this->kpos == $this->hpos + ( $this->hslots << 3 ) ) {
+ $this->kpos = $this->hpos;
+ }
+ $u = $this->unpackSigned( substr( $buf, 0, 4 ) );
+ if ( $u === $this->khash ) {
+ $buf = $this->read( 8, $pos );
+ $keyLen = $this->unpack31( substr( $buf, 0, 4 ) );
+ if ( $keyLen == strlen( $key ) && $this->match( $key, $pos + 8 ) ) {
+ // Found
+ $this->dlen = $this->unpack31( substr( $buf, 4 ) );
+ $this->dpos = $pos + 8 + $keyLen;
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * @param $key
+ * @return bool
+ */
+ protected function find( $key ) {
+ $this->findStart();
+ return $this->findNext( $key );
+ }
+}
+
+/**
+ * CDB writer class
+ */
+class CdbWriter_PHP extends CdbWriter {
+ var $handle, $realFileName, $tmpFileName;
+
+ var $hplist;
+ var $numentries, $pos;
+
+ /**
+ * @param $fileName string
+ */
+ function __construct( $fileName ) {
+ $this->realFileName = $fileName;
+ $this->tmpFileName = $fileName . '.tmp.' . mt_rand( 0, 0x7fffffff );
+ $this->handle = fopen( $this->tmpFileName, 'wb' );
+ if ( !$this->handle ) {
+ $this->throwException(
+ 'Unable to open CDB file "' . $this->tmpFileName . '" for write.' );
+ }
+ $this->hplist = array();
+ $this->numentries = 0;
+ $this->pos = 2048; // leaving space for the pointer array, 256 * 8
+ if ( fseek( $this->handle, $this->pos ) == -1 ) {
+ $this->throwException( 'fseek failed in file "' . $this->tmpFileName . '".' );
+ }
+ }
+
+ function __destruct() {
+ if ( isset( $this->handle ) ) {
+ $this->close();
+ }
+ }
+
+ /**
+ * @param $key
+ * @param $value
+ * @return
+ */
+ public function set( $key, $value ) {
+ if ( strval( $key ) === '' ) {
+ // DBA cross-check hack
+ return;
+ }
+ $this->addbegin( strlen( $key ), strlen( $value ) );
+ $this->write( $key );
+ $this->write( $value );
+ $this->addend( strlen( $key ), strlen( $value ), CdbFunctions::hash( $key ) );
+ }
+
+ /**
+ * @throws MWException
+ */
+ public function close() {
+ $this->finish();
+ if ( isset( $this->handle ) ) {
+ fclose( $this->handle );
+ }
+ if ( wfIsWindows() && file_exists( $this->realFileName ) ) {
+ unlink( $this->realFileName );
+ }
+ if ( !rename( $this->tmpFileName, $this->realFileName ) ) {
+ $this->throwException( 'Unable to move the new CDB file into place.' );
+ }
+ unset( $this->handle );
+ }
+
+ /**
+ * @throws MWException
+ * @param $buf
+ */
+ protected function write( $buf ) {
+ $len = fwrite( $this->handle, $buf );
+ if ( $len !== strlen( $buf ) ) {
+ $this->throwException( 'Error writing to CDB file "' . $this->tmpFileName . '".' );
+ }
+ }
+
+ /**
+ * @throws MWException
+ * @param $len
+ */
+ protected function posplus( $len ) {
+ $newpos = $this->pos + $len;
+ if ( $newpos > 0x7fffffff ) {
+ $this->throwException(
+ 'A value in the CDB file "' . $this->tmpFileName . '" is too large.' );
+ }
+ $this->pos = $newpos;
+ }
+
+ /**
+ * @param $keylen
+ * @param $datalen
+ * @param $h
+ */
+ protected function addend( $keylen, $datalen, $h ) {
+ $this->hplist[] = array(
+ 'h' => $h,
+ 'p' => $this->pos
+ );
+
+ $this->numentries++;
+ $this->posplus( 8 );
+ $this->posplus( $keylen );
+ $this->posplus( $datalen );
+ }
+
+ /**
+ * @throws MWException
+ * @param $keylen
+ * @param $datalen
+ */
+ protected function addbegin( $keylen, $datalen ) {
+ if ( $keylen > 0x7fffffff ) {
+ $this->throwException( 'Key length too long in file "' . $this->tmpFileName . '".' );
+ }
+ if ( $datalen > 0x7fffffff ) {
+ $this->throwException( 'Data length too long in file "' . $this->tmpFileName . '".' );
+ }
+ $buf = pack( 'VV', $keylen, $datalen );
+ $this->write( $buf );
+ }
+
+ /**
+ * @throws MWException
+ */
+ protected function finish() {
+ // Hack for DBA cross-check
+ $this->hplist = array_reverse( $this->hplist );
+
+ // Calculate the number of items that will be in each hashtable
+ $counts = array_fill( 0, 256, 0 );
+ foreach ( $this->hplist as $item ) {
+ ++ $counts[255 & $item['h']];
+ }
+
+ // Fill in $starts with the *end* indexes
+ $starts = array();
+ $pos = 0;
+ for ( $i = 0; $i < 256; ++$i ) {
+ $pos += $counts[$i];
+ $starts[$i] = $pos;
+ }
+
+ // Excessively clever and indulgent code to simultaneously fill $packedTables
+ // with the packed hashtables, and adjust the elements of $starts
+ // to actually point to the starts instead of the ends.
+ $packedTables = array_fill( 0, $this->numentries, false );
+ foreach ( $this->hplist as $item ) {
+ $packedTables[--$starts[255 & $item['h']]] = $item;
+ }
+
+ $final = '';
+ for ( $i = 0; $i < 256; ++$i ) {
+ $count = $counts[$i];
+
+ // The size of the hashtable will be double the item count.
+ // The rest of the slots will be empty.
+ $len = $count + $count;
+ $final .= pack( 'VV', $this->pos, $len );
+
+ $hashtable = array();
+ for ( $u = 0; $u < $len; ++$u ) {
+ $hashtable[$u] = array( 'h' => 0, 'p' => 0 );
+ }
+
+ // Fill the hashtable, using the next empty slot if the hashed slot
+ // is taken.
+ for ( $u = 0; $u < $count; ++$u ) {
+ $hp = $packedTables[$starts[$i] + $u];
+ $where = CdbFunctions::unsignedMod(
+ CdbFunctions::unsignedShiftRight( $hp['h'], 8 ), $len );
+ while ( $hashtable[$where]['p'] ) {
+ if ( ++$where == $len ) {
+ $where = 0;
+ }
+ }
+ $hashtable[$where] = $hp;
+ }
+
+ // Write the hashtable
+ for ( $u = 0; $u < $len; ++$u ) {
+ $buf = pack( 'vvV',
+ $hashtable[$u]['h'] & 0xffff,
+ CdbFunctions::unsignedShiftRight( $hashtable[$u]['h'], 16 ),
+ $hashtable[$u]['p'] );
+ $this->write( $buf );
+ $this->posplus( 8 );
+ }
+ }
+
+ // Write the pointer array at the start of the file
+ rewind( $this->handle );
+ if ( ftell( $this->handle ) != 0 ) {
+ $this->throwException( 'Error rewinding to start of file "' . $this->tmpFileName . '".' );
+ }
+ $this->write( $final );
+ }
+
+ /**
+ * Clean up the temp file and throw an exception
+ *
+ * @param $msg string
+ * @throws MWException
+ */
+ protected function throwException( $msg ) {
+ if ( $this->handle ) {
+ fclose( $this->handle );
+ unlink( $this->tmpFileName );
+ }
+ throw new MWException( $msg );
+ }
+}
--- /dev/null
+<?php
+/**
+ * Configuration file editor.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * This is a state machine style parser with two internal stacks:
+ * * A next state stack, which determines the state the machine will progress to next
+ * * A path stack, which keeps track of the logical location in the file.
+ *
+ * Reference grammar:
+ *
+ * file = T_OPEN_TAG *statement
+ * statement = T_VARIABLE "=" expression ";"
+ * expression = array / scalar / T_VARIABLE
+ * array = T_ARRAY "(" [ element *( "," element ) [ "," ] ] ")"
+ * element = assoc-element / expression
+ * assoc-element = scalar T_DOUBLE_ARROW expression
+ * scalar = T_LNUMBER / T_DNUMBER / T_STRING / T_CONSTANT_ENCAPSED_STRING
+ */
+class ConfEditor {
+ /** The text to parse */
+ var $text;
+
+ /** The token array from token_get_all() */
+ var $tokens;
+
+ /** The current position in the token array */
+ var $pos;
+
+ /** The current 1-based line number */
+ var $lineNum;
+
+ /** The current 1-based column number */
+ var $colNum;
+
+ /** The current 0-based byte number */
+ var $byteNum;
+
+ /** The current ConfEditorToken object */
+ var $currentToken;
+
+ /** The previous ConfEditorToken object */
+ var $prevToken;
+
+ /**
+ * The state machine stack. This is an array of strings where the topmost
+ * element will be popped off and become the next parser state.
+ */
+ var $stateStack;
+
+ /**
+ * The path stack is a stack of associative arrays with the following elements:
+ * name The name of top level of the path
+ * level The level (number of elements) of the path
+ * startByte The byte offset of the start of the path
+ * startToken The token offset of the start
+ * endByte The byte offset of thee
+ * endToken The token offset of the end, plus one
+ * valueStartToken The start token offset of the value part
+ * valueStartByte The start byte offset of the value part
+ * valueEndToken The end token offset of the value part, plus one
+ * valueEndByte The end byte offset of the value part, plus one
+ * nextArrayIndex The next numeric array index at this level
+ * hasComma True if the array element ends with a comma
+ * arrowByte The byte offset of the "=>", or false if there isn't one
+ */
+ var $pathStack;
+
+ /**
+ * The elements of the top of the pathStack for every path encountered, indexed
+ * by slash-separated path.
+ */
+ var $pathInfo;
+
+ /**
+ * Next serial number for whitespace placeholder paths (\@extra-N)
+ */
+ var $serial;
+
+ /**
+ * Editor state. This consists of the internal copy/insert operations which
+ * are applied to the source string to obtain the destination string.
+ */
+ var $edits;
+
+ /**
+ * Simple entry point for command-line testing
+ *
+ * @param $text string
+ *
+ * @return string
+ */
+ static function test( $text ) {
+ try {
+ $ce = new self( $text );
+ $ce->parse();
+ } catch ( ConfEditorParseError $e ) {
+ return $e->getMessage() . "\n" . $e->highlight( $text );
+ }
+ return "OK";
+ }
+
+ /**
+ * Construct a new parser
+ */
+ public function __construct( $text ) {
+ $this->text = $text;
+ }
+
+ /**
+ * Edit the text. Returns the edited text.
+ * @param array $ops of operations.
+ *
+ * Operations are given as an associative array, with members:
+ * type: One of delete, set, append or insert (required)
+ * path: The path to operate on (required)
+ * key: The array key to insert/append, with PHP quotes
+ * value: The value, with PHP quotes
+ *
+ * delete
+ * Deletes an array element or statement with the specified path.
+ * e.g.
+ * array('type' => 'delete', 'path' => '$foo/bar/baz' )
+ * is equivalent to the runtime PHP code:
+ * unset( $foo['bar']['baz'] );
+ *
+ * set
+ * Sets the value of an array element. If the element doesn't exist, it
+ * is appended to the array. If it does exist, the value is set, with
+ * comments and indenting preserved.
+ *
+ * append
+ * Appends a new element to the end of the array. Adds a trailing comma.
+ * e.g.
+ * array( 'type' => 'append', 'path', '$foo/bar',
+ * 'key' => 'baz', 'value' => "'x'" )
+ * is like the PHP code:
+ * $foo['bar']['baz'] = 'x';
+ *
+ * insert
+ * Insert a new element at the start of the array.
+ *
+ * @throws MWException
+ * @return string
+ */
+ public function edit( $ops ) {
+ $this->parse();
+
+ $this->edits = array(
+ array( 'copy', 0, strlen( $this->text ) )
+ );
+ foreach ( $ops as $op ) {
+ $type = $op['type'];
+ $path = $op['path'];
+ $value = isset( $op['value'] ) ? $op['value'] : null;
+ $key = isset( $op['key'] ) ? $op['key'] : null;
+
+ switch ( $type ) {
+ case 'delete':
+ list( $start, $end ) = $this->findDeletionRegion( $path );
+ $this->replaceSourceRegion( $start, $end, false );
+ break;
+ case 'set':
+ if ( isset( $this->pathInfo[$path] ) ) {
+ list( $start, $end ) = $this->findValueRegion( $path );
+ $encValue = $value; // var_export( $value, true );
+ $this->replaceSourceRegion( $start, $end, $encValue );
+ break;
+ }
+ // No existing path, fall through to append
+ $slashPos = strrpos( $path, '/' );
+ $key = var_export( substr( $path, $slashPos + 1 ), true );
+ $path = substr( $path, 0, $slashPos );
+ // Fall through
+ case 'append':
+ // Find the last array element
+ $lastEltPath = $this->findLastArrayElement( $path );
+ if ( $lastEltPath === false ) {
+ throw new MWException( "Can't find any element of array \"$path\"" );
+ }
+ $lastEltInfo = $this->pathInfo[$lastEltPath];
+
+ // Has it got a comma already?
+ if ( strpos( $lastEltPath, '@extra' ) === false && !$lastEltInfo['hasComma'] ) {
+ // No comma, insert one after the value region
+ list( , $end ) = $this->findValueRegion( $lastEltPath );
+ $this->replaceSourceRegion( $end - 1, $end - 1, ',' );
+ }
+
+ // Make the text to insert
+ list( $start, $end ) = $this->findDeletionRegion( $lastEltPath );
+
+ if ( $key === null ) {
+ list( $indent, ) = $this->getIndent( $start );
+ $textToInsert = "$indent$value,";
+ } else {
+ list( $indent, $arrowIndent ) =
+ $this->getIndent( $start, $key, $lastEltInfo['arrowByte'] );
+ $textToInsert = "$indent$key$arrowIndent=> $value,";
+ }
+ $textToInsert .= ( $indent === false ? ' ' : "\n" );
+
+ // Insert the item
+ $this->replaceSourceRegion( $end, $end, $textToInsert );
+ break;
+ case 'insert':
+ // Find first array element
+ $firstEltPath = $this->findFirstArrayElement( $path );
+ if ( $firstEltPath === false ) {
+ throw new MWException( "Can't find array element of \"$path\"" );
+ }
+ list( $start, ) = $this->findDeletionRegion( $firstEltPath );
+ $info = $this->pathInfo[$firstEltPath];
+
+ // Make the text to insert
+ if ( $key === null ) {
+ list( $indent, ) = $this->getIndent( $start );
+ $textToInsert = "$indent$value,";
+ } else {
+ list( $indent, $arrowIndent ) =
+ $this->getIndent( $start, $key, $info['arrowByte'] );
+ $textToInsert = "$indent$key$arrowIndent=> $value,";
+ }
+ $textToInsert .= ( $indent === false ? ' ' : "\n" );
+
+ // Insert the item
+ $this->replaceSourceRegion( $start, $start, $textToInsert );
+ break;
+ default:
+ throw new MWException( "Unrecognised operation: \"$type\"" );
+ }
+ }
+
+ // Do the edits
+ $out = '';
+ foreach ( $this->edits as $edit ) {
+ if ( $edit[0] == 'copy' ) {
+ $out .= substr( $this->text, $edit[1], $edit[2] - $edit[1] );
+ } else { // if ( $edit[0] == 'insert' )
+ $out .= $edit[1];
+ }
+ }
+
+ // Do a second parse as a sanity check
+ $this->text = $out;
+ try {
+ $this->parse();
+ } catch ( ConfEditorParseError $e ) {
+ throw new MWException(
+ "Sorry, ConfEditor broke the file during editing and it won't parse anymore: " .
+ $e->getMessage() );
+ }
+ return $out;
+ }
+
+ /**
+ * Get the variables defined in the text
+ * @return array( varname => value )
+ */
+ function getVars() {
+ $vars = array();
+ $this->parse();
+ foreach ( $this->pathInfo as $path => $data ) {
+ if ( $path[0] != '$' ) {
+ continue;
+ }
+ $trimmedPath = substr( $path, 1 );
+ $name = $data['name'];
+ if ( $name[0] == '@' ) {
+ continue;
+ }
+ if ( $name[0] == '$' ) {
+ $name = substr( $name, 1 );
+ }
+ $parentPath = substr( $trimmedPath, 0,
+ strlen( $trimmedPath ) - strlen( $name ) );
+ if ( substr( $parentPath, -1 ) == '/' ) {
+ $parentPath = substr( $parentPath, 0, -1 );
+ }
+
+ $value = substr( $this->text, $data['valueStartByte'],
+ $data['valueEndByte'] - $data['valueStartByte']
+ );
+ $this->setVar( $vars, $parentPath, $name,
+ $this->parseScalar( $value ) );
+ }
+ return $vars;
+ }
+
+ /**
+ * Set a value in an array, unless it's set already. For instance,
+ * setVar( $arr, 'foo/bar', 'baz', 3 ); will set
+ * $arr['foo']['bar']['baz'] = 3;
+ * @param $array array
+ * @param string $path slash-delimited path
+ * @param $key mixed Key
+ * @param $value mixed Value
+ */
+ function setVar( &$array, $path, $key, $value ) {
+ $pathArr = explode( '/', $path );
+ $target =& $array;
+ if ( $path !== '' ) {
+ foreach ( $pathArr as $p ) {
+ if ( !isset( $target[$p] ) ) {
+ $target[$p] = array();
+ }
+ $target =& $target[$p];
+ }
+ }
+ if ( !isset( $target[$key] ) ) {
+ $target[$key] = $value;
+ }
+ }
+
+ /**
+ * Parse a scalar value in PHP
+ * @return mixed Parsed value
+ */
+ function parseScalar( $str ) {
+ if ( $str !== '' && $str[0] == '\'' ) {
+ // Single-quoted string
+ // @todo FIXME: trim() call is due to mystery bug where whitespace gets
+ // appended to the token; without it we ended up reading in the
+ // extra quote on the end!
+ return strtr( substr( trim( $str ), 1, -1 ),
+ array( '\\\'' => '\'', '\\\\' => '\\' ) );
+ }
+ if ( $str !== '' && $str[0] == '"' ) {
+ // Double-quoted string
+ // @todo FIXME: trim() call is due to mystery bug where whitespace gets
+ // appended to the token; without it we ended up reading in the
+ // extra quote on the end!
+ return stripcslashes( substr( trim( $str ), 1, -1 ) );
+ }
+ if ( substr( $str, 0, 4 ) == 'true' ) {
+ return true;
+ }
+ if ( substr( $str, 0, 5 ) == 'false' ) {
+ return false;
+ }
+ if ( substr( $str, 0, 4 ) == 'null' ) {
+ return null;
+ }
+ // Must be some kind of numeric value, so let PHP's weak typing
+ // be useful for a change
+ return $str;
+ }
+
+ /**
+ * Replace the byte offset region of the source with $newText.
+ * Works by adding elements to the $this->edits array.
+ */
+ function replaceSourceRegion( $start, $end, $newText = false ) {
+ // Split all copy operations with a source corresponding to the region
+ // in question.
+ $newEdits = array();
+ foreach ( $this->edits as $edit ) {
+ if ( $edit[0] !== 'copy' ) {
+ $newEdits[] = $edit;
+ continue;
+ }
+ $copyStart = $edit[1];
+ $copyEnd = $edit[2];
+ if ( $start >= $copyEnd || $end <= $copyStart ) {
+ // Outside this region
+ $newEdits[] = $edit;
+ continue;
+ }
+ if ( ( $start < $copyStart && $end > $copyStart )
+ || ( $start < $copyEnd && $end > $copyEnd )
+ ) {
+ throw new MWException( "Overlapping regions found, can't do the edit" );
+ }
+ // Split the copy
+ $newEdits[] = array( 'copy', $copyStart, $start );
+ if ( $newText !== false ) {
+ $newEdits[] = array( 'insert', $newText );
+ }
+ $newEdits[] = array( 'copy', $end, $copyEnd );
+ }
+ $this->edits = $newEdits;
+ }
+
+ /**
+ * Finds the source byte region which you would want to delete, if $pathName
+ * was to be deleted. Includes the leading spaces and tabs, the trailing line
+ * break, and any comments in between.
+ * @param $pathName
+ * @throws MWException
+ * @return array
+ */
+ function findDeletionRegion( $pathName ) {
+ if ( !isset( $this->pathInfo[$pathName] ) ) {
+ throw new MWException( "Can't find path \"$pathName\"" );
+ }
+ $path = $this->pathInfo[$pathName];
+ // Find the start
+ $this->firstToken();
+ while ( $this->pos != $path['startToken'] ) {
+ $this->nextToken();
+ }
+ $regionStart = $path['startByte'];
+ for ( $offset = -1; $offset >= -$this->pos; $offset-- ) {
+ $token = $this->getTokenAhead( $offset );
+ if ( !$token->isSkip() ) {
+ // If there is other content on the same line, don't move the start point
+ // back, because that will cause the regions to overlap.
+ $regionStart = $path['startByte'];
+ break;
+ }
+ $lfPos = strrpos( $token->text, "\n" );
+ if ( $lfPos === false ) {
+ $regionStart -= strlen( $token->text );
+ } else {
+ // The line start does not include the LF
+ $regionStart -= strlen( $token->text ) - $lfPos - 1;
+ break;
+ }
+ }
+ // Find the end
+ while ( $this->pos != $path['endToken'] ) {
+ $this->nextToken();
+ }
+ $regionEnd = $path['endByte']; // past the end
+ for ( $offset = 0; $offset < count( $this->tokens ) - $this->pos; $offset++ ) {
+ $token = $this->getTokenAhead( $offset );
+ if ( !$token->isSkip() ) {
+ break;
+ }
+ $lfPos = strpos( $token->text, "\n" );
+ if ( $lfPos === false ) {
+ $regionEnd += strlen( $token->text );
+ } else {
+ // This should point past the LF
+ $regionEnd += $lfPos + 1;
+ break;
+ }
+ }
+ return array( $regionStart, $regionEnd );
+ }
+
+ /**
+ * Find the byte region in the source corresponding to the value part.
+ * This includes the quotes, but does not include the trailing comma
+ * or semicolon.
+ *
+ * The end position is the past-the-end (end + 1) value as per convention.
+ * @param $pathName
+ * @throws MWException
+ * @return array
+ */
+ function findValueRegion( $pathName ) {
+ if ( !isset( $this->pathInfo[$pathName] ) ) {
+ throw new MWException( "Can't find path \"$pathName\"" );
+ }
+ $path = $this->pathInfo[$pathName];
+ if ( $path['valueStartByte'] === false || $path['valueEndByte'] === false ) {
+ throw new MWException( "Can't find value region for path \"$pathName\"" );
+ }
+ return array( $path['valueStartByte'], $path['valueEndByte'] );
+ }
+
+ /**
+ * Find the path name of the last element in the array.
+ * If the array is empty, this will return the \@extra interstitial element.
+ * If the specified path is not found or is not an array, it will return false.
+ * @return bool|int|string
+ */
+ function findLastArrayElement( $path ) {
+ // Try for a real element
+ $lastEltPath = false;
+ foreach ( $this->pathInfo as $candidatePath => $info ) {
+ $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
+ $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
+ if ( $part2 == '@' ) {
+ // Do nothing
+ } elseif ( $part1 == "$path/" ) {
+ $lastEltPath = $candidatePath;
+ } elseif ( $lastEltPath !== false ) {
+ break;
+ }
+ }
+ if ( $lastEltPath !== false ) {
+ return $lastEltPath;
+ }
+
+ // Try for an interstitial element
+ $extraPath = false;
+ foreach ( $this->pathInfo as $candidatePath => $info ) {
+ $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
+ if ( $part1 == "$path/" ) {
+ $extraPath = $candidatePath;
+ } elseif ( $extraPath !== false ) {
+ break;
+ }
+ }
+ return $extraPath;
+ }
+
+ /**
+ * Find the path name of first element in the array.
+ * If the array is empty, this will return the \@extra interstitial element.
+ * If the specified path is not found or is not an array, it will return false.
+ * @return bool|int|string
+ */
+ function findFirstArrayElement( $path ) {
+ // Try for an ordinary element
+ foreach ( $this->pathInfo as $candidatePath => $info ) {
+ $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
+ $part2 = substr( $candidatePath, strlen( $path ) + 1, 1 );
+ if ( $part1 == "$path/" && $part2 != '@' ) {
+ return $candidatePath;
+ }
+ }
+
+ // Try for an interstitial element
+ foreach ( $this->pathInfo as $candidatePath => $info ) {
+ $part1 = substr( $candidatePath, 0, strlen( $path ) + 1 );
+ if ( $part1 == "$path/" ) {
+ return $candidatePath;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Get the indent string which sits after a given start position.
+ * Returns false if the position is not at the start of the line.
+ * @return array
+ */
+ function getIndent( $pos, $key = false, $arrowPos = false ) {
+ $arrowIndent = ' ';
+ if ( $pos == 0 || $this->text[$pos - 1] == "\n" ) {
+ $indentLength = strspn( $this->text, " \t", $pos );
+ $indent = substr( $this->text, $pos, $indentLength );
+ } else {
+ $indent = false;
+ }
+ if ( $indent !== false && $arrowPos !== false ) {
+ $arrowIndentLength = $arrowPos - $pos - $indentLength - strlen( $key );
+ if ( $arrowIndentLength > 0 ) {
+ $arrowIndent = str_repeat( ' ', $arrowIndentLength );
+ }
+ }
+ return array( $indent, $arrowIndent );
+ }
+
+ /**
+ * Run the parser on the text. Throws an exception if the string does not
+ * match our defined subset of PHP syntax.
+ */
+ public function parse() {
+ $this->initParse();
+ $this->pushState( 'file' );
+ $this->pushPath( '@extra-' . ( $this->serial++ ) );
+ $token = $this->firstToken();
+
+ while ( !$token->isEnd() ) {
+ $state = $this->popState();
+ if ( !$state ) {
+ $this->error( 'internal error: empty state stack' );
+ }
+
+ switch ( $state ) {
+ case 'file':
+ $this->expect( T_OPEN_TAG );
+ $token = $this->skipSpace();
+ if ( $token->isEnd() ) {
+ break 2;
+ }
+ $this->pushState( 'statement', 'file 2' );
+ break;
+ case 'file 2':
+ $token = $this->skipSpace();
+ if ( $token->isEnd() ) {
+ break 2;
+ }
+ $this->pushState( 'statement', 'file 2' );
+ break;
+ case 'statement':
+ $token = $this->skipSpace();
+ if ( !$this->validatePath( $token->text ) ) {
+ $this->error( "Invalid variable name \"{$token->text}\"" );
+ }
+ $this->nextPath( $token->text );
+ $this->expect( T_VARIABLE );
+ $this->skipSpace();
+ $arrayAssign = false;
+ if ( $this->currentToken()->type == '[' ) {
+ $this->nextToken();
+ $token = $this->skipSpace();
+ if ( !$token->isScalar() ) {
+ $this->error( "expected a string or number for the array key" );
+ }
+ if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
+ $text = $this->parseScalar( $token->text );
+ } else {
+ $text = $token->text;
+ }
+ if ( !$this->validatePath( $text ) ) {
+ $this->error( "Invalid associative array name \"$text\"" );
+ }
+ $this->pushPath( $text );
+ $this->nextToken();
+ $this->skipSpace();
+ $this->expect( ']' );
+ $this->skipSpace();
+ $arrayAssign = true;
+ }
+ $this->expect( '=' );
+ $this->skipSpace();
+ $this->startPathValue();
+ if ( $arrayAssign ) {
+ $this->pushState( 'expression', 'array assign end' );
+ } else {
+ $this->pushState( 'expression', 'statement end' );
+ }
+ break;
+ case 'array assign end':
+ case 'statement end':
+ $this->endPathValue();
+ if ( $state == 'array assign end' ) {
+ $this->popPath();
+ }
+ $this->skipSpace();
+ $this->expect( ';' );
+ $this->nextPath( '@extra-' . ( $this->serial++ ) );
+ break;
+ case 'expression':
+ $token = $this->skipSpace();
+ if ( $token->type == T_ARRAY ) {
+ $this->pushState( 'array' );
+ } elseif ( $token->isScalar() ) {
+ $this->nextToken();
+ } elseif ( $token->type == T_VARIABLE ) {
+ $this->nextToken();
+ } else {
+ $this->error( "expected simple expression" );
+ }
+ break;
+ case 'array':
+ $this->skipSpace();
+ $this->expect( T_ARRAY );
+ $this->skipSpace();
+ $this->expect( '(' );
+ $this->skipSpace();
+ $this->pushPath( '@extra-' . ( $this->serial++ ) );
+ if ( $this->isAhead( ')' ) ) {
+ // Empty array
+ $this->pushState( 'array end' );
+ } else {
+ $this->pushState( 'element', 'array end' );
+ }
+ break;
+ case 'array end':
+ $this->skipSpace();
+ $this->popPath();
+ $this->expect( ')' );
+ break;
+ case 'element':
+ $token = $this->skipSpace();
+ // Look ahead to find the double arrow
+ if ( $token->isScalar() && $this->isAhead( T_DOUBLE_ARROW, 1 ) ) {
+ // Found associative element
+ $this->pushState( 'assoc-element', 'element end' );
+ } else {
+ // Not associative
+ $this->nextPath( '@next' );
+ $this->startPathValue();
+ $this->pushState( 'expression', 'element end' );
+ }
+ break;
+ case 'element end':
+ $token = $this->skipSpace();
+ if ( $token->type == ',' ) {
+ $this->endPathValue();
+ $this->markComma();
+ $this->nextToken();
+ $this->nextPath( '@extra-' . ( $this->serial++ ) );
+ // Look ahead to find ending bracket
+ if ( $this->isAhead( ")" ) ) {
+ // Found ending bracket, no continuation
+ $this->skipSpace();
+ } else {
+ // No ending bracket, continue to next element
+ $this->pushState( 'element' );
+ }
+ } elseif ( $token->type == ')' ) {
+ // End array
+ $this->endPathValue();
+ } else {
+ $this->error( "expected the next array element or the end of the array" );
+ }
+ break;
+ case 'assoc-element':
+ $token = $this->skipSpace();
+ if ( !$token->isScalar() ) {
+ $this->error( "expected a string or number for the array key" );
+ }
+ if ( $token->type == T_CONSTANT_ENCAPSED_STRING ) {
+ $text = $this->parseScalar( $token->text );
+ } else {
+ $text = $token->text;
+ }
+ if ( !$this->validatePath( $text ) ) {
+ $this->error( "Invalid associative array name \"$text\"" );
+ }
+ $this->nextPath( $text );
+ $this->nextToken();
+ $this->skipSpace();
+ $this->markArrow();
+ $this->expect( T_DOUBLE_ARROW );
+ $this->skipSpace();
+ $this->startPathValue();
+ $this->pushState( 'expression' );
+ break;
+ }
+ }
+ if ( count( $this->stateStack ) ) {
+ $this->error( 'unexpected end of file' );
+ }
+ $this->popPath();
+ }
+
+ /**
+ * Initialise a parse.
+ */
+ protected function initParse() {
+ $this->tokens = token_get_all( $this->text );
+ $this->stateStack = array();
+ $this->pathStack = array();
+ $this->firstToken();
+ $this->pathInfo = array();
+ $this->serial = 1;
+ }
+
+ /**
+ * Set the parse position. Do not call this except from firstToken() and
+ * nextToken(), there is more to update than just the position.
+ */
+ protected function setPos( $pos ) {
+ $this->pos = $pos;
+ if ( $this->pos >= count( $this->tokens ) ) {
+ $this->currentToken = ConfEditorToken::newEnd();
+ } else {
+ $this->currentToken = $this->newTokenObj( $this->tokens[$this->pos] );
+ }
+ return $this->currentToken;
+ }
+
+ /**
+ * Create a ConfEditorToken from an element of token_get_all()
+ * @return ConfEditorToken
+ */
+ function newTokenObj( $internalToken ) {
+ if ( is_array( $internalToken ) ) {
+ return new ConfEditorToken( $internalToken[0], $internalToken[1] );
+ } else {
+ return new ConfEditorToken( $internalToken, $internalToken );
+ }
+ }
+
+ /**
+ * Reset the parse position
+ */
+ function firstToken() {
+ $this->setPos( 0 );
+ $this->prevToken = ConfEditorToken::newEnd();
+ $this->lineNum = 1;
+ $this->colNum = 1;
+ $this->byteNum = 0;
+ return $this->currentToken;
+ }
+
+ /**
+ * Get the current token
+ */
+ function currentToken() {
+ return $this->currentToken;
+ }
+
+ /**
+ * Advance the current position and return the resulting next token
+ */
+ function nextToken() {
+ if ( $this->currentToken ) {
+ $text = $this->currentToken->text;
+ $lfCount = substr_count( $text, "\n" );
+ if ( $lfCount ) {
+ $this->lineNum += $lfCount;
+ $this->colNum = strlen( $text ) - strrpos( $text, "\n" );
+ } else {
+ $this->colNum += strlen( $text );
+ }
+ $this->byteNum += strlen( $text );
+ }
+ $this->prevToken = $this->currentToken;
+ $this->setPos( $this->pos + 1 );
+ return $this->currentToken;
+ }
+
+ /**
+ * Get the token $offset steps ahead of the current position.
+ * $offset may be negative, to get tokens behind the current position.
+ * @return ConfEditorToken
+ */
+ function getTokenAhead( $offset ) {
+ $pos = $this->pos + $offset;
+ if ( $pos >= count( $this->tokens ) || $pos < 0 ) {
+ return ConfEditorToken::newEnd();
+ } else {
+ return $this->newTokenObj( $this->tokens[$pos] );
+ }
+ }
+
+ /**
+ * Advances the current position past any whitespace or comments
+ */
+ function skipSpace() {
+ while ( $this->currentToken && $this->currentToken->isSkip() ) {
+ $this->nextToken();
+ }
+ return $this->currentToken;
+ }
+
+ /**
+ * Throws an error if the current token is not of the given type, and
+ * then advances to the next position.
+ */
+ function expect( $type ) {
+ if ( $this->currentToken && $this->currentToken->type == $type ) {
+ return $this->nextToken();
+ } else {
+ $this->error( "expected " . $this->getTypeName( $type ) .
+ ", got " . $this->getTypeName( $this->currentToken->type ) );
+ }
+ }
+
+ /**
+ * Push a state or two on to the state stack.
+ */
+ function pushState( $nextState, $stateAfterThat = null ) {
+ if ( $stateAfterThat !== null ) {
+ $this->stateStack[] = $stateAfterThat;
+ }
+ $this->stateStack[] = $nextState;
+ }
+
+ /**
+ * Pop a state from the state stack.
+ * @return mixed
+ */
+ function popState() {
+ return array_pop( $this->stateStack );
+ }
+
+ /**
+ * Returns true if the user input path is valid.
+ * This exists to allow "/" and "@" to be reserved for string path keys
+ * @return bool
+ */
+ function validatePath( $path ) {
+ return strpos( $path, '/' ) === false && substr( $path, 0, 1 ) != '@';
+ }
+
+ /**
+ * Internal function to update some things at the end of a path region. Do
+ * not call except from popPath() or nextPath().
+ */
+ function endPath() {
+ $key = '';
+ foreach ( $this->pathStack as $pathInfo ) {
+ if ( $key !== '' ) {
+ $key .= '/';
+ }
+ $key .= $pathInfo['name'];
+ }
+ $pathInfo['endByte'] = $this->byteNum;
+ $pathInfo['endToken'] = $this->pos;
+ $this->pathInfo[$key] = $pathInfo;
+ }
+
+ /**
+ * Go up to a new path level, for example at the start of an array.
+ */
+ function pushPath( $path ) {
+ $this->pathStack[] = array(
+ 'name' => $path,
+ 'level' => count( $this->pathStack ) + 1,
+ 'startByte' => $this->byteNum,
+ 'startToken' => $this->pos,
+ 'valueStartToken' => false,
+ 'valueStartByte' => false,
+ 'valueEndToken' => false,
+ 'valueEndByte' => false,
+ 'nextArrayIndex' => 0,
+ 'hasComma' => false,
+ 'arrowByte' => false
+ );
+ }
+
+ /**
+ * Go down a path level, for example at the end of an array.
+ */
+ function popPath() {
+ $this->endPath();
+ array_pop( $this->pathStack );
+ }
+
+ /**
+ * Go to the next path on the same level. This ends the current path and
+ * starts a new one. If $path is \@next, the new path is set to the next
+ * numeric array element.
+ */
+ function nextPath( $path ) {
+ $this->endPath();
+ $i = count( $this->pathStack ) - 1;
+ if ( $path == '@next' ) {
+ $nextArrayIndex =& $this->pathStack[$i]['nextArrayIndex'];
+ $this->pathStack[$i]['name'] = $nextArrayIndex;
+ $nextArrayIndex++;
+ } else {
+ $this->pathStack[$i]['name'] = $path;
+ }
+ $this->pathStack[$i] =
+ array(
+ 'startByte' => $this->byteNum,
+ 'startToken' => $this->pos,
+ 'valueStartToken' => false,
+ 'valueStartByte' => false,
+ 'valueEndToken' => false,
+ 'valueEndByte' => false,
+ 'hasComma' => false,
+ 'arrowByte' => false,
+ ) + $this->pathStack[$i];
+ }
+
+ /**
+ * Mark the start of the value part of a path.
+ */
+ function startPathValue() {
+ $path =& $this->pathStack[count( $this->pathStack ) - 1];
+ $path['valueStartToken'] = $this->pos;
+ $path['valueStartByte'] = $this->byteNum;
+ }
+
+ /**
+ * Mark the end of the value part of a path.
+ */
+ function endPathValue() {
+ $path =& $this->pathStack[count( $this->pathStack ) - 1];
+ $path['valueEndToken'] = $this->pos;
+ $path['valueEndByte'] = $this->byteNum;
+ }
+
+ /**
+ * Mark the comma separator in an array element
+ */
+ function markComma() {
+ $path =& $this->pathStack[count( $this->pathStack ) - 1];
+ $path['hasComma'] = true;
+ }
+
+ /**
+ * Mark the arrow separator in an associative array element
+ */
+ function markArrow() {
+ $path =& $this->pathStack[count( $this->pathStack ) - 1];
+ $path['arrowByte'] = $this->byteNum;
+ }
+
+ /**
+ * Generate a parse error
+ */
+ function error( $msg ) {
+ throw new ConfEditorParseError( $this, $msg );
+ }
+
+ /**
+ * Get a readable name for the given token type.
+ * @return string
+ */
+ function getTypeName( $type ) {
+ if ( is_int( $type ) ) {
+ return token_name( $type );
+ } else {
+ return "\"$type\"";
+ }
+ }
+
+ /**
+ * Looks ahead to see if the given type is the next token type, starting
+ * from the current position plus the given offset. Skips any intervening
+ * whitespace.
+ * @return bool
+ */
+ function isAhead( $type, $offset = 0 ) {
+ $ahead = $offset;
+ $token = $this->getTokenAhead( $offset );
+ while ( !$token->isEnd() ) {
+ if ( $token->isSkip() ) {
+ $ahead++;
+ $token = $this->getTokenAhead( $ahead );
+ continue;
+ } elseif ( $token->type == $type ) {
+ // Found the type
+ return true;
+ } else {
+ // Not found
+ return false;
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Get the previous token object
+ */
+ function prevToken() {
+ return $this->prevToken;
+ }
+
+ /**
+ * Echo a reasonably readable representation of the tokenizer array.
+ */
+ function dumpTokens() {
+ $out = '';
+ foreach ( $this->tokens as $token ) {
+ $obj = $this->newTokenObj( $token );
+ $out .= sprintf( "%-28s %s\n",
+ $this->getTypeName( $obj->type ),
+ addcslashes( $obj->text, "\0..\37" ) );
+ }
+ echo "<pre>" . htmlspecialchars( $out ) . "</pre>";
+ }
+}
+
+/**
+ * Exception class for parse errors
+ */
+class ConfEditorParseError extends MWException {
+ var $lineNum, $colNum;
+ function __construct( $editor, $msg ) {
+ $this->lineNum = $editor->lineNum;
+ $this->colNum = $editor->colNum;
+ parent::__construct( "Parse error on line {$editor->lineNum} " .
+ "col {$editor->colNum}: $msg" );
+ }
+
+ function highlight( $text ) {
+ $lines = StringUtils::explode( "\n", $text );
+ foreach ( $lines as $lineNum => $line ) {
+ if ( $lineNum == $this->lineNum - 1 ) {
+ return "$line\n" . str_repeat( ' ', $this->colNum - 1 ) . "^\n";
+ }
+ }
+ return '';
+ }
+
+}
+
+/**
+ * Class to wrap a token from the tokenizer.
+ */
+class ConfEditorToken {
+ var $type, $text;
+
+ static $scalarTypes = array( T_LNUMBER, T_DNUMBER, T_STRING, T_CONSTANT_ENCAPSED_STRING );
+ static $skipTypes = array( T_WHITESPACE, T_COMMENT, T_DOC_COMMENT );
+
+ static function newEnd() {
+ return new self( 'END', '' );
+ }
+
+ function __construct( $type, $text ) {
+ $this->type = $type;
+ $this->text = $text;
+ }
+
+ function isSkip() {
+ return in_array( $this->type, self::$skipTypes );
+ }
+
+ function isScalar() {
+ return in_array( $this->type, self::$scalarTypes );
+ }
+
+ function isEnd() {
+ return $this->type == 'END';
+ }
+}
--- /dev/null
+<?php
+/**
+ * Convenience class for weighted consistent hash rings.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Aaron Schulz
+ */
+
+/**
+ * Convenience class for weighted consistent hash rings
+ *
+ * @since 1.22
+ */
+class HashRing {
+ /** @var Array (location => weight) */
+ protected $sourceMap = array();
+ /** @var Array (location => (start, end)) */
+ protected $ring = array();
+
+ const RING_SIZE = 268435456; // 2^28
+
+ /**
+ * @param array $map (location => weight)
+ */
+ public function __construct( array $map ) {
+ $map = array_filter( $map, function( $w ) { return $w > 0; } );
+ if ( !count( $map ) ) {
+ throw new MWException( "Ring is empty or all weights are zero." );
+ }
+ $this->sourceMap = $map;
+ // Sort the locations based on the hash of their names
+ $hashes = array();
+ foreach ( $map as $location => $weight ) {
+ $hashes[$location] = sha1( $location );
+ }
+ uksort( $map, function ( $a, $b ) use ( $hashes ) {
+ return strcmp( $hashes[$a], $hashes[$b] );
+ } );
+ // Fit the map to weight-proportionate one with a space of size RING_SIZE
+ $sum = array_sum( $map );
+ $standardMap = array();
+ foreach ( $map as $location => $weight ) {
+ $standardMap[$location] = (int)floor( $weight / $sum * self::RING_SIZE );
+ }
+ // Build a ring of RING_SIZE spots, with each location at a spot in location hash order
+ $index = 0;
+ foreach ( $standardMap as $location => $weight ) {
+ // Location covers half-closed interval [$index,$index + $weight)
+ $this->ring[$location] = array( $index, $index + $weight );
+ $index += $weight;
+ }
+ // Make sure the last location covers what is left
+ end( $this->ring );
+ $this->ring[key( $this->ring )][1] = self::RING_SIZE;
+ }
+
+ /**
+ * Get the location of an item on the ring
+ *
+ * @param string $item
+ * @return string Location
+ */
+ public function getLocation( $item ) {
+ $locations = $this->getLocations( $item, 1 );
+ return $locations[0];
+ }
+
+ /**
+ * Get the location of an item on the ring, as well as the next clockwise locations
+ *
+ * @param string $item
+ * @param integer $limit Maximum number of locations to return
+ * @return array List of locations
+ */
+ public function getLocations( $item, $limit ) {
+ $locations = array();
+ $primaryLocation = null;
+ $spot = hexdec( substr( sha1( $item ), 0, 7 ) ); // first 28 bits
+ foreach ( $this->ring as $location => $range ) {
+ if ( count( $locations ) >= $limit ) {
+ break;
+ }
+ // The $primaryLocation is the location the item spot is in.
+ // After that is reached, keep appending the next locations.
+ if ( ( $range[0] <= $spot && $spot < $range[1] ) || $primaryLocation !== null ) {
+ if ( $primaryLocation === null ) {
+ $primaryLocation = $location;
+ }
+ $locations[] = $location;
+ }
+ }
+ // If more locations are requested, wrap-around and keep adding them
+ reset( $this->ring );
+ while ( count( $locations ) < $limit ) {
+ list( $location, ) = each( $this->ring );
+ if ( $location === $primaryLocation ) {
+ break; // don't go in circles
+ }
+ $locations[] = $location;
+ }
+ return $locations;
+ }
+
+ /**
+ * Get the map of locations to weight (ignores 0-weight items)
+ *
+ * @return array
+ */
+ public function getLocationWeights() {
+ return $this->sourceMap;
+ }
+
+ /**
+ * Get a new hash ring with a location removed from the ring
+ *
+ * @param string $location
+ * @return HashRing|bool Returns false if no non-zero weighted spots are left
+ */
+ public function newWithoutLocation( $location ) {
+ $map = $this->sourceMap;
+ unset( $map[$location] );
+ if ( count( $map ) ) {
+ return new self( $map );
+ }
+ return false;
+ }
+}
--- /dev/null
+<?php
+/**
+ * Functions and constants to play with IP addresses and ranges
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Antoine Musso "<hashar at free dot fr>", Aaron Schulz
+ */
+
+// Some regex definition to "play" with IP address and IP address blocks
+
+// An IPv4 address is made of 4 bytes from x00 to xFF which is d0 to d255
+define( 'RE_IP_BYTE', '(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|0?[0-9]?[0-9])' );
+define( 'RE_IP_ADD', RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE . '\.' . RE_IP_BYTE );
+// An IPv4 block is an IP address and a prefix (d1 to d32)
+define( 'RE_IP_PREFIX', '(3[0-2]|[12]?\d)' );
+define( 'RE_IP_BLOCK', RE_IP_ADD . '\/' . RE_IP_PREFIX );
+
+// An IPv6 address is made up of 8 words (each x0000 to xFFFF).
+// However, the "::" abbreviation can be used on consecutive x0000 words.
+define( 'RE_IPV6_WORD', '([0-9A-Fa-f]{1,4})' );
+define( 'RE_IPV6_PREFIX', '(12[0-8]|1[01][0-9]|[1-9]?\d)' );
+define( 'RE_IPV6_ADD',
+ '(?:' . // starts with "::" (including "::")
+ ':(?::|(?::' . RE_IPV6_WORD . '){1,7})' .
+ '|' . // ends with "::" (except "::")
+ RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){0,6}::' .
+ '|' . // contains one "::" in the middle (the ^ makes the test fail if none found)
+ RE_IPV6_WORD . '(?::((?(-1)|:))?' . RE_IPV6_WORD . '){1,6}(?(-2)|^)' .
+ '|' . // contains no "::"
+ RE_IPV6_WORD . '(?::' . RE_IPV6_WORD . '){7}' .
+ ')'
+);
+// An IPv6 block is an IP address and a prefix (d1 to d128)
+define( 'RE_IPV6_BLOCK', RE_IPV6_ADD . '\/' . RE_IPV6_PREFIX );
+// For IPv6 canonicalization (NOT for strict validation; these are quite lax!)
+define( 'RE_IPV6_GAP', ':(?:0+:)*(?::(?:0+:)*)?' );
+define( 'RE_IPV6_V4_PREFIX', '0*' . RE_IPV6_GAP . '(?:ffff:)?' );
+
+// This might be useful for regexps used elsewhere, matches any IPv6 or IPv6 address or network
+define( 'IP_ADDRESS_STRING',
+ '(?:' .
+ RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?' . // IPv4
+ '|' .
+ RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?' . // IPv6
+ ')'
+);
+
+/**
+ * A collection of public static functions to play with IP address
+ * and IP blocks.
+ */
+class IP {
+ /**
+ * Determine if a string is as valid IP address or network (CIDR prefix).
+ * SIIT IPv4-translated addresses are rejected.
+ * Note: canonicalize() tries to convert translated addresses to IPv4.
+ *
+ * @param string $ip possible IP address
+ * @return Boolean
+ */
+ public static function isIPAddress( $ip ) {
+ return (bool)preg_match( '/^' . IP_ADDRESS_STRING . '$/', $ip );
+ }
+
+ /**
+ * Given a string, determine if it as valid IP in IPv6 only.
+ * Note: Unlike isValid(), this looks for networks too.
+ *
+ * @param string $ip possible IP address
+ * @return Boolean
+ */
+ public static function isIPv6( $ip ) {
+ return (bool)preg_match( '/^' . RE_IPV6_ADD . '(?:\/' . RE_IPV6_PREFIX . ')?$/', $ip );
+ }
+
+ /**
+ * Given a string, determine if it as valid IP in IPv4 only.
+ * Note: Unlike isValid(), this looks for networks too.
+ *
+ * @param string $ip possible IP address
+ * @return Boolean
+ */
+ public static function isIPv4( $ip ) {
+ return (bool)preg_match( '/^' . RE_IP_ADD . '(?:\/' . RE_IP_PREFIX . ')?$/', $ip );
+ }
+
+ /**
+ * Validate an IP address. Ranges are NOT considered valid.
+ * SIIT IPv4-translated addresses are rejected.
+ * Note: canonicalize() tries to convert translated addresses to IPv4.
+ *
+ * @param $ip String
+ * @return Boolean: True if it is valid.
+ */
+ public static function isValid( $ip ) {
+ return ( preg_match( '/^' . RE_IP_ADD . '$/', $ip )
+ || preg_match( '/^' . RE_IPV6_ADD . '$/', $ip ) );
+ }
+
+ /**
+ * Validate an IP Block (valid address WITH a valid prefix).
+ * SIIT IPv4-translated addresses are rejected.
+ * Note: canonicalize() tries to convert translated addresses to IPv4.
+ *
+ * @param $ipblock String
+ * @return Boolean: True if it is valid.
+ */
+ public static function isValidBlock( $ipblock ) {
+ return ( preg_match( '/^' . RE_IPV6_BLOCK . '$/', $ipblock )
+ || preg_match( '/^' . RE_IP_BLOCK . '$/', $ipblock ) );
+ }
+
+ /**
+ * Convert an IP into a verbose, uppercase, normalized form.
+ * IPv6 addresses in octet notation are expanded to 8 words.
+ * IPv4 addresses are just trimmed.
+ *
+ * @param string $ip IP address in quad or octet form (CIDR or not).
+ * @return String
+ */
+ public static function sanitizeIP( $ip ) {
+ $ip = trim( $ip );
+ if ( $ip === '' ) {
+ return null;
+ }
+ if ( self::isIPv4( $ip ) || !self::isIPv6( $ip ) ) {
+ return $ip; // nothing else to do for IPv4 addresses or invalid ones
+ }
+ // Remove any whitespaces, convert to upper case
+ $ip = strtoupper( $ip );
+ // Expand zero abbreviations
+ $abbrevPos = strpos( $ip, '::' );
+ if ( $abbrevPos !== false ) {
+ // We know this is valid IPv6. Find the last index of the
+ // address before any CIDR number (e.g. "a:b:c::/24").
+ $CIDRStart = strpos( $ip, "/" );
+ $addressEnd = ( $CIDRStart !== false )
+ ? $CIDRStart - 1
+ : strlen( $ip ) - 1;
+ // If the '::' is at the beginning...
+ if ( $abbrevPos == 0 ) {
+ $repeat = '0:';
+ $extra = ( $ip == '::' ) ? '0' : ''; // for the address '::'
+ $pad = 9; // 7+2 (due to '::')
+ // If the '::' is at the end...
+ } elseif ( $abbrevPos == ( $addressEnd - 1 ) ) {
+ $repeat = ':0';
+ $extra = '';
+ $pad = 9; // 7+2 (due to '::')
+ // If the '::' is in the middle...
+ } else {
+ $repeat = ':0';
+ $extra = ':';
+ $pad = 8; // 6+2 (due to '::')
+ }
+ $ip = str_replace( '::',
+ str_repeat( $repeat, $pad - substr_count( $ip, ':' ) ) . $extra,
+ $ip
+ );
+ }
+ // Remove leading zeros from each bloc as needed
+ $ip = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip );
+ return $ip;
+ }
+
+ /**
+ * Prettify an IP for display to end users.
+ * This will make it more compact and lower-case.
+ *
+ * @param $ip string
+ * @return string
+ */
+ public static function prettifyIP( $ip ) {
+ $ip = self::sanitizeIP( $ip ); // normalize (removes '::')
+ if ( self::isIPv6( $ip ) ) {
+ // Split IP into an address and a CIDR
+ if ( strpos( $ip, '/' ) !== false ) {
+ list( $ip, $cidr ) = explode( '/', $ip, 2 );
+ } else {
+ list( $ip, $cidr ) = array( $ip, '' );
+ }
+ // Get the largest slice of words with multiple zeros
+ $offset = 0;
+ $longest = $longestPos = false;
+ while ( preg_match(
+ '!(?:^|:)0(?::0)+(?:$|:)!', $ip, $m, PREG_OFFSET_CAPTURE, $offset
+ ) ) {
+ list( $match, $pos ) = $m[0]; // full match
+ if ( strlen( $match ) > strlen( $longest ) ) {
+ $longest = $match;
+ $longestPos = $pos;
+ }
+ $offset = ( $pos + strlen( $match ) ); // advance
+ }
+ if ( $longest !== false ) {
+ // Replace this portion of the string with the '::' abbreviation
+ $ip = substr_replace( $ip, '::', $longestPos, strlen( $longest ) );
+ }
+ // Add any CIDR back on
+ if ( $cidr !== '' ) {
+ $ip = "{$ip}/{$cidr}";
+ }
+ // Convert to lower case to make it more readable
+ $ip = strtolower( $ip );
+ }
+ return $ip;
+ }
+
+ /**
+ * Given a host/port string, like one might find in the host part of a URL
+ * per RFC 2732, split the hostname part and the port part and return an
+ * array with an element for each. If there is no port part, the array will
+ * have false in place of the port. If the string was invalid in some way,
+ * false is returned.
+ *
+ * This was easy with IPv4 and was generally done in an ad-hoc way, but
+ * with IPv6 it's somewhat more complicated due to the need to parse the
+ * square brackets and colons.
+ *
+ * A bare IPv6 address is accepted despite the lack of square brackets.
+ *
+ * @param string $both The string with the host and port
+ * @return array
+ */
+ public static function splitHostAndPort( $both ) {
+ if ( substr( $both, 0, 1 ) === '[' ) {
+ if ( preg_match( '/^\[(' . RE_IPV6_ADD . ')\](?::(?P<port>\d+))?$/', $both, $m ) ) {
+ if ( isset( $m['port'] ) ) {
+ return array( $m[1], intval( $m['port'] ) );
+ } else {
+ return array( $m[1], false );
+ }
+ } else {
+ // Square bracket found but no IPv6
+ return false;
+ }
+ }
+ $numColons = substr_count( $both, ':' );
+ if ( $numColons >= 2 ) {
+ // Is it a bare IPv6 address?
+ if ( preg_match( '/^' . RE_IPV6_ADD . '$/', $both ) ) {
+ return array( $both, false );
+ } else {
+ // Not valid IPv6, but too many colons for anything else
+ return false;
+ }
+ }
+ if ( $numColons >= 1 ) {
+ // Host:port?
+ $bits = explode( ':', $both );
+ if ( preg_match( '/^\d+/', $bits[1] ) ) {
+ return array( $bits[0], intval( $bits[1] ) );
+ } else {
+ // Not a valid port
+ return false;
+ }
+ }
+ // Plain hostname
+ return array( $both, false );
+ }
+
+ /**
+ * Given a host name and a port, combine them into host/port string like
+ * you might find in a URL. If the host contains a colon, wrap it in square
+ * brackets like in RFC 2732. If the port matches the default port, omit
+ * the port specification
+ *
+ * @param $host string
+ * @param $port int
+ * @param $defaultPort bool|int
+ * @return string
+ */
+ public static function combineHostAndPort( $host, $port, $defaultPort = false ) {
+ if ( strpos( $host, ':' ) !== false ) {
+ $host = "[$host]";
+ }
+ if ( $defaultPort !== false && $port == $defaultPort ) {
+ return $host;
+ } else {
+ return "$host:$port";
+ }
+ }
+
+ /**
+ * Given an unsigned integer, returns an IPv6 address in octet notation
+ *
+ * @param $ip_int String: IP address.
+ * @return String
+ */
+ public static function toOctet( $ip_int ) {
+ return self::hexToOctet( wfBaseConvert( $ip_int, 10, 16, 32, false ) );
+ }
+
+ /**
+ * Convert an IPv4 or IPv6 hexadecimal representation back to readable format
+ *
+ * @param string $hex number, with "v6-" prefix if it is IPv6
+ * @return String: quad-dotted (IPv4) or octet notation (IPv6)
+ */
+ public static function formatHex( $hex ) {
+ if ( substr( $hex, 0, 3 ) == 'v6-' ) { // IPv6
+ return self::hexToOctet( substr( $hex, 3 ) );
+ } else { // IPv4
+ return self::hexToQuad( $hex );
+ }
+ }
+
+ /**
+ * Converts a hexadecimal number to an IPv6 address in octet notation
+ *
+ * @param $ip_hex String: pure hex (no v6- prefix)
+ * @return String (of format a:b:c:d:e:f:g:h)
+ */
+ public static function hexToOctet( $ip_hex ) {
+ // Pad hex to 32 chars (128 bits)
+ $ip_hex = str_pad( strtoupper( $ip_hex ), 32, '0', STR_PAD_LEFT );
+ // Separate into 8 words
+ $ip_oct = substr( $ip_hex, 0, 4 );
+ for ( $n = 1; $n < 8; $n++ ) {
+ $ip_oct .= ':' . substr( $ip_hex, 4 * $n, 4 );
+ }
+ // NO leading zeroes
+ $ip_oct = preg_replace( '/(^|:)0+(' . RE_IPV6_WORD . ')/', '$1$2', $ip_oct );
+ return $ip_oct;
+ }
+
+ /**
+ * Converts a hexadecimal number to an IPv4 address in quad-dotted notation
+ *
+ * @param $ip_hex String: pure hex
+ * @return String (of format a.b.c.d)
+ */
+ public static function hexToQuad( $ip_hex ) {
+ // Pad hex to 8 chars (32 bits)
+ $ip_hex = str_pad( strtoupper( $ip_hex ), 8, '0', STR_PAD_LEFT );
+ // Separate into four quads
+ $s = '';
+ for ( $i = 0; $i < 4; $i++ ) {
+ if ( $s !== '' ) {
+ $s .= '.';
+ }
+ $s .= base_convert( substr( $ip_hex, $i * 2, 2 ), 16, 10 );
+ }
+ return $s;
+ }
+
+ /**
+ * Determine if an IP address really is an IP address, and if it is public,
+ * i.e. not RFC 1918 or similar
+ * Comes from ProxyTools.php
+ *
+ * @param $ip String
+ * @return Boolean
+ */
+ public static function isPublic( $ip ) {
+ if ( self::isIPv6( $ip ) ) {
+ return self::isPublic6( $ip );
+ }
+ $n = self::toUnsigned( $ip );
+ if ( !$n ) {
+ return false;
+ }
+
+ // ip2long accepts incomplete addresses, as well as some addresses
+ // followed by garbage characters. Check that it's really valid.
+ if ( $ip != long2ip( $n ) ) {
+ return false;
+ }
+
+ static $privateRanges = false;
+ if ( !$privateRanges ) {
+ $privateRanges = array(
+ array( '10.0.0.0', '10.255.255.255' ), # RFC 1918 (private)
+ array( '172.16.0.0', '172.31.255.255' ), # RFC 1918 (private)
+ array( '192.168.0.0', '192.168.255.255' ), # RFC 1918 (private)
+ array( '0.0.0.0', '0.255.255.255' ), # this network
+ array( '127.0.0.0', '127.255.255.255' ), # loopback
+ );
+ }
+
+ foreach ( $privateRanges as $r ) {
+ $start = self::toUnsigned( $r[0] );
+ $end = self::toUnsigned( $r[1] );
+ if ( $n >= $start && $n <= $end ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Determine if an IPv6 address really is an IP address, and if it is public,
+ * i.e. not RFC 4193 or similar
+ *
+ * @param $ip String
+ * @return Boolean
+ */
+ private static function isPublic6( $ip ) {
+ static $privateRanges = false;
+ if ( !$privateRanges ) {
+ $privateRanges = array(
+ array( 'fc00::', 'fdff:ffff:ffff:ffff:ffff:ffff:ffff:ffff' ), # RFC 4193 (local)
+ array( '0:0:0:0:0:0:0:1', '0:0:0:0:0:0:0:1' ), # loopback
+ );
+ }
+ $n = self::toHex( $ip );
+ foreach ( $privateRanges as $r ) {
+ $start = self::toHex( $r[0] );
+ $end = self::toHex( $r[1] );
+ if ( $n >= $start && $n <= $end ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Return a zero-padded upper case hexadecimal representation of an IP address.
+ *
+ * Hexadecimal addresses are used because they can easily be extended to
+ * IPv6 support. To separate the ranges, the return value from this
+ * function for an IPv6 address will be prefixed with "v6-", a non-
+ * hexadecimal string which sorts after the IPv4 addresses.
+ *
+ * @param string $ip quad dotted/octet IP address.
+ * @return String
+ */
+ public static function toHex( $ip ) {
+ if ( self::isIPv6( $ip ) ) {
+ $n = 'v6-' . self::IPv6ToRawHex( $ip );
+ } else {
+ $n = self::toUnsigned( $ip );
+ if ( $n !== false ) {
+ $n = wfBaseConvert( $n, 10, 16, 8, false );
+ }
+ }
+ return $n;
+ }
+
+ /**
+ * Given an IPv6 address in octet notation, returns a pure hex string.
+ *
+ * @param string $ip octet ipv6 IP address.
+ * @return String: pure hex (uppercase)
+ */
+ private static function IPv6ToRawHex( $ip ) {
+ $ip = self::sanitizeIP( $ip );
+ if ( !$ip ) {
+ return null;
+ }
+ $r_ip = '';
+ foreach ( explode( ':', $ip ) as $v ) {
+ $r_ip .= str_pad( $v, 4, 0, STR_PAD_LEFT );
+ }
+ return $r_ip;
+ }
+
+ /**
+ * Given an IP address in dotted-quad/octet notation, returns an unsigned integer.
+ * Like ip2long() except that it actually works and has a consistent error return value.
+ * Comes from ProxyTools.php
+ *
+ * @param string $ip quad dotted IP address.
+ * @return Mixed: string/int/false
+ */
+ public static function toUnsigned( $ip ) {
+ if ( self::isIPv6( $ip ) ) {
+ $n = self::toUnsigned6( $ip );
+ } else {
+ $n = ip2long( $ip );
+ if ( $n < 0 ) {
+ $n += pow( 2, 32 );
+ # On 32-bit platforms (and on Windows), 2^32 does not fit into an int,
+ # so $n becomes a float. We convert it to string instead.
+ if ( is_float( $n ) ) {
+ $n = (string)$n;
+ }
+ }
+ }
+ return $n;
+ }
+
+ /**
+ * @param $ip
+ * @return String
+ */
+ private static function toUnsigned6( $ip ) {
+ return wfBaseConvert( self::IPv6ToRawHex( $ip ), 16, 10 );
+ }
+
+ /**
+ * Convert a network specification in CIDR notation
+ * to an integer network and a number of bits
+ *
+ * @param string $range IP with CIDR prefix
+ * @return array(int or string, int)
+ */
+ public static function parseCIDR( $range ) {
+ if ( self::isIPv6( $range ) ) {
+ return self::parseCIDR6( $range );
+ }
+ $parts = explode( '/', $range, 2 );
+ if ( count( $parts ) != 2 ) {
+ return array( false, false );
+ }
+ list( $network, $bits ) = $parts;
+ $network = ip2long( $network );
+ if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 32 ) {
+ if ( $bits == 0 ) {
+ $network = 0;
+ } else {
+ $network &= ~( ( 1 << ( 32 - $bits ) ) - 1 );
+ }
+ # Convert to unsigned
+ if ( $network < 0 ) {
+ $network += pow( 2, 32 );
+ }
+ } else {
+ $network = false;
+ $bits = false;
+ }
+ return array( $network, $bits );
+ }
+
+ /**
+ * Given a string range in a number of formats,
+ * return the start and end of the range in hexadecimal.
+ *
+ * Formats are:
+ * 1.2.3.4/24 CIDR
+ * 1.2.3.4 - 1.2.3.5 Explicit range
+ * 1.2.3.4 Single IP
+ *
+ * 2001:0db8:85a3::7344/96 CIDR
+ * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range
+ * 2001:0db8:85a3::7344 Single IP
+ * @param string $range IP range
+ * @return array(string, string)
+ */
+ public static function parseRange( $range ) {
+ // CIDR notation
+ if ( strpos( $range, '/' ) !== false ) {
+ if ( self::isIPv6( $range ) ) {
+ return self::parseRange6( $range );
+ }
+ list( $network, $bits ) = self::parseCIDR( $range );
+ if ( $network === false ) {
+ $start = $end = false;
+ } else {
+ $start = sprintf( '%08X', $network );
+ $end = sprintf( '%08X', $network + pow( 2, ( 32 - $bits ) ) - 1 );
+ }
+ // Explicit range
+ } elseif ( strpos( $range, '-' ) !== false ) {
+ list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) );
+ if ( self::isIPv6( $start ) && self::isIPv6( $end ) ) {
+ return self::parseRange6( $range );
+ }
+ if ( self::isIPv4( $start ) && self::isIPv4( $end ) ) {
+ $start = self::toUnsigned( $start );
+ $end = self::toUnsigned( $end );
+ if ( $start > $end ) {
+ $start = $end = false;
+ } else {
+ $start = sprintf( '%08X', $start );
+ $end = sprintf( '%08X', $end );
+ }
+ } else {
+ $start = $end = false;
+ }
+ } else {
+ # Single IP
+ $start = $end = self::toHex( $range );
+ }
+ if ( $start === false || $end === false ) {
+ return array( false, false );
+ } else {
+ return array( $start, $end );
+ }
+ }
+
+ /**
+ * Convert a network specification in IPv6 CIDR notation to an
+ * integer network and a number of bits
+ *
+ * @param $range
+ *
+ * @return array(string, int)
+ */
+ private static function parseCIDR6( $range ) {
+ # Explode into <expanded IP,range>
+ $parts = explode( '/', IP::sanitizeIP( $range ), 2 );
+ if ( count( $parts ) != 2 ) {
+ return array( false, false );
+ }
+ list( $network, $bits ) = $parts;
+ $network = self::IPv6ToRawHex( $network );
+ if ( $network !== false && is_numeric( $bits ) && $bits >= 0 && $bits <= 128 ) {
+ if ( $bits == 0 ) {
+ $network = "0";
+ } else {
+ # Native 32 bit functions WONT work here!!!
+ # Convert to a padded binary number
+ $network = wfBaseConvert( $network, 16, 2, 128 );
+ # Truncate the last (128-$bits) bits and replace them with zeros
+ $network = str_pad( substr( $network, 0, $bits ), 128, 0, STR_PAD_RIGHT );
+ # Convert back to an integer
+ $network = wfBaseConvert( $network, 2, 10 );
+ }
+ } else {
+ $network = false;
+ $bits = false;
+ }
+ return array( $network, (int)$bits );
+ }
+
+ /**
+ * Given a string range in a number of formats, return the
+ * start and end of the range in hexadecimal. For IPv6.
+ *
+ * Formats are:
+ * 2001:0db8:85a3::7344/96 CIDR
+ * 2001:0db8:85a3::7344 - 2001:0db8:85a3::7344 Explicit range
+ * 2001:0db8:85a3::7344/96 Single IP
+ *
+ * @param $range
+ *
+ * @return array(string, string)
+ */
+ private static function parseRange6( $range ) {
+ # Expand any IPv6 IP
+ $range = IP::sanitizeIP( $range );
+ // CIDR notation...
+ if ( strpos( $range, '/' ) !== false ) {
+ list( $network, $bits ) = self::parseCIDR6( $range );
+ if ( $network === false ) {
+ $start = $end = false;
+ } else {
+ $start = wfBaseConvert( $network, 10, 16, 32, false );
+ # Turn network to binary (again)
+ $end = wfBaseConvert( $network, 10, 2, 128 );
+ # Truncate the last (128-$bits) bits and replace them with ones
+ $end = str_pad( substr( $end, 0, $bits ), 128, 1, STR_PAD_RIGHT );
+ # Convert to hex
+ $end = wfBaseConvert( $end, 2, 16, 32, false );
+ # see toHex() comment
+ $start = "v6-$start";
+ $end = "v6-$end";
+ }
+ // Explicit range notation...
+ } elseif ( strpos( $range, '-' ) !== false ) {
+ list( $start, $end ) = array_map( 'trim', explode( '-', $range, 2 ) );
+ $start = self::toUnsigned6( $start );
+ $end = self::toUnsigned6( $end );
+ if ( $start > $end ) {
+ $start = $end = false;
+ } else {
+ $start = wfBaseConvert( $start, 10, 16, 32, false );
+ $end = wfBaseConvert( $end, 10, 16, 32, false );
+ }
+ # see toHex() comment
+ $start = "v6-$start";
+ $end = "v6-$end";
+ } else {
+ # Single IP
+ $start = $end = self::toHex( $range );
+ }
+ if ( $start === false || $end === false ) {
+ return array( false, false );
+ } else {
+ return array( $start, $end );
+ }
+ }
+
+ /**
+ * Determine if a given IPv4/IPv6 address is in a given CIDR network
+ *
+ * @param string $addr the address to check against the given range.
+ * @param string $range the range to check the given address against.
+ * @return Boolean: whether or not the given address is in the given range.
+ */
+ public static function isInRange( $addr, $range ) {
+ $hexIP = self::toHex( $addr );
+ list( $start, $end ) = self::parseRange( $range );
+ return ( strcmp( $hexIP, $start ) >= 0 &&
+ strcmp( $hexIP, $end ) <= 0 );
+ }
+
+ /**
+ * Convert some unusual representations of IPv4 addresses to their
+ * canonical dotted quad representation.
+ *
+ * This currently only checks a few IPV4-to-IPv6 related cases. More
+ * unusual representations may be added later.
+ *
+ * @param string $addr something that might be an IP address
+ * @return String: valid dotted quad IPv4 address or null
+ */
+ public static function canonicalize( $addr ) {
+ // remove zone info (bug 35738)
+ $addr = preg_replace( '/\%.*/', '', $addr );
+
+ if ( self::isValid( $addr ) ) {
+ return $addr;
+ }
+ // Turn mapped addresses from ::ce:ffff:1.2.3.4 to 1.2.3.4
+ if ( strpos( $addr, ':' ) !== false && strpos( $addr, '.' ) !== false ) {
+ $addr = substr( $addr, strrpos( $addr, ':' ) + 1 );
+ if ( self::isIPv4( $addr ) ) {
+ return $addr;
+ }
+ }
+ // IPv6 loopback address
+ $m = array();
+ if ( preg_match( '/^0*' . RE_IPV6_GAP . '1$/', $addr, $m ) ) {
+ return '127.0.0.1';
+ }
+ // IPv4-mapped and IPv4-compatible IPv6 addresses
+ if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . '(' . RE_IP_ADD . ')$/i', $addr, $m ) ) {
+ return $m[1];
+ }
+ if ( preg_match( '/^' . RE_IPV6_V4_PREFIX . RE_IPV6_WORD .
+ ':' . RE_IPV6_WORD . '$/i', $addr, $m ) )
+ {
+ return long2ip( ( hexdec( $m[1] ) << 16 ) + hexdec( $m[2] ) );
+ }
+
+ return null; // give up
+ }
+
+ /**
+ * Gets rid of unneeded numbers in quad-dotted/octet IP strings
+ * For example, 127.111.113.151/24 -> 127.111.113.0/24
+ * @param string $range IP address to normalize
+ * @return string
+ */
+ public static function sanitizeRange( $range ) {
+ list( /*...*/, $bits ) = self::parseCIDR( $range );
+ list( $start, /*...*/ ) = self::parseRange( $range );
+ $start = self::formatHex( $start );
+ if ( $bits === false ) {
+ return $start; // wasn't actually a range
+ }
+ return "$start/$bits";
+ }
+}
--- /dev/null
+<?php
+/**
+ * A cryptographic random generator class used for generating secret keys
+ *
+ * This is based in part on Drupal code as well as what we used in our own code
+ * prior to introduction of this class.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @author Daniel Friesen
+ * @file
+ */
+
+class MWCryptRand {
+
+ /**
+ * Minimum number of iterations we want to make in our drift calculations.
+ */
+ const MIN_ITERATIONS = 1000;
+
+ /**
+ * Number of milliseconds we want to spend generating each separate byte
+ * of the final generated bytes.
+ * This is used in combination with the hash length to determine the duration
+ * we should spend doing drift calculations.
+ */
+ const MSEC_PER_BYTE = 0.5;
+
+ /**
+ * Singleton instance for public use
+ */
+ protected static $singleton = null;
+
+ /**
+ * The hash algorithm being used
+ */
+ protected $algo = null;
+
+ /**
+ * The number of bytes outputted by the hash algorithm
+ */
+ protected $hashLength = null;
+
+ /**
+ * A boolean indicating whether the previous random generation was done using
+ * cryptographically strong random number generator or not.
+ */
+ protected $strong = null;
+
+ /**
+ * Initialize an initial random state based off of whatever we can find
+ */
+ protected function initialRandomState() {
+ // $_SERVER contains a variety of unstable user and system specific information
+ // It'll vary a little with each page, and vary even more with separate users
+ // It'll also vary slightly across different machines
+ $state = serialize( $_SERVER );
+
+ // To try vary the system information of the state a bit more
+ // by including the system's hostname into the state
+ $state .= wfHostname();
+
+ // Try to gather a little entropy from the different php rand sources
+ $state .= rand() . uniqid( mt_rand(), true );
+
+ // Include some information about the filesystem's current state in the random state
+ $files = array();
+
+ // We know this file is here so grab some info about ourselves
+ $files[] = __FILE__;
+
+ // We must also have a parent folder, and with the usual file structure, a grandparent
+ $files[] = __DIR__;
+ $files[] = dirname( __DIR__ );
+
+ // The config file is likely the most often edited file we know should be around
+ // so include its stat info into the state.
+ // The constant with its location will almost always be defined, as WebStart.php defines
+ // MW_CONFIG_FILE to $IP/LocalSettings.php unless being configured with MW_CONFIG_CALLBACK (eg. the installer)
+ if ( defined( 'MW_CONFIG_FILE' ) ) {
+ $files[] = MW_CONFIG_FILE;
+ }
+
+ foreach ( $files as $file ) {
+ wfSuppressWarnings();
+ $stat = stat( $file );
+ wfRestoreWarnings();
+ if ( $stat ) {
+ // stat() duplicates data into numeric and string keys so kill off all the numeric ones
+ foreach ( $stat as $k => $v ) {
+ if ( is_numeric( $k ) ) {
+ unset( $k );
+ }
+ }
+ // The absolute filename itself will differ from install to install so don't leave it out
+ if ( ( $path = realpath( $file ) ) !== false ) {
+ $state .= $path;
+ } else {
+ $state .= $file;
+ }
+ $state .= implode( '', $stat );
+ } else {
+ // The fact that the file isn't there is worth at least a
+ // minuscule amount of entropy.
+ $state .= '0';
+ }
+ }
+
+ // Try and make this a little more unstable by including the varying process
+ // id of the php process we are running inside of if we are able to access it
+ if ( function_exists( 'getmypid' ) ) {
+ $state .= getmypid();
+ }
+
+ // If available try to increase the instability of the data by throwing in
+ // the precise amount of memory that we happen to be using at the moment.
+ if ( function_exists( 'memory_get_usage' ) ) {
+ $state .= memory_get_usage( true );
+ }
+
+ // It's mostly worthless but throw the wiki's id into the data for a little more variance
+ $state .= wfWikiID();
+
+ // If we have a secret key or proxy key set then throw it into the state as well
+ global $wgSecretKey, $wgProxyKey;
+ if ( $wgSecretKey ) {
+ $state .= $wgSecretKey;
+ } elseif ( $wgProxyKey ) {
+ $state .= $wgProxyKey;
+ }
+
+ return $state;
+ }
+
+ /**
+ * Randomly hash data while mixing in clock drift data for randomness
+ *
+ * @param string $data The data to randomly hash.
+ * @return String The hashed bytes
+ * @author Tim Starling
+ */
+ protected function driftHash( $data ) {
+ // Minimum number of iterations (to avoid slow operations causing the loop to gather little entropy)
+ $minIterations = self::MIN_ITERATIONS;
+ // Duration of time to spend doing calculations (in seconds)
+ $duration = ( self::MSEC_PER_BYTE / 1000 ) * $this->hashLength();
+ // Create a buffer to use to trigger memory operations
+ $bufLength = 10000000;
+ $buffer = str_repeat( ' ', $bufLength );
+ $bufPos = 0;
+
+ // Iterate for $duration seconds or at least $minIterations number of iterations
+ $iterations = 0;
+ $startTime = microtime( true );
+ $currentTime = $startTime;
+ while ( $iterations < $minIterations || $currentTime - $startTime < $duration ) {
+ // Trigger some memory writing to trigger some bus activity
+ // This may create variance in the time between iterations
+ $bufPos = ( $bufPos + 13 ) % $bufLength;
+ $buffer[$bufPos] = ' ';
+ // Add the drift between this iteration and the last in as entropy
+ $nextTime = microtime( true );
+ $delta = (int)( ( $nextTime - $currentTime ) * 1000000 );
+ $data .= $delta;
+ // Every 100 iterations hash the data and entropy
+ if ( $iterations % 100 === 0 ) {
+ $data = sha1( $data );
+ }
+ $currentTime = $nextTime;
+ $iterations++;
+ }
+ $timeTaken = $currentTime - $startTime;
+ $data = $this->hash( $data );
+
+ wfDebug( __METHOD__ . ": Clock drift calculation " .
+ "(time-taken=" . ( $timeTaken * 1000 ) . "ms, " .
+ "iterations=$iterations, " .
+ "time-per-iteration=" . ( $timeTaken / $iterations * 1e6 ) . "us)\n" );
+ return $data;
+ }
+
+ /**
+ * Return a rolling random state initially build using data from unstable sources
+ * @return string A new weak random state
+ */
+ protected function randomState() {
+ static $state = null;
+ if ( is_null( $state ) ) {
+ // Initialize the state with whatever unstable data we can find
+ // It's important that this data is hashed right afterwards to prevent
+ // it from being leaked into the output stream
+ $state = $this->hash( $this->initialRandomState() );
+ }
+ // Generate a new random state based on the initial random state or previous
+ // random state by combining it with clock drift
+ $state = $this->driftHash( $state );
+ return $state;
+ }
+
+ /**
+ * Decide on the best acceptable hash algorithm we have available for hash()
+ * @throws MWException
+ * @return String A hash algorithm
+ */
+ protected function hashAlgo() {
+ if ( !is_null( $this->algo ) ) {
+ return $this->algo;
+ }
+
+ $algos = hash_algos();
+ $preference = array( 'whirlpool', 'sha256', 'sha1', 'md5' );
+
+ foreach ( $preference as $algorithm ) {
+ if ( in_array( $algorithm, $algos ) ) {
+ $this->algo = $algorithm;
+ wfDebug( __METHOD__ . ": Using the {$this->algo} hash algorithm.\n" );
+ return $this->algo;
+ }
+ }
+
+ // We only reach here if no acceptable hash is found in the list, this should
+ // be a technical impossibility since most of php's hash list is fixed and
+ // some of the ones we list are available as their own native functions
+ // But since we already require at least 5.2 and hash() was default in
+ // 5.1.2 we don't bother falling back to methods like sha1 and md5.
+ throw new MWException( "Could not find an acceptable hashing function in hash_algos()" );
+ }
+
+ /**
+ * Return the byte-length output of the hash algorithm we are
+ * using in self::hash and self::hmac.
+ *
+ * @return int Number of bytes the hash outputs
+ */
+ protected function hashLength() {
+ if ( is_null( $this->hashLength ) ) {
+ $this->hashLength = strlen( $this->hash( '' ) );
+ }
+ return $this->hashLength;
+ }
+
+ /**
+ * Generate an acceptably unstable one-way-hash of some text
+ * making use of the best hash algorithm that we have available.
+ *
+ * @param $data string
+ * @return String A raw hash of the data
+ */
+ protected function hash( $data ) {
+ return hash( $this->hashAlgo(), $data, true );
+ }
+
+ /**
+ * Generate an acceptably unstable one-way-hmac of some text
+ * making use of the best hash algorithm that we have available.
+ *
+ * @param $data string
+ * @param $key string
+ * @return String A raw hash of the data
+ */
+ protected function hmac( $data, $key ) {
+ return hash_hmac( $this->hashAlgo(), $data, $key, true );
+ }
+
+ /**
+ * @see self::wasStrong()
+ */
+ public function realWasStrong() {
+ if ( is_null( $this->strong ) ) {
+ throw new MWException( __METHOD__ . ' called before generation of random data' );
+ }
+ return $this->strong;
+ }
+
+ /**
+ * @see self::generate()
+ */
+ public function realGenerate( $bytes, $forceStrong = false ) {
+ wfProfileIn( __METHOD__ );
+
+ wfDebug( __METHOD__ . ": Generating cryptographic random bytes for " . wfGetAllCallers( 5 ) . "\n" );
+
+ $bytes = floor( $bytes );
+ static $buffer = '';
+ if ( is_null( $this->strong ) ) {
+ // Set strength to false initially until we know what source data is coming from
+ $this->strong = true;
+ }
+
+ if ( strlen( $buffer ) < $bytes ) {
+ // If available make use of mcrypt_create_iv URANDOM source to generate randomness
+ // On unix-like systems this reads from /dev/urandom but does it without any buffering
+ // and bypasses openbasedir restrictions, so it's preferable to reading directly
+ // On Windows starting in PHP 5.3.0 Windows' native CryptGenRandom is used to generate
+ // entropy so this is also preferable to just trying to read urandom because it may work
+ // on Windows systems as well.
+ if ( function_exists( 'mcrypt_create_iv' ) ) {
+ wfProfileIn( __METHOD__ . '-mcrypt' );
+ $rem = $bytes - strlen( $buffer );
+ $iv = mcrypt_create_iv( $rem, MCRYPT_DEV_URANDOM );
+ if ( $iv === false ) {
+ wfDebug( __METHOD__ . ": mcrypt_create_iv returned false.\n" );
+ } else {
+ $buffer .= $iv;
+ wfDebug( __METHOD__ . ": mcrypt_create_iv generated " . strlen( $iv ) . " bytes of randomness.\n" );
+ }
+ wfProfileOut( __METHOD__ . '-mcrypt' );
+ }
+ }
+
+ if ( strlen( $buffer ) < $bytes ) {
+ // If available make use of openssl's random_pseudo_bytes method to attempt to generate randomness.
+ // However don't do this on Windows with PHP < 5.3.4 due to a bug:
+ // http://stackoverflow.com/questions/1940168/openssl-random-pseudo-bytes-is-slow-php
+ // http://git.php.net/?p=php-src.git;a=commitdiff;h=cd62a70863c261b07f6dadedad9464f7e213cad5
+ if ( function_exists( 'openssl_random_pseudo_bytes' )
+ && ( !wfIsWindows() || version_compare( PHP_VERSION, '5.3.4', '>=' ) )
+ ) {
+ wfProfileIn( __METHOD__ . '-openssl' );
+ $rem = $bytes - strlen( $buffer );
+ $openssl_bytes = openssl_random_pseudo_bytes( $rem, $openssl_strong );
+ if ( $openssl_bytes === false ) {
+ wfDebug( __METHOD__ . ": openssl_random_pseudo_bytes returned false.\n" );
+ } else {
+ $buffer .= $openssl_bytes;
+ wfDebug( __METHOD__ . ": openssl_random_pseudo_bytes generated " . strlen( $openssl_bytes ) . " bytes of " . ( $openssl_strong ? "strong" : "weak" ) . " randomness.\n" );
+ }
+ if ( strlen( $buffer ) >= $bytes ) {
+ // openssl tells us if the random source was strong, if some of our data was generated
+ // using it use it's say on whether the randomness is strong
+ $this->strong = !!$openssl_strong;
+ }
+ wfProfileOut( __METHOD__ . '-openssl' );
+ }
+ }
+
+ // Only read from urandom if we can control the buffer size or were passed forceStrong
+ if ( strlen( $buffer ) < $bytes && ( function_exists( 'stream_set_read_buffer' ) || $forceStrong ) ) {
+ wfProfileIn( __METHOD__ . '-fopen-urandom' );
+ $rem = $bytes - strlen( $buffer );
+ if ( !function_exists( 'stream_set_read_buffer' ) && $forceStrong ) {
+ wfDebug( __METHOD__ . ": Was forced to read from /dev/urandom without control over the buffer size.\n" );
+ }
+ // /dev/urandom is generally considered the best possible commonly
+ // available random source, and is available on most *nix systems.
+ wfSuppressWarnings();
+ $urandom = fopen( "/dev/urandom", "rb" );
+ wfRestoreWarnings();
+
+ // Attempt to read all our random data from urandom
+ // php's fread always does buffered reads based on the stream's chunk_size
+ // so in reality it will usually read more than the amount of data we're
+ // asked for and not storing that risks depleting the system's random pool.
+ // If stream_set_read_buffer is available set the chunk_size to the amount
+ // of data we need. Otherwise read 8k, php's default chunk_size.
+ if ( $urandom ) {
+ // php's default chunk_size is 8k
+ $chunk_size = 1024 * 8;
+ if ( function_exists( 'stream_set_read_buffer' ) ) {
+ // If possible set the chunk_size to the amount of data we need
+ stream_set_read_buffer( $urandom, $rem );
+ $chunk_size = $rem;
+ }
+ $random_bytes = fread( $urandom, max( $chunk_size, $rem ) );
+ $buffer .= $random_bytes;
+ fclose( $urandom );
+ wfDebug( __METHOD__ . ": /dev/urandom generated " . strlen( $random_bytes ) . " bytes of randomness.\n" );
+ if ( strlen( $buffer ) >= $bytes ) {
+ // urandom is always strong, set to true if all our data was generated using it
+ $this->strong = true;
+ }
+ } else {
+ wfDebug( __METHOD__ . ": /dev/urandom could not be opened.\n" );
+ }
+ wfProfileOut( __METHOD__ . '-fopen-urandom' );
+ }
+
+ // If we cannot use or generate enough data from a secure source
+ // use this loop to generate a good set of pseudo random data.
+ // This works by initializing a random state using a pile of unstable data
+ // and continually shoving it through a hash along with a variable salt.
+ // We hash the random state with more salt to avoid the state from leaking
+ // out and being used to predict the /randomness/ that follows.
+ if ( strlen( $buffer ) < $bytes ) {
+ wfDebug( __METHOD__ . ": Falling back to using a pseudo random state to generate randomness.\n" );
+ }
+ while ( strlen( $buffer ) < $bytes ) {
+ wfProfileIn( __METHOD__ . '-fallback' );
+ $buffer .= $this->hmac( $this->randomState(), mt_rand() );
+ // This code is never really cryptographically strong, if we use it
+ // at all, then set strong to false.
+ $this->strong = false;
+ wfProfileOut( __METHOD__ . '-fallback' );
+ }
+
+ // Once the buffer has been filled up with enough random data to fulfill
+ // the request shift off enough data to handle the request and leave the
+ // unused portion left inside the buffer for the next request for random data
+ $generated = substr( $buffer, 0, $bytes );
+ $buffer = substr( $buffer, $bytes );
+
+ wfDebug( __METHOD__ . ": " . strlen( $buffer ) . " bytes of randomness leftover in the buffer.\n" );
+
+ wfProfileOut( __METHOD__ );
+ return $generated;
+ }
+
+ /**
+ * @see self::generateHex()
+ */
+ public function realGenerateHex( $chars, $forceStrong = false ) {
+ // hex strings are 2x the length of raw binary so we divide the length in half
+ // odd numbers will result in a .5 that leads the generate() being 1 character
+ // short, so we use ceil() to ensure that we always have enough bytes
+ $bytes = ceil( $chars / 2 );
+ // Generate the data and then convert it to a hex string
+ $hex = bin2hex( $this->generate( $bytes, $forceStrong ) );
+ // A bit of paranoia here, the caller asked for a specific length of string
+ // here, and it's possible (eg when given an odd number) that we may actually
+ // have at least 1 char more than they asked for. Just in case they made this
+ // call intending to insert it into a database that does truncation we don't
+ // want to give them too much and end up with their database and their live
+ // code having two different values because part of what we gave them is truncated
+ // hence, we strip out any run of characters longer than what we were asked for.
+ return substr( $hex, 0, $chars );
+ }
+
+ /** Publicly exposed static methods **/
+
+ /**
+ * Return a singleton instance of MWCryptRand
+ * @return MWCryptRand
+ */
+ protected static function singleton() {
+ if ( is_null( self::$singleton ) ) {
+ self::$singleton = new self;
+ }
+ return self::$singleton;
+ }
+
+ /**
+ * Return a boolean indicating whether or not the source used for cryptographic
+ * random bytes generation in the previously run generate* call
+ * was cryptographically strong.
+ *
+ * @return bool Returns true if the source was strong, false if not.
+ */
+ public static function wasStrong() {
+ return self::singleton()->realWasStrong();
+ }
+
+ /**
+ * Generate a run of (ideally) cryptographically random data and return
+ * it in raw binary form.
+ * You can use MWCryptRand::wasStrong() if you wish to know if the source used
+ * was cryptographically strong.
+ *
+ * @param int $bytes the number of bytes of random data to generate
+ * @param bool $forceStrong Pass true if you want generate to prefer cryptographically
+ * strong sources of entropy even if reading from them may steal
+ * more entropy from the system than optimal.
+ * @return String Raw binary random data
+ */
+ public static function generate( $bytes, $forceStrong = false ) {
+ return self::singleton()->realGenerate( $bytes, $forceStrong );
+ }
+
+ /**
+ * Generate a run of (ideally) cryptographically random data and return
+ * it in hexadecimal string format.
+ * You can use MWCryptRand::wasStrong() if you wish to know if the source used
+ * was cryptographically strong.
+ *
+ * @param int $chars the number of hex chars of random data to generate
+ * @param bool $forceStrong Pass true if you want generate to prefer cryptographically
+ * strong sources of entropy even if reading from them may steal
+ * more entropy from the system than optimal.
+ * @return String Hexadecimal random data
+ */
+ public static function generateHex( $chars, $forceStrong = false ) {
+ return self::singleton()->realGenerateHex( $chars, $forceStrong );
+ }
+
+}
--- /dev/null
+<?php
+/**
+ * Helper methods to call functions and instance objects.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+class MWFunction {
+
+ /**
+ * @deprecated since 1.22; use call_user_func()
+ * @param $callback
+ * @return mixed
+ */
+ public static function call( $callback ) {
+ wfDeprecated( __METHOD__, '1.22' );
+ $args = func_get_args();
+ return call_user_func_array( 'call_user_func', $args );
+ }
+
+ /**
+ * @deprecated since 1.22; use call_user_func_array()
+ * @param $callback
+ * @param $argsarams
+ * @return mixed
+ */
+ public static function callArray( $callback, $argsarams ) {
+ wfDeprecated( __METHOD__, '1.22' );
+ return call_user_func_array( $callback, $argsarams );
+ }
+
+ /**
+ * @param $class
+ * @param $args array
+ * @return object
+ */
+ public static function newObj( $class, $args = array() ) {
+ if ( !count( $args ) ) {
+ return new $class;
+ }
+
+ $ref = new ReflectionClass( $class );
+ return $ref->newInstanceArgs( $args );
+ }
+
+}
--- /dev/null
+<?php
+/**
+ * Convenience class for generating iterators from iterators.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Aaron Schulz
+ */
+
+/**
+ * Convenience class for generating iterators from iterators.
+ *
+ * @since 1.21
+ */
+class MappedIterator extends FilterIterator {
+ /** @var callable */
+ protected $vCallback;
+ /** @var callable */
+ protected $aCallback;
+ /** @var array */
+ protected $cache = array();
+
+ protected $rewound = false; // boolean; whether rewind() has been called
+
+ /**
+ * Build an new iterator from a base iterator by having the former wrap the
+ * later, returning the result of "value" callback for each current() invocation.
+ * The callback takes the result of current() on the base iterator as an argument.
+ * The keys of the base iterator are reused verbatim.
+ *
+ * An "accept" callback can also be provided which will be called for each value in
+ * the base iterator (post-callback) and will return true if that value should be
+ * included in iteration of the MappedIterator (otherwise it will be filtered out).
+ *
+ * @param Iterator|Array $iter
+ * @param callable $vCallback Value transformation callback
+ * @param array $options Options map (includes "accept") (since 1.22)
+ * @throws MWException
+ */
+ public function __construct( $iter, $vCallback, array $options = array() ) {
+ if ( is_array( $iter ) ) {
+ $baseIterator = new ArrayIterator( $iter );
+ } elseif ( $iter instanceof Iterator ) {
+ $baseIterator = $iter;
+ } else {
+ throw new MWException( "Invalid base iterator provided." );
+ }
+ parent::__construct( $baseIterator );
+ $this->vCallback = $vCallback;
+ $this->aCallback = isset( $options['accept'] ) ? $options['accept'] : null;
+ }
+
+ public function next() {
+ $this->cache = array();
+ parent::next();
+ }
+
+ public function rewind() {
+ $this->rewound = true;
+ $this->cache = array();
+ parent::rewind();
+ }
+
+ public function accept() {
+ $value = call_user_func( $this->vCallback, $this->getInnerIterator()->current() );
+ $ok = ( $this->aCallback ) ? call_user_func( $this->aCallback, $value ) : true;
+ if ( $ok ) {
+ $this->cache['current'] = $value;
+ }
+ return $ok;
+ }
+
+ public function key() {
+ $this->init();
+ return parent::key();
+ }
+
+ public function valid() {
+ $this->init();
+ return parent::valid();
+ }
+
+ public function current() {
+ $this->init();
+ if ( parent::valid() ) {
+ return $this->cache['current'];
+ } else {
+ return null; // out of range
+ }
+ }
+
+ /**
+ * Obviate the usual need for rewind() before using a FilterIterator in a manual loop
+ */
+ protected function init() {
+ if ( !$this->rewound ) {
+ $this->rewind();
+ }
+ }
+}
--- /dev/null
+The classes in this directory are general utilities for use by any part of
+MediaWiki. They do not favour any particular user interface and are not
+constrained to serve any particular feature. This is similar to includes/libs,
+except that some dependency on the MediaWiki framework (such as the use of
+MWException, Status or wfDebug()) disqualifies them from use outside of
+MediaWiki without modification.
+
+Utilities should not use global configuration variables, rather they should rely
+on the caller to configure their behaviour.
--- /dev/null
+<?php
+/**
+ * This file deals with RAII style scoped callbacks.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * Class for asserting that a callback happens when an dummy object leaves scope
+ *
+ * @since 1.21
+ */
+class ScopedCallback {
+ /** @var callable */
+ protected $callback;
+
+ /**
+ * @param callable $callback
+ * @throws MWException
+ */
+ public function __construct( $callback ) {
+ if ( !is_callable( $callback ) ) {
+ throw new MWException( "Provided callback is not valid." );
+ }
+ $this->callback = $callback;
+ }
+
+ /**
+ * Trigger a scoped callback and destroy it.
+ * This is the same is just setting it to null.
+ *
+ * @param ScopedCallback $sc
+ */
+ public static function consume( ScopedCallback &$sc = null ) {
+ $sc = null;
+ }
+
+ /**
+ * Destroy a scoped callback without triggering it
+ *
+ * @param ScopedCallback $sc
+ */
+ public static function cancel( ScopedCallback &$sc = null ) {
+ if ( $sc ) {
+ $sc->callback = null;
+ }
+ $sc = null;
+ }
+
+ /**
+ * Trigger the callback when this leaves scope
+ */
+ function __destruct() {
+ if ( $this->callback !== null ) {
+ call_user_func( $this->callback );
+ }
+ }
+}
--- /dev/null
+<?php
+/**
+ * Methods to play with strings.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * A collection of static methods to play with strings.
+ */
+class StringUtils {
+
+ /**
+ * Test whether a string is valid UTF-8.
+ *
+ * The function check for invalid byte sequences, overlong encoding but
+ * not for different normalisations.
+ *
+ * This relies internally on the mbstring function mb_check_encoding()
+ * hardcoded to check against UTF-8. Whenever the function is not available
+ * we fallback to a pure PHP implementation. Setting $disableMbstring to
+ * true will skip the use of mb_check_encoding, this is mostly intended for
+ * unit testing our internal implementation.
+ *
+ * @since 1.21
+ * @note In MediaWiki 1.21, this function did not provide proper UTF-8 validation.
+ * In particular, the pure PHP code path did not in fact check for overlong forms.
+ * Beware of this when backporting code to that version of MediaWiki.
+ *
+ * @param string $value String to check
+ * @param boolean $disableMbstring Whether to use the pure PHP
+ * implementation instead of trying mb_check_encoding. Intended for unit
+ * testing. Default: false
+ *
+ * @return boolean Whether the given $value is a valid UTF-8 encoded string
+ */
+ static function isUtf8( $value, $disableMbstring = false ) {
+ $value = (string)$value;
+
+ // If the mbstring extension is loaded, use it. However, before PHP 5.4, values above
+ // U+10FFFF are incorrectly allowed, so we have to check for them separately.
+ if ( !$disableMbstring && function_exists( 'mb_check_encoding' ) ) {
+ static $newPHP;
+ if ( $newPHP === null ) {
+ $newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' );
+ }
+
+ return mb_check_encoding( $value, 'UTF-8' ) &&
+ ( $newPHP || preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $value ) === 0 );
+ }
+
+ if ( preg_match( "/[\x80-\xff]/S", $value ) === 0 ) {
+ // String contains only ASCII characters, has to be valid
+ return true;
+ }
+
+ // PCRE implements repetition using recursion; to avoid a stack overflow (and segfault)
+ // for large input, we check for invalid sequences (<= 5 bytes) rather than valid
+ // sequences, which can be as long as the input string is. Multiple short regexes are
+ // used rather than a single long regex for performance.
+ static $regexes;
+ if ( $regexes === null ) {
+ $cont = "[\x80-\xbf]";
+ $after = "(?!$cont)"; // "(?:[^\x80-\xbf]|$)" would work here
+ $regexes = array(
+ // Continuation byte at the start
+ "/^$cont/",
+
+ // ASCII byte followed by a continuation byte
+ "/[\\x00-\x7f]$cont/S",
+
+ // Illegal byte
+ "/[\xc0\xc1\xf5-\xff]/S",
+
+ // Invalid 2-byte sequence, or valid one then an extra continuation byte
+ "/[\xc2-\xdf](?!$cont$after)/S",
+
+ // Invalid 3-byte sequence, or valid one then an extra continuation byte
+ "/\xe0(?![\xa0-\xbf]$cont$after)/",
+ "/[\xe1-\xec\xee\xef](?!$cont{2}$after)/S",
+ "/\xed(?![\x80-\x9f]$cont$after)/",
+
+ // Invalid 4-byte sequence, or valid one then an extra continuation byte
+ "/\xf0(?![\x90-\xbf]$cont{2}$after)/",
+ "/[\xf1-\xf3](?!$cont{3}$after)/S",
+ "/\xf4(?![\x80-\x8f]$cont{2}$after)/",
+ );
+ }
+
+ foreach ( $regexes as $regex ) {
+ if ( preg_match( $regex, $value ) !== 0 ) {
+ return false;
+ }
+ }
+ return true;
+ }
+
+ /**
+ * Perform an operation equivalent to
+ *
+ * preg_replace( "!$startDelim(.*?)$endDelim!", $replace, $subject );
+ *
+ * except that it's worst-case O(N) instead of O(N^2)
+ *
+ * Compared to delimiterReplace(), this implementation is fast but memory-
+ * hungry and inflexible. The memory requirements are such that I don't
+ * recommend using it on anything but guaranteed small chunks of text.
+ *
+ * @param $startDelim
+ * @param $endDelim
+ * @param $replace
+ * @param $subject
+ *
+ * @return string
+ */
+ static function hungryDelimiterReplace( $startDelim, $endDelim, $replace, $subject ) {
+ $segments = explode( $startDelim, $subject );
+ $output = array_shift( $segments );
+ foreach ( $segments as $s ) {
+ $endDelimPos = strpos( $s, $endDelim );
+ if ( $endDelimPos === false ) {
+ $output .= $startDelim . $s;
+ } else {
+ $output .= $replace . substr( $s, $endDelimPos + strlen( $endDelim ) );
+ }
+ }
+ return $output;
+ }
+
+ /**
+ * Perform an operation equivalent to
+ *
+ * preg_replace_callback( "!$startDelim(.*)$endDelim!s$flags", $callback, $subject )
+ *
+ * This implementation is slower than hungryDelimiterReplace but uses far less
+ * memory. The delimiters are literal strings, not regular expressions.
+ *
+ * If the start delimiter ends with an initial substring of the end delimiter,
+ * e.g. in the case of C-style comments, the behavior differs from the model
+ * regex. In this implementation, the end must share no characters with the
+ * start, so e.g. /*\/ is not considered to be both the start and end of a
+ * comment. /*\/xy/*\/ is considered to be a single comment with contents /xy/.
+ *
+ * @param string $startDelim start delimiter
+ * @param string $endDelim end delimiter
+ * @param $callback Callback: function to call on each match
+ * @param $subject String
+ * @param string $flags regular expression flags
+ * @throws MWException
+ * @return string
+ */
+ static function delimiterReplaceCallback( $startDelim, $endDelim, $callback, $subject, $flags = '' ) {
+ $inputPos = 0;
+ $outputPos = 0;
+ $output = '';
+ $foundStart = false;
+ $encStart = preg_quote( $startDelim, '!' );
+ $encEnd = preg_quote( $endDelim, '!' );
+ $strcmp = strpos( $flags, 'i' ) === false ? 'strcmp' : 'strcasecmp';
+ $endLength = strlen( $endDelim );
+ $m = array();
+
+ while ( $inputPos < strlen( $subject ) &&
+ preg_match( "!($encStart)|($encEnd)!S$flags", $subject, $m, PREG_OFFSET_CAPTURE, $inputPos ) )
+ {
+ $tokenOffset = $m[0][1];
+ if ( $m[1][0] != '' ) {
+ if ( $foundStart &&
+ $strcmp( $endDelim, substr( $subject, $tokenOffset, $endLength ) ) == 0 )
+ {
+ # An end match is present at the same location
+ $tokenType = 'end';
+ $tokenLength = $endLength;
+ } else {
+ $tokenType = 'start';
+ $tokenLength = strlen( $m[0][0] );
+ }
+ } elseif ( $m[2][0] != '' ) {
+ $tokenType = 'end';
+ $tokenLength = strlen( $m[0][0] );
+ } else {
+ throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
+ }
+
+ if ( $tokenType == 'start' ) {
+ # Only move the start position if we haven't already found a start
+ # This means that START START END matches outer pair
+ if ( !$foundStart ) {
+ # Found start
+ $inputPos = $tokenOffset + $tokenLength;
+ # Write out the non-matching section
+ $output .= substr( $subject, $outputPos, $tokenOffset - $outputPos );
+ $outputPos = $tokenOffset;
+ $contentPos = $inputPos;
+ $foundStart = true;
+ } else {
+ # Move the input position past the *first character* of START,
+ # to protect against missing END when it overlaps with START
+ $inputPos = $tokenOffset + 1;
+ }
+ } elseif ( $tokenType == 'end' ) {
+ if ( $foundStart ) {
+ # Found match
+ $output .= call_user_func( $callback, array(
+ substr( $subject, $outputPos, $tokenOffset + $tokenLength - $outputPos ),
+ substr( $subject, $contentPos, $tokenOffset - $contentPos )
+ ));
+ $foundStart = false;
+ } else {
+ # Non-matching end, write it out
+ $output .= substr( $subject, $inputPos, $tokenOffset + $tokenLength - $outputPos );
+ }
+ $inputPos = $outputPos = $tokenOffset + $tokenLength;
+ } else {
+ throw new MWException( 'Invalid delimiter given to ' . __METHOD__ );
+ }
+ }
+ if ( $outputPos < strlen( $subject ) ) {
+ $output .= substr( $subject, $outputPos );
+ }
+ return $output;
+ }
+
+ /**
+ * Perform an operation equivalent to
+ *
+ * preg_replace( "!$startDelim(.*)$endDelim!$flags", $replace, $subject )
+ *
+ * @param string $startDelim start delimiter regular expression
+ * @param string $endDelim end delimiter regular expression
+ * @param string $replace replacement string. May contain $1, which will be
+ * replaced by the text between the delimiters
+ * @param string $subject to search
+ * @param string $flags regular expression flags
+ * @return String: The string with the matches replaced
+ */
+ static function delimiterReplace( $startDelim, $endDelim, $replace, $subject, $flags = '' ) {
+ $replacer = new RegexlikeReplacer( $replace );
+ return self::delimiterReplaceCallback( $startDelim, $endDelim,
+ $replacer->cb(), $subject, $flags );
+ }
+
+ /**
+ * More or less "markup-safe" explode()
+ * Ignores any instances of the separator inside <...>
+ * @param string $separator
+ * @param string $text
+ * @return array
+ */
+ static function explodeMarkup( $separator, $text ) {
+ $placeholder = "\x00";
+
+ // Remove placeholder instances
+ $text = str_replace( $placeholder, '', $text );
+
+ // Replace instances of the separator inside HTML-like tags with the placeholder
+ $replacer = new DoubleReplacer( $separator, $placeholder );
+ $cleaned = StringUtils::delimiterReplaceCallback( '<', '>', $replacer->cb(), $text );
+
+ // Explode, then put the replaced separators back in
+ $items = explode( $separator, $cleaned );
+ foreach ( $items as $i => $str ) {
+ $items[$i] = str_replace( $placeholder, $separator, $str );
+ }
+
+ return $items;
+ }
+
+ /**
+ * Escape a string to make it suitable for inclusion in a preg_replace()
+ * replacement parameter.
+ *
+ * @param string $string
+ * @return string
+ */
+ static function escapeRegexReplacement( $string ) {
+ $string = str_replace( '\\', '\\\\', $string );
+ $string = str_replace( '$', '\\$', $string );
+ return $string;
+ }
+
+ /**
+ * Workalike for explode() with limited memory usage.
+ * Returns an Iterator
+ * @param string $separator
+ * @param string $subject
+ * @return ArrayIterator|ExplodeIterator
+ */
+ static function explode( $separator, $subject ) {
+ if ( substr_count( $subject, $separator ) > 1000 ) {
+ return new ExplodeIterator( $separator, $subject );
+ } else {
+ return new ArrayIterator( explode( $separator, $subject ) );
+ }
+ }
+}
+
+/**
+ * Base class for "replacers", objects used in preg_replace_callback() and
+ * StringUtils::delimiterReplaceCallback()
+ */
+class Replacer {
+
+ /**
+ * @return array
+ */
+ function cb() {
+ return array( &$this, 'replace' );
+ }
+}
+
+/**
+ * Class to replace regex matches with a string similar to that used in preg_replace()
+ */
+class RegexlikeReplacer extends Replacer {
+ var $r;
+
+ /**
+ * @param string $r
+ */
+ function __construct( $r ) {
+ $this->r = $r;
+ }
+
+ /**
+ * @param array $matches
+ * @return string
+ */
+ function replace( $matches ) {
+ $pairs = array();
+ foreach ( $matches as $i => $match ) {
+ $pairs["\$$i"] = $match;
+ }
+ return strtr( $this->r, $pairs );
+ }
+
+}
+
+/**
+ * Class to perform secondary replacement within each replacement string
+ */
+class DoubleReplacer extends Replacer {
+
+ /**
+ * @param $from
+ * @param $to
+ * @param int $index
+ */
+ function __construct( $from, $to, $index = 0 ) {
+ $this->from = $from;
+ $this->to = $to;
+ $this->index = $index;
+ }
+
+ /**
+ * @param array $matches
+ * @return mixed
+ */
+ function replace( $matches ) {
+ return str_replace( $this->from, $this->to, $matches[$this->index] );
+ }
+}
+
+/**
+ * Class to perform replacement based on a simple hashtable lookup
+ */
+class HashtableReplacer extends Replacer {
+ var $table, $index;
+
+ /**
+ * @param $table
+ * @param int $index
+ */
+ function __construct( $table, $index = 0 ) {
+ $this->table = $table;
+ $this->index = $index;
+ }
+
+ /**
+ * @param array $matches
+ * @return mixed
+ */
+ function replace( $matches ) {
+ return $this->table[$matches[$this->index]];
+ }
+}
+
+/**
+ * Replacement array for FSS with fallback to strtr()
+ * Supports lazy initialisation of FSS resource
+ */
+class ReplacementArray {
+ /*mostly private*/ var $data = false;
+ /*mostly private*/ var $fss = false;
+
+ /**
+ * Create an object with the specified replacement array
+ * The array should have the same form as the replacement array for strtr()
+ * @param array $data
+ */
+ function __construct( $data = array() ) {
+ $this->data = $data;
+ }
+
+ /**
+ * @return array
+ */
+ function __sleep() {
+ return array( 'data' );
+ }
+
+ function __wakeup() {
+ $this->fss = false;
+ }
+
+ /**
+ * Set the whole replacement array at once
+ * @param array $data
+ */
+ function setArray( $data ) {
+ $this->data = $data;
+ $this->fss = false;
+ }
+
+ /**
+ * @return array|bool
+ */
+ function getArray() {
+ return $this->data;
+ }
+
+ /**
+ * Set an element of the replacement array
+ * @param string $from
+ * @param string $to
+ */
+ function setPair( $from, $to ) {
+ $this->data[$from] = $to;
+ $this->fss = false;
+ }
+
+ /**
+ * @param array $data
+ */
+ function mergeArray( $data ) {
+ $this->data = array_merge( $this->data, $data );
+ $this->fss = false;
+ }
+
+ /**
+ * @param ReplacementArray $other
+ */
+ function merge( $other ) {
+ $this->data = array_merge( $this->data, $other->data );
+ $this->fss = false;
+ }
+
+ /**
+ * @param string $from
+ */
+ function removePair( $from ) {
+ unset( $this->data[$from] );
+ $this->fss = false;
+ }
+
+ /**
+ * @param array $data
+ */
+ function removeArray( $data ) {
+ foreach ( $data as $from => $to ) {
+ $this->removePair( $from );
+ }
+ $this->fss = false;
+ }
+
+ /**
+ * @param string $subject
+ * @return string
+ */
+ function replace( $subject ) {
+ if ( function_exists( 'fss_prep_replace' ) ) {
+ wfProfileIn( __METHOD__ . '-fss' );
+ if ( $this->fss === false ) {
+ $this->fss = fss_prep_replace( $this->data );
+ }
+ $result = fss_exec_replace( $this->fss, $subject );
+ wfProfileOut( __METHOD__ . '-fss' );
+ } else {
+ wfProfileIn( __METHOD__ . '-strtr' );
+ $result = strtr( $subject, $this->data );
+ wfProfileOut( __METHOD__ . '-strtr' );
+ }
+ return $result;
+ }
+}
+
+/**
+ * An iterator which works exactly like:
+ *
+ * foreach ( explode( $delim, $s ) as $element ) {
+ * ...
+ * }
+ *
+ * Except it doesn't use 193 byte per element
+ */
+class ExplodeIterator implements Iterator {
+ // The subject string
+ var $subject, $subjectLength;
+
+ // The delimiter
+ var $delim, $delimLength;
+
+ // The position of the start of the line
+ var $curPos;
+
+ // The position after the end of the next delimiter
+ var $endPos;
+
+ // The current token
+ var $current;
+
+ /**
+ * Construct a DelimIterator
+ * @param string $delim
+ * @param string $subject
+ */
+ function __construct( $delim, $subject ) {
+ $this->subject = $subject;
+ $this->delim = $delim;
+
+ // Micro-optimisation (theoretical)
+ $this->subjectLength = strlen( $subject );
+ $this->delimLength = strlen( $delim );
+
+ $this->rewind();
+ }
+
+ function rewind() {
+ $this->curPos = 0;
+ $this->endPos = strpos( $this->subject, $this->delim );
+ $this->refreshCurrent();
+ }
+
+ function refreshCurrent() {
+ if ( $this->curPos === false ) {
+ $this->current = false;
+ } elseif ( $this->curPos >= $this->subjectLength ) {
+ $this->current = '';
+ } elseif ( $this->endPos === false ) {
+ $this->current = substr( $this->subject, $this->curPos );
+ } else {
+ $this->current = substr( $this->subject, $this->curPos, $this->endPos - $this->curPos );
+ }
+ }
+
+ function current() {
+ return $this->current;
+ }
+
+ /**
+ * @return int|bool Current position or boolean false if invalid
+ */
+ function key() {
+ return $this->curPos;
+ }
+
+ /**
+ * @return string
+ */
+ function next() {
+ if ( $this->endPos === false ) {
+ $this->curPos = false;
+ } else {
+ $this->curPos = $this->endPos + $this->delimLength;
+ if ( $this->curPos >= $this->subjectLength ) {
+ $this->endPos = false;
+ } else {
+ $this->endPos = strpos( $this->subject, $this->delim, $this->curPos );
+ }
+ }
+ $this->refreshCurrent();
+ return $this->current;
+ }
+
+ /**
+ * @return bool
+ */
+ function valid() {
+ return $this->curPos !== false;
+ }
+}
--- /dev/null
+<?php
+/**
+ * This file deals with UID generation.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @author Aaron Schulz
+ */
+
+/**
+ * Class for getting statistically unique IDs
+ *
+ * @since 1.21
+ */
+class UIDGenerator {
+ /** @var UIDGenerator */
+ protected static $instance = null;
+
+ protected $nodeId32; // string; node ID in binary (32 bits)
+ protected $nodeId48; // string; node ID in binary (48 bits)
+
+ protected $lockFile88; // string; local file path
+ protected $lockFile128; // string; local file path
+
+ /** @var Array */
+ protected $fileHandles = array(); // cache file handles
+
+ const QUICK_RAND = 1; // get randomness from fast and insecure sources
+
+ protected function __construct() {
+ $idFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
+ $nodeId = is_file( $idFile ) ? file_get_contents( $idFile ) : '';
+ // Try to get some ID that uniquely identifies this machine (RFC 4122)...
+ if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
+ wfSuppressWarnings();
+ if ( wfIsWindows() ) {
+ // http://technet.microsoft.com/en-us/library/bb490913.aspx
+ $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
+ $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
+ $info = str_getcsv( $line );
+ $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
+ } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
+ // See http://linux.die.net/man/8/ifconfig
+ $m = array();
+ preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
+ wfShellExec( '/sbin/ifconfig -a' ), $m );
+ $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
+ }
+ wfRestoreWarnings();
+ if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
+ $nodeId = MWCryptRand::generateHex( 12, true );
+ $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
+ }
+ file_put_contents( $idFile, $nodeId ); // cache
+ }
+ $this->nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
+ $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 );
+ // If different processes run as different users, they may have different temp dirs.
+ // This is dealt with by initializing the clock sequence number and counters randomly.
+ $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
+ $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
+ }
+
+ /**
+ * @return UIDGenerator
+ */
+ protected static function singleton() {
+ if ( self::$instance === null ) {
+ self::$instance = new self();
+ }
+ return self::$instance;
+ }
+
+ /**
+ * Get a statistically unique 88-bit unsigned integer ID string.
+ * The bits of the UID are prefixed with the time (down to the millisecond).
+ *
+ * These IDs are suitable as values for the shard key of distributed data.
+ * If a column uses these as values, it should be declared UNIQUE to handle collisions.
+ * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
+ * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL.
+ *
+ * UID generation is serialized on each server (as the node ID is for the whole machine).
+ *
+ * @param $base integer Specifies a base other than 10
+ * @return string Number
+ * @throws MWException
+ */
+ public static function newTimestampedUID88( $base = 10 ) {
+ if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
+ throw new MWException( "Base must an integer be between 2 and 36" );
+ }
+ $gen = self::singleton();
+ $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 );
+ return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base );
+ }
+
+ /**
+ * @param array $time (UIDGenerator::millitime(), clock sequence)
+ * @return string 88 bits
+ */
+ protected function getTimestampedID88( array $info ) {
+ list( $time, $counter ) = $info;
+ // Take the 46 MSBs of "milliseconds since epoch"
+ $id_bin = $this->millisecondsSinceEpochBinary( $time );
+ // Add a 10 bit counter resulting in 56 bits total
+ $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
+ // Add the 32 bit node ID resulting in 88 bits total
+ $id_bin .= $this->nodeId32;
+ // Convert to a 1-27 digit integer string
+ if ( strlen( $id_bin ) !== 88 ) {
+ throw new MWException( "Detected overflow for millisecond timestamp." );
+ }
+ return $id_bin;
+ }
+
+ /**
+ * Get a statistically unique 128-bit unsigned integer ID string.
+ * The bits of the UID are prefixed with the time (down to the millisecond).
+ *
+ * These IDs are suitable as globally unique IDs, without any enforced uniqueness.
+ * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
+ * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL.
+ *
+ * UID generation is serialized on each server (as the node ID is for the whole machine).
+ *
+ * @param $base integer Specifies a base other than 10
+ * @return string Number
+ * @throws MWException
+ */
+ public static function newTimestampedUID128( $base = 10 ) {
+ if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
+ throw new MWException( "Base must be an integer between 2 and 36" );
+ }
+ $gen = self::singleton();
+ $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 );
+ return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base );
+ }
+
+ /**
+ * @param array $info (UIDGenerator::millitime(), counter, clock sequence)
+ * @return string 128 bits
+ */
+ protected function getTimestampedID128( array $info ) {
+ list( $time, $counter, $clkSeq ) = $info;
+ // Take the 46 MSBs of "milliseconds since epoch"
+ $id_bin = $this->millisecondsSinceEpochBinary( $time );
+ // Add a 20 bit counter resulting in 66 bits total
+ $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
+ // Add a 14 bit clock sequence number resulting in 80 bits total
+ $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
+ // Add the 48 bit node ID resulting in 128 bits total
+ $id_bin .= $this->nodeId48;
+ // Convert to a 1-39 digit integer string
+ if ( strlen( $id_bin ) !== 128 ) {
+ throw new MWException( "Detected overflow for millisecond timestamp." );
+ }
+ return $id_bin;
+ }
+
+ /**
+ * Return an RFC4122 compliant v4 UUID
+ *
+ * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND)
+ * @return string
+ * @throws MWException
+ */
+ public static function newUUIDv4( $flags = 0 ) {
+ $hex = ( $flags & self::QUICK_RAND )
+ ? wfRandomString( 31 )
+ : MWCryptRand::generateHex( 31 );
+
+ return sprintf( '%s-%s-%s-%s-%s',
+ // "time_low" (32 bits)
+ substr( $hex, 0, 8 ),
+ // "time_mid" (16 bits)
+ substr( $hex, 8, 4 ),
+ // "time_hi_and_version" (16 bits)
+ '4' . substr( $hex, 12, 3 ),
+ // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
+ dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
+ // "node" (48 bits)
+ substr( $hex, 19, 12 )
+ );
+ }
+
+ /**
+ * Return an RFC4122 compliant v4 UUID
+ *
+ * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND)
+ * @return string 32 hex characters with no hyphens
+ * @throws MWException
+ */
+ public static function newRawUUIDv4( $flags = 0 ) {
+ return str_replace( '-', '', self::newUUIDv4( $flags ) );
+ }
+
+ /**
+ * Get a (time,counter,clock sequence) where (time,counter) is higher
+ * than any previous (time,counter) value for the given clock sequence.
+ * This is useful for making UIDs sequential on a per-node bases.
+ *
+ * @param string $lockFile Name of a local lock file
+ * @param $clockSeqSize integer The number of possible clock sequence values
+ * @param $counterSize integer The number of possible counter values
+ * @return Array (result of UIDGenerator::millitime(), counter, clock sequence)
+ * @throws MWException
+ */
+ protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) {
+ // Get the UID lock file handle
+ if ( isset( $this->fileHandles[$lockFile] ) ) {
+ $handle = $this->fileHandles[$lockFile];
+ } else {
+ $handle = fopen( $this->$lockFile, 'cb+' );
+ $this->fileHandles[$lockFile] = $handle ?: null; // cache
+ }
+ // Acquire the UID lock file
+ if ( $handle === false ) {
+ throw new MWException( "Could not open '{$this->$lockFile}'." );
+ } elseif ( !flock( $handle, LOCK_EX ) ) {
+ throw new MWException( "Could not acquire '{$this->$lockFile}'." );
+ }
+ // Get the current timestamp, clock sequence number, last time, and counter
+ rewind( $handle );
+ $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
+ $clockChanged = false; // clock set back significantly?
+ if ( count( $data ) == 5 ) { // last UID info already initialized
+ $clkSeq = (int)$data[0] % $clockSeqSize;
+ $prevTime = array( (int)$data[1], (int)$data[2] );
+ $offset = (int)$data[4] % $counterSize; // random counter offset
+ $counter = 0; // counter for UIDs with the same timestamp
+ // Delay until the clock reaches the time of the last ID.
+ // This detects any microtime() drift among processes.
+ $time = $this->timeWaitUntil( $prevTime );
+ if ( !$time ) { // too long to delay?
+ $clockChanged = true; // bump clock sequence number
+ $time = self::millitime();
+ } elseif ( $time == $prevTime ) {
+ // Bump the counter if there are timestamp collisions
+ $counter = (int)$data[3] % $counterSize;
+ if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
+ flock( $handle, LOCK_UN ); // abort
+ throw new MWException( "Counter overflow for timestamp value." );
+ }
+ }
+ } else { // last UID info not initialized
+ $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
+ $counter = 0;
+ $offset = mt_rand( 0, $counterSize - 1 );
+ $time = self::millitime();
+ }
+ // microtime() and gettimeofday() can drift from time() at least on Windows.
+ // The drift is immediate for processes running while the system clock changes.
+ // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
+ if ( abs( time() - $time[0] ) >= 2 ) {
+ // We don't want processes using too high or low timestamps to avoid duplicate
+ // UIDs and clock sequence number churn. This process should just be restarted.
+ flock( $handle, LOCK_UN ); // abort
+ throw new MWException( "Process clock is outdated or drifted." );
+ }
+ // If microtime() is synced and a clock change was detected, then the clock went back
+ if ( $clockChanged ) {
+ // Bump the clock sequence number and also randomize the counter offset,
+ // which is useful for UIDs that do not include the clock sequence number.
+ $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
+ $offset = mt_rand( 0, $counterSize - 1 );
+ trigger_error( "Clock was set back; sequence number incremented." );
+ }
+ // Update the (clock sequence number, timestamp, counter)
+ ftruncate( $handle, 0 );
+ rewind( $handle );
+ fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
+ fflush( $handle );
+ // Release the UID lock file
+ flock( $handle, LOCK_UN );
+
+ return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq );
+ }
+
+ /**
+ * Wait till the current timestamp reaches $time and return the current
+ * timestamp. This returns false if it would have to wait more than 10ms.
+ *
+ * @param array $time Result of UIDGenerator::millitime()
+ * @return Array|bool UIDGenerator::millitime() result or false
+ */
+ protected function timeWaitUntil( array $time ) {
+ do {
+ $ct = self::millitime();
+ if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php
+ return $ct; // current timestamp is higher than $time
+ }
+ } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 );
+
+ return false;
+ }
+
+ /**
+ * @param array $time Result of UIDGenerator::millitime()
+ * @return string 46 MSBs of "milliseconds since epoch" in binary (rolls over in 4201)
+ */
+ protected function millisecondsSinceEpochBinary( array $time ) {
+ list( $sec, $msec ) = $time;
+ $ts = 1000 * $sec + $msec;
+ if ( $ts > pow( 2, 52 ) ) {
+ throw new MWException( __METHOD__ .
+ ': sorry, this function doesn\'t work after the year 144680' );
+ }
+ return substr( wfBaseConvert( $ts, 10, 2, 46 ), -46 );
+ }
+
+ /**
+ * @return Array (current time in seconds, milliseconds since then)
+ */
+ protected static function millitime() {
+ list( $msec, $sec ) = explode( ' ', microtime() );
+ return array( (int)$sec, (int)( $msec * 1000 ) );
+ }
+
+ function __destruct() {
+ array_map( 'fclose', $this->fileHandles );
+ }
+}
--- /dev/null
+<?php
+/**
+ * ZIP file directories reader, for the purposes of upload verification.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * A class for reading ZIP file directories, for the purposes of upload
+ * verification.
+ *
+ * Only a functional interface is provided: ZipFileReader::read(). No access is
+ * given to object instances.
+ *
+ */
+class ZipDirectoryReader {
+ /**
+ * Read a ZIP file and call a function for each file discovered in it.
+ *
+ * Because this class is aimed at verification, an error is raised on
+ * suspicious or ambiguous input, instead of emulating some standard
+ * behavior.
+ *
+ * @param string $fileName The archive file name
+ * @param array $callback The callback function. It will be called for each file
+ * with a single associative array each time, with members:
+ *
+ * - name: The file name. Directories conventionally have a trailing
+ * slash.
+ *
+ * - mtime: The file modification time, in MediaWiki 14-char format
+ *
+ * - size: The uncompressed file size
+ *
+ * @param array $options An associative array of read options, with the option
+ * name in the key. This may currently contain:
+ *
+ * - zip64: If this is set to true, then we will emulate a
+ * library with ZIP64 support, like OpenJDK 7. If it is set to
+ * false, then we will emulate a library with no knowledge of
+ * ZIP64.
+ *
+ * NOTE: The ZIP64 code is untested and probably doesn't work. It
+ * turned out to be easier to just reject ZIP64 archive uploads,
+ * since they are likely to be very rare. Confirming safety of a
+ * ZIP64 file is fairly complex. What do you do with a file that is
+ * ambiguous and broken when read with a non-ZIP64 reader, but valid
+ * when read with a ZIP64 reader? This situation is normal for a
+ * valid ZIP64 file, and working out what non-ZIP64 readers will make
+ * of such a file is not trivial.
+ *
+ * @return Status object. The following fatal errors are defined:
+ *
+ * - zip-file-open-error: The file could not be opened.
+ *
+ * - zip-wrong-format: The file does not appear to be a ZIP file.
+ *
+ * - zip-bad: There was something wrong or ambiguous about the file
+ * data.
+ *
+ * - zip-unsupported: The ZIP file uses features which
+ * ZipDirectoryReader does not support.
+ *
+ * The default messages for those fatal errors are written in a way that
+ * makes sense for upload verification.
+ *
+ * If a fatal error is returned, more information about the error will be
+ * available in the debug log.
+ *
+ * Note that the callback function may be called any number of times before
+ * a fatal error is returned. If this occurs, the data sent to the callback
+ * function should be discarded.
+ */
+ public static function read( $fileName, $callback, $options = array() ) {
+ $zdr = new self( $fileName, $callback, $options );
+ return $zdr->execute();
+ }
+
+ /** The file name */
+ var $fileName;
+
+ /** The opened file resource */
+ var $file;
+
+ /** The cached length of the file, or null if it has not been loaded yet. */
+ var $fileLength;
+
+ /** A segmented cache of the file contents */
+ var $buffer;
+
+ /** The file data callback */
+ var $callback;
+
+ /** The ZIP64 mode */
+ var $zip64 = false;
+
+ /** Stored headers */
+ var $eocdr, $eocdr64, $eocdr64Locator;
+
+ var $data;
+
+ /** The "extra field" ID for ZIP64 central directory entries */
+ const ZIP64_EXTRA_HEADER = 0x0001;
+
+ /** The segment size for the file contents cache */
+ const SEGSIZE = 16384;
+
+ /** The index of the "general field" bit for UTF-8 file names */
+ const GENERAL_UTF8 = 11;
+
+ /** The index of the "general field" bit for central directory encryption */
+ const GENERAL_CD_ENCRYPTED = 13;
+
+ /**
+ * Private constructor
+ */
+ protected function __construct( $fileName, $callback, $options ) {
+ $this->fileName = $fileName;
+ $this->callback = $callback;
+
+ if ( isset( $options['zip64'] ) ) {
+ $this->zip64 = $options['zip64'];
+ }
+ }
+
+ /**
+ * Read the directory according to settings in $this.
+ *
+ * @return Status
+ */
+ function execute() {
+ $this->file = fopen( $this->fileName, 'r' );
+ $this->data = array();
+ if ( !$this->file ) {
+ return Status::newFatal( 'zip-file-open-error' );
+ }
+
+ $status = Status::newGood();
+ try {
+ $this->readEndOfCentralDirectoryRecord();
+ if ( $this->zip64 ) {
+ list( $offset, $size ) = $this->findZip64CentralDirectory();
+ $this->readCentralDirectory( $offset, $size );
+ } else {
+ if ( $this->eocdr['CD size'] == 0xffffffff
+ || $this->eocdr['CD offset'] == 0xffffffff
+ || $this->eocdr['CD entries total'] == 0xffff )
+ {
+ $this->error( 'zip-unsupported', 'Central directory header indicates ZIP64, ' .
+ 'but we are in legacy mode. Rejecting this upload is necessary to avoid ' .
+ 'opening vulnerabilities on clients using OpenJDK 7 or later.' );
+ }
+
+ list( $offset, $size ) = $this->findOldCentralDirectory();
+ $this->readCentralDirectory( $offset, $size );
+ }
+ } catch ( ZipDirectoryReaderError $e ) {
+ $status->fatal( $e->getErrorCode() );
+ }
+
+ fclose( $this->file );
+ return $status;
+ }
+
+ /**
+ * Throw an error, and log a debug message
+ */
+ function error( $code, $debugMessage ) {
+ wfDebug( __CLASS__ . ": Fatal error: $debugMessage\n" );
+ throw new ZipDirectoryReaderError( $code );
+ }
+
+ /**
+ * Read the header which is at the end of the central directory,
+ * unimaginatively called the "end of central directory record" by the ZIP
+ * spec.
+ */
+ function readEndOfCentralDirectoryRecord() {
+ $info = array(
+ 'signature' => 4,
+ 'disk' => 2,
+ 'CD start disk' => 2,
+ 'CD entries this disk' => 2,
+ 'CD entries total' => 2,
+ 'CD size' => 4,
+ 'CD offset' => 4,
+ 'file comment length' => 2,
+ );
+ $structSize = $this->getStructSize( $info );
+ $startPos = $this->getFileLength() - 65536 - $structSize;
+ if ( $startPos < 0 ) {
+ $startPos = 0;
+ }
+
+ $block = $this->getBlock( $startPos );
+ $sigPos = strrpos( $block, "PK\x05\x06" );
+ if ( $sigPos === false ) {
+ $this->error( 'zip-wrong-format',
+ "zip file lacks EOCDR signature. It probably isn't a zip file." );
+ }
+
+ $this->eocdr = $this->unpack( substr( $block, $sigPos ), $info );
+ $this->eocdr['EOCDR size'] = $structSize + $this->eocdr['file comment length'];
+
+ if ( $structSize + $this->eocdr['file comment length'] != strlen( $block ) - $sigPos ) {
+ $this->error( 'zip-bad', 'trailing bytes after the end of the file comment' );
+ }
+ if ( $this->eocdr['disk'] !== 0
+ || $this->eocdr['CD start disk'] !== 0 )
+ {
+ $this->error( 'zip-unsupported', 'more than one disk (in EOCDR)' );
+ }
+ $this->eocdr += $this->unpack(
+ $block,
+ array( 'file comment' => array( 'string', $this->eocdr['file comment length'] ) ),
+ $sigPos + $structSize );
+ $this->eocdr['position'] = $startPos + $sigPos;
+ }
+
+ /**
+ * Read the header called the "ZIP64 end of central directory locator". An
+ * error will be raised if it does not exist.
+ */
+ function readZip64EndOfCentralDirectoryLocator() {
+ $info = array(
+ 'signature' => array( 'string', 4 ),
+ 'eocdr64 start disk' => 4,
+ 'eocdr64 offset' => 8,
+ 'number of disks' => 4,
+ );
+ $structSize = $this->getStructSize( $info );
+
+ $block = $this->getBlock( $this->getFileLength() - $this->eocdr['EOCDR size']
+ - $structSize, $structSize );
+ $this->eocdr64Locator = $data = $this->unpack( $block, $info );
+
+ if ( $data['signature'] !== "PK\x06\x07" ) {
+ // Note: Java will allow this and continue to read the
+ // EOCDR64, so we have to reject the upload, we can't
+ // just use the EOCDR header instead.
+ $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory locator' );
+ }
+ }
+
+ /**
+ * Read the header called the "ZIP64 end of central directory record". It
+ * may replace the regular "end of central directory record" in ZIP64 files.
+ */
+ function readZip64EndOfCentralDirectoryRecord() {
+ if ( $this->eocdr64Locator['eocdr64 start disk'] != 0
+ || $this->eocdr64Locator['number of disks'] != 0 )
+ {
+ $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64 locator)' );
+ }
+
+ $info = array(
+ 'signature' => array( 'string', 4 ),
+ 'EOCDR64 size' => 8,
+ 'version made by' => 2,
+ 'version needed' => 2,
+ 'disk' => 4,
+ 'CD start disk' => 4,
+ 'CD entries this disk' => 8,
+ 'CD entries total' => 8,
+ 'CD size' => 8,
+ 'CD offset' => 8
+ );
+ $structSize = $this->getStructSize( $info );
+ $block = $this->getBlock( $this->eocdr64Locator['eocdr64 offset'], $structSize );
+ $this->eocdr64 = $data = $this->unpack( $block, $info );
+ if ( $data['signature'] !== "PK\x06\x06" ) {
+ $this->error( 'zip-bad', 'wrong signature on Zip64 end of central directory record' );
+ }
+ if ( $data['disk'] !== 0
+ || $data['CD start disk'] !== 0 )
+ {
+ $this->error( 'zip-unsupported', 'more than one disk (in EOCDR64)' );
+ }
+ }
+
+ /**
+ * Find the location of the central directory, as would be seen by a
+ * non-ZIP64 reader.
+ *
+ * @return List containing offset, size and end position.
+ */
+ function findOldCentralDirectory() {
+ $size = $this->eocdr['CD size'];
+ $offset = $this->eocdr['CD offset'];
+ $endPos = $this->eocdr['position'];
+
+ // Some readers use the EOCDR position instead of the offset field
+ // to find the directory, so to be safe, we check if they both agree.
+ if ( $offset + $size != $endPos ) {
+ $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
+ 'of central directory record' );
+ }
+ return array( $offset, $size );
+ }
+
+ /**
+ * Find the location of the central directory, as would be seen by a
+ * ZIP64-compliant reader.
+ *
+ * @return array List containing offset, size and end position.
+ */
+ function findZip64CentralDirectory() {
+ // The spec is ambiguous about the exact rules of precedence between the
+ // ZIP64 headers and the original headers. Here we follow zip_util.c
+ // from OpenJDK 7.
+ $size = $this->eocdr['CD size'];
+ $offset = $this->eocdr['CD offset'];
+ $numEntries = $this->eocdr['CD entries total'];
+ $endPos = $this->eocdr['position'];
+ if ( $size == 0xffffffff
+ || $offset == 0xffffffff
+ || $numEntries == 0xffff )
+ {
+ $this->readZip64EndOfCentralDirectoryLocator();
+
+ if ( isset( $this->eocdr64Locator['eocdr64 offset'] ) ) {
+ $this->readZip64EndOfCentralDirectoryRecord();
+ if ( isset( $this->eocdr64['CD offset'] ) ) {
+ $size = $this->eocdr64['CD size'];
+ $offset = $this->eocdr64['CD offset'];
+ $endPos = $this->eocdr64Locator['eocdr64 offset'];
+ }
+ }
+ }
+ // Some readers use the EOCDR position instead of the offset field
+ // to find the directory, so to be safe, we check if they both agree.
+ if ( $offset + $size != $endPos ) {
+ $this->error( 'zip-bad', 'the central directory does not immediately precede the end ' .
+ 'of central directory record' );
+ }
+ return array( $offset, $size );
+ }
+
+ /**
+ * Read the central directory at the given location
+ */
+ function readCentralDirectory( $offset, $size ) {
+ $block = $this->getBlock( $offset, $size );
+
+ $fixedInfo = array(
+ 'signature' => array( 'string', 4 ),
+ 'version made by' => 2,
+ 'version needed' => 2,
+ 'general bits' => 2,
+ 'compression method' => 2,
+ 'mod time' => 2,
+ 'mod date' => 2,
+ 'crc-32' => 4,
+ 'compressed size' => 4,
+ 'uncompressed size' => 4,
+ 'name length' => 2,
+ 'extra field length' => 2,
+ 'comment length' => 2,
+ 'disk number start' => 2,
+ 'internal attrs' => 2,
+ 'external attrs' => 4,
+ 'local header offset' => 4,
+ );
+ $fixedSize = $this->getStructSize( $fixedInfo );
+
+ $pos = 0;
+ while ( $pos < $size ) {
+ $data = $this->unpack( $block, $fixedInfo, $pos );
+ $pos += $fixedSize;
+
+ if ( $data['signature'] !== "PK\x01\x02" ) {
+ $this->error( 'zip-bad', 'Invalid signature found in directory entry' );
+ }
+
+ $variableInfo = array(
+ 'name' => array( 'string', $data['name length'] ),
+ 'extra field' => array( 'string', $data['extra field length'] ),
+ 'comment' => array( 'string', $data['comment length'] ),
+ );
+ $data += $this->unpack( $block, $variableInfo, $pos );
+ $pos += $this->getStructSize( $variableInfo );
+
+ if ( $this->zip64 && (
+ $data['compressed size'] == 0xffffffff
+ || $data['uncompressed size'] == 0xffffffff
+ || $data['local header offset'] == 0xffffffff ) )
+ {
+ $zip64Data = $this->unpackZip64Extra( $data['extra field'] );
+ if ( $zip64Data ) {
+ $data = $zip64Data + $data;
+ }
+ }
+
+ if ( $this->testBit( $data['general bits'], self::GENERAL_CD_ENCRYPTED ) ) {
+ $this->error( 'zip-unsupported', 'central directory encryption is not supported' );
+ }
+
+ // Convert the timestamp into MediaWiki format
+ // For the format, please see the MS-DOS 2.0 Programmer's Reference,
+ // pages 3-5 and 3-6.
+ $time = $data['mod time'];
+ $date = $data['mod date'];
+
+ $year = 1980 + ( $date >> 9 );
+ $month = ( $date >> 5 ) & 15;
+ $day = $date & 31;
+ $hour = ( $time >> 11 ) & 31;
+ $minute = ( $time >> 5 ) & 63;
+ $second = ( $time & 31 ) * 2;
+ $timestamp = sprintf( "%04d%02d%02d%02d%02d%02d",
+ $year, $month, $day, $hour, $minute, $second );
+
+ // Convert the character set in the file name
+ if ( !function_exists( 'iconv' )
+ || $this->testBit( $data['general bits'], self::GENERAL_UTF8 ) )
+ {
+ $name = $data['name'];
+ } else {
+ $name = iconv( 'CP437', 'UTF-8', $data['name'] );
+ }
+
+ // Compile a data array for the user, with a sensible format
+ $userData = array(
+ 'name' => $name,
+ 'mtime' => $timestamp,
+ 'size' => $data['uncompressed size'],
+ );
+ call_user_func( $this->callback, $userData );
+ }
+ }
+
+ /**
+ * Interpret ZIP64 "extra field" data and return an associative array.
+ * @return array|bool
+ */
+ function unpackZip64Extra( $extraField ) {
+ $extraHeaderInfo = array(
+ 'id' => 2,
+ 'size' => 2,
+ );
+ $extraHeaderSize = $this->getStructSize( $extraHeaderInfo );
+
+ $zip64ExtraInfo = array(
+ 'uncompressed size' => 8,
+ 'compressed size' => 8,
+ 'local header offset' => 8,
+ 'disk number start' => 4,
+ );
+
+ $extraPos = 0;
+ while ( $extraPos < strlen( $extraField ) ) {
+ $extra = $this->unpack( $extraField, $extraHeaderInfo, $extraPos );
+ $extraPos += $extraHeaderSize;
+ $extra += $this->unpack( $extraField,
+ array( 'data' => array( 'string', $extra['size'] ) ),
+ $extraPos );
+ $extraPos += $extra['size'];
+
+ if ( $extra['id'] == self::ZIP64_EXTRA_HEADER ) {
+ return $this->unpack( $extra['data'], $zip64ExtraInfo );
+ }
+ }
+
+ return false;
+ }
+
+ /**
+ * Get the length of the file.
+ */
+ function getFileLength() {
+ if ( $this->fileLength === null ) {
+ $stat = fstat( $this->file );
+ $this->fileLength = $stat['size'];
+ }
+ return $this->fileLength;
+ }
+
+ /**
+ * Get the file contents from a given offset. If there are not enough bytes
+ * in the file to satisfy the request, an exception will be thrown.
+ *
+ * @param int $start The byte offset of the start of the block.
+ * @param int $length The number of bytes to return. If omitted, the remainder
+ * of the file will be returned.
+ *
+ * @return string
+ */
+ function getBlock( $start, $length = null ) {
+ $fileLength = $this->getFileLength();
+ if ( $start >= $fileLength ) {
+ $this->error( 'zip-bad', "getBlock() requested position $start, " .
+ "file length is $fileLength" );
+ }
+ if ( $length === null ) {
+ $length = $fileLength - $start;
+ }
+ $end = $start + $length;
+ if ( $end > $fileLength ) {
+ $this->error( 'zip-bad', "getBlock() requested end position $end, " .
+ "file length is $fileLength" );
+ }
+ $startSeg = floor( $start / self::SEGSIZE );
+ $endSeg = ceil( $end / self::SEGSIZE );
+
+ $block = '';
+ for ( $segIndex = $startSeg; $segIndex <= $endSeg; $segIndex++ ) {
+ $block .= $this->getSegment( $segIndex );
+ }
+
+ $block = substr( $block,
+ $start - $startSeg * self::SEGSIZE,
+ $length );
+
+ if ( strlen( $block ) < $length ) {
+ $this->error( 'zip-bad', 'getBlock() returned an unexpectedly small amount of data' );
+ }
+
+ return $block;
+ }
+
+ /**
+ * Get a section of the file starting at position $segIndex * self::SEGSIZE,
+ * of length self::SEGSIZE. The result is cached. This is a helper function
+ * for getBlock().
+ *
+ * If there are not enough bytes in the file to satisfy the request, the
+ * return value will be truncated. If a request is made for a segment beyond
+ * the end of the file, an empty string will be returned.
+ * @return string
+ */
+ function getSegment( $segIndex ) {
+ if ( !isset( $this->buffer[$segIndex] ) ) {
+ $bytePos = $segIndex * self::SEGSIZE;
+ if ( $bytePos >= $this->getFileLength() ) {
+ $this->buffer[$segIndex] = '';
+ return '';
+ }
+ if ( fseek( $this->file, $bytePos ) ) {
+ $this->error( 'zip-bad', "seek to $bytePos failed" );
+ }
+ $seg = fread( $this->file, self::SEGSIZE );
+ if ( $seg === false ) {
+ $this->error( 'zip-bad', "read from $bytePos failed" );
+ }
+ $this->buffer[$segIndex] = $seg;
+ }
+ return $this->buffer[$segIndex];
+ }
+
+ /**
+ * Get the size of a structure in bytes. See unpack() for the format of $struct.
+ * @return int
+ */
+ function getStructSize( $struct ) {
+ $size = 0;
+ foreach ( $struct as $type ) {
+ if ( is_array( $type ) ) {
+ list( , $fieldSize ) = $type;
+ $size += $fieldSize;
+ } else {
+ $size += $type;
+ }
+ }
+ return $size;
+ }
+
+ /**
+ * Unpack a binary structure. This is like the built-in unpack() function
+ * except nicer.
+ *
+ * @param string $string The binary data input
+ *
+ * @param array $struct An associative array giving structure members and their
+ * types. In the key is the field name. The value may be either an
+ * integer, in which case the field is a little-endian unsigned integer
+ * encoded in the given number of bytes, or an array, in which case the
+ * first element of the array is the type name, and the subsequent
+ * elements are type-dependent parameters. Only one such type is defined:
+ * - "string": The second array element gives the length of string.
+ * Not null terminated.
+ *
+ * @param int $offset The offset into the string at which to start unpacking.
+ *
+ * @throws MWException
+ * @return array Unpacked associative array. Note that large integers in the input
+ * may be represented as floating point numbers in the return value, so
+ * the use of weak comparison is advised.
+ */
+ function unpack( $string, $struct, $offset = 0 ) {
+ $size = $this->getStructSize( $struct );
+ if ( $offset + $size > strlen( $string ) ) {
+ $this->error( 'zip-bad', 'unpack() would run past the end of the supplied string' );
+ }
+
+ $data = array();
+ $pos = $offset;
+ foreach ( $struct as $key => $type ) {
+ if ( is_array( $type ) ) {
+ list( $typeName, $fieldSize ) = $type;
+ switch ( $typeName ) {
+ case 'string':
+ $data[$key] = substr( $string, $pos, $fieldSize );
+ $pos += $fieldSize;
+ break;
+ default:
+ throw new MWException( __METHOD__ . ": invalid type \"$typeName\"" );
+ }
+ } else {
+ // Unsigned little-endian integer
+ $length = intval( $type );
+
+ // Calculate the value. Use an algorithm which automatically
+ // upgrades the value to floating point if necessary.
+ $value = 0;
+ for ( $i = $length - 1; $i >= 0; $i-- ) {
+ $value *= 256;
+ $value += ord( $string[$pos + $i] );
+ }
+
+ // Throw an exception if there was loss of precision
+ if ( $value > pow( 2, 52 ) ) {
+ $this->error( 'zip-unsupported', 'number too large to be stored in a double. ' .
+ 'This could happen if we tried to unpack a 64-bit structure ' .
+ 'at an invalid location.' );
+ }
+ $data[$key] = $value;
+ $pos += $length;
+ }
+ }
+
+ return $data;
+ }
+
+ /**
+ * Returns a bit from a given position in an integer value, converted to
+ * boolean.
+ *
+ * @param $value integer
+ * @param int $bitIndex The index of the bit, where 0 is the LSB.
+ * @return bool
+ */
+ function testBit( $value, $bitIndex ) {
+ return (bool)( ( $value >> $bitIndex ) & 1 );
+ }
+
+ /**
+ * Debugging helper function which dumps a string in hexdump -C format.
+ */
+ function hexDump( $s ) {
+ $n = strlen( $s );
+ for ( $i = 0; $i < $n; $i += 16 ) {
+ printf( "%08X ", $i );
+ for ( $j = 0; $j < 16; $j++ ) {
+ print " ";
+ if ( $j == 8 ) {
+ print " ";
+ }
+ if ( $i + $j >= $n ) {
+ print " ";
+ } else {
+ printf( "%02X", ord( $s[$i + $j] ) );
+ }
+ }
+
+ print " |";
+ for ( $j = 0; $j < 16; $j++ ) {
+ if ( $i + $j >= $n ) {
+ print " ";
+ } elseif ( ctype_print( $s[$i + $j] ) ) {
+ print $s[$i + $j];
+ } else {
+ print '.';
+ }
+ }
+ print "|\n";
+ }
+ }
+}
+
+/**
+ * Internal exception class. Will be caught by private code.
+ */
+class ZipDirectoryReaderError extends Exception {
+ var $errorCode;
+
+ function __construct( $code ) {
+ $this->errorCode = $code;
+ parent::__construct( "ZipDirectoryReader error: $code" );
+ }
+
+ /**
+ * @return mixed
+ */
+ function getErrorCode() {
+ return $this->errorCode;
+ }
+}