Add HKDF as a fast, random number source
authorcsteipp <csteipp@wikimedia.org>
Tue, 3 Jun 2014 18:15:36 +0000 (11:15 -0700)
committercsteipp <csteipp@wikimedia.org>
Mon, 9 Jun 2014 18:32:26 +0000 (11:32 -0700)
This adds an RFC 5869 compatible library for quickly generating
cryptographically secure random keys/numbers. An attacker should be
unable to predict the next number generated, as long as the secret
key (set to wgSecretKey if not explicitly defined for a wiki) remains
secret.

In my testing, this is generating random numbers about 7x faster than
MWCryptRand for 32 hex characters, and shouldn't drain openssl's
entropy source.

This will hopefully speed up maintenance tasks that require secure
random numbers, such as resetting login token, and wrapping user's
passwords in a new format.

Change-Id: I437d9aec0122a0533b0d8723fe8a8624898af909

RELEASE-NOTES-1.24
includes/AutoLoader.php
includes/DefaultSettings.php
includes/utils/MWCryptHKDF.php [new file with mode: 0644]
tests/phpunit/includes/utils/MWCryptHKDFTest.php [new file with mode: 0644]

index 0389b7c..eb0d634 100644 (file)
@@ -56,6 +56,8 @@ production.
   than in a dedicated pane in the preferences panel.
 * (bug 44591) The dropdown actions menu (little triangle next to page tabs) in
   the Vector skin has gained a label that should make it more discoverable.
+* MWCryptHKDF added for fast, cryptographically secure random number generation
+  that won't deplete openssl's entropy pool.
 
 === Bug fixes in 1.24 ===
 * (bug 49116) Footer copyright notice is now always displayed in user language
index 69f0e2f..05d4c1f 100644 (file)
@@ -1113,6 +1113,7 @@ $wgAutoloadLocalClasses = array(
        'HashtableReplacer' => 'includes/utils/StringUtils.php',
        'IP' => 'includes/utils/IP.php',
        'MWCryptRand' => 'includes/utils/MWCryptRand.php',
+       'MWCryptHKDF' => 'includes/utils/MWCryptHKDF.php',
        'MWFunction' => 'includes/utils/MWFunction.php',
        'RegexlikeReplacer' => 'includes/utils/StringUtils.php',
        'ReplacementArray' => 'includes/utils/StringUtils.php',
index ab0ffeb..28ae757 100644 (file)
@@ -7067,6 +7067,16 @@ $wgPagePropsHaveSortkey = true;
  */
 $wgHttpsPort = 443;
 
+/**
+ * Secret and algorithm for hmac-based key derivation function (fast,
+ * cryptographically secure random numbers).
+ * This should be set in LocalSettings.php, otherwise wgSecretKey will
+ * be used.
+ * @since 1.24
+ */
+$wgHKDFSecret = false;
+$wgHKDFAlgorithm = 'sha256';
+
 /**
  * For really cool vim folding this needs to be at the end:
  * vim: foldmarker=@{,@} foldmethod=marker
diff --git a/includes/utils/MWCryptHKDF.php b/includes/utils/MWCryptHKDF.php
new file mode 100644 (file)
index 0000000..6b6655e
--- /dev/null
@@ -0,0 +1,331 @@
+<?php
+/**
+ * Extract-and-Expand Key Derivation Function (HKDF). A cryptographicly
+ * secure key expansion function based on RFC 5869.
+ *
+ * This relies on the secrecy of $wgSecretKey (by default), or $wgHKDFSecret.
+ * By default, sha256 is used as the underlying hashing algorithm, but any other
+ * algorithm can be used. Finding the secret key from the output would require
+ * an attacker to discover the input key (the PRK) to the hmac that generated
+ * the output, and discover the particular data, hmac'ed with an evolving key
+ * (salt), to produce the PRK. Even with md5, no publicly known attacks make
+ * this currently feasible.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @author Chris Steipp
+ * @file
+ */
+
+class MWCryptHKDF {
+
+       /**
+        * Singleton instance for public use
+        */
+       protected static $singleton = null;
+
+       /**
+        * The persistant cache
+        */
+       protected $cache = null;
+
+       /**
+        * Cache key we'll use for our salt
+        */
+       protected $cacheKey = null;
+
+       /**
+        * The hash algorithm being used
+        */
+       protected $algorithm = null;
+
+       /**
+        * binary string, the salt for the HKDF
+        */
+       protected $salt;
+
+       /**
+        * The pseudorandom key
+        */
+       private $prk;
+
+       /**
+        * The secret key material. This must be kept secret to preserve
+        * the security properties of this RNG.
+        */
+       private $skm;
+
+       /**
+        * The last block (K(i)) of the most recent expanded key
+        */
+       protected $lastK;
+
+       /**
+        * a "context information" string CTXinfo (which may be null)
+        * See http://eprint.iacr.org/2010/264.pdf Section 4.1
+        */
+       protected $context = array();
+
+       /**
+        * Round count is computed based on the hash'es output length,
+        * which neither php nor openssl seem to provide easily.
+        */
+       public static $hashLength = array(
+               'md5' => 16,
+               'sha1' => 20,
+               'sha224' => 28,
+               'sha256' => 32,
+               'sha384' => 48,
+               'sha512' => 64,
+               'ripemd128' => 16,
+               'ripemd160' => 20,
+               'ripemd256' => 32,
+               'ripemd320' => 40,
+               'whirlpool' => 64,
+       );
+
+
+       /**
+        * @param string $hash Name of hashing algorithm
+        * @param BagOStuff $cache
+        * @param string|array $context to mix into HKDF context
+        */
+       public function __construct( $secretKeyMaterial, $algorithm, $cache, $context ) {
+               if ( strlen( $secretKeyMaterial ) < 16 ) {
+                       throw new MWException( "MWCryptHKDF secret was too short." );
+               }
+               $this->skm = $secretKeyMaterial;
+               $this->algorithm = $algorithm;
+               $this->cache = $cache;
+               $this->salt = ''; // Initialize a blank salt, see getSaltUsingCache()
+               $this->prk = '';
+               $this->context = is_array( $context ) ? $context : array( $context );
+
+               // To prevent every call from hitting the same memcache server, pick
+               // from a set of keys to use. mt_rand is only use to pick a random
+               // server, and does not affect the security of the process.
+               $this->cacheKey = wfMemcKey( 'HKDF', mt_rand( 0, 16 ) );
+       }
+
+       /**
+        * Save the last block generated, so the next user will compute a different PRK
+        * from the same SKM. This should keep things unpredictable even if an attacker
+        * is able to influence CTXinfo.
+        */
+       function __destruct() {
+               if ( $this->lastK ) {
+                       $this->cache->set( $this->cacheKey, $this->lastK );
+               }
+       }
+
+       /**
+        * MW specific salt, cached from last run
+        * @return string binary string
+        */
+       protected function getSaltUsingCache() {
+               if ( $this->salt == '' ) {
+                       $lastSalt = $this->cache->get( $this->cacheKey );
+                       if ( $lastSalt === false ) {
+                               // If we don't have a previous value to use as our salt, we use
+                               // 16 bytes from MWCryptRand, which will use a small amount of
+                               // entropy from our pool. Note, "XTR may be deterministic or keyed
+                               // via an optional “salt value”  (i.e., a non-secret random
+                               // value)..." - http://eprint.iacr.org/2010/264.pdf. However, we
+                               // use a strongly random value since we can.
+                               $lastSalt = MWCryptRand::generate( 16 );
+                       }
+                       // Get a binary string that is hashLen long
+                       $this->salt = hash( $this->algorithm, $lastSalt, true );
+               }
+               return $this->salt;
+       }
+
+       /**
+        * Return a singleton instance, based on the global configs.
+        * @return HKDF
+        */
+       protected static function singleton() {
+               global $wgHKDFAlgorithm, $wgHKDFSecret, $wgSecretKey;
+
+               $secret = $wgHKDFSecret ?: $wgSecretKey;
+               if ( !$secret ) {
+                       throw new MWException( "Cannot use MWCryptHKDF without a secret." );
+               }
+
+               // In HKDF, the context can be known to the attacker, but this will
+               // keep simultaneous runs from producing the same output.
+               $context = array();
+               $context[] = microtime();
+               $context[] = getmypid();
+               $context[] = gethostname();
+
+               // Setup salt cache. Use APC, or fallback to the main cache if it isn't setup
+               try {
+                       $cache = ObjectCache::newAccelerator( array() );
+               } catch ( Exception $e ) {
+                       $cache = wfGetMainCache();
+               }
+
+               if ( is_null( self::$singleton ) ) {
+                       self::$singleton = new self( $secret, $wgHKDFAlgorithm, $cache, $context );
+               }
+
+               return self::$singleton;
+       }
+
+       /**
+        * Produce $bytes of secure random data. As a side-effect,
+        * $this->lastK is set to the last hashLen block of key material.
+        * @param int $bytes number of bytes of data
+        * @param string $context to mix into CTXinfo
+        * @return string binary string of length $bytes
+        */
+       protected function realGenerate( $bytes, $context = '' ) {
+
+               if ( $this->prk === '' ) {
+                       $salt = $this->getSaltUsingCache();
+                       $this->prk = self::HKDFExtract(
+                               $this->algorithm,
+                               $salt,
+                               $this->skm
+                       );
+               }
+
+               $CTXinfo = implode( ':', array_merge( $this->context, array( $context ) ) );
+
+               return self::HKDFExpand(
+                       $this->algorithm,
+                       $this->prk,
+                       $CTXinfo,
+                       $bytes,
+                       $this->lastK
+               );
+       }
+
+
+       /**
+        * RFC5869 defines HKDF in 2 steps, extraction and expansion.
+        * From http://eprint.iacr.org/2010/264.pdf:
+        *
+        * The scheme HKDF is specifed as:
+        *      HKDF(XTS, SKM, CTXinfo, L) = K(1) || K(2) || ... || K(t)
+        * where the values K(i) are defined as follows:
+        *      PRK = HMAC(XTS, SKM)
+        *      K(1) = HMAC(PRK, CTXinfo || 0);
+        *      K(i+1) = HMAC(PRK, K(i) || CTXinfo || i), 1 <= i < t;
+        * where t = [L/k] and the value K(t) is truncated to its first d = L mod k bits;
+        * the counter i is non-wrapping and of a given fixed size, e.g., a single byte.
+        * Note that the length of the HMAC output is the same as its key length and therefore
+        * the scheme is well defined.
+        *
+        * XTS is the "extractor salt"
+        * SKM is the "secret keying material"
+        *
+        * N.B. http://eprint.iacr.org/2010/264.pdf seems to differ from RFC 5869 in that the test
+        * vectors from RFC 5869 only work if K(0) = '' and K(1) = HMAC(PRK, K(0) || CTXinfo || 1)
+        *
+        * @param string $hash the hashing function to use (e.g., sha256)
+        * @param string $ikm the input keying material
+        * @param string $salt the salt to add to the ikm, to get the prk
+        * @param string $info optional context (change the output without affecting
+        *      the randomness properties of the output)
+        * @param integer $L number of bytes to return
+        * @return string cryptographically secure pseudorandom binary string
+        */
+       public static function HKDF( $hash, $ikm, $salt, $info, $L ) {
+               $prk = self::HKDFExtract( $hash, $salt, $ikm );
+               $okm = self::HKDFExpand( $hash, $prk, $info, $L );
+               return $okm;
+       }
+
+       /**
+        * Extract the PRK, PRK = HMAC(XTS, SKM)
+        * Note that the hmac is keyed with XTS (the salt),
+        * and the SKM (source key material) is the "data".
+        *
+        * @param string $hash the hashing function to use (e.g., sha256)
+        * @param string $ikm the input keying material
+        * @param string $salt the salt to add to the ikm, to get the prk
+        * @return string binary string (pseudorandm key) used as input to HKDFExpand
+        */
+       private static function HKDFExtract( $hash, $salt, $ikm ) {
+               return hash_hmac( $hash, $ikm, $salt, true );
+       }
+
+       /**
+        * Expand the key with the given context
+        *
+        * @param $hash Hashing Algorithm
+        * @param $prk a pseudorandom key of at least HashLen octets
+         *     (usually, the output from the extract step)
+        * @param $info optional context and application specific information
+         *     (can be a zero-length string)
+        * @param $bytes length of output keying material in bytes
+         *     (<= 255*HashLen)
+        * @param &$lastK set by this function to the last block of the expansion.
+        *      In MediaWiki, this is used to seed future Extractions.
+        * @return string cryptographically secure random string $bytes long
+        */
+       private static function HKDFExpand( $hash, $prk, $info, $bytes, &$lastK = '' ) {
+               $hashLen = MWCryptHKDF::$hashLength[$hash];
+               $rounds = ceil( $bytes / $hashLen );
+               $output = '';
+
+               if ( $bytes > 255 * $hashLen ) {
+                       throw new MWException( "Too many bytes requested from HDKFExpand" );
+               }
+
+               // K(1) = HMAC(PRK, CTXinfo || 1);
+               // K(i) = HMAC(PRK, K(i-1) || CTXinfo || i); 1 < i <= t;
+               for ( $counter = 1; $counter <= $rounds; ++$counter ) {
+                       $lastK = hash_hmac(
+                               $hash,
+                               $lastK . $info . chr($counter),
+                               $prk,
+                               true
+                       );
+                       $output .= $lastK;
+               }
+
+               return substr( $output, 0, $bytes );
+       }
+
+       /**
+        * Generate cryptographically random data and return it in raw binary form.
+        *
+        * @param int $bytes the number of bytes of random data to generate
+        * @param string $context string to mix into HMAC context
+        * @return string binary string of length $bytes
+        */
+       public static function generate( $bytes, $context ) {
+               return self::singleton()->realGenerate( $bytes, $context );
+       }
+
+       /**
+        * Generate cryptographically random data and return it in hexadecimal string format.
+        * See MWCryptRand::realGenerateHex for details of the char-to-byte conversion logic.
+        *
+        * @param int $chars the number of hex chars of random data to generate
+        * @param string $context string to mix into HMAC context
+        * @return string random hex characters, $chars long
+        */
+       public static function generateHex( $chars, $context = '' ) {
+               $bytes = ceil( $chars / 2 );
+               $hex = bin2hex( self::singleton()->realGenerate( $bytes, $context ) );
+               return substr( $hex, 0, $chars );
+       }
+
+}
diff --git a/tests/phpunit/includes/utils/MWCryptHKDFTest.php b/tests/phpunit/includes/utils/MWCryptHKDFTest.php
new file mode 100644 (file)
index 0000000..7e37534
--- /dev/null
@@ -0,0 +1,89 @@
+<?php
+/**
+ *
+ * @group HKDF
+ */
+
+class MWCryptHKDFTest extends MediaWikiTestCase {
+
+       /**
+        * Test basic usage works
+        */
+       public function testGenerate() {
+               $a = MWCryptHKDF::generateHex( 64 );
+               $b = MWCryptHKDF::generateHex( 64 );
+
+               $this->assertTrue( strlen( $a ) == 64, "MWCryptHKDF produced fewer bytes than expected" );
+               $this->assertTrue( strlen( $b ) == 64, "MWCryptHKDF produced fewer bytes than expected" );
+               $this->assertFalse( $a == $b, "Two runs of MWCryptHKDF produced the same result." );
+       }
+
+       /**
+        * @dataProvider providerRfc5869
+        */
+       public function testRfc5869( $hash, $ikm, $salt, $info, $L, $prk, $okm ) {
+               $ikm = pack( 'H*', $ikm );
+               $salt = pack( 'H*', $salt );
+               $info = pack( 'H*', $info );
+               $okm = pack( 'H*', $okm );
+               $result = MWCryptHKDF::HKDF( $hash, $ikm, $salt, $info, $L );
+               $this->assertEquals( $okm, $result );
+       }
+
+       /**
+        * Test vectors from Appendix A on http://tools.ietf.org/html/rfc5869
+        */
+       public static function providerRfc5869() {
+
+               return array(
+                       // A.1
+                       array( 'sha256',
+                               '0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b', // ikm
+                               '000102030405060708090a0b0c', // salt
+                               'f0f1f2f3f4f5f6f7f8f9', // context
+                               42, // bytes
+                               '077709362c2e32df0ddc3f0dc47bba6390b6c73bb50f9c3122ec844ad7c2b3e5', // prk
+                               '3cb25f25faacd57a90434f64d0362f2a2d2d0a90cf1a5a4c5db02d56ecc4c5bf34007208d5b887185865' // okm
+                       ),
+                       // A.2
+                       array( 'sha256',
+                               '000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f',
+                               '606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeaf',
+                               'b0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff',
+                               82,
+                               '06a6b88c5853361a06104c9ceb35b45cef760014904671014a193f40c15fc244',
+                               'b11e398dc80327a1c8e7f78c596a49344f012eda2d4efad8a050cc4c19afa97c59045a99cac7827271cb41c65e590e09da3275600c2f09b8367793a9aca3db71cc30c58179ec3e87c14c01d5c1f3434f1d87'
+                       ),
+                       // A.3
+                       array( 'sha256',
+                               '0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b', // ikm
+                               '', // salt
+                               '', // context
+                               42, // bytes
+                               '19ef24a32c717b167f33a91d6f648bdf96596776afdb6377ac434c1c293ccb04', // prk
+                               '8da4e775a563c18f715f802a063c5a31b8a11f5c5ee1879ec3454e5f3c738d2d9d201395faa4b61a96c8' // okm
+                       ),
+                       // A.4
+                       array( 'sha1',
+                               '0b0b0b0b0b0b0b0b0b0b0b', // ikm
+                               '000102030405060708090a0b0c', // salt
+                               'f0f1f2f3f4f5f6f7f8f9', // context
+                               42, // bytes
+                               '9b6c18c432a7bf8f0e71c8eb88f4b30baa2ba243', // prk
+                               '085a01ea1b10f36933068b56efa5ad81a4f14b822f5b091568a9cdd4f155fda2c22e422478d305f3f896' // okm
+                       ),
+                       // A.5
+                       array( 'sha1',
+                               '000102030405060708090a0b0c0d0e0f101112131415161718191a1b1c1d1e1f202122232425262728292a2b2c2d2e2f303132333435363738393a3b3c3d3e3f404142434445464748494a4b4c4d4e4f', // ikm
+                               '606162636465666768696a6b6c6d6e6f707172737475767778797a7b7c7d7e7f808182838485868788898a8b8c8d8e8f909192939495969798999a9b9c9d9e9fa0a1a2a3a4a5a6a7a8a9aaabacadaeaf', // salt
+                               'b0b1b2b3b4b5b6b7b8b9babbbcbdbebfc0c1c2c3c4c5c6c7c8c9cacbcccdcecfd0d1d2d3d4d5d6d7d8d9dadbdcdddedfe0e1e2e3e4e5e6e7e8e9eaebecedeeeff0f1f2f3f4f5f6f7f8f9fafbfcfdfeff', // context
+                               82, // bytes
+                               '8adae09a2a307059478d309b26c4115a224cfaf6', // prk
+                               '0bd770a74d1160f7c9f12cd5912a06ebff6adcae899d92191fe4305673ba2ffe8fa3f1a4e5ad79f3f334b3b202b2173c486ea37ce3d397ed034c7f9dfeb15c5e927336d0441f4c4300e2cff0d0900b52d3b4' // okm
+                       ),
+               );
+
+       }
+
+
+}