mimoo/RDRAND.c

## RDRAND.c
// This is a quick analysis of Linux's use of RDRAND. All double-slash (//)
// style comments are my own, all slash-star (/*) style comments are from the
// original code. This was written by Taylor Hornby (@DefuseSec).

// This is part of the drivers/char/random.c file.
// I have re-ordered the procedures for clarity. Everything inside them (except
// comments) is exactly as you will find it in linux-3.11.tar.xz

// This comment says it 'does not use' the hardware RNG. It actually does.
/*
 * This function is the exported kernel interface.  It returns some
 * number of good random numbers, suitable for key generation, seeding
 * TCP sequence numbers, etc.  It does not use the hw random number
 * generator, if available; use get_random_bytes_arch() for that.
 */
void get_random_bytes(void *buf, int nbytes)
{
	extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
}
EXPORT_SYMBOL(get_random_bytes);


// This one is called by get_random_bytes above.
static ssize_t extract_entropy(struct entropy_store *r, void *buf,
				 size_t nbytes, int min, int reserved)
{
	ssize_t ret = 0, i;
	__u8 tmp[EXTRACT_SIZE];
	unsigned long flags;

	/* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */
	if (fips_enabled) {
		spin_lock_irqsave(&r->lock, flags);
		if (!r->last_data_init) {
			r->last_data_init = true;
			spin_unlock_irqrestore(&r->lock, flags);
			trace_extract_entropy(r->name, EXTRACT_SIZE,
					      r->entropy_count, _RET_IP_);
			xfer_secondary_pool(r, EXTRACT_SIZE);
			extract_buf(r, tmp);
			spin_lock_irqsave(&r->lock, flags);
			memcpy(r->last_data, tmp, EXTRACT_SIZE);
		}
		spin_unlock_irqrestore(&r->lock, flags);
	}

	trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_);
	xfer_secondary_pool(r, nbytes);
	nbytes = account(r, nbytes, min, reserved);

	while (nbytes) {
        // Each iteration of this loop:
        // - Extracts 'EXTRACT_SIZE' bytes from extract_buf
        // - Panic if the just-extracted bytes are the same as the
        //   previously-extracted bytes.
        // - Copy either EXTRACT_SIZE or nbytes into the output.
		extract_buf(r, tmp);

		if (fips_enabled) {
			spin_lock_irqsave(&r->lock, flags);
			if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
				panic("Hardware RNG duplicated output!\n");
			memcpy(r->last_data, tmp, EXTRACT_SIZE);
			spin_unlock_irqrestore(&r->lock, flags);
		}
		i = min_t(int, nbytes, EXTRACT_SIZE);
		memcpy(buf, tmp, i);
		nbytes -= i;
		buf += i;
		ret += i;
	}

	/* Wipe data just returned from memory */
	memset(tmp, 0, sizeof(tmp));

	return ret;
}

// This fills 'out' with EXTRACT_BYTES random bytes. It's what extract_entropy
// uses to fill its output buffer.
static void extract_buf(struct entropy_store *r, __u8 *out)
{
    // Skip all this stuff because it doesn't matter for the point I want to
    // make...
	int i;
	union {
		__u32 w[5];
		unsigned long l[LONGS(EXTRACT_SIZE)];
	} hash;
	__u32 workspace[SHA_WORKSPACE_WORDS];
	__u8 extract[64];
	unsigned long flags;

	/* Generate a hash across the pool, 16 words (512 bits) at a time */
	sha_init(hash.w);
	spin_lock_irqsave(&r->lock, flags);
	for (i = 0; i < r->poolinfo->poolwords; i += 16)
		sha_transform(hash.w, (__u8 *)(r->pool + i), workspace);

	/*
	 * We mix the hash back into the pool to prevent backtracking
	 * attacks (where the attacker knows the state of the pool
	 * plus the current outputs, and attempts to find previous
	 * ouputs), unless the hash function can be inverted. By
	 * mixing at least a SHA1 worth of hash data back, we make
	 * brute-forcing the feedback as hard as brute-forcing the
	 * hash.
	 */
	__mix_pool_bytes(r, hash.w, sizeof(hash.w), extract);
	spin_unlock_irqrestore(&r->lock, flags);

	/*
	 * To avoid duplicates, we atomically extract a portion of the
	 * pool while mixing, and hash one final time.
	 */
	sha_transform(hash.w, extract, workspace);
	memset(extract, 0, sizeof(extract));
	memset(workspace, 0, sizeof(workspace));

	/*
	 * In case the hash function has some recognizable output
	 * pattern, we fold it in half. Thus, we always feed back
	 * twice as much data as we output.
	 */
	hash.w[0] ^= hash.w[3];
	hash.w[1] ^= hash.w[4];
	hash.w[2] ^= rol32(hash.w[2], 16);

    // Ah, here we are. Finally, we found RDRAND.
    // This XOR's RDRAND *directly* into the output buffer, right before
    // returning.
	/*
	 * If we have a architectural hardware random number
	 * generator, mix that in, too.
	 */
	for (i = 0; i < LONGS(EXTRACT_SIZE); i++) {
		unsigned long v;
        // arch_get_random is RDRAND.
		if (!arch_get_random_long(&v))
			break;
		hash.l[i] ^= v;
	}
    // SIMPLICIO: Why is that a problem? I thought if you XOR a non-random
    //            stream with a random one, you get a random one? Remember,
    //            one-time-pads and such?
    //
    // SALVIATI:  Right, that's true. Even if RDRAND returns all zeroes, or some
    //            completely predictable sequence, the output will be random as
    //            long as it was random before the XOR.
    //
    // SIMPLICIO: So what's the problem, then? It seems like having RDRAND here
    //            can only make things better...
    //
    // SALVIATI:  Ah, that's true if RDRAND might only be a weak source of
    //            entropy, but if it's *actively* malicious, it could seriously
    //            compromise the security of the output. For example, it could
    //            purposely return the inverse of the bits it's going to be
    //            XORed with, resulting in this function filling 'out' with zero
    //            bytes.
    //
    // SIMPLICIO: That's rediculous! How could RDRAND know which bits it's going
    //            to be XORed with? There's no way one instruction could figure
    //            all of that out.
    //
    // SALVIATI:  Actually, it's quite possible. The procesor wouldn't even have
    //            to be smart about it. Chances are, the bits it's going to be
    //            XORed with are in cache (which is inside the CPU), so if the
    //            CPU had RDRAND return the XOR of all longs in the cache, it
    //            would cancel out and information about the state of the cache
    //            would leak out through the RNG. There are plenty of other
    //            ways. This is the CPU, remember. It can pretty much do
    //            anything it wants.
    //
    // SIMPLICIO: Ok, I see how this use of RDRAND could *in theory* weaken the
    //            whole RNG. But wouldn't that be pretty easy to detect, and
    //            can't we trust Intel? If the NSA has their hand up Intel's
    //            ass, wouldn't there be easier ways of backdooring a system?
    //
    // SALVIATI:  The RDRAND backdoor could be made so that it only activates
    //            under certain, very specific, conditions. For example, it
    //            might only activate when RAX contains 0x632472F72B3FB507,
    //            which is extremely unlikely to happen during normal use, but
    //            could be made to happen by sending the system a TCP packet,
    //            web page, etc, containing that value. So, if it existed, it
    //            would be extremely difficult to detect. It's possible in
    //            principal to reverse engineer the CPU itself, but it's
    //            extremely expensive -- and destructive -- so you can't check
    //            all of the CPUs you actually use for backdoors. Sure, if the
    //            NSA controlled Intel, there would be easier ways to backdoor
    //            a system, but backdooring the RNG is nice, because it's
    //            passive: You don't have to *do* anything to a system in order
    //            to break it. You can just listen to the system's network
    //            traffic and use your RNG backdoor to decrypt it. Futher, it's
    //            bad design to combine two RNGs by XORing them together. They
    //            may be correlated (by accident or on purpose) in subtle ways
    //            that cancel out security. To be honest, I'm not exactly sure
    //            what the best way would be, but it certainly isn't XOR.
    //

	memcpy(out, &hash, EXTRACT_SIZE);
	memset(&hash, 0, sizeof(hash));
}
	// This is a quick analysis of Linux's use of RDRAND. All double-slash (//)
	// style comments are my own, all slash-star (/*) style comments are from the
	// original code. This was written by Taylor Hornby (@DefuseSec).

	// This is part of the drivers/char/random.c file.
	// I have re-ordered the procedures for clarity. Everything inside them (except
	// comments) is exactly as you will find it in linux-3.11.tar.xz

	// This comment says it 'does not use' the hardware RNG. It actually does.
	/*
	* This function is the exported kernel interface. It returns some
	* number of good random numbers, suitable for key generation, seeding
	* TCP sequence numbers, etc. It does not use the hw random number
	* generator, if available; use get_random_bytes_arch() for that.
	*/
	void get_random_bytes(void *buf, int nbytes)
	{
	extract_entropy(&nonblocking_pool, buf, nbytes, 0, 0);
	}
	EXPORT_SYMBOL(get_random_bytes);


	// This one is called by get_random_bytes above.
	static ssize_t extract_entropy(struct entropy_store r, void buf,
	size_t nbytes, int min, int reserved)
	{
	ssize_t ret = 0, i;
	__u8 tmp[EXTRACT_SIZE];
	unsigned long flags;

	/* if last_data isn't primed, we need EXTRACT_SIZE extra bytes */
	if (fips_enabled) {
	spin_lock_irqsave(&r->lock, flags);
	if (!r->last_data_init) {
	r->last_data_init = true;
	spin_unlock_irqrestore(&r->lock, flags);
	trace_extract_entropy(r->name, EXTRACT_SIZE,
	r->entropy_count, _RET_IP_);
	xfer_secondary_pool(r, EXTRACT_SIZE);
	extract_buf(r, tmp);
	spin_lock_irqsave(&r->lock, flags);
	memcpy(r->last_data, tmp, EXTRACT_SIZE);
	}
	spin_unlock_irqrestore(&r->lock, flags);
	}

	trace_extract_entropy(r->name, nbytes, r->entropy_count, _RET_IP_);
	xfer_secondary_pool(r, nbytes);
	nbytes = account(r, nbytes, min, reserved);

	while (nbytes) {
	// Each iteration of this loop:
	// - Extracts 'EXTRACT_SIZE' bytes from extract_buf
	// - Panic if the just-extracted bytes are the same as the
	// previously-extracted bytes.
	// - Copy either EXTRACT_SIZE or nbytes into the output.
	extract_buf(r, tmp);

	if (fips_enabled) {
	spin_lock_irqsave(&r->lock, flags);
	if (!memcmp(tmp, r->last_data, EXTRACT_SIZE))
	panic("Hardware RNG duplicated output!\n");
	memcpy(r->last_data, tmp, EXTRACT_SIZE);
	spin_unlock_irqrestore(&r->lock, flags);
	}
	i = min_t(int, nbytes, EXTRACT_SIZE);
	memcpy(buf, tmp, i);
	nbytes -= i;
	buf += i;
	ret += i;
	}

	/* Wipe data just returned from memory */
	memset(tmp, 0, sizeof(tmp));

	return ret;
	}

	// This fills 'out' with EXTRACT_BYTES random bytes. It's what extract_entropy
	// uses to fill its output buffer.
	static void extract_buf(struct entropy_store r, __u8 out)
	{
	// Skip all this stuff because it doesn't matter for the point I want to
	// make...
	int i;
	union {
	__u32 w[5];
	unsigned long l[LONGS(EXTRACT_SIZE)];
	} hash;
	__u32 workspace[SHA_WORKSPACE_WORDS];
	__u8 extract[64];
	unsigned long flags;

	/* Generate a hash across the pool, 16 words (512 bits) at a time */
	sha_init(hash.w);
	spin_lock_irqsave(&r->lock, flags);
	for (i = 0; i < r->poolinfo->poolwords; i += 16)
	sha_transform(hash.w, (__u8 *)(r->pool + i), workspace);

	/*
	* We mix the hash back into the pool to prevent backtracking
	* attacks (where the attacker knows the state of the pool
	* plus the current outputs, and attempts to find previous
	* ouputs), unless the hash function can be inverted. By
	* mixing at least a SHA1 worth of hash data back, we make
	* brute-forcing the feedback as hard as brute-forcing the
	* hash.
	*/
	__mix_pool_bytes(r, hash.w, sizeof(hash.w), extract);
	spin_unlock_irqrestore(&r->lock, flags);

	/*
	* To avoid duplicates, we atomically extract a portion of the
	* pool while mixing, and hash one final time.
	*/
	sha_transform(hash.w, extract, workspace);
	memset(extract, 0, sizeof(extract));
	memset(workspace, 0, sizeof(workspace));

	/*
	* In case the hash function has some recognizable output
	* pattern, we fold it in half. Thus, we always feed back
	* twice as much data as we output.
	*/
	hash.w[0] ^= hash.w[3];
	hash.w[1] ^= hash.w[4];
	hash.w[2] ^= rol32(hash.w[2], 16);

	// Ah, here we are. Finally, we found RDRAND.
	// This XOR's RDRAND directly into the output buffer, right before
	// returning.
	/*
	* If we have a architectural hardware random number
	* generator, mix that in, too.
	*/
	for (i = 0; i < LONGS(EXTRACT_SIZE); i++) {
	unsigned long v;
	// arch_get_random is RDRAND.
	if (!arch_get_random_long(&v))
	break;
	hash.l[i] ^= v;
	}
	// SIMPLICIO: Why is that a problem? I thought if you XOR a non-random
	// stream with a random one, you get a random one? Remember,
	// one-time-pads and such?
	//
	// SALVIATI: Right, that's true. Even if RDRAND returns all zeroes, or some
	// completely predictable sequence, the output will be random as
	// long as it was random before the XOR.
	//
	// SIMPLICIO: So what's the problem, then? It seems like having RDRAND here
	// can only make things better...
	//
	// SALVIATI: Ah, that's true if RDRAND might only be a weak source of
	// entropy, but if it's actively malicious, it could seriously
	// compromise the security of the output. For example, it could
	// purposely return the inverse of the bits it's going to be
	// XORed with, resulting in this function filling 'out' with zero
	// bytes.
	//
	// SIMPLICIO: That's rediculous! How could RDRAND know which bits it's going
	// to be XORed with? There's no way one instruction could figure
	// all of that out.
	//
	// SALVIATI: Actually, it's quite possible. The procesor wouldn't even have
	// to be smart about it. Chances are, the bits it's going to be
	// XORed with are in cache (which is inside the CPU), so if the
	// CPU had RDRAND return the XOR of all longs in the cache, it
	// would cancel out and information about the state of the cache
	// would leak out through the RNG. There are plenty of other
	// ways. This is the CPU, remember. It can pretty much do
	// anything it wants.
	//
	// SIMPLICIO: Ok, I see how this use of RDRAND could in theory weaken the
	// whole RNG. But wouldn't that be pretty easy to detect, and
	// can't we trust Intel? If the NSA has their hand up Intel's
	// ass, wouldn't there be easier ways of backdooring a system?
	//
	// SALVIATI: The RDRAND backdoor could be made so that it only activates
	// under certain, very specific, conditions. For example, it
	// might only activate when RAX contains 0x632472F72B3FB507,
	// which is extremely unlikely to happen during normal use, but
	// could be made to happen by sending the system a TCP packet,
	// web page, etc, containing that value. So, if it existed, it
	// would be extremely difficult to detect. It's possible in
	// principal to reverse engineer the CPU itself, but it's
	// extremely expensive -- and destructive -- so you can't check
	// all of the CPUs you actually use for backdoors. Sure, if the
	// NSA controlled Intel, there would be easier ways to backdoor
	// a system, but backdooring the RNG is nice, because it's
	// passive: You don't have to do anything to a system in order
	// to break it. You can just listen to the system's network
	// traffic and use your RNG backdoor to decrypt it. Futher, it's
	// bad design to combine two RNGs by XORing them together. They
	// may be correlated (by accident or on purpose) in subtle ways
	// that cancel out security. To be honest, I'm not exactly sure
	// what the best way would be, but it certainly isn't XOR.
	//

	memcpy(out, &hash, EXTRACT_SIZE);
	memset(&hash, 0, sizeof(hash));
	}