kkiningh/dataset.py

## dataset.py
import numpy as np
import scipy.stats
import scipy.signal

sbox = np.array([
        0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
        0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
        0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
        0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
        0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
        0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
        0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
        0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
        0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
        0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
        0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
        0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
        0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
        0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
        0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
        0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16])

hamming_weight = np.array([bin(x).count("1") for x in range(256)])

def load_dataset(dataset_name):
    # The power consumption. Row is trace number, column is time.
    pwrs = np.load(dataset_name + '_pwrs.npy')

    # Keys and messages used for the encryption.
    msgs = np.load(dataset_name + '_msgs.npy')
    keys = np.load(dataset_name + '_keys.npy')

    # Calculate the labels we're trying to predict.
    # We want to predict the hamming wight of the output of the initial round
    # plus the first SubBytes step.
    # See https://en.wikipedia.org/wiki/Advanced_Encryption_Standard under
    # "High-level description of the algorithm"
    hw = hamming_weight[sbox[msgs[:, 0] ^ keys[:, 0]]]

    return pwrs, hw

def simple_template_attack(pwrs, hw, n_poi=100):
    # Find the locations in the dataset that correlate most highly to the key
    # Note this takes ~30 seconds on my machine
    corr, _ = np.apply_along_axis(scipy.stats.pearsonr, 0, pwrs, hw)

    # Find the indexes of the peaks of the correlation
    peaks, = scipy.signal.argrelmax(corr, order=50)

    # Take the top n_poi peaks from the correlation
    poi = peaks[corr[peaks].argsort()[-n_poi:]]

    # Create the actual templates
    templates = []
    for i in range(9):
        # Extract just the power information for just the points of interest
        pwrs_for_poi = pwrs[hw == i][:, poi]

        # Calculate the sample mean and covariance
        mean = pwrs_for_poi.mean(0)
        cov  = np.cov(pwrs_for_poi.T)

        # Create the actual template as a multivariate gaussian
        t = scipy.stats.multivariate_normal(mean, cov, allow_singular=True)

        templates.append(t)

    # Calculate the probility of each template for every trace
    p = np.array([dist.logpdf(pwrs[:, poi]) for dist in templates])

    # Convert these probability estimates to rank (neg. prob to get rank 1 == most likely)
    # This tells us the order in which we'd guess the subkey
    r = np.apply_along_axis(scipy.stats.rankdata, 0, -p)

    # Pick the correct hw from the list of ranks.
    # This basically tells us the number of guesses we'd have to make in order
    # to guess the correct key.
    rank_of_correct_hw = np.choose(hw, r)

    # Print stats about how well we did
    for i in range(9):
        # True positives is the number of times we ranked hw i as 1 when it was correct
        tp = float(np.logical_and(r[i] == 1, hw == i).sum())

        # True negative is the number of times we ranked hw i as not 1 when it was incorrect
        tn = float(np.logical_and(r[i] != 1, hw != i).sum())

        # False positives is the number of times we ranked hw i as 1 when it was incorrect
        fp = float(np.logical_and(r[i] == 1, hw != i).sum())

        # False negative is the number of times we ranked hw i as not 1 when it was correct
        fn = float(np.logical_and(r[i] != 1, hw == i).sum())

        # Average number of guesses for this hamming weight
        avg_guesses = rank_of_correct_hw[hw == i].mean(0)

        # Precision = tp / (tp + fp)
        print 'Template %d' % i
        if tp + fn == 0:
            print '\tPrecision: Undefined (%d/%d)' % (tp, tp + fn)
        else:
            print '\tPrecision: %f (%d/%d)' % (float(tp / (tp + fn)), tp, tp + fn)

        # Recall = tp / (tp + fn)
        if tp + fp == 0:
            print '\tRecall:    Undefined (%d/%d)' % (tp, tp + fp)
        else:
            print '\tRecall:    %f (%d/%d)' % (float(tp / (tp + fp)), tp, tp + fp)

        # Average number of guesses for this hw
        print '\tAvg. guesses needed: %f' % (avg_guesses)

    return corr, poi

def main():
    import matplotlib.pyplot as plt

    # Load the dataset. 'hw' is the trace label (what we're trying to predict)
    pwrs, hw = load_dataset('0001')

    # Perform a simple template attack
    # A typical attack uses more poi, but it makes the graph too busy
    corrs, pois = simple_template_attack(pwrs, hw, n_poi=10)

    # Plot all the hw
    for i in range(9):
        plt.plot(pwrs[hw == i].mean(0))

    # Plot the points of interest
    for poi in pois:
        plt.axvline(poi, color='r')

    plt.show()

if __name__ == '__main__':
    main()
	import numpy as np
	import scipy.stats
	import scipy.signal

	sbox = np.array([
	0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76,
	0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0,
	0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15,
	0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75,
	0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84,
	0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf,
	0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8,
	0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2,
	0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73,
	0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb,
	0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79,
	0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08,
	0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a,
	0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e,
	0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf,
	0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16])

	hamming_weight = np.array([bin(x).count("1") for x in range(256)])

	def load_dataset(dataset_name):
	# The power consumption. Row is trace number, column is time.
	pwrs = np.load(dataset_name + '_pwrs.npy')

	# Keys and messages used for the encryption.
	msgs = np.load(dataset_name + '_msgs.npy')
	keys = np.load(dataset_name + '_keys.npy')

	# Calculate the labels we're trying to predict.
	# We want to predict the hamming wight of the output of the initial round
	# plus the first SubBytes step.
	# See https://en.wikipedia.org/wiki/Advanced_Encryption_Standard under
	# "High-level description of the algorithm"
	hw = hamming_weight[sbox[msgs[:, 0] ^ keys[:, 0]]]

	return pwrs, hw

	def simple_template_attack(pwrs, hw, n_poi=100):
	# Find the locations in the dataset that correlate most highly to the key
	# Note this takes ~30 seconds on my machine
	corr, _ = np.apply_along_axis(scipy.stats.pearsonr, 0, pwrs, hw)

	# Find the indexes of the peaks of the correlation
	peaks, = scipy.signal.argrelmax(corr, order=50)

	# Take the top n_poi peaks from the correlation
	poi = peaks[corr[peaks].argsort()[-n_poi:]]

	# Create the actual templates
	templates = []
	for i in range(9):
	# Extract just the power information for just the points of interest
	pwrs_for_poi = pwrs[hw == i][:, poi]

	# Calculate the sample mean and covariance
	mean = pwrs_for_poi.mean(0)
	cov = np.cov(pwrs_for_poi.T)

	# Create the actual template as a multivariate gaussian
	t = scipy.stats.multivariate_normal(mean, cov, allow_singular=True)

	templates.append(t)

	# Calculate the probility of each template for every trace
	p = np.array([dist.logpdf(pwrs[:, poi]) for dist in templates])

	# Convert these probability estimates to rank (neg. prob to get rank 1 == most likely)
	# This tells us the order in which we'd guess the subkey
	r = np.apply_along_axis(scipy.stats.rankdata, 0, -p)

	# Pick the correct hw from the list of ranks.
	# This basically tells us the number of guesses we'd have to make in order
	# to guess the correct key.
	rank_of_correct_hw = np.choose(hw, r)

	# Print stats about how well we did
	for i in range(9):
	# True positives is the number of times we ranked hw i as 1 when it was correct
	tp = float(np.logical_and(r[i] == 1, hw == i).sum())

	# True negative is the number of times we ranked hw i as not 1 when it was incorrect
	tn = float(np.logical_and(r[i] != 1, hw != i).sum())

	# False positives is the number of times we ranked hw i as 1 when it was incorrect
	fp = float(np.logical_and(r[i] == 1, hw != i).sum())

	# False negative is the number of times we ranked hw i as not 1 when it was correct
	fn = float(np.logical_and(r[i] != 1, hw == i).sum())

	# Average number of guesses for this hamming weight
	avg_guesses = rank_of_correct_hw[hw == i].mean(0)

	# Precision = tp / (tp + fp)
	print 'Template %d' % i
	if tp + fn == 0:
	print '\tPrecision: Undefined (%d/%d)' % (tp, tp + fn)
	else:
	print '\tPrecision: %f (%d/%d)' % (float(tp / (tp + fn)), tp, tp + fn)

	# Recall = tp / (tp + fn)
	if tp + fp == 0:
	print '\tRecall: Undefined (%d/%d)' % (tp, tp + fp)
	else:
	print '\tRecall: %f (%d/%d)' % (float(tp / (tp + fp)), tp, tp + fp)

	# Average number of guesses for this hw
	print '\tAvg. guesses needed: %f' % (avg_guesses)

	return corr, poi

	def main():
	import matplotlib.pyplot as plt

	# Load the dataset. 'hw' is the trace label (what we're trying to predict)
	pwrs, hw = load_dataset('0001')

	# Perform a simple template attack
	# A typical attack uses more poi, but it makes the graph too busy
	corrs, pois = simple_template_attack(pwrs, hw, n_poi=10)

	# Plot all the hw
	for i in range(9):
	plt.plot(pwrs[hw == i].mean(0))

	# Plot the points of interest
	for poi in pois:
	plt.axvline(poi, color='r')

	plt.show()

	if __name__ == '__main__':
	main()