Example of using the SCA dataset for CS229
import numpy as np
import scipy.stats
import scipy.signal
sbox = np.array([
hamming_weight = np.array([bin(x).count("1") for x in range(256)])
def load_dataset(dataset_name):
# The power consumption. Row is trace number, column is time.
pwrs = np.load(dataset_name + '_pwrs.npy')
# Keys and messages used for the encryption.
msgs = np.load(dataset_name + '_msgs.npy')
keys = np.load(dataset_name + '_keys.npy')
# Calculate the labels we're trying to predict.
# We want to predict the hamming wight of the output of the initial round
# plus the first SubBytes step.
# See under
# "High-level description of the algorithm"
hw = hamming_weight[sbox[msgs[:, 0] ^ keys[:, 0]]]
return pwrs, hw
def simple_template_attack(pwrs, hw, n_poi=100):
# Find the locations in the dataset that correlate most highly to the key
# Note this takes ~30 seconds on my machine
corr, _ = np.apply_along_axis(scipy.stats.pearsonr, 0, pwrs, hw)
# Find the indexes of the peaks of the correlation
peaks, = scipy.signal.argrelmax(corr, order=50)
# Take the top n_poi peaks from the correlation
poi = peaks[corr[peaks].argsort()[-n_poi:]]
# Create the actual templates
templates = []
for i in range(9):
# Extract just the power information for just the points of interest
pwrs_for_poi = pwrs[hw == i][:, poi]
# Calculate the sample mean and covariance
mean = pwrs_for_poi.mean(0)
cov = np.cov(pwrs_for_poi.T)
# Create the actual template as a multivariate gaussian
t = scipy.stats.multivariate_normal(mean, cov, allow_singular=True)
# Calculate the probility of each template for every trace
p = np.array([dist.logpdf(pwrs[:, poi]) for dist in templates])
# Convert these probability estimates to rank (neg. prob to get rank 1 == most likely)
# This tells us the order in which we'd guess the subkey
r = np.apply_along_axis(scipy.stats.rankdata, 0, -p)
# Pick the correct hw from the list of ranks.
# This basically tells us the number of guesses we'd have to make in order
# to guess the correct key.
rank_of_correct_hw = np.choose(hw, r)
# Print stats about how well we did
for i in range(9):
# True positives is the number of times we ranked hw i as 1 when it was correct
tp = float(np.logical_and(r[i] == 1, hw == i).sum())
# True negative is the number of times we ranked hw i as not 1 when it was incorrect
tn = float(np.logical_and(r[i] != 1, hw != i).sum())
# False positives is the number of times we ranked hw i as 1 when it was incorrect
fp = float(np.logical_and(r[i] == 1, hw != i).sum())
# False negative is the number of times we ranked hw i as not 1 when it was correct
fn = float(np.logical_and(r[i] != 1, hw == i).sum())
# Average number of guesses for this hamming weight
avg_guesses = rank_of_correct_hw[hw == i].mean(0)
# Precision = tp / (tp + fp)
print 'Template %d' % i
if tp + fn == 0:
print '\tPrecision: Undefined (%d/%d)' % (tp, tp + fn)
print '\tPrecision: %f (%d/%d)' % (float(tp / (tp + fn)), tp, tp + fn)
# Recall = tp / (tp + fn)
if tp + fp == 0:
print '\tRecall: Undefined (%d/%d)' % (tp, tp + fp)
print '\tRecall: %f (%d/%d)' % (float(tp / (tp + fp)), tp, tp + fp)
# Average number of guesses for this hw
print '\tAvg. guesses needed: %f' % (avg_guesses)
return corr, poi
def main():
import matplotlib.pyplot as plt
# Load the dataset. 'hw' is the trace label (what we're trying to predict)
pwrs, hw = load_dataset('0001')
# Perform a simple template attack
# A typical attack uses more poi, but it makes the graph too busy
corrs, pois = simple_template_attack(pwrs, hw, n_poi=10)
# Plot all the hw
for i in range(9):
plt.plot(pwrs[hw == i].mean(0))
# Plot the points of interest
for poi in pois:
plt.axvline(poi, color='r')
if __name__ == '__main__':
