Last active
December 9, 2016 02:05
-
-
Save kkiningh/0f1d06ce461e8e264f52b4dce9fc11c9 to your computer and use it in GitHub Desktop.
Example of using the SCA dataset for CS229
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import scipy.stats | |
import scipy.signal | |
sbox = np.array([ | |
0x63,0x7c,0x77,0x7b,0xf2,0x6b,0x6f,0xc5,0x30,0x01,0x67,0x2b,0xfe,0xd7,0xab,0x76, | |
0xca,0x82,0xc9,0x7d,0xfa,0x59,0x47,0xf0,0xad,0xd4,0xa2,0xaf,0x9c,0xa4,0x72,0xc0, | |
0xb7,0xfd,0x93,0x26,0x36,0x3f,0xf7,0xcc,0x34,0xa5,0xe5,0xf1,0x71,0xd8,0x31,0x15, | |
0x04,0xc7,0x23,0xc3,0x18,0x96,0x05,0x9a,0x07,0x12,0x80,0xe2,0xeb,0x27,0xb2,0x75, | |
0x09,0x83,0x2c,0x1a,0x1b,0x6e,0x5a,0xa0,0x52,0x3b,0xd6,0xb3,0x29,0xe3,0x2f,0x84, | |
0x53,0xd1,0x00,0xed,0x20,0xfc,0xb1,0x5b,0x6a,0xcb,0xbe,0x39,0x4a,0x4c,0x58,0xcf, | |
0xd0,0xef,0xaa,0xfb,0x43,0x4d,0x33,0x85,0x45,0xf9,0x02,0x7f,0x50,0x3c,0x9f,0xa8, | |
0x51,0xa3,0x40,0x8f,0x92,0x9d,0x38,0xf5,0xbc,0xb6,0xda,0x21,0x10,0xff,0xf3,0xd2, | |
0xcd,0x0c,0x13,0xec,0x5f,0x97,0x44,0x17,0xc4,0xa7,0x7e,0x3d,0x64,0x5d,0x19,0x73, | |
0x60,0x81,0x4f,0xdc,0x22,0x2a,0x90,0x88,0x46,0xee,0xb8,0x14,0xde,0x5e,0x0b,0xdb, | |
0xe0,0x32,0x3a,0x0a,0x49,0x06,0x24,0x5c,0xc2,0xd3,0xac,0x62,0x91,0x95,0xe4,0x79, | |
0xe7,0xc8,0x37,0x6d,0x8d,0xd5,0x4e,0xa9,0x6c,0x56,0xf4,0xea,0x65,0x7a,0xae,0x08, | |
0xba,0x78,0x25,0x2e,0x1c,0xa6,0xb4,0xc6,0xe8,0xdd,0x74,0x1f,0x4b,0xbd,0x8b,0x8a, | |
0x70,0x3e,0xb5,0x66,0x48,0x03,0xf6,0x0e,0x61,0x35,0x57,0xb9,0x86,0xc1,0x1d,0x9e, | |
0xe1,0xf8,0x98,0x11,0x69,0xd9,0x8e,0x94,0x9b,0x1e,0x87,0xe9,0xce,0x55,0x28,0xdf, | |
0x8c,0xa1,0x89,0x0d,0xbf,0xe6,0x42,0x68,0x41,0x99,0x2d,0x0f,0xb0,0x54,0xbb,0x16]) | |
hamming_weight = np.array([bin(x).count("1") for x in range(256)]) | |
def load_dataset(dataset_name): | |
# The power consumption. Row is trace number, column is time. | |
pwrs = np.load(dataset_name + '_pwrs.npy') | |
# Keys and messages used for the encryption. | |
msgs = np.load(dataset_name + '_msgs.npy') | |
keys = np.load(dataset_name + '_keys.npy') | |
# Calculate the labels we're trying to predict. | |
# We want to predict the hamming wight of the output of the initial round | |
# plus the first SubBytes step. | |
# See https://en.wikipedia.org/wiki/Advanced_Encryption_Standard under | |
# "High-level description of the algorithm" | |
hw = hamming_weight[sbox[msgs[:, 0] ^ keys[:, 0]]] | |
return pwrs, hw | |
def simple_template_attack(pwrs, hw, n_poi=100): | |
# Find the locations in the dataset that correlate most highly to the key | |
# Note this takes ~30 seconds on my machine | |
corr, _ = np.apply_along_axis(scipy.stats.pearsonr, 0, pwrs, hw) | |
# Find the indexes of the peaks of the correlation | |
peaks, = scipy.signal.argrelmax(corr, order=50) | |
# Take the top n_poi peaks from the correlation | |
poi = peaks[corr[peaks].argsort()[-n_poi:]] | |
# Create the actual templates | |
templates = [] | |
for i in range(9): | |
# Extract just the power information for just the points of interest | |
pwrs_for_poi = pwrs[hw == i][:, poi] | |
# Calculate the sample mean and covariance | |
mean = pwrs_for_poi.mean(0) | |
cov = np.cov(pwrs_for_poi.T) | |
# Create the actual template as a multivariate gaussian | |
t = scipy.stats.multivariate_normal(mean, cov, allow_singular=True) | |
templates.append(t) | |
# Calculate the probility of each template for every trace | |
p = np.array([dist.logpdf(pwrs[:, poi]) for dist in templates]) | |
# Convert these probability estimates to rank (neg. prob to get rank 1 == most likely) | |
# This tells us the order in which we'd guess the subkey | |
r = np.apply_along_axis(scipy.stats.rankdata, 0, -p) | |
# Pick the correct hw from the list of ranks. | |
# This basically tells us the number of guesses we'd have to make in order | |
# to guess the correct key. | |
rank_of_correct_hw = np.choose(hw, r) | |
# Print stats about how well we did | |
for i in range(9): | |
# True positives is the number of times we ranked hw i as 1 when it was correct | |
tp = float(np.logical_and(r[i] == 1, hw == i).sum()) | |
# True negative is the number of times we ranked hw i as not 1 when it was incorrect | |
tn = float(np.logical_and(r[i] != 1, hw != i).sum()) | |
# False positives is the number of times we ranked hw i as 1 when it was incorrect | |
fp = float(np.logical_and(r[i] == 1, hw != i).sum()) | |
# False negative is the number of times we ranked hw i as not 1 when it was correct | |
fn = float(np.logical_and(r[i] != 1, hw == i).sum()) | |
# Average number of guesses for this hamming weight | |
avg_guesses = rank_of_correct_hw[hw == i].mean(0) | |
# Precision = tp / (tp + fp) | |
print 'Template %d' % i | |
if tp + fn == 0: | |
print '\tPrecision: Undefined (%d/%d)' % (tp, tp + fn) | |
else: | |
print '\tPrecision: %f (%d/%d)' % (float(tp / (tp + fn)), tp, tp + fn) | |
# Recall = tp / (tp + fn) | |
if tp + fp == 0: | |
print '\tRecall: Undefined (%d/%d)' % (tp, tp + fp) | |
else: | |
print '\tRecall: %f (%d/%d)' % (float(tp / (tp + fp)), tp, tp + fp) | |
# Average number of guesses for this hw | |
print '\tAvg. guesses needed: %f' % (avg_guesses) | |
return corr, poi | |
def main(): | |
import matplotlib.pyplot as plt | |
# Load the dataset. 'hw' is the trace label (what we're trying to predict) | |
pwrs, hw = load_dataset('0001') | |
# Perform a simple template attack | |
# A typical attack uses more poi, but it makes the graph too busy | |
corrs, pois = simple_template_attack(pwrs, hw, n_poi=10) | |
# Plot all the hw | |
for i in range(9): | |
plt.plot(pwrs[hw == i].mean(0)) | |
# Plot the points of interest | |
for poi in pois: | |
plt.axvline(poi, color='r') | |
plt.show() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment