-
-
Save jcjones/a73789205b007a57123740776761c50b to your computer and use it in GitHub Desktop.
Analyze SHA1 Deprecation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
# In[1]: | |
from moztelemetry import get_pings_properties, get_one_ping_per_client | |
from moztelemetry.dataset import Dataset | |
pings = Dataset.from_source("telemetry") .where(submissionDate=lambda xx: xx.startswith("2017")) .where(docType="OTHER") .records(sc, sample=1) | |
rolloutData = pings.filter(lambda xx: xx['meta']['docType'] == "disableSHA1rollout") | |
# In[2]: | |
subset = rolloutData.take(10) | |
# In[3]: | |
from collections import defaultdict, Counter | |
expected = ["197feaf3faa0f0ad637a89c97cb91336bfc114b6b3018203cbd9c3d10c7fa86c", | |
"154c433c491929c5ef686e838e323664a00e6a0d822ccc958fb4dab03e49a08f", | |
"4348a0e9444c78cb265e058d5e8944b4d84f9662bd26db257f8934a443c70161"] | |
# Not currently used | |
def isChainAsExpected(xx): | |
if 'payload' not in xx: | |
return | |
if 'chain' not in xx['payload']: | |
return | |
if len(xx['payload']['chain']) != 3: | |
return xx['payload']['chain'] | |
for idx, val in enumerate(expected): | |
chainEntry = xx['payload']['chain'][idx]['sha256Fingerprint'] | |
if chainEntry != val: | |
#return "Mismatch on {} - {} != {}".format(idx, chainEntry, val) | |
return xx['payload']['chain'] | |
return | |
def analyzeSHA1shutdown(accum, xx): | |
if 'isAccum' not in accum: | |
# Happens on the first execution | |
emptyValueBootstrapDataset = defaultdict(Counter) | |
emptyValueBootstrapDataset['isAccum'] = True | |
# Recursively bootstrap our accumulator | |
accum = analyzeSHA1shutdown(emptyValueBootstrapDataset, accum) | |
if 'isAccum' in xx: | |
# Merge intermediate states on the final executions | |
for k1,v1 in xx.iteritems(): | |
if k1 == 'isAccum': | |
continue | |
for k2,v2 in v1.iteritems(): | |
accum[k1][k2] += v2 | |
return accum | |
# Primary analysis | |
# catch missing data | |
if 'payload' not in xx: | |
return accum | |
if 'errorCode' not in xx['payload']: | |
return accum | |
if 'chain' not in xx['payload']: | |
return accum | |
# Grab metadata | |
accum['channels'][xx['meta']['normalizedChannel']] += 1 | |
accum['geoCities'][xx['meta']['geoCity']] += 1 | |
accum['geoCountries'][xx['meta']['geoCountry']] += 1 | |
# analyze error codes | |
code=xx['payload']['errorCode'] | |
if code != 0: | |
accum['total_errors'][code] += 1 | |
for chainEntry in xx['payload']['chain']: | |
if chainEntry['isBuiltInRoot'] == True: | |
fp = chainEntry['sha256Fingerprint'] | |
accum['total_roots'][fp] += 1 | |
fpcode = "{} {}".format(fp,code) | |
accum['rooterrors'][fpcode] += 1 | |
try: | |
if 'didNotDisableSHA1Because' in xx['payload']: | |
accum['failureReasons'][xx['payload']['didNotDisableSHA1Because']] += 1 | |
if 'disabledSHA1' in xx['payload']: | |
accum['disabledSHA1'][xx['payload']['disabledSHA1']] += 1 | |
if 'cohortName' in xx['payload']: | |
cohortName = xx['payload']['cohortName'] | |
accum['cohortNames'][cohortName] += 1 | |
# Build a timeline of cohort enrollment | |
subDate = xx['meta']['submissionDate'] | |
if cohortName == "notSafeToDisableSHA1": | |
accum['time-notSafe'][subDate] += 1 | |
elif cohortName == "optedOut": | |
accum['time-optedOut'][subDate] += 1 | |
elif cohortName == "optedIn": | |
accum['time-optedIn'][subDate] += 1 | |
elif cohortName == "test": | |
accum['time-test'][subDate] += 1 | |
elif cohortName == "control": | |
accum['time-control'][subDate] += 1 | |
except: | |
print "Missing data? {}".format(xx['payload']) | |
return accum | |
# In[4]: | |
# Trial run on a subset | |
reduce(analyzeSHA1shutdown, subset) | |
# In[5]: | |
# Run on full dataset | |
rolloutData.reduce(analyzeSHA1shutdown) | |
# In[ ]: | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment