Skip to content

Instantly share code, notes, and snippets.

@ablanathtanalba
Last active September 11, 2019 07:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ablanathtanalba/bada7a50943e28022b55a79ace40344f to your computer and use it in GitHub Desktop.
Save ablanathtanalba/bada7a50943e28022b55a79ace40344f to your computer and use it in GitHub Desktop.
import json
import math
# python version of estimate max entropy function
# manually tested, produces same results as the js version
def estimate_max_entropy(str):
MAX_LS_LEN_FOR_ENTROPY_EST = 256
SEPS = "._-"
BIN = "01"
DEC = "0123456789"
HEX = "abcdef" + DEC
ALPHA = "abcdefghijklmnopqrstuvwxyz"
ALPHANUM = ALPHA + DEC
B64 = ALPHANUM + ALPHA.upper() + "/+"
URL = ALPHANUM + ALPHA.upper() + "~%"
if len(str) > MAX_LS_LEN_FOR_ENTROPY_EST:
return len(str)
max_symbols = None
same_case = str.lower() == str or str.upper() == str
if same_case:
str = str.lower()
chr_classes = [BIN, DEC, HEX, ALPHA, ALPHANUM, B64, URL]
# this is the code block to comment out on the second run
for chr_class in chr_classes:
group = chr_class + SEPS
each_char_in_group = None
for char in str:
if char in str:
if not each_char_in_group:
each_char_in_group = True
else:
each_char_in_group = False
if each_char_in_group:
max_symbols = len(chr_class)
break
# clunkier than the JS version
if not max_symbols:
char_codes = []
char_list = list(str)
for char in char_list:
char_codes.append(ord(char))
# arbitrary values, meh
min_char_code = char_codes[0]
max_char_code = char_codes[1]
for char in char_codes:
if char < min_char_code:
min_char_code = char
elif char > max_char_code:
max_char_code = char
max_symbols = max_char_code - min_char_code + 1
max_bits = (math.log(max_symbols)/math.log(2)) * len(str)
return max_bits
#### now the tricky stuff, import the test results
#### filter them by type, find the pixel ones, save the substrings
#### run those substrings through the estimate max entorpy function
#### save those results,
#### then rinse and repeat but with the block of code in question commented out
#### compare the results, see what's good
# big fat json blob of test results
with open('./results-10k-cookieshare.json') as data_file:
data = json.load(data_file)
# gonna be a dictionary of all the substrings in pixel tracking matches
filtered_results = {}
# find those matches
for item in data["snitch_map"][""]:
if item["type"] == "pixel":
match = item["details"]["substring"]
filtered_results[match] = estimate_max_entropy(match)
# will probs rewrite this to write to a new file and save it there
# if so, will append the same results when code block in question is commented out
print(filtered_results)
@ablanathtanalba
Copy link
Author

this is run on my local machine, so directory paths are assuming as much. i downloaded the raw json results from bennett's jupyter notebook

@ablanathtanalba
Copy link
Author

buggy, but it's late, coming back to this

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment