Skip to content

Instantly share code, notes, and snippets.

@nibrahim
Last active May 22, 2018 06:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nibrahim/e32e99c904317b326b2460048a3ea16b to your computer and use it in GitHub Desktop.
Save nibrahim/e32e99c904317b326b2460048a3ea16b to your computer and use it in GitHub Desktop.
import json
import math
import sys
#import pprint
## You should probably close the file before returning d here. Use a
## with statement
def parse_file(filename):
f = open(filename, "rt")
d = json.load(f)
return d
## You don't necessarily need to do this upfront. You can just add
## events as you go through the original data structure
def set_events(data):
distinct_events = set()
for i in data:
events = i["events"]
for j in events:
distinct_events.add(j)
return list(distinct_events)
def calc_metrics(data, distinct_events):
#distinct_events = set_events(data)
## You shouldn't do this. The code should run based on the
## data. Adding something like this means that you have to change
## the code when the data changes. This is an anti-pattern.
##
## Also, you shouldn't name variables dict, list etc. since these
## are builtins.
dict = {
'weekend':{},
'reading':{},
'cycling':{},
'lettuce':{},
'dentist':{},
'running':{},
'television':{},
'exercise':{},
'brussel sprouts':{},
'candy':{},
'beer':{},
'spaghetti':{},
'brushed teeth':{},
'work':{},
'peanuts':{},
'lasagna':{},
'carrot':{},
'bread':{},
'touched tree':{},
'computer':{},
'pizza':{},
'nachos':{},
'cauliflower':{},
'ice cream':{},
'potatoes':{},
'pudding':{}
}
## While this loop works, I think a more idiomatic (though perhaps
## not as efficient solution) is to just count and use numbers
## rather than the string which you've using.
##
## I also recommend building the functions with proper names
## "calc_metrics" could mean anything.
for i in data:
events = i["events"]
squirrel = int(i["squirrel"])
for j in distinct_events: ## You've commented this out above. I'm assuming that was an error
if j in events:
x = "{}{}".format(1,squirrel)
else:
x = "{}{}".format(0, squirrel)
try:
dict[j][x] = dict[x]+1 ## Shouldn't the right side be dict[j][x] + 1 ?
except KeyError:
#print(j)
#print(x)
dict[j][x] = 1
return dict
def calc_phi(dict):
## Same comment about repeating the data as above.
dict1 = {
'weekend': {},
'reading': {},
'cycling': {},
'lettuce': {},
'dentist': {},
'running': {},
'television': {},
'exercise': {},
'brussel sprouts': {},
'candy': {},
'beer': {},
'spaghetti': {},
'brushed teeth': {},
'work': {},
'peanuts': {},
'lasagna': {},
'carrot': {},
'bread': {},
'touched tree': {},
'computer': {},
'pizza': {},
'nachos': {},
'cauliflower': {},
'ice cream': {},
'potatoes': {},
'pudding':{}
}
for i in dict1:
if not ("11" in dict[i].keys()): ## You don't need the .keys(). You can use the `in` operator directly on dictionaries
dict[i]["11"] = 0
if not ("10" in dict[i].keys()):
dict[i]["10"] = 0
if not ("01" in dict[i].keys()):
dict[i]["01"] = 0
if not ("00" in dict[i].keys()):
dict[i]["00"] = 0
## This whole calculation here becomes rather dense. I recommend you clean it up a little with temporary variables.
x = (dict[i]["11"]*dict[i]["00"] - dict[i]["10"]*dict[i]["01"])
y = math.sqrt((dict[i]["11"]+dict[i]["10"])*(dict[i]["00"]+dict[i]["01"])*(dict[i]["11"]+dict[i]["01"])*(dict[i]["00"]+dict[i]["10"]))
dict1[i] = x/y
return dict1
def main(filename):
d = parse_file(filename)
distinct_events = set_events(d)
dict = calc_metrics(d, distinct_events)
dict1 = calc_phi(dict) ## Your calc_phi mutates the dict parameter which is a bad idea.
print(json.dumps(dict1, indent=10))
#pprint.pprint(dict1, indent=10)
if __name__ == "__main__": # Import guard
main(sys.argv[1])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment