Skip to content

Instantly share code, notes, and snippets.

@blackfist
Last active December 27, 2015 09:39
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save blackfist/7305012 to your computer and use it in GitHub Desktop.
Save blackfist/7305012 to your computer and use it in GitHub Desktop.
Takes a random sample from the VCDB dataset for quality checking.
import json
import os
from collections import defaultdict
from random import sample
population = defaultdict(list)
sample_size = 2
final_sample = defaultdict(list)
# i = getIncident('blahblahblah.json')
def getIncident(inString):
return json.loads(open(inString).read())
for filename in os.listdir('.'):
if filename.endswith('.json'):
i = getIncident(filename)
if 'analysis_status' not in i['plus'].keys():
population['no status'].append(filename)
continue
if i['plus']['analysis_status'] == "First pass":
try:
population[i['plus']['analyst']].append(filename)
except:
population['no analyst'].append(filename)
for key in population.keys():
if len(population[key]) >= sample_size:
final_sample[key] = sample(population[key],sample_size)
else:
final_sample[key] = sample(population[key],len(population[key]))
for key in final_sample:
print "Sample for",key
for each in final_sample[key]:
print "\t",each
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment