Last active
October 20, 2017 21:56
-
-
Save jdembowski/a115f4337e14918214a9aca575dd59aa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# This will need the investigate module installed via 'pip install investigate' | |
import investigate, time, json, fileinput, codecs, sys, os | |
def slice(l, n): | |
n = max(1, n) | |
return [l[i:i + n] for i in range(0, len(l), n)] | |
# Read key, single line | |
with open('api-key.txt', 'r') as k: | |
api_key = k.read().rstrip() | |
newdata={} | |
inv = investigate.Investigate(api_key) | |
# Initialize vars | |
i=0 | |
domains = {} | |
if len(sys.argv) == 2: | |
filename = sys.argv[1] | |
else: | |
print 'ERROR: please provide an input file name' | |
sys.exit(1) | |
with open(filename) as f: | |
domains = f.read().splitlines() | |
# print "FILENAME:", filename | |
# How many chunks do we need? | |
size = len(domains) | |
chunks = (size/1000) | |
# Take care of any remainder | |
if (size%1000): chunks=chunks+1 | |
slices=slice(domains,1000) | |
# Print first line of CSV output | |
# print 'Domain,Content Categories,Security Categories' | |
for chunk in range(0, chunks): | |
#print 'Chunk:', chunk | |
# Call to Investigate bulk endpoint | |
results = inv.categorization(slices[chunk], labels=True) | |
for domain, value in results.items(): | |
# Some of the domains in the file may be unicode | |
domain=domain.encode('utf-8') | |
sys.stdout.write(domain+',') | |
# This returns content_categories, security_categories, and status. | |
# The status we don't care about here. Walk through and get the results. | |
for category, categories in value.items(): | |
if category == 'content_categories': | |
sys.stdout.write('|'.join(str(p) for p in categories)) | |
sys.stdout.write(',') | |
if category == 'security_categories': | |
if not categories: | |
sys.stdout.write('Benign') | |
else: | |
sys.stdout.write('|'.join(str(p) for p in categories)) | |
# Sleep for 0.5 second between chunks | |
time.sleep(0.5) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment