Skip to content

Instantly share code, notes, and snippets.

@jdembowski
Last active October 20, 2017 21:56
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdembowski/a115f4337e14918214a9aca575dd59aa to your computer and use it in GitHub Desktop.
Save jdembowski/a115f4337e14918214a9aca575dd59aa to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# This will need the investigate module installed via 'pip install investigate'
import investigate, time, json, fileinput, codecs, sys, os
def slice(l, n):
n = max(1, n)
return [l[i:i + n] for i in range(0, len(l), n)]
# Read key, single line
with open('api-key.txt', 'r') as k:
api_key = k.read().rstrip()
newdata={}
inv = investigate.Investigate(api_key)
# Initialize vars
i=0
domains = {}
if len(sys.argv) == 2:
filename = sys.argv[1]
else:
print 'ERROR: please provide an input file name'
sys.exit(1)
with open(filename) as f:
domains = f.read().splitlines()
# print "FILENAME:", filename
# How many chunks do we need?
size = len(domains)
chunks = (size/1000)
# Take care of any remainder
if (size%1000): chunks=chunks+1
slices=slice(domains,1000)
# Print first line of CSV output
# print 'Domain,Content Categories,Security Categories'
for chunk in range(0, chunks):
#print 'Chunk:', chunk
# Call to Investigate bulk endpoint
results = inv.categorization(slices[chunk], labels=True)
for domain, value in results.items():
# Some of the domains in the file may be unicode
domain=domain.encode('utf-8')
sys.stdout.write(domain+',')
# This returns content_categories, security_categories, and status.
# The status we don't care about here. Walk through and get the results.
for category, categories in value.items():
if category == 'content_categories':
sys.stdout.write('|'.join(str(p) for p in categories))
sys.stdout.write(',')
if category == 'security_categories':
if not categories:
sys.stdout.write('Benign')
else:
sys.stdout.write('|'.join(str(p) for p in categories))
print
# Sleep for 0.5 second between chunks
time.sleep(0.5)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment