Skip to content

Instantly share code, notes, and snippets.

@jdembowski
Last active September 27, 2017 20:04
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jdembowski/6607878a60fab62d639dd44888394e6e to your computer and use it in GitHub Desktop.
Save jdembowski/6607878a60fab62d639dd44888394e6e to your computer and use it in GitHub Desktop.
#!/usr/bin/python
import os, sys, json
from time import sleep
from urllib2 import Request, urlopen, URLError, HTTPError
# From https://stackoverflow.com/questions/3462784/check-if-a-string-matches-an-ip-address-pattern-in-python
def validate_ip(s):
a = s.split('.')
if len(a) != 4:
return False
for x in a:
if not x.isdigit():
return False
i = int(x)
if i < 0 or i > 255:
return False
return True
token = os.getenv('INVESTIGATE_TOKEN', False)
if not token:
print "ERROR: environment variable \'INVESTIGATE_TOKEN\' not set. Invoke script with \'INVESTIGATE_TOKEN=%YourToken% python scripts.py\'"
sys.exit(1)
headers = {
'Authorization': 'Bearer ' + token
}
if len(sys.argv) == 2:
filename = sys.argv[1]
else:
print 'ERROR: please provide an input file name'
sys.exit(1)
print 'Domain,Security Categorization,Content Categorization'
for domain in open(filename, 'r').readlines():
# Get rid of whitespace or newline
domain = domain.rstrip()
domain = domain.lower()
sys.stdout.write(str(domain + ','))
# If domain isn't an IP address
if not validate_ip(domain):
# domains/categorization
request = Request('https://investigate.api.umbrella.com/domains/categorization/' + domain + '?showLabels', headers=headers)
try:
fetch = urlopen(request)
except KeyboardInterrupt:
raise
except:
sys.stdout.write(str(''))
# Did that return a 200? If not then keep trying
while not fetch.code == 200:
try:
# Wait before trying again
sleep(0.5)
fetch = urlopen(request)
except KeyboardInterrupt:
raise
except:
sys.stdout.write(str(''))
# Take what we fetched and populate it into data
try:
data = json.load(fetch)
except:
sys.stdout.write(str(''))
# You can see the keys with json.dumps
# print json.dumps(data, indent=4, sort_keys=True)
try:
if data[domain]['security_categories']:
categories = '|'.join(data[domain]['security_categories'])
sys.stdout.write(str(categories))
else:
sys.stdout.write(str('Benign'))
except:
sys.stdout.write(str('ERROR'))
# Seperate fields with comma
sys.stdout.write(str(','))
try:
if data[domain]['content_categories']:
categories = '|'.join(data[domain]['content_categories'])
sys.stdout.write(str(categories))
except:
sys.stdout.write(str('ERROR'))
# Print new line
print
else:
# We're an IP. Are there any malicious domains for that IP?
request = Request('https://investigate.api.umbrella.com/ips/' + domain + '/latest_domains', headers=headers)
try:
fetch = urlopen(request)
except KeyboardInterrupt:
raise
except:
sys.stdout.write(str(''))
# Did that return a 200? If not then keep trying
while not fetch.code == 200:
try:
# Wait before trying again
sleep(0.5)
fetch = urlopen(request)
except KeyboardInterrupt:
raise
except:
sys.stdout.write(str(''))
try:
data = json.load(fetch)
except:
sys.stdout.write(str(''))
# print json.dumps(data, indent=4, sort_keys=True)
if data:
print 'Malicious domains found,Visit https://investigate.umbrella.com/ip-view/' + domain
else:
print 'Benign,'
# No more lines in file
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment