Skip to content

Instantly share code, notes, and snippets.

@uglyrobot
Created April 5, 2023 17:26
Show Gist options
  • Save uglyrobot/df3c7c32ec53f20b373cae4e63e86f5d to your computer and use it in GitHub Desktop.
Save uglyrobot/df3c7c32ec53f20b373cae4e63e86f5d to your computer and use it in GitHub Desktop.
Take a list of domains and export a CSV of their IP info: country, isp, org, as, asname, proxy, hosting
import csv
import requests
import json
import time
import socket
# Set up the URL for the IP-API.com API endpoint
api_url = "http://ip-api.com/batch"
# Set up the fields to request from the API
fields = "query,status,country,isp,org,as,asname,proxy,hosting"
# Set up the batch size and delay
batch_size = 100
delay = 1.5 # in seconds
completed_domains = []
# Open the CSV file and read its contents
with open('~/domains_results.csv') as csvfile:
csv_reader = csv.reader(csvfile)
# Skip the header row
next(csv_reader)
# Loop through each row in the CSV file
for row in csv_reader:
# Extract the domain name from the row
domain = row[1]
# parse only the hostname
domain = domain.split('/')[0]
completed_domains.append(domain)
csvfile.close()
# Open the CSV file and read its contents
with open('~/domains.csv') as csvfile:
csv_reader = csv.reader(csvfile)
# Skip the header row
next(csv_reader)
# Initialize the batch counter and the list of domains for the current batch
batch_count = 0
domains = []
info = dict()
i = 0
#create a new csv file with the headers
if len(completed_domains) > 0:
new_csv = csv.writer(open('~/domains_results.csv', 'a'))
else:
new_csv = csv.writer(open('~/domains_results.csv', 'w'))
new_csv.writerow(['Userid','Domain', 'Status','Country', 'ISP', 'Org', 'AS', 'AS Name', 'Proxy', 'Hosting', 'IP'])
# Loop through each row in the CSV file
for row in csv_reader:
# Extract the domain name from the row
user_id = row[0]
domain = row[1]
# parse only the hostname
domain = domain.split('/')[0]
#check if domain has already been processed
if domain in completed_domains:
#print("Domain already processed: " + domain)
continue
#lookup IP for domain
try:
ip = socket.gethostbyname(domain)
except:
print("DNS Error: " + domain)
new_csv.writerow([user_id, domain, "DNS Error"])
continue
# Add the domain to the list for the current batch
domains.append({"query": ip})
info[i] = {'user_id': user_id, 'domain': domain}
i += 1
# Check if we've reached the batch size
if len(domains) == batch_size:
# Make a request to the API with the domains as post body (not json) in the current batch and parameters
try:
response = requests.post(api_url, params={'fields': fields}, data=json.dumps(domains))
except:
print("API Error: " + response.text)
domains = []
info = dict()
i = 0
break
#check for errors
if response.status_code != 200:
print("Error: " + response.text)
domains = []
info = dict()
i = 0
break
# Parse the JSON response from the API
data = json.loads(response.text)
# Process the response for each domain in the batch
for i in range(batch_size):
# Extract the relevant fields from the response
if data[i]['status'] == 'success':
print( "Batch %d - %s" % (batch_count, info[i]['domain']) )
new_csv.writerow([info[i]['user_id'], info[i]['domain'], data[i]['status'], data[i]['country'], data[i]['isp'], data[i]['org'], data[i]['as'], data[i]['asname'], data[i]['proxy'], data[i]['hosting'], data[i]['query']])
else:
new_csv.writerow([info[i]['user_id'], info[i]['domain'],data[i]['status']])
# Increment the batch counter and clear the list of domains for the next batch
batch_count += 1
domains = []
info = dict()
i = 0
# Wait for the specified delay before making the next request
time.sleep(delay)
# Check if there are any remaining domains to process
if domains:
# Make a request to the API with the domains as post body (not json) in the current batch and parameters
response = requests.post(api_url, params={'fields': fields}, data=json.dumps(domains))
#check for errors
if response.status_code != 200:
print("Error: " + response.text)
else:
# Parse the JSON response from the API
data = json.loads(response.text)
# Process the response for each domain in the batch
for i in range(batch_size):
# Extract the relevant fields from the response
new_csv.writerow([info[i]['user_id'], info[i]['domain'], data[i]['status'], data[i]['country'], data[i]['isp'], data[i]['org'], data[i]['as'], data[i]['asname'], data[i]['proxy'], data[i]['hosting'], data[i]['query']])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment