uglyrobot/fetchdomaininfo.py

## fetchdomaininfo.py
import csv
import requests
import json
import time
import socket

# Set up the URL for the IP-API.com API endpoint
api_url = "http://ip-api.com/batch"

# Set up the fields to request from the API
fields = "query,status,country,isp,org,as,asname,proxy,hosting"

# Set up the batch size and delay
batch_size = 100
delay = 1.5  # in seconds

completed_domains = []
# Open the CSV file and read its contents
with open('~/domains_results.csv') as csvfile:
    csv_reader = csv.reader(csvfile)
    # Skip the header row
    next(csv_reader)
    # Loop through each row in the CSV file
    for row in csv_reader:
        # Extract the domain name from the row
        domain = row[1]
        # parse only the hostname
        domain = domain.split('/')[0]
        completed_domains.append(domain)
    csvfile.close()


# Open the CSV file and read its contents
with open('~/domains.csv') as csvfile:
    csv_reader = csv.reader(csvfile)
    # Skip the header row
    next(csv_reader)

    # Initialize the batch counter and the list of domains for the current batch
    batch_count = 0
    domains = []
    info = dict()
    i = 0

    #create a new csv file with the headers
    if len(completed_domains) > 0:
        new_csv = csv.writer(open('~/domains_results.csv', 'a'))
    else:
        new_csv = csv.writer(open('~/domains_results.csv', 'w'))
        new_csv.writerow(['Userid','Domain', 'Status','Country', 'ISP', 'Org', 'AS', 'AS Name', 'Proxy', 'Hosting', 'IP'])

    # Loop through each row in the CSV file
    for row in csv_reader:
        # Extract the domain name from the row
        user_id = row[0]
        domain = row[1]

        # parse only the hostname
        domain = domain.split('/')[0]

        #check if domain has already been processed
        if domain in completed_domains:
            #print("Domain already processed: " + domain)
            continue

        #lookup IP for domain
        try:
            ip = socket.gethostbyname(domain)
        except:
            print("DNS Error: " + domain)
            new_csv.writerow([user_id, domain, "DNS Error"])
            continue

        # Add the domain to the list for the current batch
        domains.append({"query": ip})
        info[i] = {'user_id': user_id, 'domain': domain}
        i += 1

        # Check if we've reached the batch size
        if len(domains) == batch_size:
            # Make a request to the API with the domains as post body (not json) in the current batch and parameters
            try:
                response = requests.post(api_url, params={'fields': fields}, data=json.dumps(domains))
            except:
                print("API Error: " + response.text)
                domains = []
                info = dict()
                i = 0
                break

            #check for errors
            if response.status_code != 200:
                print("Error: " + response.text)
                domains = []
                info = dict()
                i = 0
                break

            # Parse the JSON response from the API
            data = json.loads(response.text)

            # Process the response for each domain in the batch
            for i in range(batch_size):
                # Extract the relevant fields from the response
                if data[i]['status'] == 'success':
                    print( "Batch %d - %s" % (batch_count, info[i]['domain']) )
                    new_csv.writerow([info[i]['user_id'], info[i]['domain'], data[i]['status'], data[i]['country'], data[i]['isp'], data[i]['org'], data[i]['as'], data[i]['asname'], data[i]['proxy'], data[i]['hosting'], data[i]['query']])
                else:
                    new_csv.writerow([info[i]['user_id'], info[i]['domain'],data[i]['status']])

            # Increment the batch counter and clear the list of domains for the next batch
            batch_count += 1
            domains = []
            info = dict()
            i = 0

            # Wait for the specified delay before making the next request
            time.sleep(delay)

    # Check if there are any remaining domains to process
    if domains:
       # Make a request to the API with the domains as post body (not json) in the current batch and parameters

        response = requests.post(api_url, params={'fields': fields}, data=json.dumps(domains))

        #check for errors
        if response.status_code != 200:
            print("Error: " + response.text)
        else:
            # Parse the JSON response from the API
            data = json.loads(response.text)

            # Process the response for each domain in the batch
            for i in range(batch_size):
                # Extract the relevant fields from the response
                new_csv.writerow([info[i]['user_id'], info[i]['domain'], data[i]['status'], data[i]['country'], data[i]['isp'], data[i]['org'], data[i]['as'], data[i]['asname'], data[i]['proxy'], data[i]['hosting'], data[i]['query']])
	import csv
	import requests
	import json
	import time
	import socket

	# Set up the URL for the IP-API.com API endpoint
	api_url = "http://ip-api.com/batch"

	# Set up the fields to request from the API
	fields = "query,status,country,isp,org,as,asname,proxy,hosting"

	# Set up the batch size and delay
	batch_size = 100
	delay = 1.5 # in seconds

	completed_domains = []
	# Open the CSV file and read its contents
	with open('~/domains_results.csv') as csvfile:
	csv_reader = csv.reader(csvfile)
	# Skip the header row
	next(csv_reader)
	# Loop through each row in the CSV file
	for row in csv_reader:
	# Extract the domain name from the row
	domain = row[1]
	# parse only the hostname
	domain = domain.split('/')[0]
	completed_domains.append(domain)
	csvfile.close()


	# Open the CSV file and read its contents
	with open('~/domains.csv') as csvfile:
	csv_reader = csv.reader(csvfile)
	# Skip the header row
	next(csv_reader)

	# Initialize the batch counter and the list of domains for the current batch
	batch_count = 0
	domains = []
	info = dict()
	i = 0

	#create a new csv file with the headers
	if len(completed_domains) > 0:
	new_csv = csv.writer(open('~/domains_results.csv', 'a'))
	else:
	new_csv = csv.writer(open('~/domains_results.csv', 'w'))
	new_csv.writerow(['Userid','Domain', 'Status','Country', 'ISP', 'Org', 'AS', 'AS Name', 'Proxy', 'Hosting', 'IP'])

	# Loop through each row in the CSV file
	for row in csv_reader:
	# Extract the domain name from the row
	user_id = row[0]
	domain = row[1]

	# parse only the hostname
	domain = domain.split('/')[0]

	#check if domain has already been processed
	if domain in completed_domains:
	#print("Domain already processed: " + domain)
	continue

	#lookup IP for domain
	try:
	ip = socket.gethostbyname(domain)
	except:
	print("DNS Error: " + domain)
	new_csv.writerow([user_id, domain, "DNS Error"])
	continue

	# Add the domain to the list for the current batch
	domains.append({"query": ip})
	info[i] = {'user_id': user_id, 'domain': domain}
	i += 1

	# Check if we've reached the batch size
	if len(domains) == batch_size:
	# Make a request to the API with the domains as post body (not json) in the current batch and parameters
	try:
	response = requests.post(api_url, params={'fields': fields}, data=json.dumps(domains))
	except:
	print("API Error: " + response.text)
	domains = []
	info = dict()
	i = 0
	break

	#check for errors
	if response.status_code != 200:
	print("Error: " + response.text)
	domains = []
	info = dict()
	i = 0
	break

	# Parse the JSON response from the API
	data = json.loads(response.text)

	# Process the response for each domain in the batch
	for i in range(batch_size):
	# Extract the relevant fields from the response
	if data[i]['status'] == 'success':
	print( "Batch %d - %s" % (batch_count, info[i]['domain']) )
	new_csv.writerow([info[i]['user_id'], info[i]['domain'], data[i]['status'], data[i]['country'], data[i]['isp'], data[i]['org'], data[i]['as'], data[i]['asname'], data[i]['proxy'], data[i]['hosting'], data[i]['query']])
	else:
	new_csv.writerow([info[i]['user_id'], info[i]['domain'],data[i]['status']])

	# Increment the batch counter and clear the list of domains for the next batch
	batch_count += 1
	domains = []
	info = dict()
	i = 0

	# Wait for the specified delay before making the next request
	time.sleep(delay)

	# Check if there are any remaining domains to process
	if domains:
	# Make a request to the API with the domains as post body (not json) in the current batch and parameters

	response = requests.post(api_url, params={'fields': fields}, data=json.dumps(domains))

	#check for errors
	if response.status_code != 200:
	print("Error: " + response.text)
	else:
	# Parse the JSON response from the API
	data = json.loads(response.text)

	# Process the response for each domain in the batch
	for i in range(batch_size):
	# Extract the relevant fields from the response
	new_csv.writerow([info[i]['user_id'], info[i]['domain'], data[i]['status'], data[i]['country'], data[i]['isp'], data[i]['org'], data[i]['as'], data[i]['asname'], data[i]['proxy'], data[i]['hosting'], data[i]['query']])