Skip to content

Instantly share code, notes, and snippets.

@Duartemartins
Created April 8, 2024 20:48
Show Gist options
  • Save Duartemartins/791df32eef8b04d2e96395dc8ee07bf7 to your computer and use it in GitHub Desktop.
Save Duartemartins/791df32eef8b04d2e96395dc8ee07bf7 to your computer and use it in GitHub Desktop.
# Import necessary libraries
from http.client import HTTPSConnection
from base64 import b64encode
from json import loads
from json import dumps
import os
from dotenv import load_dotenv
import json
import pandas as pd
# Define path to environment variables file
env_path = os.path.join(".", "env")
# Load environment variables
load_dotenv(env_path, override=True)
dataforseo_username = os.getenv("DATAFORSEO_USERNAME")
dataforseo_password = os.getenv("DATAFORSEO_PASSWORD")
openai_api_key = os.getenv("OPENAI_API_KEY")
google_api_key = os.getenv("GOOGLE_API_KEY")
google_project_id = os.getenv("GOOGLE_PROJECT_ID")
# Define RestClient class for making HTTP requests
class RestClient:
domain = "api.dataforseo.com"
def __init__(self, username, password):
self.username = username
self.password = password
# Define method for making HTTP requests
def request(self, path, method, data=None):
connection = HTTPSConnection(self.domain)
try:
base64_bytes = b64encode(
("%s:%s" % (self.username, self.password)).encode("ascii")
).decode("ascii")
headers = {'Authorization' : 'Basic %s' % base64_bytes, 'Content-Encoding' : 'gzip'}
connection.request(method, path, headers=headers, body=data)
response = connection.getresponse()
return loads(response.read().decode())
finally:
connection.close()
# Define methods for GET and POST requests
def get(self, path):
return self.request(path, 'GET')
def post(self, path, data):
if isinstance(data, str):
data_str = data
else:
data_str = dumps(data)
return self.request(path, 'POST', data_str)
# Create RestClient object
seodata_client = RestClient(dataforseo_username, dataforseo_password)
# Define data for POST request
post_data_serp = dict()
post_data_serp[len(post_data_serp)] = dict(
keywords=[
# List of keywords goes here
# ...
]
,
location_name="United States",
language_name="English",
filters=[["relevant_serp_items", ">", 0], "or", ["median_position", "in", [1, 10]]],
)
# Define directory and file path for saving response
dir_path = "./response"
filename_serp = "serp_competitors"
file_path = os.path.join(dir_path, f"{filename_serp}.json")
# If directory doesn't exist, create it
if not os.path.exists(dir_path):
os.makedirs(dir_path)
# If file doesn't exist, create it
if not os.path.exists(file_path):
open(file_path, "w").close()
# Make POST request and save response
print("Get data from DataForSEO")
response_serp = seodata_client.post(
"/v3/dataforseo_labs/google/serp_competitors/live", post_data_serp
)
# If response is successful, save it to file
if response_serp["status_code"] == 20000:
with open(file_path, "w") as f:
json.dump(response_serp, f)
else:
print(
"error. Code: %d Message: %s"
% (response_serp["status_code"], response_serp["status_message"])
)
# Extract results from response
results_serp = response_serp['tasks'][0]['result'][0]['items']
# Initialize lists for storing result data
domain = []
avg_position = []
median_position = []
rating = []
etv = []
keywords_count = []
visibility = []
# Extract data from each result and append to lists
for result in results_serp:
domain.append(result['domain'])
avg_position.append(result['avg_position'])
median_position.append(result['median_position'])
rating.append(result['rating'])
etv.append(result['etv'])
keywords_count.append(result['keywords_count'])
visibility.append(result['visibility'])
# Create DataFrame from lists
df_serp = pd.DataFrame(
{
"domain": domain,
"avg_position": avg_position,
"median_position": median_position,
"rating": rating,
"etv": etv,
"keywords_count": keywords_count,
"visibility": visibility,
}
)
# Print DataFrame
print(df_serp.to_string())
# Save DataFrame to CSV
df_serp.to_csv('competitors.csv', index=False)
# Remove 'www.' from all domains
domain_list = [domain.replace('www.', '') for domain in domain_list]
# Define data for POST request
post_data = dict()
post_data[len(post_data)] = dict(
targets=domain_list
)
# Make POST request
response = seodata_client.post("/v3/backlinks/bulk_ranks/live", post_data)
# If response is not None, process it
if response is not None:
try:
results_da = response['tasks'][0]['result'][0]['items']
print(results_da)
except KeyError:
print("Unexpected structure in the response")
if response["status_code"] == 20000:
print(response)
else:
print("error. Code: %d Message: %s" % (response["status_code"], response["status_message"]))
else:
print("No response from the API")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment