Skip to content

Instantly share code, notes, and snippets.

@sureshkumargondi
Created October 18, 2019 12:36
Show Gist options
  • Save sureshkumargondi/076c2b61985fb407edb610bf3dd070bc to your computer and use it in GitHub Desktop.
Save sureshkumargondi/076c2b61985fb407edb610bf3dd070bc to your computer and use it in GitHub Desktop.
SEO script. Get the top 10-100 results for many keywords at once using DataForSEO API.
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"name": "Multi_Keyword_SERP.ipynb",
"provenance": [],
"collapsed_sections": []
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
}
},
"cells": [
{
"cell_type": "code",
"metadata": {
"id": "hm8Lw8bbMDO0",
"colab_type": "code",
"colab": {}
},
"source": [
"# Get the API, $50 minimum last time I checked, lot of data with that\n",
"# https://dataforseo.com/\n",
"\n",
"# Data For SEO credentials\n",
"# DO NOT SHARE THIS INFO !!!\n",
"USER=\"\"\n",
"PASS=\"\"\n",
"\n",
"# The number of positions to grab from the top 100 results\n",
"SHOW_ONLY=10\n",
"\n",
"# The name of the file to download\n",
"FILE_NAME='on-page-seo-tools-serp.csv'\n",
"\n",
"# Google configuration\n",
"GOOGLE_URL=\"google.com\"\n",
"GOOGLE_LANG=\"English\"\n",
"GOOGLE_LOCATION=\"United States\""
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "abb_FE9mICCt",
"colab_type": "code",
"colab": {}
},
"source": [
"# List of keywords, one per line\n",
"keywords_list = \"\"\"\n",
"on page seo tool\n",
"advanced seo tool\n",
"seo measurement tool\n",
"\"\"\""
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "9RoCiaCNLD5C",
"colab_type": "code",
"colab": {}
},
"source": [
"# @hidden_cell\n",
"# Boilerplate for API calls\n",
"\n",
"import json\n",
"from random import Random\n",
"from base64 import b64encode\n",
"from http.client import HTTPSConnection, HTTPConnection\n",
"\n",
"class Data4SEO:\n",
" domain = \"api.dataforseo.com\"\n",
"\n",
" def __init__(self, username, password):\n",
" self.username = username\n",
" self.password = password\n",
"\n",
" def request(self, path, method, data=None):\n",
"\n",
" connection = HTTPSConnection(self.domain)\n",
"\n",
" try:\n",
"\n",
" base64_bytes = b64encode((\"%s:%s\" % (self.username, self.password)).encode(\"ascii\")).decode(\"ascii\")\n",
" headers = {\n",
" 'Authorization': 'Basic %s' % base64_bytes,\n",
" 'Content-type': 'application/json',\n",
" }\n",
" connection.request(method, path, headers=headers, body=data)\n",
" response = connection.getresponse()\n",
"\n",
" return json.loads(response.read().decode())\n",
"\n",
" finally:\n",
"\n",
" connection.close()\n",
"\n",
" def get(self, path):\n",
" return self.request(path, 'GET')\n",
"\n",
" def post(self, path, data):\n",
"\n",
" if isinstance(data, str):\n",
" data_str = data\n",
" else:\n",
" data_str = json.dumps(data)\n",
"\n",
" return self.request(path, 'POST', data_str)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "Fg6SfJKbITSm",
"colab_type": "code",
"colab": {}
},
"source": [
"# Clean the keywords\n",
"keywords = [w.strip().lower() for w in keywords_list.split(\"\\n\") if w != \"\"]"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "BLT0xnR5Im1o",
"colab_type": "code",
"outputId": "22fe0169-9792-49f8-f2a5-8862bd17049c",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"# Show top 10 keywords from the list\n",
"keywords[:10]"
],
"execution_count": 17,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['on page seo tool', 'advanced seo tool', 'seo measurement tool']"
]
},
"metadata": {
"tags": []
},
"execution_count": 17
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "vDQl0r4pMyWd",
"colab_type": "code",
"colab": {}
},
"source": [
"serp_results = []"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "qmWTI5OaJEW8",
"colab_type": "code",
"outputId": "f5ddc61d-0bce-44dc-fa6d-05673a18a158",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 119
}
},
"source": [
"# For each keyword call the API and get the top 100 results\n",
"for keyword in keywords:\n",
" print(\"Getting data for keyword: %s\" % keyword)\n",
"\n",
" rnd = Random()\n",
"\n",
" # Data for the API\n",
" data = dict()\n",
" data[rnd.randint(1, 30000000)] = dict(\n",
" se_name=GOOGLE_URL,\n",
" se_language=GOOGLE_LANG,\n",
" loc_name_canonical=GOOGLE_LOCATION,\n",
" key=keyword,\n",
" )\n",
"\n",
" api = Data4SEO(USER, PASS)\n",
" response = api.post(\"/v2/live/srp_tasks_post\", dict(data=data))\n",
"\n",
" if response[\"status\"] == \"error\":\n",
" print(\"Error on keyword: %s\" % keyword)\n",
" print(\"Code: %d Message: %s\" % (response[\"error\"][\"code\"], response[\"error\"][\"message\"]))\n",
" else:\n",
" print(\"Got Results for keyword: %s\" % keyword)\n",
" serp_results.append(dict(\n",
" keyword=keyword,\n",
" results=response[\"results\"]\n",
" ))"
],
"execution_count": 19,
"outputs": [
{
"output_type": "stream",
"text": [
"Getting data for keyword: on page seo tool\n",
"Got Results for keyword: on page seo tool\n",
"Getting data for keyword: advanced seo tool\n",
"Got Results for keyword: advanced seo tool\n",
"Getting data for keyword: seo measurement tool\n",
"Got Results for keyword: seo measurement tool\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "MGqqIlDlve3r",
"colab_type": "code",
"colab": {}
},
"source": [
"# Our first CSV row\n",
"csv_data = [[\"keyword\",\"position\",\"url\",\"title\"]]"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "QSHT4f5nPSZ1",
"colab_type": "code",
"outputId": "0d6a7ed8-341d-40b2-e0e4-45667a39c83e",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 54
}
},
"source": [
"# Create our CSV file\n",
"for serp_result in serp_results:\n",
" for result in serp_result[\"results\"][\"organic\"]:\n",
" if result[\"result_position\"] <= SHOW_ONLY:\n",
" csv_data.append([serp_result[\"keyword\"], result[\"result_position\"], result[\"result_url\"], result[\"result_title\"]])\n",
"\n",
"# Just checking our first 2 lines on the CSV are correct\n",
"print(csv_data[:2])"
],
"execution_count": 21,
"outputs": [
{
"output_type": "stream",
"text": [
"[['keyword', 'position', 'url', 'title'], ['on page seo tool', 1, 'https://www.smartinsights.com/search-engine-optimisation-seo/on-page-optimisation/best-tools-page/', 'Best tools for on-page SEO and how to use them | Smart Insights']]\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "1cRs8TDCwGDv",
"colab_type": "code",
"outputId": "b337a4a1-4f5e-49d1-c5a5-7f1688d1cbbf",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 71
}
},
"source": [
"import io\n",
"import csv\n",
"\n",
"output = io.StringIO()\n",
"writer = csv.writer(output, delimiter=\",\", quoting=csv.QUOTE_NONNUMERIC)\n",
"\n",
"for row in csv_data:\n",
" writer.writerow(row)\n",
"\n",
"output.getvalue().split(\"\\r\\n\")[:2]"
],
"execution_count": 22,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"['\"keyword\",\"position\",\"url\",\"title\"',\n",
" '\"on page seo tool\",1,\"https://www.smartinsights.com/search-engine-optimisation-seo/on-page-optimisation/best-tools-page/\",\"Best tools for on-page SEO and how to use them | Smart Insights\"']"
]
},
"metadata": {
"tags": []
},
"execution_count": 22
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "ZgpdESLvlu_b",
"colab_type": "code",
"outputId": "94b342e0-69d5-4a41-b4d6-d7168aa6c799",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 111
}
},
"source": [
"import pandas as pd\n",
"\n",
"output.seek(0)\n",
"df = pd.read_csv(output)\n",
"df[:2]"
],
"execution_count": 23,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>keyword</th>\n",
" <th>position</th>\n",
" <th>url</th>\n",
" <th>title</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>on page seo tool</td>\n",
" <td>1</td>\n",
" <td>https://www.smartinsights.com/search-engine-op...</td>\n",
" <td>Best tools for on-page SEO and how to use them...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>on page seo tool</td>\n",
" <td>2</td>\n",
" <td>https://www.internetmarketingninjas.com/seo-to...</td>\n",
" <td>On-Page Optimization Tool - Internet Marketing...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" keyword ... title\n",
"0 on page seo tool ... Best tools for on-page SEO and how to use them...\n",
"1 on page seo tool ... On-Page Optimization Tool - Internet Marketing...\n",
"\n",
"[2 rows x 4 columns]"
]
},
"metadata": {
"tags": []
},
"execution_count": 23
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "nMy4QuAe2Tco",
"colab_type": "code",
"outputId": "bc1a8c78-9e16-4971-a865-68b07b3b10f7",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 34
}
},
"source": [
"from google.colab import drive\n",
"from google.colab import files\n",
"\n",
"drive.mount('/content/drive')"
],
"execution_count": 24,
"outputs": [
{
"output_type": "stream",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
],
"name": "stdout"
}
]
},
{
"cell_type": "code",
"metadata": {
"id": "-6rD4V1t2YLg",
"colab_type": "code",
"colab": {}
},
"source": [
"df.to_csv(FILE_NAME)"
],
"execution_count": 0,
"outputs": []
},
{
"cell_type": "code",
"metadata": {
"id": "D5gYDW_42Xhz",
"colab_type": "code",
"colab": {}
},
"source": [
"files.download(FILE_NAME)"
],
"execution_count": 0,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment