Created
October 18, 2019 12:36
-
-
Save sureshkumargondi/076c2b61985fb407edb610bf3dd070bc to your computer and use it in GitHub Desktop.
SEO script. Get the top 10-100 results for many keywords at once using DataForSEO API.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "Multi_Keyword_SERP.ipynb", | |
"provenance": [], | |
"collapsed_sections": [] | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "hm8Lw8bbMDO0", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Get the API, $50 minimum last time I checked, lot of data with that\n", | |
"# https://dataforseo.com/\n", | |
"\n", | |
"# Data For SEO credentials\n", | |
"# DO NOT SHARE THIS INFO !!!\n", | |
"USER=\"\"\n", | |
"PASS=\"\"\n", | |
"\n", | |
"# The number of positions to grab from the top 100 results\n", | |
"SHOW_ONLY=10\n", | |
"\n", | |
"# The name of the file to download\n", | |
"FILE_NAME='on-page-seo-tools-serp.csv'\n", | |
"\n", | |
"# Google configuration\n", | |
"GOOGLE_URL=\"google.com\"\n", | |
"GOOGLE_LANG=\"English\"\n", | |
"GOOGLE_LOCATION=\"United States\"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "abb_FE9mICCt", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# List of keywords, one per line\n", | |
"keywords_list = \"\"\"\n", | |
"on page seo tool\n", | |
"advanced seo tool\n", | |
"seo measurement tool\n", | |
"\"\"\"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "9RoCiaCNLD5C", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# @hidden_cell\n", | |
"# Boilerplate for API calls\n", | |
"\n", | |
"import json\n", | |
"from random import Random\n", | |
"from base64 import b64encode\n", | |
"from http.client import HTTPSConnection, HTTPConnection\n", | |
"\n", | |
"class Data4SEO:\n", | |
" domain = \"api.dataforseo.com\"\n", | |
"\n", | |
" def __init__(self, username, password):\n", | |
" self.username = username\n", | |
" self.password = password\n", | |
"\n", | |
" def request(self, path, method, data=None):\n", | |
"\n", | |
" connection = HTTPSConnection(self.domain)\n", | |
"\n", | |
" try:\n", | |
"\n", | |
" base64_bytes = b64encode((\"%s:%s\" % (self.username, self.password)).encode(\"ascii\")).decode(\"ascii\")\n", | |
" headers = {\n", | |
" 'Authorization': 'Basic %s' % base64_bytes,\n", | |
" 'Content-type': 'application/json',\n", | |
" }\n", | |
" connection.request(method, path, headers=headers, body=data)\n", | |
" response = connection.getresponse()\n", | |
"\n", | |
" return json.loads(response.read().decode())\n", | |
"\n", | |
" finally:\n", | |
"\n", | |
" connection.close()\n", | |
"\n", | |
" def get(self, path):\n", | |
" return self.request(path, 'GET')\n", | |
"\n", | |
" def post(self, path, data):\n", | |
"\n", | |
" if isinstance(data, str):\n", | |
" data_str = data\n", | |
" else:\n", | |
" data_str = json.dumps(data)\n", | |
"\n", | |
" return self.request(path, 'POST', data_str)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Fg6SfJKbITSm", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Clean the keywords\n", | |
"keywords = [w.strip().lower() for w in keywords_list.split(\"\\n\") if w != \"\"]" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "BLT0xnR5Im1o", | |
"colab_type": "code", | |
"outputId": "22fe0169-9792-49f8-f2a5-8862bd17049c", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"source": [ | |
"# Show top 10 keywords from the list\n", | |
"keywords[:10]" | |
], | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['on page seo tool', 'advanced seo tool', 'seo measurement tool']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 17 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "vDQl0r4pMyWd", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"serp_results = []" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "qmWTI5OaJEW8", | |
"colab_type": "code", | |
"outputId": "f5ddc61d-0bce-44dc-fa6d-05673a18a158", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 119 | |
} | |
}, | |
"source": [ | |
"# For each keyword call the API and get the top 100 results\n", | |
"for keyword in keywords:\n", | |
" print(\"Getting data for keyword: %s\" % keyword)\n", | |
"\n", | |
" rnd = Random()\n", | |
"\n", | |
" # Data for the API\n", | |
" data = dict()\n", | |
" data[rnd.randint(1, 30000000)] = dict(\n", | |
" se_name=GOOGLE_URL,\n", | |
" se_language=GOOGLE_LANG,\n", | |
" loc_name_canonical=GOOGLE_LOCATION,\n", | |
" key=keyword,\n", | |
" )\n", | |
"\n", | |
" api = Data4SEO(USER, PASS)\n", | |
" response = api.post(\"/v2/live/srp_tasks_post\", dict(data=data))\n", | |
"\n", | |
" if response[\"status\"] == \"error\":\n", | |
" print(\"Error on keyword: %s\" % keyword)\n", | |
" print(\"Code: %d Message: %s\" % (response[\"error\"][\"code\"], response[\"error\"][\"message\"]))\n", | |
" else:\n", | |
" print(\"Got Results for keyword: %s\" % keyword)\n", | |
" serp_results.append(dict(\n", | |
" keyword=keyword,\n", | |
" results=response[\"results\"]\n", | |
" ))" | |
], | |
"execution_count": 19, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Getting data for keyword: on page seo tool\n", | |
"Got Results for keyword: on page seo tool\n", | |
"Getting data for keyword: advanced seo tool\n", | |
"Got Results for keyword: advanced seo tool\n", | |
"Getting data for keyword: seo measurement tool\n", | |
"Got Results for keyword: seo measurement tool\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "MGqqIlDlve3r", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"# Our first CSV row\n", | |
"csv_data = [[\"keyword\",\"position\",\"url\",\"title\"]]" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "QSHT4f5nPSZ1", | |
"colab_type": "code", | |
"outputId": "0d6a7ed8-341d-40b2-e0e4-45667a39c83e", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 54 | |
} | |
}, | |
"source": [ | |
"# Create our CSV file\n", | |
"for serp_result in serp_results:\n", | |
" for result in serp_result[\"results\"][\"organic\"]:\n", | |
" if result[\"result_position\"] <= SHOW_ONLY:\n", | |
" csv_data.append([serp_result[\"keyword\"], result[\"result_position\"], result[\"result_url\"], result[\"result_title\"]])\n", | |
"\n", | |
"# Just checking our first 2 lines on the CSV are correct\n", | |
"print(csv_data[:2])" | |
], | |
"execution_count": 21, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"[['keyword', 'position', 'url', 'title'], ['on page seo tool', 1, 'https://www.smartinsights.com/search-engine-optimisation-seo/on-page-optimisation/best-tools-page/', 'Best tools for on-page SEO and how to use them | Smart Insights']]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "1cRs8TDCwGDv", | |
"colab_type": "code", | |
"outputId": "b337a4a1-4f5e-49d1-c5a5-7f1688d1cbbf", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 71 | |
} | |
}, | |
"source": [ | |
"import io\n", | |
"import csv\n", | |
"\n", | |
"output = io.StringIO()\n", | |
"writer = csv.writer(output, delimiter=\",\", quoting=csv.QUOTE_NONNUMERIC)\n", | |
"\n", | |
"for row in csv_data:\n", | |
" writer.writerow(row)\n", | |
"\n", | |
"output.getvalue().split(\"\\r\\n\")[:2]" | |
], | |
"execution_count": 22, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/plain": [ | |
"['\"keyword\",\"position\",\"url\",\"title\"',\n", | |
" '\"on page seo tool\",1,\"https://www.smartinsights.com/search-engine-optimisation-seo/on-page-optimisation/best-tools-page/\",\"Best tools for on-page SEO and how to use them | Smart Insights\"']" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 22 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "ZgpdESLvlu_b", | |
"colab_type": "code", | |
"outputId": "94b342e0-69d5-4a41-b4d6-d7168aa6c799", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 111 | |
} | |
}, | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"output.seek(0)\n", | |
"df = pd.read_csv(output)\n", | |
"df[:2]" | |
], | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"output_type": "execute_result", | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>keyword</th>\n", | |
" <th>position</th>\n", | |
" <th>url</th>\n", | |
" <th>title</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>on page seo tool</td>\n", | |
" <td>1</td>\n", | |
" <td>https://www.smartinsights.com/search-engine-op...</td>\n", | |
" <td>Best tools for on-page SEO and how to use them...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>on page seo tool</td>\n", | |
" <td>2</td>\n", | |
" <td>https://www.internetmarketingninjas.com/seo-to...</td>\n", | |
" <td>On-Page Optimization Tool - Internet Marketing...</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" keyword ... title\n", | |
"0 on page seo tool ... Best tools for on-page SEO and how to use them...\n", | |
"1 on page seo tool ... On-Page Optimization Tool - Internet Marketing...\n", | |
"\n", | |
"[2 rows x 4 columns]" | |
] | |
}, | |
"metadata": { | |
"tags": [] | |
}, | |
"execution_count": 23 | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "nMy4QuAe2Tco", | |
"colab_type": "code", | |
"outputId": "bc1a8c78-9e16-4971-a865-68b07b3b10f7", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 34 | |
} | |
}, | |
"source": [ | |
"from google.colab import drive\n", | |
"from google.colab import files\n", | |
"\n", | |
"drive.mount('/content/drive')" | |
], | |
"execution_count": 24, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "-6rD4V1t2YLg", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"df.to_csv(FILE_NAME)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "D5gYDW_42Xhz", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"files.download(FILE_NAME)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment