Created
December 6, 2016 18:06
-
-
Save soeffing/7033b2be846eb9985a3d073d3950cebc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import csv | |
from pymongo import MongoClient | |
client = MongoClient() | |
client = MongoClient('localhost', 27017) | |
db = client.serp_v2 | |
# Read keywords | |
keywords = [] | |
with open('inputs/small_keys.csv', 'rU') as csvfile: | |
csvreader = csv.reader(csvfile) | |
for row in csvreader: | |
# do necesarry cleaning | |
# row = row[0].replace('"', '') | |
# row = row.lower() | |
# row = row.replace(' ', '_') | |
keywords.append(row[0]) | |
print 'Total keyword: %i' % len(keywords) | |
# remove duplicates | |
keywords = list(set(keywords)) | |
print 'Unique keyword: %i' % len(keywords) | |
serp_keywords_ids = [] | |
for saved_url in db.urls.find(): | |
serp_keywords_ids.append(saved_url['keyword_id']) | |
serp_keywords_ids = list(set(serp_keywords_ids)) | |
print 'Saved SERPS of %i keywords' % len(serp_keywords_ids) | |
exclude_keywords = [key['term'] for key in db.keywords.find({'_id': { '$in': serp_keywords_ids }})] | |
pending_keys = list(set(keywords).difference(set(exclude_keywords))) | |
# Vertifire setup | |
API_TOKEN = 'TOKEN' | |
headers = {'X-Vertifire-Token': API_TOKEN} | |
keywords_col = db.keywords | |
urls_col = db.urls | |
print 'Total pending keywords: %i' % len(pending_keys) | |
for keyword in pending_keys: | |
print keyword | |
#mongo_keyword = keywords_col.find_one({ 'term': keyword}) | |
#all_k = keywords_col.find({ 'term': keyword}) | |
#print all_k.count() | |
try: | |
new_key = { | |
'web_id': 1, | |
'term': keyword | |
} | |
keyword_id = keywords_col.insert_one(new_key).inserted_id | |
except: | |
print 'already exists' | |
keyword_id = keywords_col.find_one({'term': keyword})['_id'] | |
#urls = urls_col.find({'keyword_id': keyword_id}) | |
#if (urls.count() >= 50): | |
#print 'Enough serps' | |
#continue | |
data = { | |
'callback[method]': 'GET', | |
'callback[url]': 'http://54.152.153.222:8300/callback', | |
'callback[param]': keyword, | |
'terms[0][term]': keyword, | |
'terms[0][sep][search_engine]': '1', | |
'terms[0][sep][country]': 'US', | |
'terms[0][sep][country_only]': '1', | |
'terms[0][sep][language]': 'en', | |
'terms[0][sep][language_only]': '1' | |
} | |
try: | |
res = requests.post('https://api.vertifire.com/v2/serp/top', headers=headers, data=data) | |
print res.text | |
request_j = res.json() | |
# Call response url if keyword was already processed by vertifire, get key and retrieve serps directy | |
if ('error' in request_j.keys()) and (request_j['error']['code'] == 1010): | |
print 'already requested' | |
vertifire_key = request_j['response']['key'] | |
vertifire_res = requests.get("https://api.vertifire.com/v1/response/" + vertifire_key, | |
headers={'X-Vertifire-Token': 'TOKEN' } ) | |
response_j = vertifire_res.json() | |
try: | |
for serp in response_j['response'][0]['results']['organic']: | |
# some serps do not have descriptions | |
if 'description' not in serp.keys(): | |
serp['description'] = '' | |
try: | |
description = serp['description'].encode('utf-8').strip().replace('"', "'") | |
title = serp['title'].encode('utf-8').strip().replace('"', "'") | |
new_url = { | |
'rank': serp['rank'], | |
'url': serp['url'], | |
'title': title, | |
'description': description, | |
'keyword_id': keyword_id | |
} | |
urls_col.insert_one(new_url) | |
except: | |
print 'Error writing to db' | |
print serp['description'] | |
print serp['url'] | |
print serp['title'] | |
print keyword_id | |
except: | |
print 'Error in response from vertifire' | |
keywords_col.update_one({'_id': keyword_id}, {'$set': {'vertifire_key': request_j['response']['key']}}) | |
print '######' | |
except: | |
print 'Error' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment