Skip to content

Instantly share code, notes, and snippets.

@soeffing
Created December 6, 2016 18:06
Show Gist options
  • Save soeffing/7033b2be846eb9985a3d073d3950cebc to your computer and use it in GitHub Desktop.
Save soeffing/7033b2be846eb9985a3d073d3950cebc to your computer and use it in GitHub Desktop.
import requests
import csv
from pymongo import MongoClient
client = MongoClient()
client = MongoClient('localhost', 27017)
db = client.serp_v2
# Read keywords
keywords = []
with open('inputs/small_keys.csv', 'rU') as csvfile:
csvreader = csv.reader(csvfile)
for row in csvreader:
# do necesarry cleaning
# row = row[0].replace('"', '')
# row = row.lower()
# row = row.replace(' ', '_')
keywords.append(row[0])
print 'Total keyword: %i' % len(keywords)
# remove duplicates
keywords = list(set(keywords))
print 'Unique keyword: %i' % len(keywords)
serp_keywords_ids = []
for saved_url in db.urls.find():
serp_keywords_ids.append(saved_url['keyword_id'])
serp_keywords_ids = list(set(serp_keywords_ids))
print 'Saved SERPS of %i keywords' % len(serp_keywords_ids)
exclude_keywords = [key['term'] for key in db.keywords.find({'_id': { '$in': serp_keywords_ids }})]
pending_keys = list(set(keywords).difference(set(exclude_keywords)))
# Vertifire setup
API_TOKEN = 'TOKEN'
headers = {'X-Vertifire-Token': API_TOKEN}
keywords_col = db.keywords
urls_col = db.urls
print 'Total pending keywords: %i' % len(pending_keys)
for keyword in pending_keys:
print keyword
#mongo_keyword = keywords_col.find_one({ 'term': keyword})
#all_k = keywords_col.find({ 'term': keyword})
#print all_k.count()
try:
new_key = {
'web_id': 1,
'term': keyword
}
keyword_id = keywords_col.insert_one(new_key).inserted_id
except:
print 'already exists'
keyword_id = keywords_col.find_one({'term': keyword})['_id']
#urls = urls_col.find({'keyword_id': keyword_id})
#if (urls.count() >= 50):
#print 'Enough serps'
#continue
data = {
'callback[method]': 'GET',
'callback[url]': 'http://54.152.153.222:8300/callback',
'callback[param]': keyword,
'terms[0][term]': keyword,
'terms[0][sep][search_engine]': '1',
'terms[0][sep][country]': 'US',
'terms[0][sep][country_only]': '1',
'terms[0][sep][language]': 'en',
'terms[0][sep][language_only]': '1'
}
try:
res = requests.post('https://api.vertifire.com/v2/serp/top', headers=headers, data=data)
print res.text
request_j = res.json()
# Call response url if keyword was already processed by vertifire, get key and retrieve serps directy
if ('error' in request_j.keys()) and (request_j['error']['code'] == 1010):
print 'already requested'
vertifire_key = request_j['response']['key']
vertifire_res = requests.get("https://api.vertifire.com/v1/response/" + vertifire_key,
headers={'X-Vertifire-Token': 'TOKEN' } )
response_j = vertifire_res.json()
try:
for serp in response_j['response'][0]['results']['organic']:
# some serps do not have descriptions
if 'description' not in serp.keys():
serp['description'] = ''
try:
description = serp['description'].encode('utf-8').strip().replace('"', "'")
title = serp['title'].encode('utf-8').strip().replace('"', "'")
new_url = {
'rank': serp['rank'],
'url': serp['url'],
'title': title,
'description': description,
'keyword_id': keyword_id
}
urls_col.insert_one(new_url)
except:
print 'Error writing to db'
print serp['description']
print serp['url']
print serp['title']
print keyword_id
except:
print 'Error in response from vertifire'
keywords_col.update_one({'_id': keyword_id}, {'$set': {'vertifire_key': request_j['response']['key']}})
print '######'
except:
print 'Error'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment