Last active
August 29, 2015 14:25
-
-
Save derickson/4990d679133b6082dd1a to your computer and use it in GitHub Desktop.
Experiment, pulling google spreadsheet data into ES
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## Dependencies in order for this to work | |
## 1) google spreadsheet lib: https://github.com/burnash/gspread | |
## pip install gspread | |
## 2) oauth2 lib: http://gspread.readthedocs.org/en/latest/oauth2.html | |
## pip install oauth2client | |
## 3) create a "service account" in your google api console following the instructions from the oauth2 lib above | |
## 4) the google spreadsheet has to be shared with the value of client_email from the .json keyfile you downloaded in step 3 | |
## <SOMEHASHFROMFILE>@developer.gserviceaccount.com | |
import json | |
import gspread | |
from oauth2client.client import SignedJwtAssertionCredentials | |
from datetime import datetime | |
from elasticsearch import Elasticsearch | |
es = Elasticsearch() | |
keyFilePath = "<BIGHASHFILENAME>.json" | |
spreadsheetKey = "<THE GOOGLE UUID OF THE SPREADSHEET FROM THE URL IN GOOGLE DOCS>" | |
worksheetName = "Record" | |
esIndexName = "videogames" | |
esTypeName = "record" | |
json_key = json.load(open(keyFilePath)) | |
scope = ['https://spreadsheets.google.com/feeds'] | |
credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key['private_key'], scope) | |
gc = gspread.authorize(credentials) | |
sh = gc.open_by_key(spreadsheetKey) | |
ws = sh.worksheet(worksheetName) | |
list_of_lists = ws.get_all_values() | |
##print list_of_lists | |
## es.index( index="foo", doc_type="event", id='3', body={"name":"Bob", "@timestamp":datetime.today() }) | |
es.indices.delete(index=esIndexName, ignore=[400, 404]) | |
es.indices.create(index=esIndexName, body={ | |
"settings": { | |
"number_of_replicas": 0, | |
"number_of_shards": 1 | |
}, | |
"mappings": { | |
"record": { | |
"_all": { "enabled": True }, | |
"properties": { | |
"year": {"type": "integer"}, | |
"title": {"type": "string", "index" : "analyzed"}, | |
"purchaseMethod": {"type": "string", "index" : "not_analyzed"}, | |
"genre": {"type": "string", "index" : "not_analyzed"}, | |
"playStatus": {"type": "string", "index" : "not_analyzed"}, | |
"platform": {"type": "string", "index" : "not_analyzed"} | |
} | |
} | |
} | |
}) | |
counter = 0 | |
for row in list_of_lists: | |
if(counter == 0): | |
counter = counter + 1 | |
continue | |
doc = { | |
"year": int(row[0]), | |
"title": row[2], | |
"purchaseMethod": row[3], | |
"genre": row[4], | |
"playStatus": row[5], | |
"platform": row[6] | |
} | |
es.index( index=esIndexName, doc_type=esTypeName, body= doc) | |
counter = counter + 1 | |
print "Done, loaded: ", counter | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment