Experiment, pulling google spreadsheet data into ES
## Dependencies in order for this to work | |
## 1) google spreadsheet lib: https://github.com/burnash/gspread | |
## pip install gspread | |
## 2) oauth2 lib: http://gspread.readthedocs.org/en/latest/oauth2.html | |
## pip install oauth2client | |
## 3) create a "service account" in your google api console following the instructions from the oauth2 lib above | |
## 4) the google spreadsheet has to be shared with the value of client_email from the .json keyfile you downloaded in step 3 | |
## <SOMEHASHFROMFILE>@developer.gserviceaccount.com | |
import json | |
import gspread | |
from oauth2client.client import SignedJwtAssertionCredentials | |
from datetime import datetime | |
from elasticsearch import Elasticsearch | |
es = Elasticsearch() | |
keyFilePath = "<BIGHASHFILENAME>.json" | |
spreadsheetKey = "<THE GOOGLE UUID OF THE SPREADSHEET FROM THE URL IN GOOGLE DOCS>" | |
worksheetName = "Record" | |
esIndexName = "videogames" | |
esTypeName = "record" | |
json_key = json.load(open(keyFilePath)) | |
scope = ['https://spreadsheets.google.com/feeds'] | |
credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key['private_key'], scope) | |
gc = gspread.authorize(credentials) | |
sh = gc.open_by_key(spreadsheetKey) | |
ws = sh.worksheet(worksheetName) | |
list_of_lists = ws.get_all_values() | |
##print list_of_lists | |
## es.index( index="foo", doc_type="event", id='3', body={"name":"Bob", "@timestamp":datetime.today() }) | |
es.indices.delete(index=esIndexName, ignore=[400, 404]) | |
es.indices.create(index=esIndexName, body={ | |
"settings": { | |
"number_of_replicas": 0, | |
"number_of_shards": 1 | |
}, | |
"mappings": { | |
"record": { | |
"_all": { "enabled": True }, | |
"properties": { | |
"year": {"type": "integer"}, | |
"title": {"type": "string", "index" : "analyzed"}, | |
"purchaseMethod": {"type": "string", "index" : "not_analyzed"}, | |
"genre": {"type": "string", "index" : "not_analyzed"}, | |
"playStatus": {"type": "string", "index" : "not_analyzed"}, | |
"platform": {"type": "string", "index" : "not_analyzed"} | |
} | |
} | |
} | |
}) | |
counter = 0 | |
for row in list_of_lists: | |
if(counter == 0): | |
counter = counter + 1 | |
continue | |
doc = { | |
"year": int(row[0]), | |
"title": row[2], | |
"purchaseMethod": row[3], | |
"genre": row[4], | |
"playStatus": row[5], | |
"platform": row[6] | |
} | |
es.index( index=esIndexName, doc_type=esTypeName, body= doc) | |
counter = counter + 1 | |
print "Done, loaded: ", counter | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment