Skip to content

Instantly share code, notes, and snippets.

@derickson
Last active August 29, 2015 14:25
Show Gist options
  • Save derickson/4990d679133b6082dd1a to your computer and use it in GitHub Desktop.
Save derickson/4990d679133b6082dd1a to your computer and use it in GitHub Desktop.
Experiment, pulling google spreadsheet data into ES
## Dependencies in order for this to work
## 1) google spreadsheet lib: https://github.com/burnash/gspread
## pip install gspread
## 2) oauth2 lib: http://gspread.readthedocs.org/en/latest/oauth2.html
## pip install oauth2client
## 3) create a "service account" in your google api console following the instructions from the oauth2 lib above
## 4) the google spreadsheet has to be shared with the value of client_email from the .json keyfile you downloaded in step 3
## <SOMEHASHFROMFILE>@developer.gserviceaccount.com
import json
import gspread
from oauth2client.client import SignedJwtAssertionCredentials
from datetime import datetime
from elasticsearch import Elasticsearch
es = Elasticsearch()
keyFilePath = "<BIGHASHFILENAME>.json"
spreadsheetKey = "<THE GOOGLE UUID OF THE SPREADSHEET FROM THE URL IN GOOGLE DOCS>"
worksheetName = "Record"
esIndexName = "videogames"
esTypeName = "record"
json_key = json.load(open(keyFilePath))
scope = ['https://spreadsheets.google.com/feeds']
credentials = SignedJwtAssertionCredentials(json_key['client_email'], json_key['private_key'], scope)
gc = gspread.authorize(credentials)
sh = gc.open_by_key(spreadsheetKey)
ws = sh.worksheet(worksheetName)
list_of_lists = ws.get_all_values()
##print list_of_lists
## es.index( index="foo", doc_type="event", id='3', body={"name":"Bob", "@timestamp":datetime.today() })
es.indices.delete(index=esIndexName, ignore=[400, 404])
es.indices.create(index=esIndexName, body={
"settings": {
"number_of_replicas": 0,
"number_of_shards": 1
},
"mappings": {
"record": {
"_all": { "enabled": True },
"properties": {
"year": {"type": "integer"},
"title": {"type": "string", "index" : "analyzed"},
"purchaseMethod": {"type": "string", "index" : "not_analyzed"},
"genre": {"type": "string", "index" : "not_analyzed"},
"playStatus": {"type": "string", "index" : "not_analyzed"},
"platform": {"type": "string", "index" : "not_analyzed"}
}
}
}
})
counter = 0
for row in list_of_lists:
if(counter == 0):
counter = counter + 1
continue
doc = {
"year": int(row[0]),
"title": row[2],
"purchaseMethod": row[3],
"genre": row[4],
"playStatus": row[5],
"platform": row[6]
}
es.index( index=esIndexName, doc_type=esTypeName, body= doc)
counter = counter + 1
print "Done, loaded: ", counter
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment