Skip to content

Instantly share code, notes, and snippets.

@zmcghee
Last active August 29, 2015 14:21
Show Gist options
  • Save zmcghee/b1c80406f8dc5c4cadec to your computer and use it in GitHub Desktop.
Save zmcghee/b1c80406f8dc5c4cadec to your computer and use it in GitHub Desktop.
Fetch the first sheet of a Google Sheet file and work with it in Python.
def get_spreadsheet_id(user_input):
"""Accepts any form of Google Sheet URL or just the raw ID"""
if user_input.__contains__("spreadsheets/d"):
parts = user_input.split("/", 7)
if parts[4] == 'd':
return parts[5]
raise Exception("Couldn't get spreadsheet ID")
return user_input
if __name__ == "__main__":
example_1 = "1BY3SXLld0ooJsxi8m7FqAUvxAUzIvIOCY7RErMYutZc"
example_2 = "https://docs.google.com/spreadsheets/d/1BY3SXLld0ooJsxi8m7FqAUvxAUzIvIOCY7RErMYutZc/pubhtml"
example_3 = "https://docs.google.com/spreadsheets/d/1BY3SXLld0ooJsxi8m7FqAUvxAUzIvIOCY7RErMYutZc/edit#gid=907233235"
try:
assert get_spreadsheet_id(example_1) == get_spreadsheet_id(example_2) == get_spreadsheet_id(example_3)
except:
raise
else:
print "It works!"
import json
import requests
class GoogleSheet(object):
"""
Class to import the first sheet of a Google Sheet and
convert it to a Python dictionary. Basic usage:
>>> sheet_id = '1PWhG8sLvoGM86OTBPIc7lrWYnqoAQPo9hzrEJRnKogE'
>>> sheet = GoogleSheet(sheet_id)
>>> sheet.items
"""
sheet_id = None
fields = []
def __init__(self, sheet_id, fields=[], fetch=True):
"""Class will fetch the remote data and introspect fields
from the data by default upon instantiation. You can pass
fetch=False to disable this behavior and call them
explicitly, such as:
>>> sheet = GoogleSheet(sheet_id, fetch=False)
>>> sheet.fetch()
>>> sheet.auto_fields()
>>> sheet.items
[(list of dicts)...]
You can also explicitly pass the fields you want returned,
either here or using the set_fields method. Examples:
>>> sheet = GoogleSheet(sheet_id, fields=['name', 'email'])
>>> sheet.items.keys()
['name', 'email']
>>> sheet = GoogleSheet(sheet_id, fetch=False)
>>> sheet.set_fields(['name', 'email'])
>>> sheet.items.keys()
['name', 'email']
"""
self.sheet_id = sheet_id
if fields: # before fetch in case fetch fails
self.set_fields(fields)
if fetch:
self.fetch()
if not fields and fields != False:
# after fetch so we can infer from data
self.auto_fields()
@property
def json_url(self):
"""Returns a simple URL to fetch the first sheet of
a Google Sheet in JSON format from Google"""
url = 'https://spreadsheets.google.com/feeds/list/'\
+ '%s/default/public/values?alt=json' % (self.sheet_id)
return url
def fetch(self):
"""Fetch json_url, convert the JSON to Python & store it"""
self._google = requests.get(self.json_url)
if self._google.status_code != 200:
raise Exception("Google Sheet returned non-200 response")
self.gsx = self._google.json()
def set_fields(self, fields):
"""Explicitly set fields you want from the sheet"""
self.fields = fields
def auto_fields(self):
"""Infer fields from keys in your sheet's first entry"""
try:
keys = self.gsx['feed']['entry'][0].keys()
except KeyError, IndexError:
pass
else:
fields = []
for key in keys:
if key.startswith('gsx$'):
fields.append(key)
self.set_fields(fields)
@property
def items(self):
"""Returns entries from your sheet"""
items = []
for entry in self.gsx['feed']['entry']:
item = {}
for field in self.fields:
if field.startswith('gsx$'):
gsx_key = field
else:
gsx_key = 'gsx$' + field
key = gsx_key[4:]
item[key] = entry[gsx_key]['$t']
if isinstance(item[key], basestring):
item[key] = item[key].strip()
items.append(item)
return items
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment