Skip to content

Instantly share code, notes, and snippets.

@mcsquaredjr
Created April 14, 2013 17:51
Show Gist options
  • Save mcsquaredjr/5383587 to your computer and use it in GitHub Desktop.
Save mcsquaredjr/5383587 to your computer and use it in GitHub Desktop.
gstest
username = 'your email'
passwd = 'your pass'
doc_name = 'PV data'
import sys
sys.path.append('../gdata')
import gdata.docs
import gdata.docs.service
import gdata.spreadsheet.service
import re, os
class GoogleSpreadsheet(object):
'''Wrap up google gdata methods to access a Google spreadsheet'''
def __init__(self, email, passwd):
# Connect to Google account
self.gd_client = gdata.spreadsheet.service.SpreadsheetsService()
self.gd_client.email = username
self.gd_client.password = passwd
self.gd_client.source = 'GoogleSpreadsheet wrapper class'
self.gd_client.ProgrammaticLogin()
def get_spreadsheet_id(self, spreadsheet_title):
'''Get spreadsheet id by spreadsheet title'''
doc_query = gdata.spreadsheet.service.DocumentQuery()
doc_query['title'] = spreadsheet_title
doc_query['title-exact'] = 'true'
ss_feed = self.gd_client.GetSpreadsheetsFeed(query=doc_query)
spreadsheet_id = ss_feed.entry[0].id.text.rsplit('/',1)[1]
return spreadsheet_id
def get_worksheet_id(self, spreadsheet_id, workbook_name):
'''Get worksheet id by spreadsheet id and workbook name'''
ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_id)
d = self.worksheet_dict(spreadsheet_id)
try:
id = d[workbook_name]
except KeyError, e:
print e
id = None
return id
def worksheet_dict(self, spreadsheet_id):
'''Create dictionary containing worsheet's ids with keys equal to
their names if spreadsheet_id is given.
'''
ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_id)
d = dict()
for i, entry in enumerate(ws_feed.entry):
d[entry.title.text] = entry.id.text.split('/')[-1]
return d
def _get_num_cols(self, range):
'''Compute the number of rows in a range'''
# It's ugly, but I did not find a method in the API that does it
pattern = re.compile('[\d]*$')
first, second = range.split(':')
m1 = re.search(pattern, first)
m2 = re.search(pattern, second)
ind1 = int(first[m1.start():])
ind2 = int(second[m2.start():])
num_cols = ind2 - ind1 + 1
return num_cols
def get_range(self, spreadsheet_id, worksheet_id, rng):
'''Return cell range as a list of tuples, so that each element of
the tuple represents a row of data in the spreadsheet. The range
should be provided following standard R1C1 notation. This method
always returns a rectangular array. Empty cells are returned as
None.
Parameters:
spreadhseet_id -- spreadhseet id
worksheet_id -- worksheet
rng -- range of cells, e.g. 'A2:R23'
'''
# If range is given
if rng is not None:
cell_query = gdata.spreadsheet.service.CellQuery()
cell_query['range'] = rng
cell_query['return-empty'] = 'true' # oh, my!
else:
cell_query = None
cell_feed = self.gd_client.GetCellsFeed(spreadsheet_id, worksheet_id, query=cell_query)
entry = cell_feed.entry
num_cols = self._get_num_cols(rng)
num_rows = len(entry) / num_cols
cells = []
for ii in range(len(entry)):
cells.append(entry[ii].content.text)
# Now reshape it to create num_rowsXnum_cols lists of tuples
cells = zip(*[iter(cells)]*num_rows)
return cells
def get_cols(self, spreadsheet_id, worksheet_id, query=None):
'''Reads entire workbook and returns dictionary, that contains
column names as keys and list of column elements as dictionary
values.
'''
if query is None:
rows = self.gd_client.GetListFeed(spreadsheet_id, worksheet_id)
else:
rows = self.gd_client.GetListFeed(spreadsheet_id, worksheet_id, query)
d = dict()
# Build dictionary
for row in rows.entry:
for key in row.custom:
if key in d.keys():
d[key].append(row.custom[key].text)
else:
d[key] = [row.custom[key].text]
return d
def get_list_query(self, query_str):
'''Retun an instance of ListQuery suitable to pass to the get_cols
method. You may set query parameters by setting up query keys.
Query keys:
sq: for general query such as 'name=john&last=smith'
orderby: for example: 'column:first'
reverse: values are 'true' and 'false', note these are string.
'''
list_query = gdata.spreadsheet.service.ListQuery()
return list_query
if __name__ == '__main__':
# Let's roll
gs = GoogleSpreadsheet(username, passwd)
sid = gs.get_spreadsheet_id(doc_name)
print sid
wid = gs.get_worksheet_id(sid, 'Sheet1')
print wid
cls = gs.get_range(sid, wid, 'A1:C5')
print cls
d = gs.get_cols(sid, wid)
print d
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment