Skip to content

Instantly share code, notes, and snippets.

@mcsquaredjr
Created August 6, 2014 15:53
Show Gist options
  • Save mcsquaredjr/5312d3f16eb3b29f6381 to your computer and use it in GitHub Desktop.
Save mcsquaredjr/5312d3f16eb3b29f6381 to your computer and use it in GitHub Desktop.
Simple wrapper around Google GData API. Caveat: columns cannot have spaces in their name. Make sure that gdata is installed before using it, to install go https://code.google.com/p/gdata-python-client/
USERNAME = 'your user name here'
PASSWD = 'your password here'
DOC_NAME = 'Document Name'
import sys
import gdata.docs
import gdata.docs.client
import gdata.docs.data
import gdata.docs.service
import gdata.spreadsheet.service
import re
import datetime
import time
############################################################
# CLASS GOOGLESPREADSHEET #
############################################################
class GoogleSpreadsheet(object):
'''Wrap up google gdata methods to access a Google spreadsheet'''
def __init__(self, email, passwd):
# Connect to Google account
self.gd_client = gdata.spreadsheet.service.SpreadsheetsService()
self.gd_client.email = email
self.gd_client.password = passwd
self.gd_client.source = 'GoogleSpreadsheet wrapper class'
self.gd_client.ProgrammaticLogin()
def create_spreadsheet(self, spreadsheet_title):
'''Create a new spreadsheet, given its title, return spreadsheet
key if successful.
'''
client = gdata.docs.client.DocsClient()
client.http_client.debug = False
client.client_login(self.gd_client.email,
self.gd_client.password,
'GoogleSpreadsheet wrapper class')
# Create spreadsheet
doc = gdata.docs.data.Resource(type='spreadsheet', title=spreadsheet_title)
document = client.create_resource(doc)
spreadsheet_key = document.GetId().split("%3A")[1]
return spreadsheet_key
def get_spreadsheet_key(self, spreadsheet_title):
'''Get spreadsheet id by spreadsheet title'''
doc_query = gdata.spreadsheet.service.DocumentQuery()
doc_query['title'] = spreadsheet_title
doc_query['title-exact'] = 'true'
ss_feed = self.gd_client.GetSpreadsheetsFeed(query=doc_query)
spreadsheet_key = ss_feed.entry[0].id.text.rsplit('/',1)[1]
return spreadsheet_key
def add_worksheet(self, spreadsheet_key, worksheet_title, row_count, col_count):
'''Add new worsheet to the spreadsheet identified by its key'''
ws = self.gd_client.AddWorksheet(worksheet_title,
row_count, col_count,
spreadsheet_key)
# Get worksheet as SpreadsheatsWorksheet and return its id
return ws.id.text.rsplit('/',1)[1]
def get_worksheet_id(self, spreadsheet_key, worksheet_name):
'''Get worksheet id by spreadsheet key and workbook name'''
ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key)
d = self.worksheet_dict(spreadsheet_key)
try:
id = d[worksheet_name]
except KeyError, e:
print e
id = None
return id
def worksheet_dict(self, spreadsheet_key):
'''Create dictionary containing worsheet's ids with keys equal to
their names if spreadsheet_key is given.
'''
ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key)
d = dict()
for i, entry in enumerate(ws_feed.entry):
d[entry.title.text] = entry.id.text.split('/')[-1]
return d
def _get_worksheet_url(self, spreadsheet_key, worksheet_id):
ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key)
url = None
for i, entry in enumerate(ws_feed.entry):
if entry.id.text.split('/')[-1] == worksheet_id:
url = entry.link[-1].href
break
return url
def delete_worksheet(self, spreadsheet_key, worksheet_id):
url = self._get_worksheet_url(spreadsheet_key, worksheet_id)
self.gd_client.DeleteWorksheet(url=url)
def _get_num_cols(self, range):
'''Compute the number of rows in a range'''
# It's ugly, but I did not find a method in the API that does it
pattern = re.compile('[\d]*$')
first, second = range.split(':')
m1 = re.search(pattern, first)
m2 = re.search(pattern, second)
ind1 = int(first[m1.start():])
ind2 = int(second[m2.start():])
num_cols = ind2 - ind1 + 1
return num_cols
def get_range(self, spreadsheet_key, worksheet_id, rng):
'''Return cell range as a list of tuples, so that each element of
the tuple represents a row of data in the spreadsheet. The range
should be provided following standard R1C1 notation. This method
always returns a rectangular array. Empty cells are returned as
None.
Parameters:
spreadhseet_id -- spreadhseet id
worksheet_id -- worksheet
rng -- range of cells, e.g. 'A2:R23'
'''
# If range is given
if rng is not None:
cell_query = gdata.spreadsheet.service.CellQuery()
cell_query['range'] = rng
cell_query['return-empty'] = 'true' # oh, my!
else:
cell_query = None
cell_feed = self.gd_client.GetCellsFeed(spreadsheet_key,
worksheet_id,
query=cell_query)
entry = cell_feed.entry
num_cols = self._get_num_cols(rng)
num_rows = len(entry) / num_cols
cells = []
for ii in range(len(entry)):
cells.append(entry[ii].content.text)
# Now reshape it to create num_rowsXnum_cols lists of tuples
cells = zip(*[iter(cells)]*num_rows)
return cells
def update_cells(self, spreadsheet_key, worksheet_id, data, row=1, col=1):
'''Update data in a worksheets specified by keys and sheets id,
starting from position specified by row and col (default 1, 1)
Parameters:
data -- is a list of lists, so that each element represents a row,
data must be a rectangular array (all rows are of the same length)
Warning: this method is painfully slow, use insert_row whenever possible.
'''
num_rows = len(data)
num_cols = len(data[0])
for i in range(num_rows):
for j in range(num_cols):
cell = self.gd_client.UpdateCell(i+row,
j+col,
str(data[i][j]),
spreadsheet_key,
worksheet_id)
if isinstance(cell, gdata.spreadsheet.SpreadsheetsCell) == False:
print 'Error updating cell R{0}C{1}'.format(i+row, j+col)
def __insert_rows(self, spreadsheet_key, worksheet_id, data):
'''Insert rows in empty spreadsheet'''
hdr = [data[0]]
self.update_cells(spreadsheet_key, worksheet_id, hdr)
# Now we may use InsertRow
for i in range(1, len(data)):
row_dict = dict()
for j in range(len(data[0])):
row_dict[str(data[0][j]).lower()] = str(data[i][j])
entry = self.gd_client.InsertRow(row_dict, spreadsheet_key, worksheet_id)
if isinstance(entry, gdata.spreadsheet.SpreadsheetsList) == False:
print 'Error inserting row #{0}'.format(i)
def insert_rows(self, spreadsheet_key, worksheet_id, data):
'''Insert a row of data in the spreadsheet. Data should be a list of
lists, representing a rectangular array, i.e. each list has the same
number of elements.
'''
list_feed = self.gd_client.GetListFeed(spreadsheet_key, worksheet_id)
if len(list_feed.entry) == 0:
self.__insert_rows(spreadsheet_key, worksheet_id, data)
else:
# Delete non-empty rows
for i in range(len(list_feed.entry)):
self.gd_client.DeleteRow(list_feed.entry[i])
self.__insert_rows(spreadsheet_key, worksheet_id, data)
def get_cols(self, spreadsheet_key, worksheet_id):
'''Reads entire workbook and returns dictionary, that contains
column names as keys and list of column elements as dictionary
values.
'''
rows = self.gd_client.GetListFeed(spreadsheet_key, worksheet_id)
d = dict()
# Build dictionary
for row in rows.entry:
for key in row.custom:
if key in d.keys():
d[key].append(row.custom[key].text)
else:
d[key] = [row.custom[key].text]
return d
def get_list_query(self, query_str):
'''Retun an instance of ListQuery suitable to pass to the get_cols
method. You may set query parameters by setting up query keys.
Query keys:
sq: for general query such as 'name=john&last=smith'
orderby: for example: 'column:first'
reverse: values are 'true' and 'false', note these are string.
'''
list_query = gdata.spreadsheet.service.ListQuery()
return list_query
if __name__ == '__main__':
# Let's roll
gs = GoogleSpreadsheet(USERNAME, PASSWD)
sid = gs.get_spreadsheet_key('Test spreadsheet')
ws = gs.get_worksheet_id(sid, 'Test Worksheet')
print sid
print ws
data = [['Project', 'Start', 'End', 'NumberOfBugs'], ['Trinity', '2012/05/15', '2012/06/15', 45], ['Trinity', '2012/06/15', '2012/07/15', 29], ['Trinity', '2012/07/15', '2012/08/15', 13], ['Trinity', '2012/08/15', '2012/09/15', 11], ['Trinity', '2012/09/15', '2012/10/15', 15], ['Trinity', '2012/10/15', '2012/11/15', 13], ['Trinity', '2012/11/15', '2012/12/15', 12], ['Trinity', '2012/12/15', '2013/01/15', 4], ['Trinity', '2013/01/15', '2013/02/15', 33], ['Trinity', '2013/02/15', '2013/03/15', 51], ['Trinity', '2013/03/15', '2013/04/15', 19], ['Neo', '2012/05/15', '2012/06/15', 3], ['Neo', '2012/06/15', '2012/07/15', 2], ['Neo', '2012/07/15', '2012/08/15', 3], ['Neo', '2012/08/15', '2012/09/15', 3], ['Neo', '2012/09/15', '2012/10/15', 2], ['Neo', '2012/10/15', '2012/11/15', 1], ['Neo', '2012/11/15', '2012/12/15', 4], ['Neo', '2012/12/15', '2013/01/15', 1], ['Neo', '2013/01/15', '2013/02/15', 0], ['Neo', '2013/02/15', '2013/03/15', 2], ['Neo', '2013/03/15', '2013/04/15', 0], ['OSG', '2012/05/15', '2012/06/15', 16], ['OSG', '2012/06/15', '2012/07/15', 19], ['OSG', '2012/07/15', '2012/08/15', 4], ['OSG', '2012/08/15', '2012/09/15', 8], ['OSG', '2012/09/15', '2012/10/15', 1], ['OSG', '2012/10/15', '2012/11/15', 3], ['OSG', '2012/11/15', '2012/12/15', 15], ['OSG', '2012/12/15', '2013/01/15', 7], ['OSG', '2013/01/15', '2013/02/15', 14], ['OSG', '2013/02/15', '2013/03/15', 8], ['OSG', '2013/03/15', '2013/04/15', 7], ['Morpheus', '2012/05/15', '2012/06/15', 2], ['Morpheus', '2012/06/15', '2012/07/15', 5], ['Morpheus', '2012/07/15', '2012/08/15', 8], ['Morpheus', '2012/08/15', '2012/09/15', 6], ['Morpheus', '2012/09/15', '2012/10/15', 3], ['Morpheus', '2012/10/15', '2012/11/15', 4], ['Morpheus', '2012/11/15', '2012/12/15', 3], ['Morpheus', '2012/12/15', '2013/01/15', 1], ['Morpheus', '2013/01/15', '2013/02/15', 5], ['Morpheus', '2013/02/15', '2013/03/15', 7], ['Morpheus', '2013/03/15', '2013/04/15', 10]]
print data
gs.insert_rows(sid, ws, data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment