Created
August 6, 2014 15:53
-
-
Save mcsquaredjr/5312d3f16eb3b29f6381 to your computer and use it in GitHub Desktop.
Simple wrapper around Google GData API. Caveat: columns cannot have spaces in their name. Make sure that gdata is installed before using it, to install go https://code.google.com/p/gdata-python-client/
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
USERNAME = 'your user name here' | |
PASSWD = 'your password here' | |
DOC_NAME = 'Document Name' | |
import sys | |
import gdata.docs | |
import gdata.docs.client | |
import gdata.docs.data | |
import gdata.docs.service | |
import gdata.spreadsheet.service | |
import re | |
import datetime | |
import time | |
############################################################ | |
# CLASS GOOGLESPREADSHEET # | |
############################################################ | |
class GoogleSpreadsheet(object): | |
'''Wrap up google gdata methods to access a Google spreadsheet''' | |
def __init__(self, email, passwd): | |
# Connect to Google account | |
self.gd_client = gdata.spreadsheet.service.SpreadsheetsService() | |
self.gd_client.email = email | |
self.gd_client.password = passwd | |
self.gd_client.source = 'GoogleSpreadsheet wrapper class' | |
self.gd_client.ProgrammaticLogin() | |
def create_spreadsheet(self, spreadsheet_title): | |
'''Create a new spreadsheet, given its title, return spreadsheet | |
key if successful. | |
''' | |
client = gdata.docs.client.DocsClient() | |
client.http_client.debug = False | |
client.client_login(self.gd_client.email, | |
self.gd_client.password, | |
'GoogleSpreadsheet wrapper class') | |
# Create spreadsheet | |
doc = gdata.docs.data.Resource(type='spreadsheet', title=spreadsheet_title) | |
document = client.create_resource(doc) | |
spreadsheet_key = document.GetId().split("%3A")[1] | |
return spreadsheet_key | |
def get_spreadsheet_key(self, spreadsheet_title): | |
'''Get spreadsheet id by spreadsheet title''' | |
doc_query = gdata.spreadsheet.service.DocumentQuery() | |
doc_query['title'] = spreadsheet_title | |
doc_query['title-exact'] = 'true' | |
ss_feed = self.gd_client.GetSpreadsheetsFeed(query=doc_query) | |
spreadsheet_key = ss_feed.entry[0].id.text.rsplit('/',1)[1] | |
return spreadsheet_key | |
def add_worksheet(self, spreadsheet_key, worksheet_title, row_count, col_count): | |
'''Add new worsheet to the spreadsheet identified by its key''' | |
ws = self.gd_client.AddWorksheet(worksheet_title, | |
row_count, col_count, | |
spreadsheet_key) | |
# Get worksheet as SpreadsheatsWorksheet and return its id | |
return ws.id.text.rsplit('/',1)[1] | |
def get_worksheet_id(self, spreadsheet_key, worksheet_name): | |
'''Get worksheet id by spreadsheet key and workbook name''' | |
ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key) | |
d = self.worksheet_dict(spreadsheet_key) | |
try: | |
id = d[worksheet_name] | |
except KeyError, e: | |
print e | |
id = None | |
return id | |
def worksheet_dict(self, spreadsheet_key): | |
'''Create dictionary containing worsheet's ids with keys equal to | |
their names if spreadsheet_key is given. | |
''' | |
ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key) | |
d = dict() | |
for i, entry in enumerate(ws_feed.entry): | |
d[entry.title.text] = entry.id.text.split('/')[-1] | |
return d | |
def _get_worksheet_url(self, spreadsheet_key, worksheet_id): | |
ws_feed = self.gd_client.GetWorksheetsFeed(spreadsheet_key) | |
url = None | |
for i, entry in enumerate(ws_feed.entry): | |
if entry.id.text.split('/')[-1] == worksheet_id: | |
url = entry.link[-1].href | |
break | |
return url | |
def delete_worksheet(self, spreadsheet_key, worksheet_id): | |
url = self._get_worksheet_url(spreadsheet_key, worksheet_id) | |
self.gd_client.DeleteWorksheet(url=url) | |
def _get_num_cols(self, range): | |
'''Compute the number of rows in a range''' | |
# It's ugly, but I did not find a method in the API that does it | |
pattern = re.compile('[\d]*$') | |
first, second = range.split(':') | |
m1 = re.search(pattern, first) | |
m2 = re.search(pattern, second) | |
ind1 = int(first[m1.start():]) | |
ind2 = int(second[m2.start():]) | |
num_cols = ind2 - ind1 + 1 | |
return num_cols | |
def get_range(self, spreadsheet_key, worksheet_id, rng): | |
'''Return cell range as a list of tuples, so that each element of | |
the tuple represents a row of data in the spreadsheet. The range | |
should be provided following standard R1C1 notation. This method | |
always returns a rectangular array. Empty cells are returned as | |
None. | |
Parameters: | |
spreadhseet_id -- spreadhseet id | |
worksheet_id -- worksheet | |
rng -- range of cells, e.g. 'A2:R23' | |
''' | |
# If range is given | |
if rng is not None: | |
cell_query = gdata.spreadsheet.service.CellQuery() | |
cell_query['range'] = rng | |
cell_query['return-empty'] = 'true' # oh, my! | |
else: | |
cell_query = None | |
cell_feed = self.gd_client.GetCellsFeed(spreadsheet_key, | |
worksheet_id, | |
query=cell_query) | |
entry = cell_feed.entry | |
num_cols = self._get_num_cols(rng) | |
num_rows = len(entry) / num_cols | |
cells = [] | |
for ii in range(len(entry)): | |
cells.append(entry[ii].content.text) | |
# Now reshape it to create num_rowsXnum_cols lists of tuples | |
cells = zip(*[iter(cells)]*num_rows) | |
return cells | |
def update_cells(self, spreadsheet_key, worksheet_id, data, row=1, col=1): | |
'''Update data in a worksheets specified by keys and sheets id, | |
starting from position specified by row and col (default 1, 1) | |
Parameters: | |
data -- is a list of lists, so that each element represents a row, | |
data must be a rectangular array (all rows are of the same length) | |
Warning: this method is painfully slow, use insert_row whenever possible. | |
''' | |
num_rows = len(data) | |
num_cols = len(data[0]) | |
for i in range(num_rows): | |
for j in range(num_cols): | |
cell = self.gd_client.UpdateCell(i+row, | |
j+col, | |
str(data[i][j]), | |
spreadsheet_key, | |
worksheet_id) | |
if isinstance(cell, gdata.spreadsheet.SpreadsheetsCell) == False: | |
print 'Error updating cell R{0}C{1}'.format(i+row, j+col) | |
def __insert_rows(self, spreadsheet_key, worksheet_id, data): | |
'''Insert rows in empty spreadsheet''' | |
hdr = [data[0]] | |
self.update_cells(spreadsheet_key, worksheet_id, hdr) | |
# Now we may use InsertRow | |
for i in range(1, len(data)): | |
row_dict = dict() | |
for j in range(len(data[0])): | |
row_dict[str(data[0][j]).lower()] = str(data[i][j]) | |
entry = self.gd_client.InsertRow(row_dict, spreadsheet_key, worksheet_id) | |
if isinstance(entry, gdata.spreadsheet.SpreadsheetsList) == False: | |
print 'Error inserting row #{0}'.format(i) | |
def insert_rows(self, spreadsheet_key, worksheet_id, data): | |
'''Insert a row of data in the spreadsheet. Data should be a list of | |
lists, representing a rectangular array, i.e. each list has the same | |
number of elements. | |
''' | |
list_feed = self.gd_client.GetListFeed(spreadsheet_key, worksheet_id) | |
if len(list_feed.entry) == 0: | |
self.__insert_rows(spreadsheet_key, worksheet_id, data) | |
else: | |
# Delete non-empty rows | |
for i in range(len(list_feed.entry)): | |
self.gd_client.DeleteRow(list_feed.entry[i]) | |
self.__insert_rows(spreadsheet_key, worksheet_id, data) | |
def get_cols(self, spreadsheet_key, worksheet_id): | |
'''Reads entire workbook and returns dictionary, that contains | |
column names as keys and list of column elements as dictionary | |
values. | |
''' | |
rows = self.gd_client.GetListFeed(spreadsheet_key, worksheet_id) | |
d = dict() | |
# Build dictionary | |
for row in rows.entry: | |
for key in row.custom: | |
if key in d.keys(): | |
d[key].append(row.custom[key].text) | |
else: | |
d[key] = [row.custom[key].text] | |
return d | |
def get_list_query(self, query_str): | |
'''Retun an instance of ListQuery suitable to pass to the get_cols | |
method. You may set query parameters by setting up query keys. | |
Query keys: | |
sq: for general query such as 'name=john&last=smith' | |
orderby: for example: 'column:first' | |
reverse: values are 'true' and 'false', note these are string. | |
''' | |
list_query = gdata.spreadsheet.service.ListQuery() | |
return list_query | |
if __name__ == '__main__': | |
# Let's roll | |
gs = GoogleSpreadsheet(USERNAME, PASSWD) | |
sid = gs.get_spreadsheet_key('Test spreadsheet') | |
ws = gs.get_worksheet_id(sid, 'Test Worksheet') | |
print sid | |
print ws | |
data = [['Project', 'Start', 'End', 'NumberOfBugs'], ['Trinity', '2012/05/15', '2012/06/15', 45], ['Trinity', '2012/06/15', '2012/07/15', 29], ['Trinity', '2012/07/15', '2012/08/15', 13], ['Trinity', '2012/08/15', '2012/09/15', 11], ['Trinity', '2012/09/15', '2012/10/15', 15], ['Trinity', '2012/10/15', '2012/11/15', 13], ['Trinity', '2012/11/15', '2012/12/15', 12], ['Trinity', '2012/12/15', '2013/01/15', 4], ['Trinity', '2013/01/15', '2013/02/15', 33], ['Trinity', '2013/02/15', '2013/03/15', 51], ['Trinity', '2013/03/15', '2013/04/15', 19], ['Neo', '2012/05/15', '2012/06/15', 3], ['Neo', '2012/06/15', '2012/07/15', 2], ['Neo', '2012/07/15', '2012/08/15', 3], ['Neo', '2012/08/15', '2012/09/15', 3], ['Neo', '2012/09/15', '2012/10/15', 2], ['Neo', '2012/10/15', '2012/11/15', 1], ['Neo', '2012/11/15', '2012/12/15', 4], ['Neo', '2012/12/15', '2013/01/15', 1], ['Neo', '2013/01/15', '2013/02/15', 0], ['Neo', '2013/02/15', '2013/03/15', 2], ['Neo', '2013/03/15', '2013/04/15', 0], ['OSG', '2012/05/15', '2012/06/15', 16], ['OSG', '2012/06/15', '2012/07/15', 19], ['OSG', '2012/07/15', '2012/08/15', 4], ['OSG', '2012/08/15', '2012/09/15', 8], ['OSG', '2012/09/15', '2012/10/15', 1], ['OSG', '2012/10/15', '2012/11/15', 3], ['OSG', '2012/11/15', '2012/12/15', 15], ['OSG', '2012/12/15', '2013/01/15', 7], ['OSG', '2013/01/15', '2013/02/15', 14], ['OSG', '2013/02/15', '2013/03/15', 8], ['OSG', '2013/03/15', '2013/04/15', 7], ['Morpheus', '2012/05/15', '2012/06/15', 2], ['Morpheus', '2012/06/15', '2012/07/15', 5], ['Morpheus', '2012/07/15', '2012/08/15', 8], ['Morpheus', '2012/08/15', '2012/09/15', 6], ['Morpheus', '2012/09/15', '2012/10/15', 3], ['Morpheus', '2012/10/15', '2012/11/15', 4], ['Morpheus', '2012/11/15', '2012/12/15', 3], ['Morpheus', '2012/12/15', '2013/01/15', 1], ['Morpheus', '2013/01/15', '2013/02/15', 5], ['Morpheus', '2013/02/15', '2013/03/15', 7], ['Morpheus', '2013/03/15', '2013/04/15', 10]] | |
print data | |
gs.insert_rows(sid, ws, data) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment