Skip to content

Instantly share code, notes, and snippets.

@greglinch
Forked from nickjevershed/google-sheets-json.py
Last active March 14, 2017 17:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save greglinch/1ae5901fbe00c38bfc400bb689472e1e to your computer and use it in GitHub Desktop.
Save greglinch/1ae5901fbe00c38bfc400bb689472e1e to your computer and use it in GitHub Desktop.
Python script (based on @nickjevershed's original) to convert Google spreadsheets to simple JSON file and save it locally and/or to S3. Assumes your data is on the left-most sheet (i.e. the default) and that you've already clicked the "Publish to the web" option in the "File" menu. S3 requires environment variables.
import os
import json
import argparse
import requests
import tinys3
'''
Modified version of nickjevershed's code
to-do's:
- abstract sheet_id option so it can be passed in
- Slack output option
'''
## arguments -- support TK
parser = argparse.ArgumentParser()
parser.add_argument("--id", help="Google Sheet ID")
parser.add_argument("--quotes", help="Use " instead of escaped quotes")
args = parser.parse_args()
id_arg = args.id
quotes_arg = args.quotes
# TK TK
## spreadsheet id -- abstract TK
sheet_id = 'SHEET_ID' # change to param to be accepted via CLI; maybe other ways, too
## Google API request urls
url1 = 'https://spreadsheets.google.com/feeds/cells/' + sheet_id + '/od6/public/values?alt=json'
url2 = 'https://spreadsheets.google.com/feeds/list/' + sheet_id + '/od6/public/values?alt=json'
## get the json in cell format from google
ss_content1 = requests.get(url1).json()
## lists to store new keys and data
new_keys = []
new_data = []
## make a list of the entries in the first row for nice keys
for item in ss_content1['feed']['entry']:
if item['gs$cell']['row'] == '1':
new_keys.append(item['content']['$t'])
print new_keys
## get json in list format
ss_content2 = requests.get(url2).json()
## remap entries from having gsx$-prefixed keys to having no prefix, i.e. our first row as keys
for entry in ss_content2['feed']['entry']:
row_data = []
for key in new_keys:
## default escaped quote marks in json
# row_data.append(entry['gsx$' + key]['$t'])
## optional replace quote marks as encoded
row_data.append(entry['gsx$' + key]['$t'].replace('"', """))
new_data.append(dict(zip(new_keys, row_data)))
print new_data
## make it into a json object for writing to file or s3
new_json = json.dumps(new_data)
print new_json
## save file locally with sheet_id as file name
json_file = 'json/%s.json' % (sheet_id)
with open(json_file,'w') as file_out:
file_out.write(new_json)
## save file on S3 with sheet_id as file name
## S3 access keys
S3_ACCESS_KEY = os.environ['S3_ACCESS_KEY']
S3_SECRET_KEY = os.environ['S3_SECRET_KEY']
S3_BUCKET = os.environ['S3_BUCKET']
S3_REGION = os.environ['S3_REGION']
## set up and call the s3 connection using tinyS3
s3_connection = tinys3.Connection(
S3_ACCESS_KEY,
S3_SECRET_KEY,
default_bucket=S3_BUCKET,
tls=True
)
filename_s3 = 'data/' + json_file
domain = os.environ['S3_DOMAIN']
# domain = '%s.s3-%s.amazonaws.com' % (S3_BUCKET, S3_REGION)
# domain = os.environ['CDN_DOMAIN']
url = domain + '/' + filename_s3
with open(json_file,'rb') as file_to_upload:
s3_connection.upload(filename_s3, file_to_upload)
print "JSON uploaded to S3:\n\n%s" % url
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment