greglinch/google_sheets_json.py

## google_sheets_json.py
import os
import json
import argparse
import requests
import tinys3


'''
Modified version of nickjevershed's code

to-do's:
- abstract sheet_id option so it can be passed in
- Slack output option
'''

## arguments -- support TK

parser = argparse.ArgumentParser()

parser.add_argument("--id", help="Google Sheet ID")
parser.add_argument("--quotes", help="Use &quot; instead of escaped quotes")

args = parser.parse_args()

id_arg = args.id
quotes_arg = args.quotes
# TK TK

## spreadsheet id -- abstract TK

sheet_id = 'SHEET_ID' # change to param to be accepted via CLI; maybe other ways, too

## Google API request urls

url1 = 'https://spreadsheets.google.com/feeds/cells/' + sheet_id + '/od6/public/values?alt=json'
url2 = 'https://spreadsheets.google.com/feeds/list/' + sheet_id + '/od6/public/values?alt=json'

## get the json in cell format from google

ss_content1 = requests.get(url1).json()

## lists to store new keys and data

new_keys = []
new_data = []

## make a list of the entries in the first row for nice keys

for item in ss_content1['feed']['entry']:
    if item['gs$cell']['row'] == '1':
        new_keys.append(item['content']['$t'])

print new_keys

## get json in list format

ss_content2 = requests.get(url2).json()

## remap entries from having gsx$-prefixed keys to having no prefix, i.e. our first row as keys

for entry in ss_content2['feed']['entry']:
    row_data = []
    for key in new_keys:
        ## default escaped quote marks in json
        # row_data.append(entry['gsx$' + key]['$t'])
        ## optional replace quote marks as encoded
        row_data.append(entry['gsx$' + key]['$t'].replace('"', "&quot;"))
    new_data.append(dict(zip(new_keys, row_data)))

print new_data

## make it into a json object for writing to file or s3

new_json = json.dumps(new_data)

print new_json

## save file locally with sheet_id as file name

json_file = 'json/%s.json' % (sheet_id)

with open(json_file,'w') as file_out:
    file_out.write(new_json)

## save file on S3 with sheet_id as file name

## S3 access keys
S3_ACCESS_KEY = os.environ['S3_ACCESS_KEY']
S3_SECRET_KEY = os.environ['S3_SECRET_KEY']
S3_BUCKET = os.environ['S3_BUCKET']
S3_REGION = os.environ['S3_REGION']

## set up and call the s3 connection using tinyS3
s3_connection = tinys3.Connection(
    S3_ACCESS_KEY,
    S3_SECRET_KEY,
    default_bucket=S3_BUCKET,
    tls=True
)

filename_s3 = 'data/' + json_file

domain = os.environ['S3_DOMAIN']
# domain = '%s.s3-%s.amazonaws.com' % (S3_BUCKET, S3_REGION)
# domain = os.environ['CDN_DOMAIN']
url = domain + '/' + filename_s3


with open(json_file,'rb') as file_to_upload:
    s3_connection.upload(filename_s3, file_to_upload)

print "JSON uploaded to S3:\n\n%s" % url
	import os
	import json
	import argparse
	import requests
	import tinys3


	'''
	Modified version of nickjevershed's code

	to-do's:
	- abstract sheet_id option so it can be passed in
	- Slack output option
	'''

	## arguments -- support TK

	parser = argparse.ArgumentParser()

	parser.add_argument("--id", help="Google Sheet ID")
	parser.add_argument("--quotes", help="Use " instead of escaped quotes")

	args = parser.parse_args()

	id_arg = args.id
	quotes_arg = args.quotes
	# TK TK

	## spreadsheet id -- abstract TK

	sheet_id = 'SHEET_ID' # change to param to be accepted via CLI; maybe other ways, too

	## Google API request urls

	url1 = 'https://spreadsheets.google.com/feeds/cells/' + sheet_id + '/od6/public/values?alt=json'
	url2 = 'https://spreadsheets.google.com/feeds/list/' + sheet_id + '/od6/public/values?alt=json'

	## get the json in cell format from google

	ss_content1 = requests.get(url1).json()

	## lists to store new keys and data

	new_keys = []
	new_data = []

	## make a list of the entries in the first row for nice keys

	for item in ss_content1['feed']['entry']:
	if item['gs$cell']['row'] == '1':
	new_keys.append(item['content']['$t'])

	print new_keys

	## get json in list format

	ss_content2 = requests.get(url2).json()

	## remap entries from having gsx$-prefixed keys to having no prefix, i.e. our first row as keys

	for entry in ss_content2['feed']['entry']:
	row_data = []
	for key in new_keys:
	## default escaped quote marks in json
	# row_data.append(entry['gsx$' + key]['$t'])
	## optional replace quote marks as encoded
	row_data.append(entry['gsx$' + key]['$t'].replace('"', """))
	new_data.append(dict(zip(new_keys, row_data)))

	print new_data

	## make it into a json object for writing to file or s3

	new_json = json.dumps(new_data)

	print new_json

	## save file locally with sheet_id as file name

	json_file = 'json/%s.json' % (sheet_id)

	with open(json_file,'w') as file_out:
	file_out.write(new_json)

	## save file on S3 with sheet_id as file name

	## S3 access keys
	S3_ACCESS_KEY = os.environ['S3_ACCESS_KEY']
	S3_SECRET_KEY = os.environ['S3_SECRET_KEY']
	S3_BUCKET = os.environ['S3_BUCKET']
	S3_REGION = os.environ['S3_REGION']

	## set up and call the s3 connection using tinyS3
	s3_connection = tinys3.Connection(
	S3_ACCESS_KEY,
	S3_SECRET_KEY,
	default_bucket=S3_BUCKET,
	tls=True
	)

	filename_s3 = 'data/' + json_file

	domain = os.environ['S3_DOMAIN']
	# domain = '%s.s3-%s.amazonaws.com' % (S3_BUCKET, S3_REGION)
	# domain = os.environ['CDN_DOMAIN']
	url = domain + '/' + filename_s3


	with open(json_file,'rb') as file_to_upload:
	s3_connection.upload(filename_s3, file_to_upload)

	print "JSON uploaded to S3:\n\n%s" % url