guyromm/sheet_sync.py

## sheet_sync.py
#!/usr/bin/env python

"""
synchronize data from a local file to a google spreadsheet, where rows are located according to keys.
achtung! lots of case-specific bits (marked with FIXME). use with caution.
tested on python 3.6.6
"""
import gspread,json,sys

import threading,time # for rate limiting
from functools import wraps
from random import shuffle

shid = sys.argv[2] # google sheet to write to

cfn='auth.json'

from oauth2client.service_account import ServiceAccountCredentials

scope = ['https://spreadsheets.google.com/feeds',
         'https://www.googleapis.com/auth/drive']

do_not_update = ['notes'] # FIXME: case-specific. the list of columns not to update on the destination sheet.

credentials = ServiceAccountCredentials.from_json_keyfile_name(cfn, scope)

gc = gspread.authorize(credentials)
sh = gc.open_by_key(shid)
worksheet = sh.sheet1
print('spreadsheet',sh.id)
makeheader=True
from fabfile import sslheader as sheetheader # FIXME: case-specific. this is a list of all columns
lastcol = chr(ord('A')+len(sheetheader)-1)

def rate_limited(max_per_second: int):
    """Rate-limits the decorated function locally, for one process."""
    lock = threading.Lock()
    min_interval = 1.0 / max_per_second

    def decorate(func):
        last_time_called = time.perf_counter()

        @wraps(func)
        def rate_limited_function(*args, **kwargs):
            lock.acquire()
            nonlocal last_time_called
            try:
                elapsed = time.perf_counter() - last_time_called
                left_to_wait = min_interval - elapsed
                if left_to_wait > 0:
                    time.sleep(left_to_wait)

                return func(*args, **kwargs)
            finally:
                last_time_called = time.perf_counter()
                lock.release()

        return rate_limited_function

    return decorate

if makeheader:
    # establish headers if absent
    header = worksheet.row_values(1)
    if header!=sheetheader:
        print(header,'!=',sheetheader)
        rng = 'A1:%s1'%lastcol
        print('rng',rng)
        cl = worksheet.range(rng)
        i=0
        for c in cl:
            print(i)
            c.value = sheetheader[i]
            print(c,'=',sheetheader[i])
            i+=1
        worksheet.update_cells(cl)
        print('headers established')
    else:
        print('headers ok')
else:
    header = sheetheader

# walk over each output row and look for its keys in existing worksheet
cnt=0
keys = ['repo','hostname'] # FIXME: case-specific . these are the keys by which row uniqueness is identified
colvals={}
# obtain key column values
for k in keys:
    kpos = header.index(k)
    vals = worksheet.col_values(kpos+1)[1:]
    colvals[k]=vals


rows = open(sys.argv[1],'r').read().split("\n")[1:]
shuffle(rows)

for row in rows:
    cnt+=1
    r = row.split("\t")
    if r==['']: continue
    #assert len(header)==len(r),Exception(header,r)
    try:
        rv = dict([(header[i],
                    (len(r)-1>i and r[i] or None)) \
                   for i in range(len(header))])
    except IndexError as e:
        print('could not expand',r,'by',header)
        raise

    # determine if we have row in spread for this

    def calcrowsfound():
        rowsfound={}
        for k in keys:
            rowsfound[k] = [i for i in range(len(colvals[k])) if colvals[k][i]==rv[k]]
        return rowsfound
    rowsfound = calcrowsfound()
    # find the intersection of all these sets

    isect = set.intersection(*[set(i[1]) for i in rowsfound.items()])
    if not len(isect):
        @rate_limited(1)
        def apnd():
            worksheet.resize(cols=len(header)) # otherwise append misbehaves..
            worksheet.append_row(r)
            print('appending',r)
        apnd()
    else:
        while len(isect)>1:
            todel = isect.pop()
            print('deleting row',todel,[rv[k] for k in keys])
            for k in keys:
                print('colval',k,':',colvals[k][todel])
                del colvals[k][todel]
            tdr = todel+1
            worksheet.delete_row(tdr)
            rowsfound = calcrowsfound()
            isect = set.intersection(*[set(i[1]) for i in rowsfound.items()])
            print('deleted row',tdr,worksheet.row_values(tdr))
            #isect,[rv[k] for k in keys])
        rownum = list(isect)[0]+2
        rng = 'A%s:%s%s'%(rownum,lastcol,rownum)
        #print('going to update',rng,'=>',r)
        @rate_limited(1)
        def rngf():
            cl = worksheet.range(rng)
            return cl
        cl = rngf()
        i=0
        changed=False
        for c in cl:
            if header[i] in ['valid_fr','valid_to']: # FIXME: case-specific
                cmpf = lambda x: x.replace(' 0',' ')
            else:
                cmpf = lambda x: x

            if (len(r)-1>i) and cmpf(c.value)!=cmpf(r[i]) and header[i] not in do_not_update:
                jkey=(isect,'::'.join([rv[k] for k in keys]))
                print(jkey,'.',c,'=',r[i],'(',i,header[i],')')
                assert c.value!=r[i], Exception('but why?',c.value==r[i],header[i],type(c.value),type(r[i]))
                c.value= r[i]
                changed=True
            i+=1
        if changed:
            @rate_limited(1)
            def updc():
                worksheet.update_cells(cl)
            updc()
    print(cnt,'/',len(rows))
	#!/usr/bin/env python

	"""
	synchronize data from a local file to a google spreadsheet, where rows are located according to keys.
	achtung! lots of case-specific bits (marked with FIXME). use with caution.
	tested on python 3.6.6
	"""
	import gspread,json,sys

	import threading,time # for rate limiting
	from functools import wraps
	from random import shuffle

	shid = sys.argv[2] # google sheet to write to

	cfn='auth.json'

	from oauth2client.service_account import ServiceAccountCredentials

	scope = ['https://spreadsheets.google.com/feeds',
	'https://www.googleapis.com/auth/drive']

	do_not_update = ['notes'] # FIXME: case-specific. the list of columns not to update on the destination sheet.

	credentials = ServiceAccountCredentials.from_json_keyfile_name(cfn, scope)

	gc = gspread.authorize(credentials)
	sh = gc.open_by_key(shid)
	worksheet = sh.sheet1
	print('spreadsheet',sh.id)
	makeheader=True
	from fabfile import sslheader as sheetheader # FIXME: case-specific. this is a list of all columns
	lastcol = chr(ord('A')+len(sheetheader)-1)

	def rate_limited(max_per_second: int):
	"""Rate-limits the decorated function locally, for one process."""
	lock = threading.Lock()
	min_interval = 1.0 / max_per_second

	def decorate(func):
	last_time_called = time.perf_counter()

	@wraps(func)
	def rate_limited_function(args, *kwargs):
	lock.acquire()
	nonlocal last_time_called
	try:
	elapsed = time.perf_counter() - last_time_called
	left_to_wait = min_interval - elapsed
	if left_to_wait > 0:
	time.sleep(left_to_wait)

	return func(args, *kwargs)
	finally:
	last_time_called = time.perf_counter()
	lock.release()

	return rate_limited_function

	return decorate

	if makeheader:
	# establish headers if absent
	header = worksheet.row_values(1)
	if header!=sheetheader:
	print(header,'!=',sheetheader)
	rng = 'A1:%s1'%lastcol
	print('rng',rng)
	cl = worksheet.range(rng)
	i=0
	for c in cl:
	print(i)
	c.value = sheetheader[i]
	print(c,'=',sheetheader[i])
	i+=1
	worksheet.update_cells(cl)
	print('headers established')
	else:
	print('headers ok')
	else:
	header = sheetheader

	# walk over each output row and look for its keys in existing worksheet
	cnt=0
	keys = ['repo','hostname'] # FIXME: case-specific . these are the keys by which row uniqueness is identified
	colvals={}
	# obtain key column values
	for k in keys:
	kpos = header.index(k)
	vals = worksheet.col_values(kpos+1)[1:]
	colvals[k]=vals



	rows = open(sys.argv[1],'r').read().split("\n")[1:]
	shuffle(rows)

	for row in rows:
	cnt+=1
	r = row.split("\t")
	if r==['']: continue
	#assert len(header)==len(r),Exception(header,r)
	try:
	rv = dict([(header[i],
	(len(r)-1>i and r[i] or None)) \
	for i in range(len(header))])
	except IndexError as e:
	print('could not expand',r,'by',header)
	raise

	# determine if we have row in spread for this

	def calcrowsfound():
	rowsfound={}
	for k in keys:
	rowsfound[k] = [i for i in range(len(colvals[k])) if colvals[k][i]==rv[k]]
	return rowsfound
	rowsfound = calcrowsfound()
	# find the intersection of all these sets

	isect = set.intersection(*[set(i[1]) for i in rowsfound.items()])
	if not len(isect):
	@rate_limited(1)
	def apnd():
	worksheet.resize(cols=len(header)) # otherwise append misbehaves..
	worksheet.append_row(r)
	print('appending',r)
	apnd()
	else:
	while len(isect)>1:
	todel = isect.pop()
	print('deleting row',todel,[rv[k] for k in keys])
	for k in keys:
	print('colval',k,':',colvals[k][todel])
	del colvals[k][todel]
	tdr = todel+1
	worksheet.delete_row(tdr)
	rowsfound = calcrowsfound()
	isect = set.intersection(*[set(i[1]) for i in rowsfound.items()])
	print('deleted row',tdr,worksheet.row_values(tdr))
	#isect,[rv[k] for k in keys])
	rownum = list(isect)[0]+2
	rng = 'A%s:%s%s'%(rownum,lastcol,rownum)
	#print('going to update',rng,'=>',r)
	@rate_limited(1)
	def rngf():
	cl = worksheet.range(rng)
	return cl
	cl = rngf()
	i=0
	changed=False
	for c in cl:
	if header[i] in ['valid_fr','valid_to']: # FIXME: case-specific
	cmpf = lambda x: x.replace(' 0',' ')
	else:
	cmpf = lambda x: x

	if (len(r)-1>i) and cmpf(c.value)!=cmpf(r[i]) and header[i] not in do_not_update:
	jkey=(isect,'::'.join([rv[k] for k in keys]))
	print(jkey,'.',c,'=',r[i],'(',i,header[i],')')
	assert c.value!=r[i], Exception('but why?',c.value==r[i],header[i],type(c.value),type(r[i]))
	c.value= r[i]
	changed=True
	i+=1
	if changed:
	@rate_limited(1)
	def updc():
	worksheet.update_cells(cl)
	updc()
	print(cnt,'/',len(rows))