thedod/README.md

## README.md

      
    Raw
  

              README.md
            
          
    To convert json to csv:
./timeline_json2csv.py < goodtimes.json > goodtimes.csv

Edit the spreadsheet (with excel), then convert back:
./timeline_csv2json.py < goodtimes.csv > goodtimes.json

Note: We assume csv files are utf-16 encoded (like excel writes them),
not utf-8 (like soffice would).
Todo: make this a command line option.

  
## timeline_csv2json.py
#!/usr/bin/env python3
import json
import csv
import sys
import io
import codecs

def flatput(d,keys,value):
    if type(keys)==type(''):
        keys = keys.split('__')
    if len(keys)==1:
        d[keys[0]] = value
    else:
        subdict = d.get(keys[0],{})
        flatput(subdict,keys[1:],value)
        d[keys[0]] = subdict

def row2slide(row,keys):
    slide = {}
    for k,v in zip(keys,row):
        flatput(slide,k,v)
    return slide

if __name__=='__main__':
    # We assume csv is utf-16 (because that's how excel saves them)
    # todo: control encoding with a command line switch or something
#    sheet = csv.reader(io.TextIOWrapper(sys.stdin.buffer, encoding='utf-16'))
    sheet = csv.reader(io.TextIOWrapper(sys.stdin.buffer, encoding='windows-1255')) # This *seems* to be the encoding
#    sheet = csv.reader(io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8'))
    keys = next(sheet)
    slides = [row2slide(row,keys) for row in sheet]
    json.dump({'timeline': {'slides': slides}},sys.stdout,indent=4)

## timeline_json2csv.py
#!/usr/bin/env python3
import json
import csv
import sys
from codecs import BOM_UTF8
BOM = str(BOM_UTF8,'utf8')

TIMELINE_FIELDS = [
    'type',
    'start_date__date', 'start_date__month', 'start_date__year',
    'end_date__date', 'end_date__month', 'end_date__year',
    'text__headline', 'text__text',
    'media__caption', 'media__credit', 'media__url', 'media__thumb'
]

def flatget(d,keys):
    if type(keys)==type(''):
        keys = keys.split('__')
    val = d.get(keys[0])
    if val is None:
        return None
    if len(keys)==1:
        return val
    else:
        return flatget(val,keys[1:])

def slide2row(d):
    return [flatget(d,f) for f in TIMELINE_FIELDS]

if __name__=='__main__':
    j = json.load(sys.stdin)
    sys.stdout.write(BOM)  # windows UTF-8 magic
    sheet = csv.writer(sys.stdout)
    sheet.writerow(TIMELINE_FIELDS)
    for slide in j['timeline']['slides']:
        sheet.writerow(slide2row(slide))

### These were used to "reasearch" a timeline and produce TIMELINE_FIELDS
def _flatkeys(d,prefix=[]):
    keys = []
    for key in d.keys():
        if type(d[key])==type({}):
            keys += [prefix+subkey for subkey in _flatkeys(d[key],prefix+[key])]
        else:
            keys.append(prefix+[key])
    return keys

def flatkeys(d):
    return ['__'.join(path) for path in _flatkeys(d)]

def flatunion(ds):
    u = set()
    for d in ds:
        u.update(flatkeys(d))
    return sorted(u)
	#!/usr/bin/env python3
	import json
	import csv
	import sys
	import io
	import codecs

	def flatput(d,keys,value):
	if type(keys)==type(''):
	keys = keys.split('__')
	if len(keys)==1:
	d[keys[0]] = value
	else:
	subdict = d.get(keys[0],{})
	flatput(subdict,keys[1:],value)
	d[keys[0]] = subdict

	def row2slide(row,keys):
	slide = {}
	for k,v in zip(keys,row):
	flatput(slide,k,v)
	return slide

	if __name__=='__main__':
	# We assume csv is utf-16 (because that's how excel saves them)
	# todo: control encoding with a command line switch or something
	# sheet = csv.reader(io.TextIOWrapper(sys.stdin.buffer, encoding='utf-16'))
	sheet = csv.reader(io.TextIOWrapper(sys.stdin.buffer, encoding='windows-1255')) # This seems to be the encoding
	# sheet = csv.reader(io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8'))
	keys = next(sheet)
	slides = [row2slide(row,keys) for row in sheet]
	json.dump({'timeline': {'slides': slides}},sys.stdout,indent=4)