Skip to content

Instantly share code, notes, and snippets.

@thedod
Last active August 29, 2015 14:23
Show Gist options
  • Save thedod/c335d094bb70ad22b938 to your computer and use it in GitHub Desktop.
Save thedod/c335d094bb70ad22b938 to your computer and use it in GitHub Desktop.
TimelineJS3 (not 2!) converter json<->csv [not an "official csv standard", but easy to edit]

To convert json to csv:

./timeline_json2csv.py < goodtimes.json > goodtimes.csv

Edit the spreadsheet (with excel), then convert back:

./timeline_csv2json.py < goodtimes.csv > goodtimes.json

Note: We assume csv files are utf-16 encoded (like excel writes them), not utf-8 (like soffice would). Todo: make this a command line option.

#!/usr/bin/env python3
import json
import csv
import sys
import io
import codecs
def flatput(d,keys,value):
if type(keys)==type(''):
keys = keys.split('__')
if len(keys)==1:
d[keys[0]] = value
else:
subdict = d.get(keys[0],{})
flatput(subdict,keys[1:],value)
d[keys[0]] = subdict
def row2slide(row,keys):
slide = {}
for k,v in zip(keys,row):
flatput(slide,k,v)
return slide
if __name__=='__main__':
# We assume csv is utf-16 (because that's how excel saves them)
# todo: control encoding with a command line switch or something
# sheet = csv.reader(io.TextIOWrapper(sys.stdin.buffer, encoding='utf-16'))
sheet = csv.reader(io.TextIOWrapper(sys.stdin.buffer, encoding='windows-1255')) # This *seems* to be the encoding
# sheet = csv.reader(io.TextIOWrapper(sys.stdin.buffer, encoding='utf-8'))
keys = next(sheet)
slides = [row2slide(row,keys) for row in sheet]
json.dump({'timeline': {'slides': slides}},sys.stdout,indent=4)
#!/usr/bin/env python3
import json
import csv
import sys
from codecs import BOM_UTF8
BOM = str(BOM_UTF8,'utf8')
TIMELINE_FIELDS = [
'type',
'start_date__date', 'start_date__month', 'start_date__year',
'end_date__date', 'end_date__month', 'end_date__year',
'text__headline', 'text__text',
'media__caption', 'media__credit', 'media__url', 'media__thumb'
]
def flatget(d,keys):
if type(keys)==type(''):
keys = keys.split('__')
val = d.get(keys[0])
if val is None:
return None
if len(keys)==1:
return val
else:
return flatget(val,keys[1:])
def slide2row(d):
return [flatget(d,f) for f in TIMELINE_FIELDS]
if __name__=='__main__':
j = json.load(sys.stdin)
sys.stdout.write(BOM) # windows UTF-8 magic
sheet = csv.writer(sys.stdout)
sheet.writerow(TIMELINE_FIELDS)
for slide in j['timeline']['slides']:
sheet.writerow(slide2row(slide))
### These were used to "reasearch" a timeline and produce TIMELINE_FIELDS
def _flatkeys(d,prefix=[]):
keys = []
for key in d.keys():
if type(d[key])==type({}):
keys += [prefix+subkey for subkey in _flatkeys(d[key],prefix+[key])]
else:
keys.append(prefix+[key])
return keys
def flatkeys(d):
return ['__'.join(path) for path in _flatkeys(d)]
def flatunion(ds):
u = set()
for d in ds:
u.update(flatkeys(d))
return sorted(u)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment