Created
April 9, 2015 11:55
-
-
Save pbabik/f62ca37f3ec7e4e14cd9 to your computer and use it in GitHub Desktop.
Script to convert standard comma-separated CSV to TabSON format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
import csv | |
import sys | |
import re | |
import json | |
args = sys.argv | |
ZEROLENGTH_AS_NULL = False | |
if len(args) == 1: | |
print "Usage: python csv2tabson.py srcfile dstfile" | |
exit() | |
elif len(args) == 2: | |
srcfile = args[1] | |
dstfile = None | |
else: | |
srcfile = args[1] | |
dstfile = args[2] | |
is_float = re.compile('^[-+]?[0-9]*\.[0-9]+$') | |
is_int = re.compile('^[-+]?\d+$') | |
def fix_data_type(cell): | |
if len(cell) == 0 and ZEROLENGTH_AS_NULL is True: | |
return None | |
elif is_float.match(cell): | |
return float(cell) | |
elif is_int.match(cell): | |
return int(cell) | |
else: | |
return cell | |
jsondata = {'header':[],'data':[]} | |
with open(srcfile, 'rb') as csvfile: | |
reader = csv.reader(csvfile, delimiter=',', quotechar='"') | |
is_first_row = True | |
for row in reader: | |
if is_first_row is True: | |
jsondata['header'] = row | |
is_first_row = False | |
else: | |
jsondata['data'].append(map(fix_data_type,row)) | |
if dstfile is not None: | |
with open(dstfile,'w') as dist: | |
json.dump(jsondata,dist,separators=(',', ':')) | |
dist.close() | |
else: | |
print json.dumps(jsondata,separators= (',', ':')) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment