-
-
Save mgottholsen/f1ad4208b86394889beead991717475b to your computer and use it in GitHub Desktop.
Given a CSV file with a header row, and at-least one row of data, output a SQL `CREATE TABLE` statement by guessing the data type of the columns.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# encoding: utf-8 | |
""" | |
make_table.py | |
Given a CSV file with a header row, and at-least one row of data, | |
output a SQL `CREATE TABLE` statement by guessing the data type of the columns. | |
Pradeep Gowda <pradeep@btbytes.com> | |
License: Public Domain | |
""" | |
import sys | |
import re | |
from random import randint | |
def slugify(strng): | |
'''create a slug from a free form string''' | |
if strng: | |
strng = strng.strip() | |
strng = re.sub('\.', '', strng) | |
strng = re.sub('\s+', '_', strng) | |
strng = re.sub('[^\w.-]', '', strng) | |
return strng.strip('_.- ').lower() | |
TEMPLATE = '''CREATE TABLE %(tbl_name)s (%(colnames)s);''' | |
def make_table(file_name, tbl_name=None): | |
'''return the CREATE TABLE statement''' | |
fil = open(file_name, 'r') | |
header = fil.readline() | |
colnames = [slugify(c) for c in header.rstrip().split(',')] | |
dataline = fil.readline() | |
fil.close() | |
parts = dataline.rstrip().split(',') | |
types = [] | |
i = 0 | |
for part in parts: | |
width = len(part) | |
typ = 'varchar(%s)' % (width, ) | |
try: | |
float(part) | |
typ = 'real' | |
except: | |
pass | |
types.append(' %s %s\n' % (slugify(colnames[i]), typ)) | |
i += 1 | |
colnames = ','.join(types) | |
tbl_name = slugify(tbl_name) | |
if not tbl_name: | |
try: | |
fpart = file_name.split('/')[-1].split('.')[0] | |
tbl_name = '%s_%s' % (fpart, randint(1, 100)) | |
except: | |
tbl_name = 'tbl_%s' % randint(1, 100) | |
return TEMPLATE % locals() | |
if __name__ == '__main__': | |
if len(sys.argv) < 2: | |
print "Usage: ./make_table.py csvfile.csv [sql_table_name]" | |
print 'If table name is not given, CSV file name will be used' | |
else: | |
tbl_name = None | |
try: | |
tbl_name = sys.argv[2] | |
except: | |
pass | |
file_name = sys.argv[1] | |
print make_table(file_name, tbl_name) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment