Skip to content

Instantly share code, notes, and snippets.

@btbytes
Created May 4, 2009 16:25
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 2 You must be signed in to fork a gist
  • Save btbytes/106532 to your computer and use it in GitHub Desktop.
Save btbytes/106532 to your computer and use it in GitHub Desktop.
Given a CSV file with a header row, and at-least one row of data, output a SQL `CREATE TABLE` statement by guessing the data type of the columns.
#!/usr/bin/env python
# encoding: utf-8
"""
make_table.py
Given a CSV file with a header row, and at-least one row of data,
output a SQL `CREATE TABLE` statement by guessing the data type of the columns.
Pradeep Gowda <pradeep@btbytes.com>
License: Public Domain
"""
import sys
import re
from random import randint
def slugify(strng):
'''create a slug from a free form string'''
if strng:
strng = strng.strip()
strng = re.sub('\.', '', strng)
strng = re.sub('\s+', '_', strng)
strng = re.sub('[^\w.-]', '', strng)
return strng.strip('_.- ').lower()
TEMPLATE = '''CREATE TABLE %(tbl_name)s (%(colnames)s);'''
def make_table(file_name, tbl_name=None):
'''return the CREATE TABLE statement'''
fil = open(file_name, 'r')
header = fil.readline()
colnames = [slugify(c) for c in header.rstrip().split(',')]
dataline = fil.readline()
fil.close()
parts = dataline.rstrip().split(',')
types = []
i = 0
for part in parts:
width = len(part)
typ = 'varchar(%s)' % (width, )
try:
float(part)
typ = 'real'
except:
pass
types.append(' %s %s\n' % (slugify(colnames[i]), typ))
i += 1
colnames = ','.join(types)
tbl_name = slugify(tbl_name)
if not tbl_name:
try:
fpart = file_name.split('/')[-1].split('.')[0]
tbl_name = '%s_%s' % (fpart, randint(1, 100))
except:
tbl_name = 'tbl_%s' % randint(1, 100)
return TEMPLATE % locals()
if __name__ == '__main__':
if len(sys.argv) < 2:
print "Usage: ./make_table.py csvfile.csv [sql_table_name]"
print 'If table name is not given, CSV file name will be used'
else:
tbl_name = None
try:
tbl_name = sys.argv[2]
except:
pass
file_name = sys.argv[1]
print make_table(file_name, tbl_name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment