btbytes (owner)

Revisions

gist: 106532 Download_button fork
public
Public Clone URL: git://gist.github.com/106532.git
make_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#!/usr/bin/env python
# encoding: utf-8
"""
make_table.py
Given a CSV file with a header row, and at-least one row of data,
output a SQL `CREATE TABLE` statement by guessing the data type of the columns.
 
Pradeep Gowda <pradeep@btbytes.com>
License: Public Domain
"""
 
import sys
import re
from random import randint
 
def slugify(strng):
    '''create a slug from a free form string'''
    if strng:
        strng = strng.strip()
        strng = re.sub('\.', '', strng)
        strng = re.sub('\s+', '_', strng)
        strng = re.sub('[^\w.-]', '', strng)
        return strng.strip('_.- ').lower()
    
    
TEMPLATE = '''CREATE TABLE %(tbl_name)s (%(colnames)s);'''
 
def make_table(file_name, tbl_name=None):
    '''return the CREATE TABLE statement'''
    fil = open(file_name, 'r')
    header = fil.readline()
    colnames = [slugify(c) for c in header.rstrip().split(',')]
    dataline = fil.readline()
    fil.close()
    parts = dataline.rstrip().split(',')
    types = []
    i = 0
    for part in parts:
        width = len(part)
        typ = 'varchar(%s)' % (width, )
        try:
            float(part)
            typ = 'real'
        except:
            pass
        types.append(' %s %s\n' % (slugify(colnames[i]), typ))
        i += 1
        
    colnames = ','.join(types)
    tbl_name = slugify(tbl_name)
    
    if not tbl_name:
        try:
            fpart = file_name.split('/')[-1].split('.')[0]
            tbl_name = '%s_%s' % (fpart, randint(1, 100))
        except:
            tbl_name = 'tbl_%s' % randint(1, 100)
        
    return TEMPLATE % locals()
                       
            
 
 
if __name__ == '__main__':
    if len(sys.argv) < 2:
        print "Usage: ./make_table.py csvfile.csv [sql_table_name]"
        print 'If table name is not given, CSV file name will be used'
    else:
        tbl_name = None
        try:
            tbl_name = sys.argv[2]
        except:
            pass
        
        file_name = sys.argv[1]
        print make_table(file_name, tbl_name)