Skip to content

Instantly share code, notes, and snippets.

@petrblahos petrblahos/textql.py
Created Feb 19, 2014

Embed
What would you like to do?
import csv
import sqlite3
import sys
db_con = sqlite3.connect(":memory:")
db_con.text_factory = str
def safe_column_name(name):
"""
Modifies the column name so that it only contains alphanumeric
characters. The rest is replaced by underscores.
"""
out = ""
for i in name:
if i.isalnum():
out+= i
else:
out+= "_"
if out[0].isdigit():
out = "_" + out
return out
def insert_row(table, columns, data):
"""
Insert a single row of data into the table.
#FIXME: make the cursor a parameter so that it does not have to be
recreated every time
"""
cur = db_con.cursor()
if len(data) == len(columns):
cur.execute(
"""insert into %s (%s) values (%s)""" % (
table,
", ".join(columns),
", ".join( [ "?" ] * len(columns)),
),
[ i.strip() for i in data ])
else:
raise Exception("Wrong number of columns in the row.")
cur.close()
def get_unused_column_name(name, used_names):
"""
If the name has not been used, returns it. Otherwise returns the
suffix _<number> - the number is the lowest number that makes the
new name unique.
Params:
name The proposed name.
used_names A set of used names.
Returns:
The unique name.
"""
if not name in used_names:
return name
suffix = 1
out = "%s_%s" % (name, suffix, )
while out in columns:
suffix += 1
out = "%s_%s" % (name, suffix, )
return out
def mk_table_from_header(row):
"""
Create a table named t1 with columns named by the values in row.
If any values in the row are duplicate, adds a _<number> suffix
to them.
Returns:
A tuple (tablename, column names), where table name is always "t1".
"""
columns = set()
column_names = []
for i in row:
name = get_unused_column_name(safe_column_name(i), columns)
columns.add(name)
column_names.append(name)
cur = db_con.cursor()
cur.execute("""create table t1(%s)""" % ", ".join(column_names))
cur.close()
return ("t1", column_names)
def mk_table_dummy_columns(row):
"""
Create a table named t1 with len(row) columns named col_1,
col_2, ...
Returns:
A tuple (tablename, column names), where table name is always "t1".
"""
column_names = [ "col_%d" % (i+1) for i in range(len(row)) ]
cur = db_con.cursor()
cur.execute("""create table t1(%s)""" % ", ".join(column_names))
cur.close()
return ("t1", column_names)
table = None
with open(sys.argv[1], 'rb') as csvfile:
sniff_data = csvfile.read(8192)
csvfile.seek(0)
sniffer = csv.Sniffer()
has_header = sniffer.has_header(sniff_data)
dialect = sniffer.sniff(sniff_data)
reader = csv.reader(csvfile, dialect)
for row in reader:
if table is None:
if has_header:
table = mk_table_from_header(row)
continue
else:
table = mk_table_dummy_columns(row)
insert_row(table[0], table[1], row)
cur = db_con.cursor()
cur.execute("""select * from %s limit 200""" % table[0])
for i in cur:
print i
cur.execute("""select count(*) from %s""" % table[0])
for i in cur:
print i
cur.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.
You signed in with another tab or window. Reload to refresh your session. You signed out in another tab or window. Reload to refresh your session.