petrblahos/textql.py

## textql.py
import csv
import sqlite3
import sys

db_con = sqlite3.connect(":memory:")
db_con.text_factory = str

def safe_column_name(name):
    """
        Modifies the column name so that it only contains alphanumeric
        characters. The rest is replaced by underscores.
    """
    out = ""
    for i in name:
        if i.isalnum():
            out+= i
        else:
            out+= "_"
    if out[0].isdigit():
        out = "_" + out
    return out

def insert_row(table, columns, data):
    """
        Insert a single row of data into the table.
    #FIXME: make the cursor a parameter so that it does not have to be
            recreated every time
    """
    cur = db_con.cursor()
    if len(data) == len(columns):
        cur.execute(
            """insert into %s (%s) values (%s)""" % (
                    table,
                    ", ".join(columns),
                    ", ".join( [ "?" ] * len(columns)),
                    ),
            [ i.strip() for i in data ])
    else:
        raise Exception("Wrong number of columns in the row.")
    cur.close()


def get_unused_column_name(name, used_names):
    """
        If the name has not been used, returns it. Otherwise returns the
        suffix _<number> - the number is the lowest number that makes the
        new name unique.
    Params:
        name        The proposed name.
        used_names  A set of used names.
    Returns:
        The unique name.
    """
    if not name in used_names:
        return name
    suffix = 1
    out = "%s_%s" % (name, suffix, )
    while out in columns:
        suffix += 1
        out = "%s_%s" % (name, suffix, )
    return out

def mk_table_from_header(row):
    """
        Create a table named t1 with columns named by the values in row.
        If any values in the row are duplicate, adds a _<number> suffix
        to them.
    Returns:
        A tuple (tablename, column names), where table name is always "t1".
    """
    columns = set()
    column_names = []
    for i in row:
        name = get_unused_column_name(safe_column_name(i), columns)
        columns.add(name)
        column_names.append(name)
    cur = db_con.cursor()
    cur.execute("""create table t1(%s)""" % ", ".join(column_names))
    cur.close()
    return ("t1", column_names)


def mk_table_dummy_columns(row):
    """
        Create a table named t1 with len(row) columns named col_1,
        col_2, ...
    Returns:
        A tuple (tablename, column names), where table name is always "t1".
    """
    column_names = [ "col_%d" % (i+1) for i in range(len(row)) ]
    cur = db_con.cursor()
    cur.execute("""create table t1(%s)""" % ", ".join(column_names))
    cur.close()
    return ("t1", column_names)

table = None

with open(sys.argv[1], 'rb') as csvfile:
    sniff_data = csvfile.read(8192)
    csvfile.seek(0)

    sniffer = csv.Sniffer()
    has_header = sniffer.has_header(sniff_data)
    dialect = sniffer.sniff(sniff_data)

    reader = csv.reader(csvfile, dialect)

    for row in reader:
        if table is None:
            if has_header:
                table = mk_table_from_header(row)
                continue
            else:
                table = mk_table_dummy_columns(row)
        insert_row(table[0], table[1], row)

cur = db_con.cursor()
cur.execute("""select * from %s limit 200""" % table[0])
for i in cur:
    print i

cur.execute("""select count(*) from %s""" % table[0])
for i in cur:
    print i

cur.close()
	import csv
	import sqlite3
	import sys

	db_con = sqlite3.connect(":memory:")
	db_con.text_factory = str

	def safe_column_name(name):
	"""
	Modifies the column name so that it only contains alphanumeric
	characters. The rest is replaced by underscores.
	"""
	out = ""
	for i in name:
	if i.isalnum():
	out+= i
	else:
	out+= "_"
	if out[0].isdigit():
	out = "_" + out
	return out

	def insert_row(table, columns, data):
	"""
	Insert a single row of data into the table.
	#FIXME: make the cursor a parameter so that it does not have to be
	recreated every time
	"""
	cur = db_con.cursor()
	if len(data) == len(columns):
	cur.execute(
	"""insert into %s (%s) values (%s)""" % (
	table,
	", ".join(columns),
	", ".join( [ "?" ] * len(columns)),
	),
	[ i.strip() for i in data ])
	else:
	raise Exception("Wrong number of columns in the row.")
	cur.close()


	def get_unused_column_name(name, used_names):
	"""
	If the name has not been used, returns it. Otherwise returns the
	suffix _<number> - the number is the lowest number that makes the
	new name unique.
	Params:
	name The proposed name.
	used_names A set of used names.
	Returns:
	The unique name.
	"""
	if not name in used_names:
	return name
	suffix = 1
	out = "%s_%s" % (name, suffix, )
	while out in columns:
	suffix += 1
	out = "%s_%s" % (name, suffix, )
	return out

	def mk_table_from_header(row):
	"""
	Create a table named t1 with columns named by the values in row.
	If any values in the row are duplicate, adds a _<number> suffix
	to them.
	Returns:
	A tuple (tablename, column names), where table name is always "t1".
	"""
	columns = set()
	column_names = []
	for i in row:
	name = get_unused_column_name(safe_column_name(i), columns)
	columns.add(name)
	column_names.append(name)
	cur = db_con.cursor()
	cur.execute("""create table t1(%s)""" % ", ".join(column_names))
	cur.close()
	return ("t1", column_names)


	def mk_table_dummy_columns(row):
	"""
	Create a table named t1 with len(row) columns named col_1,
	col_2, ...
	Returns:
	A tuple (tablename, column names), where table name is always "t1".
	"""
	column_names = [ "col_%d" % (i+1) for i in range(len(row)) ]
	cur = db_con.cursor()
	cur.execute("""create table t1(%s)""" % ", ".join(column_names))
	cur.close()
	return ("t1", column_names)

	table = None

	with open(sys.argv[1], 'rb') as csvfile:
	sniff_data = csvfile.read(8192)
	csvfile.seek(0)

	sniffer = csv.Sniffer()
	has_header = sniffer.has_header(sniff_data)
	dialect = sniffer.sniff(sniff_data)

	reader = csv.reader(csvfile, dialect)

	for row in reader:
	if table is None:
	if has_header:
	table = mk_table_from_header(row)
	continue
	else:
	table = mk_table_dummy_columns(row)
	insert_row(table[0], table[1], row)

	cur = db_con.cursor()
	cur.execute("""select * from %s limit 200""" % table[0])
	for i in cur:
	print i

	cur.execute("""select count(*) from %s""" % table[0])
	for i in cur:
	print i

	cur.close()