Skip to content

Instantly share code, notes, and snippets.

@hughdbrown
Created September 5, 2019 18:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save hughdbrown/a2423346b1d52f75873949edd2e4666e to your computer and use it in GitHub Desktop.
Save hughdbrown/a2423346b1d52f75873949edd2e4666e to your computer and use it in GitHub Desktop.
Make a postgresql CREATE TABLE statement by inferring types in a CSV file
import os.path
from csv import DictReader
datafile = os.path.normpath(os.path.expanduser("~/workspace/DataRobot/tests/testdata/10k_diabetes.csv"))
with open(datafile) as handle:
reader = DictReader(handle)
fields = reader.fieldnames
rows = list(reader)
def row_type(name, rows):
values = {row[name] for row in rows}
try:
for value in values:
x = int(value)
return {'type': 'INTEGER'}
except ValueError:
pass
try:
for value in values:
x = float(value)
return {'type': 'NUMERIC'}
except ValueError:
pass
return {'type': 'VARCHAR', 'length': len(max(values, key=len))}
table = {name: row_type(name, rows) for name in fields}
table_fmt = """CREATE TABLE {}(\n{}\n);"""
table_name = 'diabetes'
def format_item(name, item):
t = item['type']
name = '"{}"'.format(name)
if t == 'INTEGER':
return "{} INTEGER".format(name)
elif t == 'NUMERIC':
return "{} NUMERIC".format(name)
elif t == 'VARCHAR':
return "{} VARCHAR({})".format(name, item['length'])
else:
raise ValueError(name)
print(
table_fmt.format(
table_name,
",\n".join(format_item(name, table[name]) for name in fields)
)
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment