Created
June 20, 2011 13:22
-
-
Save hoffmanc/1035593 to your computer and use it in GitHub Desktop.
WGA-7
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Chris Hoffman | |
# 2011-6-17 | |
# Description: takes a | |
import pyodbc | |
import sys | |
import csv | |
import argparse | |
import glob | |
import os | |
import time | |
SERVER = "" | |
DB = "" | |
FILE_PTN = "*.asc" | |
CONN_STR = "DRIVER={SQL Server};SERVER=%s;DATABASE=%s;Integrated Security=true;Trusted Connection=true" % (SERVER,DB) | |
def get_args(): | |
parser = argparse.ArgumentParser(description='CSV => DB Table.') | |
parser.add_argument('directory', metavar='dir', type=str, | |
help="directory to search for files of the form %s" % FILE_PTN) | |
parser.add_argument('-t','--tname', type=str, | |
help="output table name (edit source for server/db)") | |
parser.add_argument('-d', metavar='delimiter', type=str, | |
help="field delimiter (default: '|')") | |
parser.add_argument('-m', '--maxcols', metavar='max_cols', type=int, default=None, | |
help="maximum columns in files (default: derived)") | |
parser.add_argument('-f', '--file_ptn', type=str, default=None, | |
help="file name patern (default: *.asc)") | |
args = parser.parse_args() | |
return args | |
def file_list(srchdir, fileptn): | |
files_found = glob.glob(os.path.join(srchdir,fileptn)) | |
if not files_found: | |
raise Exception("no files found matching pattern '%s' in %s" % (fileptn, srchdir)) | |
return files_found | |
def get_max_cols(files): | |
maxcols = 0 | |
for f in files: | |
fh = open(f, "rb") | |
line = fh.readline() | |
while line: | |
if maxcols < num_cols(line): | |
maxcols = num_cols(line) | |
print "Max Column Count Found: %s" % maxcols | |
line = fh.readline() | |
fh.close() | |
return maxcols | |
def num_cols(line): | |
return len(line.split('|')) + 1 | |
def create_table(cursor, tname, max_cols): | |
sql = """ | |
if object_id('%s') is null | |
begin | |
create table %s ( | |
rownum int not null, | |
fname nvarchar(max) not null, | |
%s) | |
end""" % ( | |
tname, | |
tname, | |
",".join( | |
map(lambda n: "v%s nvarchar(max)" % n, range(max_cols)) | |
) | |
) | |
cursor.execute(sql) | |
def get_total_lines(fname): | |
fh = open(fname, "rb") | |
lcount = 0 | |
while fh.readline(): | |
lcount+=1 | |
return lcount | |
def insert_rows(conn, cursor, tname, max_cols, fname): | |
total_lines = get_total_lines(fname) | |
all_cols = map(lambda n: "v%s" % n, (range(max_cols))) | |
sql_head = "insert into %s (rownum, fname, %s) " % ( | |
tname, | |
",".join(all_cols) | |
) | |
fh = open(fname, "rb") | |
line = fh.readline() | |
counter = 0 | |
start = time.time() | |
while line: | |
cols = line.split('|') | |
out_cols = [] | |
for i in range(len(all_cols)): | |
if i < len(cols) and cols[i] != None and cols[i] != "": | |
out_cols.append("'%s'" % cols[i]) | |
else: | |
out_cols.append('NULL') | |
sql = "%s values (%s, '%s', %s)" % ( | |
sql_head, | |
counter, | |
clean_name(fname), | |
",".join(out_cols) | |
) | |
cursor.execute(sql) | |
line = fh.readline() | |
if counter % (total_lines / 25) == 0: | |
print "%s/%s (%.2f sec)" % ( | |
counter / (total_lines / 25), | |
25, | |
time.time() - start | |
) | |
conn.commit() | |
start = time.time() | |
counter+=1 | |
def clean_name(fname): | |
return os.path.splitext(os.path.basename(fname))[0].replace(".","_") | |
def main(): | |
conn = pyodbc.connect(CONN_STR) | |
cursor = conn.cursor() | |
args = get_args() | |
files = file_list(args.directory, args.file_ptn or FILE_PTN) | |
max_cols = args.maxcols or get_max_cols(files) | |
if args.tname: | |
create_table(cursor, args.tname, max_cols) | |
conn.commit() | |
for f in files: | |
tname = args.tname or clean_name(f) | |
if not args.tname: | |
create_table(cursor, tname, max_cols) | |
conn.commit() | |
insert_rows(conn, cursor, tname, max_cols, f) | |
conn.commit() | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment