Skip to content

Instantly share code, notes, and snippets.

@justinnaldzin
Created February 20, 2017 17:45
Show Gist options
  • Save justinnaldzin/200cb2fd71085cec1995865794fe8fdf to your computer and use it in GitHub Desktop.
Save justinnaldzin/200cb2fd71085cec1995865794fe8fdf to your computer and use it in GitHub Desktop.
Create tables and insert data into SQL Server 2014 from all CSV files in a directory
#!/usr/bin/env python3
# Create tables and insert data into SQL Server 2014 from all CSV files in a directory. The process involves:
# - Read all CSV files in a directory
# - Ensure all headers are the same
# - Generate DTS (Data Transformation Service) files for all CSV files
# - Execute all .dtsx files to bulk insert the CSV
import os
import csv
import subprocess
data_dir = 'C:/some/folder/path/'
def validate_csv_headers():
# Read the header row for every CSV file in a directory, then write the header to a new CSV
results_csv = os.getcwd() + os.sep + 'headers.csv'
with open(results_csv, 'w') as csvfile:
writer = csv.writer(csvfile, delimiter=',', quotechar='"')
for filename in os.listdir(data_dir):
print(filename)
with open(data_dir + filename) as infile:
reader = csv.reader(infile)
header = next(reader)
header.append(filename)
writer.writerow(header)
def generate_dtsx_file():
# Read template file and create a new file by replacing contents
template_filepath = os.getcwd() + os.sep + 'template.dtsx'
for filename in os.listdir(data_dir):
basename = os.path.splitext(os.path.basename(filename))[0]
with open(basename + '.dtsx', 'w') as new_file:
with open(template_filepath) as template:
for line in template:
new_file.write(line.replace('__template__', basename))
def execute_dtsx_file():
# Execute a shell command on every file within a directory based on file extension
for filename in os.listdir(os.getcwd()):
basename, extension = os.path.splitext(filename)
if extension == '.dtsx' and filename != 'template.dtsx':
command = 'DTEXEC /FILE ' + filename
print("Executing: " + command)
subprocess.call(command)
if __name__ == '__main__':
validate_csv_headers()
generate_dtsx_file()
execute_dtsx_file()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment