Created
February 20, 2017 17:45
-
-
Save justinnaldzin/200cb2fd71085cec1995865794fe8fdf to your computer and use it in GitHub Desktop.
Create tables and insert data into SQL Server 2014 from all CSV files in a directory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# Create tables and insert data into SQL Server 2014 from all CSV files in a directory. The process involves: | |
# - Read all CSV files in a directory | |
# - Ensure all headers are the same | |
# - Generate DTS (Data Transformation Service) files for all CSV files | |
# - Execute all .dtsx files to bulk insert the CSV | |
import os | |
import csv | |
import subprocess | |
data_dir = 'C:/some/folder/path/' | |
def validate_csv_headers(): | |
# Read the header row for every CSV file in a directory, then write the header to a new CSV | |
results_csv = os.getcwd() + os.sep + 'headers.csv' | |
with open(results_csv, 'w') as csvfile: | |
writer = csv.writer(csvfile, delimiter=',', quotechar='"') | |
for filename in os.listdir(data_dir): | |
print(filename) | |
with open(data_dir + filename) as infile: | |
reader = csv.reader(infile) | |
header = next(reader) | |
header.append(filename) | |
writer.writerow(header) | |
def generate_dtsx_file(): | |
# Read template file and create a new file by replacing contents | |
template_filepath = os.getcwd() + os.sep + 'template.dtsx' | |
for filename in os.listdir(data_dir): | |
basename = os.path.splitext(os.path.basename(filename))[0] | |
with open(basename + '.dtsx', 'w') as new_file: | |
with open(template_filepath) as template: | |
for line in template: | |
new_file.write(line.replace('__template__', basename)) | |
def execute_dtsx_file(): | |
# Execute a shell command on every file within a directory based on file extension | |
for filename in os.listdir(os.getcwd()): | |
basename, extension = os.path.splitext(filename) | |
if extension == '.dtsx' and filename != 'template.dtsx': | |
command = 'DTEXEC /FILE ' + filename | |
print("Executing: " + command) | |
subprocess.call(command) | |
if __name__ == '__main__': | |
validate_csv_headers() | |
generate_dtsx_file() | |
execute_dtsx_file() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment