Skip to content

Instantly share code, notes, and snippets.

@broschke
Created March 26, 2018 14:09
Show Gist options
  • Save broschke/6feb5ea7ba2623300ff24baf051cd622 to your computer and use it in GitHub Desktop.
Save broschke/6feb5ea7ba2623300ff24baf051cd622 to your computer and use it in GitHub Desktop.
Snippet to move Pandas dataframe to CSV then upload to Vertica
import pandas as pd
import numpy as np
import os
import pyodbc
import time
#this is a sample dataframe created for testing
df = pd.DataFrame(np.random.randint(0,100,size=(100, 4)), columns=list('ABCD'))
#create a list of the column names
col_list = list(df)
#give the dataframe a name, the name that you'd like your vertica table to be
df.name = 'random'
#write dataframe to csv file
df.to_csv('df.csv',index=False)
#grab the directory location of the file
data_location = os.path.realpath('df.csv')
#if you have a large dataframe, increase the sleep seconds so the program waits sufficient time for the csv to be written
time.sleep(10)
def create_table(cols):
'''This function writes the column names into the structure needed for a create table sql statement.
Note all columns are set to varchar(32). Casting will likely be needed in your sql statements later.'''
for i, val in enumerate(cols):
if (i+1) == len(cols):
yield val + " VARCHAR(32)"
else:
yield val + " VARCHAR(32),"
#call function on col_list
table_construct = [field for field in create_table(col_list)]
#create and close an empty txt file as the reject file for copy function
with open(os.path.join('reject.txt'), 'w'):
pass
#grab exceptions location
exceptions_location = os.path.realpath('reject.txt')
#build sql statement
vertica_table = 'dq.br_'+ df.name #adjust the schema as needed
begin_create = 'create table {} ('.format(vertica_table)
end_create = ');'
copy_statement = 'COPY {} FROM local \'{}\' EXCEPTIONS \'{}\' DELIMITER \',\';'.format(vertica_table,data_location,exceptions_location)
sql = begin_create + ' '.join(table_construct) + end_create + copy_statement
#connect to vertica and excecute sql
conn=pyodbc.connect(dsn='VerticaBR')
cur=conn.cursor()
cur.execute('drop table if exists {};'.format(vertica_table))
cur.execute(sql)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment