Skip to content

Instantly share code, notes, and snippets.

@jgarciabu
Created August 30, 2017 22:23
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jgarciabu/5fb25c7df242fe9604d33be16d70cfc0 to your computer and use it in GitHub Desktop.
Save jgarciabu/5fb25c7df242fe9604d33be16d70cfc0 to your computer and use it in GitHub Desktop.
This R script uses dplyr and RODBC to combine individual CSVs into one dataframe that is then uploaded into a Azure SQL database. This cuts down manual processing significantly. Sometimes I'll get 10 or 12 individual csvs and this script frees me up to focus on other processes and tasks.
library(plyr)
library(dplyr)
library(stringr)
library(RODBC)
#========================================
#Make sure working directory is new invoice folder in Dropbox
#Names of all files put in list to feed to merge code
filenames <- list.files()
#Add Column To Each Individual CSV and specify csv name in that column
read_csv_filename <- function(filename){
ret <- read.csv(filename, stringsAsFactors = FALSE)
ret$Source <- filename #EDIT
ret
}
import.list <- ldply(filenames, read_csv_filename)
#Merge code to put all data into one dataframe (no longer necessary because ldply above combines dataframes)
#savethis <- do.call("rbind", lapply(filenames, read.csv, header = TRUE))
#Remove unnecessary columns
savethis <- subset(import.list, select=-c(Carrier,Ship.to.First.Name,Ship.to.Last.Name,Pricing.Model,Shipping...Handling,
Liability.Coverage,Custom.Packaging,CoolPack.Packaging,Will.Call.Fee,Special.Order.Fee))
#Data noise and mess clean up
savethis$Tracking.Number <- gsub('=','',savethis$Tracking.Number)
savethis$Ship.to.ZIP <- gsub('=','',savethis$Ship.to.ZIP)
savethis[] <- lapply(savethis, str_replace_all, pattern = "'", replacement = "''")#escapes single quotes for sql insert later
savethis$Source <- gsub('.csv', '',savethis$Source)
#savethis[is.na(savethis)] <- 0 no longer needed because SQL import works
savethis$Ship.Date <- gsub('^$', '1/1/1900', savethis$Ship.Date) #more data noise cleanup
savethis <- subset(savethis, select = -X)
#setnames(savethis, old=c("WineDirect.Order.Number","Customer.Order.Number", First iteration of column renames
# "Ship.from.Warehouse","Tracking.Number","Ship.to.City",
# "Ship.to.State","Ship.to.Zip","Billing.Code","Weight..lbs",
# "Ship.Date","Ship.to.Zone","X3.Tier.Delivery.Services",
# "Fulfillment.Services","Fuel.Surcharge","Line.Haul.Services",
# "Line.Haul.Fuel.Surcharge","Foam.Packaging",
# "Additional.Insert.Fee","Other.Charges.Credits","Package.Total",
# "Source"), new=c("wdOrder", "OrderID","Whse","Tracking","City",
# "State","ZIP","BillingCode","Weight","ShipDate",
# "Zone","ThreeTier","Fulfillment","Fuel","Line",
# "LineFuel","Foam","Inserts","Other","Total",
# "InvoiceFile"))
#plyr::rename(savethis, WineDirect.Order.Number=wdOrder,Customer.Order.Number=OrderID, second iteration of column rename
# Ship.from.Warehouse=Whse,Tracking.Number=Tracking,Ship.to.City=City,
# Ship.to.State=State,Ship.to.ZIP=ZIP,Billing.Code=BillingCode,
# Weight..lbs.=Weight,Ship.Date=ShipDate,Ship.to.Zone=Zone,
# X3.Tier.Delivery.Services=ThreeTier,Fulfillment.Services=Fulfillment,
# Fuel.Surcharge=Fuel,Line.Haul.Services=Line,Line.Haul.Fuel.Surcharge=LineFuel,
# Foam.Packaging=Foam,Additional.Insert.Fee=Inserts,
# Other.Charges.Credits=Other,Package.Total=Total,Source=InvoiceFile)
savethis <- plyr::rename(savethis, c("WineDirect.Order.Number"="wdOrder","Customer.Order.Number"="OrderID",
"Ship.from.Warehouse"="Whse","Tracking.Number"="Tracking","Ship.to.City"="City",
"Ship.to.State"="State","Ship.to.ZIP"="ZIP","Billing.Code"="BillingCode",
"Weight..lbs."="Weight","Ship.Date"="ShipDate","Ship.to.Zone"="Zone",
"X3.Tier.Delivery.Services"="ThreeTier","Fulfillment.Services"="Fulfillment",
"Fuel.Surcharge"="Fuel","Line.Haul.Services"="Line","Line.Haul.Fuel.Surcharge"="LineFuel",
"Foam.Packaging"="Foam","Additional.Insert.Fee"="Inserts",
"Other.Charges.Credits"="Other","Package.Total"="Total","Source"="InvoiceFile"))
#Export Data to shipped_wdinvoices table on Azure SQL Database
conn <- odbcConnect("SQL Azure", uid = 'xxxxx', pwd = 'xxxxxx') #ODBC driver set up on Windows ODBC Admin (SQL Azure is the name of the driver)
try(sqlDrop(conn,"temp_invoiceimport", errors = FALSE), silent = TRUE)
sqlSave(conn,savethis, tablename = "temp_invoiceimport",rownames = FALSE,append = TRUE, fast = TRUE)
sqlQuery(conn,paste("insert into shipped_wdinvoices (wdorder,orderid,whse,tracking,city,state,zip,billingcode,weight,shipdate,zone,threetier,fulfillment,fuel,line,linefuel,foam,inserts,other,total,invoicefile) select * from temp_invoiceimport"))
odbcClose(conn)
#Save to CSV (deprecated since sql transfer works)
#now <- Sys.time()
#file_name <- paste(format(now,"%Y%m%d"),"combinedinvoices.csv",sep="_")
#write.csv(savethis, file_name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment