Created
October 10, 2018 19:49
-
-
Save jgarciabu/81512476cd038d6fa4116c2ce37ab6b2 to your computer and use it in GitHub Desktop.
Code lib created to house all functions necessary to process data for this use case.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
""" | |
Created on Wed Apr 18 11:56:27 2018 | |
@author: jeffrey.garcia | |
""" | |
import pandas as pd | |
import numpy as np | |
import csv | |
import glob as gb | |
def ml_validation(renamed_columns_data): | |
# Reorder columns to expected order for import | |
reordered_columns_data = renamed_columns_data[ | |
["Rec. Type", "Member ID", "Loc. Name", "Loc. Address", "Loc. Address 2", "Loc. City", "Loc. State", "Loc. Zip", | |
"Operator ID", "Distributor", "DC", "PLG", "Dist Acct Num.", "Sector", "Org. Level 2", "Org. Level 3", | |
"Org. Level 4", "Segment", | |
"Food Service ?", "GLN", "Loc. Country", "Member Status", "Start Date", "End Date", "Contract Num.", | |
"Contract Name", "Contact Name", | |
"Contact Title", "Contact Phone", "Contact Fax", "Contact Email"]] | |
# First test output | |
# reordered_columns_data.to_csv('testoutput.csv', index=False) | |
# Remove all duplicate rows except for first occurrence. | |
deduped_data = reordered_columns_data.drop_duplicates() | |
# Dedupe test output | |
# data.to_csv('dedupetestoutput.csv', index=False) | |
# Drop rows with empty 'Loc. Name' field | |
no_empty_locations_data = deduped_data.replace({'Loc. Name': {'': np.nan}}).dropna(subset=['Loc. Name']) | |
# Empty 'Loc. Name' name drop test | |
# data.to_csv('blanklocname.csv', index=False) | |
# Remove carriage returns and new lines from areas of the dataset where these new lines have caused problems during ingestion | |
no_empty_locations_data['Loc. Address'] = no_empty_locations_data['Loc. Address'].str.replace('\n', ' ') | |
no_empty_locations_data['Loc. Address 2'] = no_empty_locations_data['Loc. Address 2'].str.replace('\n', ' ') | |
no_empty_locations_data['Loc. Zip'] = no_empty_locations_data['Loc. Zip'].str.replace('\n', ' ') | |
# Proper state abbreviations for validation later | |
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", | |
"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", | |
"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", | |
"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", | |
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"] | |
# Incorrect state abbreviations rows set aside for separate manual review CSV | |
incorrect_state_abbr_rows = no_empty_locations_data[~no_empty_locations_data["Loc. State"].str.upper().isin(states)] | |
if len(incorrect_state_abbr_rows) > 0: | |
incorrect_state_abbr_rows.to_csv('incorrect_state_abbreviations.csv', index=False) | |
# Bad state abbreviation validation | |
state_abbr_filtered_data = no_empty_locations_data[no_empty_locations_data["Loc. State"].str.upper().isin(states)] | |
# Adds leading zeroes to zip code if state in (CT,MA,ME,NH,NJ,RI,VT) | |
state_abbr_filtered_data = state_abbr_filtered_data.copy() | |
state_abbr_filtered_data['Loc. Zip'] = state_abbr_filtered_data['Loc. Zip'].str.strip() | |
state_abbr_filtered_data.loc[(state_abbr_filtered_data['Loc. Zip'].str.len() == 4) & ( | |
state_abbr_filtered_data['Loc. State'].str.upper().isin( | |
['CT', 'MA', 'ME', 'NH', 'NJ', 'RI', 'VT'])), 'Loc. Zip'] = '0' + state_abbr_filtered_data[ | |
'Loc. Zip'].astype(str) | |
# Collects rows with Zip Codes less than 5 digits or more than 10 digits and exports to CSV for manual review | |
bad_zip_lengths = state_abbr_filtered_data.loc[~((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :] | |
if len(bad_zip_lengths) > 0: | |
bad_zip_lengths.to_csv('incorrect_zip_code_lengths.csv', index=False) | |
# Filters out rows with 'Loc. Zip' lengths shorter than 5 and longer than 10 digits | |
zip_length_filtered_data = state_abbr_filtered_data.loc[((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :] | |
# Collects rows with 'Loc. Address' fields longer than 50 characters long and exports to CSV for manual review. | |
zip_length_filtered_data = zip_length_filtered_data.copy() | |
zip_length_filtered_data['Loc. Address'] = zip_length_filtered_data['Loc. Address'].str.strip() | |
zip_length_filtered_data['Loc. Address 2'] = zip_length_filtered_data['Loc. Address 2'].str.strip() | |
long_address_data = zip_length_filtered_data.loc[((zip_length_filtered_data["Loc. Address"].str.len() > 50) | ( | |
zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :] | |
if len(long_address_data) > 0: | |
long_address_data.to_csv('long_address_lengths.csv', index=False) | |
# Filters out rows with 'Loc. Address' lengths longer than 50 characters | |
long_address_filtered_data = zip_length_filtered_data.loc[ | |
~((zip_length_filtered_data["Loc. Address"].str.len() > 50) | ( | |
zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :] | |
return long_address_filtered_data | |
def ml_validation_sector(renamed_columns_data): | |
# Reorder columns to expected order for import | |
reordered_columns_data = renamed_columns_data[ | |
["Rec. Type", "Member ID", "Loc. Name", "Loc. Address", "Loc. Address 2", "Loc. City", "Loc. State", "Loc. Zip", | |
"Operator ID", "Distributor", "DC", "PLG", "Dist Acct Num.", "Sector", "Org. Level 2", "Org. Level 3", | |
"Org. Level 4", "Segment", | |
"Food Service ?", "GLN", "Loc. Country", "Member Status", "Start Date", "End Date", "Contract Num.", | |
"Contract Name", "Contact Name", | |
"Contact Title", "Contact Phone", "Contact Fax", "Contact Email"]] | |
# First test output | |
# reordered_columns_data.to_csv('testoutput.csv', index=False) | |
# Remove all duplicate rows except for first occurrence. | |
deduped_data = reordered_columns_data.drop_duplicates() | |
# Dedupe test output | |
# data.to_csv('dedupetestoutput.csv', index=False) | |
# Drop rows with empty 'Loc. Name' field | |
no_empty_locations_data = deduped_data.replace({'Loc. Name': {'': np.nan}}).dropna(subset=['Loc. Name']) | |
# Empty 'Loc. Name' name drop test | |
# data.to_csv('blanklocname.csv', index=False) | |
# Remove carriage returns and new lines from dataset | |
no_empty_locations_data['Loc. Address'] = no_empty_locations_data['Loc. Address'].str.replace('\n', ' ') | |
no_empty_locations_data['Loc. Address 2'] = no_empty_locations_data['Loc. Address 2'].str.replace('\n', ' ') | |
no_empty_locations_data['Loc. Zip'] = no_empty_locations_data['Loc. Zip'].str.replace('\n', ' ') | |
# Excluded sector list for validation later | |
sector_filter_list = ["CLINIC", "EMERGENCY SERVICES", "HME/DME", "HOME HEALTH CARE", "INTEGRATED DELIVERY NETWORK", | |
"LABORATORY", | |
"NON-OWN USE (Retail/Pharmacy)", "PHYSICIAN PRACTICE", "SPECIALTY PHARMACY", "VETERINARY"] | |
# Proper state abbreviations for validation later | |
states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA", | |
"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD", | |
"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ", | |
"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC", | |
"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"] | |
# Sector validation | |
sector_filtered_data = no_empty_locations_data[ | |
~no_empty_locations_data["Sector"].str.upper().isin(sector_filter_list)] | |
# Incorrect state abbreviations rows set aside for separate manual review CSV | |
incorrect_state_abbr_rows = sector_filtered_data[~sector_filtered_data["Loc. State"].str.upper().isin(states)] | |
if len(incorrect_state_abbr_rows) > 0: | |
incorrect_state_abbr_rows.to_csv('incorrect_state_abbreviations.csv', index=False) | |
# Bad state abbreviation validation | |
state_abbr_filtered_data = sector_filtered_data[sector_filtered_data["Loc. State"].str.upper().isin(states)] | |
# Adds leading zeroes to zip code if state in (CT,MA,ME,NH,NJ,RI,VT) | |
state_abbr_filtered_data = state_abbr_filtered_data.copy() | |
state_abbr_filtered_data['Loc. Zip'] = state_abbr_filtered_data['Loc. Zip'].str.strip() | |
state_abbr_filtered_data.loc[(state_abbr_filtered_data['Loc. Zip'].str.len() == 4) & ( | |
state_abbr_filtered_data['Loc. State'].str.upper().isin( | |
['CT', 'MA', 'ME', 'NH', 'NJ', 'RI', 'VT'])), 'Loc. Zip'] = '0' + state_abbr_filtered_data[ | |
'Loc. Zip'].astype(str) | |
# Collects rows with Zip Codes less than 5 digits or more than 10 digits and exports to CSV for manual review | |
bad_zip_lengths = state_abbr_filtered_data.loc[~((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :] | |
if len(bad_zip_lengths) > 0: | |
bad_zip_lengths.to_csv('incorrect_zip_code_lengths.csv', index=False) | |
# Filters out rows with 'Loc. Zip' lengths shorter than 5 and longer than 10 digits | |
zip_length_filtered_data = state_abbr_filtered_data.loc[((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :] | |
# Collects rows with 'Loc. Address' fields longer than 50 characters long and exports to CSV for manual review. | |
zip_length_filtered_data = zip_length_filtered_data.copy() | |
zip_length_filtered_data['Loc. Address'] = zip_length_filtered_data['Loc. Address'].str.strip() | |
zip_length_filtered_data['Loc. Address 2'] = zip_length_filtered_data['Loc. Address 2'].str.strip() | |
long_address_data = zip_length_filtered_data.loc[((zip_length_filtered_data["Loc. Address"].str.len() > 50) | ( | |
zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :] | |
if len(long_address_data) > 0: | |
long_address_data.to_csv('long_address_lengths.csv', index=False) | |
# Filters out rows with 'Loc. Address' lengths longer than 50 characters | |
long_address_filtered_data = zip_length_filtered_data.loc[ | |
~((zip_length_filtered_data["Loc. Address"].str.len() > 50) | ( | |
zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :] | |
return long_address_filtered_data | |
def lw_hilton(): | |
try: | |
# Search for any files that follow the naming convention established below | |
files = gb.glob("hiltonproperties*.xlsx") | |
# Create empty dataframe | |
raw_data = pd.DataFrame() | |
# Read all files in directory and add to the empty dataframe creating one dataframe from one or more files. | |
for file in files: | |
stage_data = pd.read_excel(file) | |
raw_data = raw_data.append(stage_data) | |
# Add missing columns necessary for memberlist import | |
raw_data["Rec. Type"] = "600" | |
raw_data["Distributor"] = "Hilton" | |
raw_data["DC"] = "Hilton" | |
raw_data["PLG"] = "Hilton Member List" | |
raw_data["Operator ID"] = "" | |
raw_data["Food Service ?"] = "" | |
raw_data["Member Status"] = "" | |
raw_data["Start Date"] = "" | |
raw_data["End Date"] = "" | |
raw_data["Contract Num."] = "" | |
raw_data["Contract Name"] = "" | |
raw_data["Contact Title"] = "" | |
raw_data["Dist Acct Num."] = "" | |
raw_data["Org. Level 4"] = "" | |
raw_data["GLN"] = "" | |
# Delete unnecessary columns | |
del raw_data["Facilityid"] | |
del raw_data["Addressline3"] | |
del raw_data["Foreigncity2"] | |
del raw_data["Province2"] | |
del raw_data["Rooms"] | |
del raw_data["Ownercompany Name"] | |
del raw_data["ManagementCat"] | |
del raw_data["Mgmtcompany Name"] | |
del raw_data["Rponame"] | |
# Rename remaining columns to expected values for import | |
renamed_columns_data = raw_data.rename(columns={"Inncode": "Member ID", "Full Hotel Name": "Loc. Name", | |
"Addressline1": "Loc. Address", | |
"Addressline2": "Loc. Address 2", "City2": "Loc. City", | |
"State": "Loc. State", "ZipCode": "Loc. Zip", | |
"Managementcategory": "Sector", | |
"Subbrand": "Org. Level 2", "Brandname": "Org. Level 3", | |
"Facilitytypedescription": "Segment", "Country": "Loc. Country", | |
"Gm Name": "Contact Name", | |
"PhoneNumber": "Contact Phone", "FaxNumber": "Contact Fax", | |
"Propertyemail": "Contact Email"}) | |
return renamed_columns_data | |
except Exception as e: | |
errorinfo = [] | |
errorinfo.append(e) | |
error_log_filename = 'LWHiltonPreErrorLog.csv' | |
with open(error_log_filename, 'a') as errorlog: | |
mywriter = csv.writer(errorlog) | |
mywriter.writerows([errorinfo]) | |
def lw_inta(): | |
try: | |
# Search for any files that follow the naming convention established below | |
files = gb.glob("intalereproperties*.xlsx") | |
# Create empty dataframe | |
raw_data = pd.DataFrame() | |
# Read all files in directory and add to the empty dataframe creating one dataframe from one or more files. | |
for file in files: | |
stage_data = pd.read_excel(file, header=1) | |
raw_data = raw_data.append(stage_data) | |
# Add missing columns necessary for memberlist import | |
raw_data["Rec. Type"] = "600" | |
raw_data["Distributor"] = "Amerinet" | |
raw_data["DC"] = "Amerinet" | |
raw_data["PLG"] = "Amerinet MemberList" | |
raw_data["Operator ID"] = "" | |
raw_data["Food Service ?"] = "" | |
raw_data["Loc. Country"] = "" | |
raw_data["Member Status"] = "" | |
raw_data["Start Date"] = "" | |
raw_data["End Date"] = "" | |
raw_data["Contract Num."] = "" | |
raw_data["Contract Name"] = "" | |
raw_data["Contact Name"] = "" | |
raw_data["Contact Title"] = "" | |
raw_data["Contact Phone"] = "" | |
raw_data["Contact Fax"] = "" | |
raw_data["Contact Email"] = "" | |
raw_data["Dist Acct Num."] = "" | |
raw_data["Org. Level 4"] = "" | |
# Delete unnecessary columns | |
del raw_data["Shareholder Name"] | |
del raw_data["Membership blank=No Update A=Add C=Change D=Delete"] | |
del raw_data["Update Effective Date"] | |
del raw_data["Previous Facility Name"] | |
del raw_data["Facility Name Extension"] | |
del raw_data["Area Code"] | |
del raw_data["Phone #"] | |
del raw_data["Profit Status Y/N"] | |
del raw_data["Member Since Date"] | |
del raw_data["DEA #"] | |
del raw_data["HIN"] | |
del raw_data["Mail Code"] | |
del raw_data["# of Beds"] | |
del raw_data["IDN"] | |
del raw_data["Sole Source Mbr Y/N"] | |
del raw_data["Pharmacy Member Y/N"] | |
del raw_data["Pharmacy Start Date"] | |
del raw_data["Retail Pharmacy Member Y/N"] | |
del raw_data["Retail Pharmacy Start Date"] | |
del raw_data["Pharmacy Associate"] | |
del raw_data["Pharmacy Associate Start Date"] | |
del raw_data["Parent Number"] | |
del raw_data["Family Parent Facility Number"] | |
del raw_data["Alliance"] | |
# Rename remaining columns to expected values for import | |
renamed_columns_data = raw_data.rename(columns={"Facility Number": "Member ID", "Facility Name": "Loc. Name", | |
"Address 1": "Loc. Address", "Address 2": "Loc. Address 2", | |
"City": "Loc. City", | |
"State": "Loc. State", "Zip Code": "Loc. Zip", | |
"Facility Type": "Sector", | |
"Parent Facility": "Org. Level 2", | |
"Family Parent Facility Name": "Org. Level 3", | |
"Sub-Class": "Segment", "GLN Number": "GLN"}) | |
return renamed_columns_data | |
except Exception as e: | |
errorinfo = [] | |
errorinfo.append(e) | |
error_log_filename = 'LWIntalerePreErrorLog.csv' | |
with open(error_log_filename, 'a') as errorlog: | |
mywriter = csv.writer(errorlog) | |
mywriter.writerows([errorinfo]) | |
def rp_fb(): | |
try: | |
# Search for any files that follow the naming convention established below | |
files = gb.glob("Subscribed Units List Rich*.csv") | |
# Create empty dataframe | |
raw_data = pd.DataFrame() | |
# Read all files in directory and add to the empty dataframe creating one dataframe from one or more files. | |
for file in files: | |
stage_data = pd.read_csv(file) | |
raw_data = raw_data.append(stage_data) | |
# Add missing columns necessary for memberlist import | |
raw_data["Rec. Type"] = "600" | |
raw_data["Distributor"] = "Foodbuy" | |
raw_data["DC"] = "Foodbuy" | |
raw_data["PLG"] = "Foodbuy MemberList" | |
raw_data["Operator ID"] = "" | |
raw_data["Food Service ?"] = "" | |
raw_data["Loc. Country"] = "" | |
raw_data["Member Status"] = "" | |
raw_data["End Date"] = "" | |
raw_data["Contract Num."] = "" | |
raw_data["Contract Name"] = "" | |
raw_data["Contact Name"] = "" | |
raw_data["Contact Title"] = "" | |
raw_data["Contact Phone"] = "" | |
raw_data["Contact Fax"] = "" | |
raw_data["Contact Email"] = "" | |
raw_data["Dist Acct Num."] = "" | |
raw_data["GLN"] = "" | |
# Delete unnecessary columns | |
del raw_data["owner_id"] | |
del raw_data["PERIOD"] | |
del raw_data["ORG_GROUP_ID"] | |
del raw_data["ORG_LEVEL2_ID"] | |
del raw_data["ORG_LEVEL3_ID"] | |
del raw_data["ORG_LEVEL4_ID"] | |
del raw_data["UNIT_STATUS"] | |
del raw_data["Primary_Distributor"] | |
del raw_data["Primary_Dist_Org_Name"] | |
del raw_data["DISTRIBUTOR_CUSTOMER_ID"] | |
del raw_data["DISTRIBUTOR_CUSTOMER_NAME"] | |
del raw_data["LAST_TRANSACTION_DATE"] | |
# Rename remaining columns to expected values for import | |
renamed_columns_data = raw_data.rename(columns={"CUSTOMER_ID": "Member ID", "CUSTOMER_NAME": "Loc. Name", | |
"MAIN_ADDRESS_LINE_1": "Loc. Address", | |
"MAIN_ADDRESS_LINE_2": "Loc. Address 2", "CITY": "Loc. City", | |
"STATE": "Loc. State", "ZIP": "Loc. Zip", | |
"ORG_GROUP_NAME": "Sector", | |
"ORG_LEVEL2_NAME": "Org. Level 2", | |
"ORG_LEVEL3_NAME": "Org. Level 3", | |
"ORG_LEVEL4_NAME": "Org. Level 4", | |
"CHANNEL": "Segment", "CREATE_DATE": "Start Date"}) | |
return renamed_columns_data | |
except Exception as e: | |
errorinfo = [] | |
errorinfo.append(e) | |
error_log_filename = 'RPFoodbuyPreErrorLog.csv' | |
with open(error_log_filename, 'a') as errorlog: | |
mywriter = csv.writer(errorlog) | |
mywriter.writerows([errorinfo]) | |
def con_fb(): | |
try: | |
# Search for any files that follow the naming convention established below | |
files = gb.glob("Subscribed Units List ConAgra*.csv") | |
# Create empty dataframe | |
raw_data = pd.DataFrame() | |
# Read all files in directory and add to the empty dataframe creating one dataframe from one or more files. | |
for file in files: | |
stage_data = pd.read_csv(file) | |
raw_data = raw_data.append(stage_data) | |
# Add missing columns necessary for memberlist import | |
raw_data["Rec. Type"] = "600" | |
raw_data["Distributor"] = "Foodbuy" | |
raw_data["DC"] = "Foodbuy" | |
raw_data["PLG"] = "Foodbuy MemberList" | |
raw_data["Operator ID"] = "" | |
raw_data["Food Service ?"] = "" | |
raw_data["Loc. Country"] = "" | |
raw_data["Member Status"] = "" | |
raw_data["End Date"] = "" | |
raw_data["Contract Num."] = "" | |
raw_data["Contract Name"] = "" | |
raw_data["Contact Name"] = "" | |
raw_data["Contact Title"] = "" | |
raw_data["Contact Phone"] = "" | |
raw_data["Contact Fax"] = "" | |
raw_data["Contact Email"] = "" | |
raw_data["Dist Acct Num."] = "" | |
raw_data["GLN"] = "" | |
# Delete unnecessary columns | |
del raw_data["owner_id"] | |
del raw_data["PERIOD"] | |
del raw_data["ORG_GROUP_ID"] | |
del raw_data["ORG_LEVEL2_ID"] | |
del raw_data["ORG_LEVEL3_ID"] | |
del raw_data["ORG_LEVEL4_ID"] | |
del raw_data["UNIT_STATUS"] | |
del raw_data["Primary_Distributor"] | |
del raw_data["Primary_Dist_Org_Name"] | |
del raw_data["DISTRIBUTOR_CUSTOMER_ID"] | |
del raw_data["DISTRIBUTOR_CUSTOMER_NAME"] | |
del raw_data["LAST_TRANSACTION_DATE"] | |
# Rename remaining columns to expected values for import | |
renamed_columns_data = raw_data.rename(columns={"CUSTOMER_ID": "Member ID", "CUSTOMER_NAME": "Loc. Name", | |
"MAIN_ADDRESS_LINE_1": "Loc. Address", | |
"MAIN_ADDRESS_LINE_2": "Loc. Address 2", "CITY": "Loc. City", | |
"STATE": "Loc. State", "ZIP": "Loc. Zip", | |
"ORG_GROUP_NAME": "Sector", | |
"ORG_LEVEL2_NAME": "Org. Level 2", | |
"ORG_LEVEL3_NAME": "Org. Level 3", | |
"ORG_LEVEL4_NAME": "Org. Level 4", | |
"CHANNEL": "Segment", "CREATE_DATE": "Start Date"}) | |
return renamed_columns_data | |
except Exception as e: | |
errorinfo = [] | |
errorinfo.append(e) | |
error_log_filename = 'ConFoodbuyPreErrorLog.csv' | |
with open(error_log_filename, 'a') as errorlog: | |
mywriter = csv.writer(errorlog) | |
mywriter.writerows([errorinfo]) | |
def kel_com(): | |
try: | |
# Search for any files that follow the naming convention established below | |
files = gb.glob("Compass-Foodbuy*Kellogg.xlsx") | |
# Create empty dataframe | |
raw_data = pd.DataFrame() | |
# Read all files in directory and add to the empty dataframe creating one dataframe from one or more files. | |
for file in files: | |
stage_data = pd.read_excel(file, header=3) | |
raw_data = raw_data.append(stage_data) | |
# Add missing columns necessary for memberlist import | |
raw_data["Rec. Type"] = "600" | |
raw_data["Distributor"] = "Foodbuy" | |
raw_data["DC"] = "Foodbuy" | |
raw_data["PLG"] = "Compass MemberList" | |
raw_data["Food Service ?"] = "" | |
raw_data["Start Date"] = "" | |
raw_data["End Date"] = "" | |
raw_data["Contract Num."] = "" | |
raw_data["Contract Name"] = "" | |
raw_data["Contact Name"] = "" | |
raw_data["Contact Title"] = "" | |
raw_data["Contact Phone"] = "" | |
raw_data["Contact Fax"] = "" | |
raw_data["Contact Email"] = "" | |
raw_data["Dist Acct Num."] = "" | |
raw_data["GLN"] = "" | |
raw_data["Loc. Address 2"] = "" | |
raw_data["Sector"] = "" | |
raw_data["Member Status"] = "" | |
# Delete unnecessary columns | |
del raw_data["Company"] | |
del raw_data["Oper Org Level 2 ID"] | |
del raw_data["Oper Org Level 3 ID"] | |
del raw_data["Oper Org Level 4 ID"] | |
del raw_data["Oper Org Level 4 Name"] | |
del raw_data["Oper Org Level 5 ID"] | |
del raw_data["Oper Org Level 5 Name"] | |
# Rename remaining columns to expected values for import | |
renamed_columns_data = raw_data.rename(columns={"Oper Org Group ID": "Member ID", "Operator Name": "Loc. Name", | |
"Main Contact Address": "Loc. Address", "Main Contact City": "Loc. City", | |
"Main Contact State": "Loc. State", "Main Contact Zip": "Loc. Zip", | |
"Oper Org Group Name": "Org. Level 2", | |
"Oper Org Level 2 Name": "Org. Level 3", | |
"Oper Org Level 3 Name": "Org. Level 4", | |
"CHANNEL": "Segment", "MAIN COUNTRY NAME":"Loc. Country"}) | |
return renamed_columns_data | |
except Exception as e: | |
errorinfo = [] | |
errorinfo.append(e) | |
error_log_filename = 'ConFoodbuyPreErrorLog.csv' | |
with open(error_log_filename, 'a') as errorlog: | |
mywriter = csv.writer(errorlog) | |
mywriter.writerows([errorinfo]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment