jgarciabu/ml_automation.py

## ml_automation.py
# -*- coding: utf-8 -*-
"""
Created on Wed Apr 18 11:56:27 2018

@author: jeffrey.garcia
"""

import pandas as pd
import numpy as np
import csv
import glob as gb


def ml_validation(renamed_columns_data):
    # Reorder columns to expected order for import
    reordered_columns_data = renamed_columns_data[
        ["Rec. Type", "Member ID", "Loc. Name", "Loc. Address", "Loc. Address 2", "Loc. City", "Loc. State", "Loc. Zip",
         "Operator ID", "Distributor", "DC", "PLG", "Dist Acct Num.", "Sector", "Org. Level 2", "Org. Level 3",
         "Org. Level 4", "Segment",
         "Food Service ?", "GLN", "Loc. Country", "Member Status", "Start Date", "End Date", "Contract Num.",
         "Contract Name", "Contact Name",
         "Contact Title", "Contact Phone", "Contact Fax", "Contact Email"]]

    # First test output
    # reordered_columns_data.to_csv('testoutput.csv', index=False)

    # Remove all duplicate rows except for first occurrence.
    deduped_data = reordered_columns_data.drop_duplicates()

    # Dedupe test output
    # data.to_csv('dedupetestoutput.csv', index=False)

    # Drop rows with empty 'Loc. Name' field
    no_empty_locations_data = deduped_data.replace({'Loc. Name': {'': np.nan}}).dropna(subset=['Loc. Name'])

    # Empty 'Loc. Name' name drop test
    # data.to_csv('blanklocname.csv', index=False)

    # Remove carriage returns and new lines from areas of the dataset where these new lines have caused problems during ingestion
    no_empty_locations_data['Loc. Address'] = no_empty_locations_data['Loc. Address'].str.replace('\n', ' ')
    no_empty_locations_data['Loc. Address 2'] = no_empty_locations_data['Loc. Address 2'].str.replace('\n', ' ')
    no_empty_locations_data['Loc. Zip'] = no_empty_locations_data['Loc. Zip'].str.replace('\n', ' ')


    # Proper state abbreviations for validation later
    states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA",
              "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
              "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
              "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
              "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

    # Incorrect state abbreviations rows set aside for separate manual review CSV
    incorrect_state_abbr_rows = no_empty_locations_data[~no_empty_locations_data["Loc. State"].str.upper().isin(states)]

    if len(incorrect_state_abbr_rows) > 0:
        incorrect_state_abbr_rows.to_csv('incorrect_state_abbreviations.csv', index=False)

    # Bad state abbreviation validation
    state_abbr_filtered_data = no_empty_locations_data[no_empty_locations_data["Loc. State"].str.upper().isin(states)]

    # Adds leading zeroes to zip code if state in (CT,MA,ME,NH,NJ,RI,VT)
    state_abbr_filtered_data = state_abbr_filtered_data.copy()

    state_abbr_filtered_data['Loc. Zip'] = state_abbr_filtered_data['Loc. Zip'].str.strip()

    state_abbr_filtered_data.loc[(state_abbr_filtered_data['Loc. Zip'].str.len() == 4) & (
        state_abbr_filtered_data['Loc. State'].str.upper().isin(
            ['CT', 'MA', 'ME', 'NH', 'NJ', 'RI', 'VT'])), 'Loc. Zip'] = '0' + state_abbr_filtered_data[
        'Loc. Zip'].astype(str)

    # Collects rows with Zip Codes less than 5 digits or more than 10 digits and exports to CSV for manual review
    bad_zip_lengths = state_abbr_filtered_data.loc[~((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :]

    if len(bad_zip_lengths) > 0:
        bad_zip_lengths.to_csv('incorrect_zip_code_lengths.csv', index=False)

    # Filters out rows with 'Loc. Zip' lengths shorter than 5 and longer than 10 digits
    zip_length_filtered_data = state_abbr_filtered_data.loc[((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :]

    # Collects rows with 'Loc. Address' fields longer than 50 characters long and exports to CSV for manual review.
    zip_length_filtered_data = zip_length_filtered_data.copy()

    zip_length_filtered_data['Loc. Address'] = zip_length_filtered_data['Loc. Address'].str.strip()

    zip_length_filtered_data['Loc. Address 2'] = zip_length_filtered_data['Loc. Address 2'].str.strip()

    long_address_data = zip_length_filtered_data.loc[((zip_length_filtered_data["Loc. Address"].str.len() > 50) | (
                zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :]

    if len(long_address_data) > 0:
        long_address_data.to_csv('long_address_lengths.csv', index=False)

    # Filters out rows with 'Loc. Address' lengths longer than 50 characters
    long_address_filtered_data = zip_length_filtered_data.loc[
                                 ~((zip_length_filtered_data["Loc. Address"].str.len() > 50) | (
                                    zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :]

    return long_address_filtered_data


def ml_validation_sector(renamed_columns_data):
    # Reorder columns to expected order for import
    reordered_columns_data = renamed_columns_data[
        ["Rec. Type", "Member ID", "Loc. Name", "Loc. Address", "Loc. Address 2", "Loc. City", "Loc. State", "Loc. Zip",
         "Operator ID", "Distributor", "DC", "PLG", "Dist Acct Num.", "Sector", "Org. Level 2", "Org. Level 3",
         "Org. Level 4", "Segment",
         "Food Service ?", "GLN", "Loc. Country", "Member Status", "Start Date", "End Date", "Contract Num.",
         "Contract Name", "Contact Name",
         "Contact Title", "Contact Phone", "Contact Fax", "Contact Email"]]

    # First test output
    # reordered_columns_data.to_csv('testoutput.csv', index=False)

    # Remove all duplicate rows except for first occurrence.
    deduped_data = reordered_columns_data.drop_duplicates()

    # Dedupe test output
    # data.to_csv('dedupetestoutput.csv', index=False)

    # Drop rows with empty 'Loc. Name' field
    no_empty_locations_data = deduped_data.replace({'Loc. Name': {'': np.nan}}).dropna(subset=['Loc. Name'])

    # Empty 'Loc. Name' name drop test
    # data.to_csv('blanklocname.csv', index=False)

    # Remove carriage returns and new lines from dataset
    no_empty_locations_data['Loc. Address'] = no_empty_locations_data['Loc. Address'].str.replace('\n', ' ')
	no_empty_locations_data['Loc. Address 2'] = no_empty_locations_data['Loc. Address 2'].str.replace('\n', ' ')
    no_empty_locations_data['Loc. Zip'] = no_empty_locations_data['Loc. Zip'].str.replace('\n', ' ')

    # Excluded sector list for validation later
    sector_filter_list = ["CLINIC", "EMERGENCY SERVICES", "HME/DME", "HOME HEALTH CARE", "INTEGRATED DELIVERY NETWORK",
                          "LABORATORY",
                          "NON-OWN USE (Retail/Pharmacy)", "PHYSICIAN PRACTICE", "SPECIALTY PHARMACY", "VETERINARY"]

    # Proper state abbreviations for validation later
    states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA",
              "HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
              "MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
              "NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
              "SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

    # Sector validation
    sector_filtered_data = no_empty_locations_data[
        ~no_empty_locations_data["Sector"].str.upper().isin(sector_filter_list)]

    # Incorrect state abbreviations rows set aside for separate manual review CSV
    incorrect_state_abbr_rows = sector_filtered_data[~sector_filtered_data["Loc. State"].str.upper().isin(states)]

    if len(incorrect_state_abbr_rows) > 0:
        incorrect_state_abbr_rows.to_csv('incorrect_state_abbreviations.csv', index=False)

    # Bad state abbreviation validation
    state_abbr_filtered_data = sector_filtered_data[sector_filtered_data["Loc. State"].str.upper().isin(states)]

    # Adds leading zeroes to zip code if state in (CT,MA,ME,NH,NJ,RI,VT)
    state_abbr_filtered_data = state_abbr_filtered_data.copy()

    state_abbr_filtered_data['Loc. Zip'] = state_abbr_filtered_data['Loc. Zip'].str.strip()

    state_abbr_filtered_data.loc[(state_abbr_filtered_data['Loc. Zip'].str.len() == 4) & (
        state_abbr_filtered_data['Loc. State'].str.upper().isin(
            ['CT', 'MA', 'ME', 'NH', 'NJ', 'RI', 'VT'])), 'Loc. Zip'] = '0' + state_abbr_filtered_data[
        'Loc. Zip'].astype(str)

    # Collects rows with Zip Codes less than 5 digits or more than 10 digits and exports to CSV for manual review
    bad_zip_lengths = state_abbr_filtered_data.loc[~((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :]

    if len(bad_zip_lengths) > 0:
        bad_zip_lengths.to_csv('incorrect_zip_code_lengths.csv', index=False)

    # Filters out rows with 'Loc. Zip' lengths shorter than 5 and longer than 10 digits
    zip_length_filtered_data = state_abbr_filtered_data.loc[((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :]

    # Collects rows with 'Loc. Address' fields longer than 50 characters long and exports to CSV for manual review.
    zip_length_filtered_data = zip_length_filtered_data.copy()

    zip_length_filtered_data['Loc. Address'] = zip_length_filtered_data['Loc. Address'].str.strip()

    zip_length_filtered_data['Loc. Address 2'] = zip_length_filtered_data['Loc. Address 2'].str.strip()

    long_address_data = zip_length_filtered_data.loc[((zip_length_filtered_data["Loc. Address"].str.len() > 50) | (
                zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :]

    if len(long_address_data) > 0:
        long_address_data.to_csv('long_address_lengths.csv', index=False)

    # Filters out rows with 'Loc. Address' lengths longer than 50 characters
    long_address_filtered_data = zip_length_filtered_data.loc[
                                 ~((zip_length_filtered_data["Loc. Address"].str.len() > 50) | (
                                    zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :]

    return long_address_filtered_data


def lw_hilton():
    try:
        # Search for any files that follow the naming convention established below
        files = gb.glob("hiltonproperties*.xlsx")

        # Create empty dataframe
        raw_data = pd.DataFrame()

        # Read all files in directory and add to the empty dataframe creating one dataframe from one or more files.
        for file in files:
            stage_data = pd.read_excel(file)
            raw_data = raw_data.append(stage_data)

        # Add missing columns necessary for memberlist import
        raw_data["Rec. Type"] = "600"
        raw_data["Distributor"] = "Hilton"
        raw_data["DC"] = "Hilton"
        raw_data["PLG"] = "Hilton Member List"
        raw_data["Operator ID"] = ""
        raw_data["Food Service ?"] = ""
        raw_data["Member Status"] = ""
        raw_data["Start Date"] = ""
        raw_data["End Date"] = ""
        raw_data["Contract Num."] = ""
        raw_data["Contract Name"] = ""
        raw_data["Contact Title"] = ""
        raw_data["Dist Acct Num."] = ""
        raw_data["Org. Level 4"] = ""
        raw_data["GLN"] = ""

        # Delete unnecessary columns
        del raw_data["Facilityid"]
        del raw_data["Addressline3"]
        del raw_data["Foreigncity2"]
        del raw_data["Province2"]
        del raw_data["Rooms"]
        del raw_data["Ownercompany Name"]
        del raw_data["ManagementCat"]
        del raw_data["Mgmtcompany Name"]
        del raw_data["Rponame"]

        # Rename remaining columns to expected values for import
        renamed_columns_data = raw_data.rename(columns={"Inncode": "Member ID", "Full Hotel Name": "Loc. Name",
                                                        "Addressline1": "Loc. Address",
                                                        "Addressline2": "Loc. Address 2", "City2": "Loc. City",
                                                        "State": "Loc. State", "ZipCode": "Loc. Zip",
                                                        "Managementcategory": "Sector",
                                                        "Subbrand": "Org. Level 2", "Brandname": "Org. Level 3",
                                                        "Facilitytypedescription": "Segment", "Country": "Loc. Country",
                                                        "Gm Name": "Contact Name",
                                                        "PhoneNumber": "Contact Phone", "FaxNumber": "Contact Fax",
                                                        "Propertyemail": "Contact Email"})
        return renamed_columns_data
    except Exception as e:
        errorinfo = []
        errorinfo.append(e)
        error_log_filename = 'LWHiltonPreErrorLog.csv'
        with open(error_log_filename, 'a') as errorlog:
            mywriter = csv.writer(errorlog)
            mywriter.writerows([errorinfo])


def lw_inta():
    try:
        # Search for any files that follow the naming convention established below
        files = gb.glob("intalereproperties*.xlsx")

        # Create empty dataframe
        raw_data = pd.DataFrame()

        # Read all files in directory and add to the empty dataframe creating one dataframe from one or more files.
        for file in files:
            stage_data = pd.read_excel(file, header=1)
            raw_data = raw_data.append(stage_data)

        # Add missing columns necessary for memberlist import
        raw_data["Rec. Type"] = "600"
        raw_data["Distributor"] = "Amerinet"
        raw_data["DC"] = "Amerinet"
        raw_data["PLG"] = "Amerinet MemberList"
        raw_data["Operator ID"] = ""
        raw_data["Food Service ?"] = ""
        raw_data["Loc. Country"] = ""
        raw_data["Member Status"] = ""
        raw_data["Start Date"] = ""
        raw_data["End Date"] = ""
        raw_data["Contract Num."] = ""
        raw_data["Contract Name"] = ""
        raw_data["Contact Name"] = ""
        raw_data["Contact Title"] = ""
        raw_data["Contact Phone"] = ""
        raw_data["Contact Fax"] = ""
        raw_data["Contact Email"] = ""
        raw_data["Dist Acct Num."] = ""
        raw_data["Org. Level 4"] = ""

        # Delete unnecessary columns
        del raw_data["Shareholder Name"]
        del raw_data["Membership blank=No Update A=Add C=Change D=Delete"]
        del raw_data["Update Effective Date"]
        del raw_data["Previous Facility Name"]
        del raw_data["Facility Name Extension"]
        del raw_data["Area Code"]
        del raw_data["Phone #"]
        del raw_data["Profit Status Y/N"]
        del raw_data["Member Since Date"]
        del raw_data["DEA #"]
        del raw_data["HIN"]
        del raw_data["Mail Code"]
        del raw_data["# of Beds"]
        del raw_data["IDN"]
        del raw_data["Sole Source Mbr Y/N"]
        del raw_data["Pharmacy Member Y/N"]
        del raw_data["Pharmacy Start Date"]
        del raw_data["Retail Pharmacy Member Y/N"]
        del raw_data["Retail Pharmacy Start Date"]
        del raw_data["Pharmacy Associate"]
        del raw_data["Pharmacy Associate Start Date"]
        del raw_data["Parent Number"]
        del raw_data["Family Parent Facility Number"]
        del raw_data["Alliance"]

        # Rename remaining columns to expected values for import
        renamed_columns_data = raw_data.rename(columns={"Facility Number": "Member ID", "Facility Name": "Loc. Name",
                                                        "Address 1": "Loc. Address", "Address 2": "Loc. Address 2",
                                                        "City": "Loc. City",
                                                        "State": "Loc. State", "Zip Code": "Loc. Zip",
                                                        "Facility Type": "Sector",
                                                        "Parent Facility": "Org. Level 2",
                                                        "Family Parent Facility Name": "Org. Level 3",
                                                        "Sub-Class": "Segment", "GLN Number": "GLN"})
        return renamed_columns_data
    except Exception as e:
        errorinfo = []
        errorinfo.append(e)
        error_log_filename = 'LWIntalerePreErrorLog.csv'
        with open(error_log_filename, 'a') as errorlog:
            mywriter = csv.writer(errorlog)
            mywriter.writerows([errorinfo])


def rp_fb():
    try:
        # Search for any files that follow the naming convention established below
        files = gb.glob("Subscribed Units List Rich*.csv")

        # Create empty dataframe
        raw_data = pd.DataFrame()

        # Read all files in directory and add to the empty dataframe creating one dataframe from one or more files.
        for file in files:
            stage_data = pd.read_csv(file)
            raw_data = raw_data.append(stage_data)

        # Add missing columns necessary for memberlist import
        raw_data["Rec. Type"] = "600"
        raw_data["Distributor"] = "Foodbuy"
        raw_data["DC"] = "Foodbuy"
        raw_data["PLG"] = "Foodbuy MemberList"
        raw_data["Operator ID"] = ""
        raw_data["Food Service ?"] = ""
        raw_data["Loc. Country"] = ""
        raw_data["Member Status"] = ""
        raw_data["End Date"] = ""
        raw_data["Contract Num."] = ""
        raw_data["Contract Name"] = ""
        raw_data["Contact Name"] = ""
        raw_data["Contact Title"] = ""
        raw_data["Contact Phone"] = ""
        raw_data["Contact Fax"] = ""
        raw_data["Contact Email"] = ""
        raw_data["Dist Acct Num."] = ""
        raw_data["GLN"] = ""

        # Delete unnecessary columns
        del raw_data["owner_id"]
        del raw_data["PERIOD"]
        del raw_data["ORG_GROUP_ID"]
        del raw_data["ORG_LEVEL2_ID"]
        del raw_data["ORG_LEVEL3_ID"]
        del raw_data["ORG_LEVEL4_ID"]
        del raw_data["UNIT_STATUS"]
        del raw_data["Primary_Distributor"]
        del raw_data["Primary_Dist_Org_Name"]
        del raw_data["DISTRIBUTOR_CUSTOMER_ID"]
        del raw_data["DISTRIBUTOR_CUSTOMER_NAME"]
        del raw_data["LAST_TRANSACTION_DATE"]

        # Rename remaining columns to expected values for import
        renamed_columns_data = raw_data.rename(columns={"CUSTOMER_ID": "Member ID", "CUSTOMER_NAME": "Loc. Name",
                                                        "MAIN_ADDRESS_LINE_1": "Loc. Address",
                                                        "MAIN_ADDRESS_LINE_2": "Loc. Address 2", "CITY": "Loc. City",
                                                        "STATE": "Loc. State", "ZIP": "Loc. Zip",
                                                        "ORG_GROUP_NAME": "Sector",
                                                        "ORG_LEVEL2_NAME": "Org. Level 2",
                                                        "ORG_LEVEL3_NAME": "Org. Level 3",
                                                        "ORG_LEVEL4_NAME": "Org. Level 4",
                                                        "CHANNEL": "Segment", "CREATE_DATE": "Start Date"})
        return renamed_columns_data
    except Exception as e:
        errorinfo = []
        errorinfo.append(e)
        error_log_filename = 'RPFoodbuyPreErrorLog.csv'
        with open(error_log_filename, 'a') as errorlog:
            mywriter = csv.writer(errorlog)
            mywriter.writerows([errorinfo])


def con_fb():
    try:
        # Search for any files that follow the naming convention established below
        files = gb.glob("Subscribed Units List ConAgra*.csv")

        # Create empty dataframe
        raw_data = pd.DataFrame()

        # Read all files in directory and add to the empty dataframe creating one dataframe from one or more files.
        for file in files:
            stage_data = pd.read_csv(file)
            raw_data = raw_data.append(stage_data)

        # Add missing columns necessary for memberlist import
        raw_data["Rec. Type"] = "600"
        raw_data["Distributor"] = "Foodbuy"
        raw_data["DC"] = "Foodbuy"
        raw_data["PLG"] = "Foodbuy MemberList"
        raw_data["Operator ID"] = ""
        raw_data["Food Service ?"] = ""
        raw_data["Loc. Country"] = ""
        raw_data["Member Status"] = ""
        raw_data["End Date"] = ""
        raw_data["Contract Num."] = ""
        raw_data["Contract Name"] = ""
        raw_data["Contact Name"] = ""
        raw_data["Contact Title"] = ""
        raw_data["Contact Phone"] = ""
        raw_data["Contact Fax"] = ""
        raw_data["Contact Email"] = ""
        raw_data["Dist Acct Num."] = ""
        raw_data["GLN"] = ""

        # Delete unnecessary columns
        del raw_data["owner_id"]
        del raw_data["PERIOD"]
        del raw_data["ORG_GROUP_ID"]
        del raw_data["ORG_LEVEL2_ID"]
        del raw_data["ORG_LEVEL3_ID"]
        del raw_data["ORG_LEVEL4_ID"]
        del raw_data["UNIT_STATUS"]
        del raw_data["Primary_Distributor"]
        del raw_data["Primary_Dist_Org_Name"]
        del raw_data["DISTRIBUTOR_CUSTOMER_ID"]
        del raw_data["DISTRIBUTOR_CUSTOMER_NAME"]
        del raw_data["LAST_TRANSACTION_DATE"]

        # Rename remaining columns to expected values for import
        renamed_columns_data = raw_data.rename(columns={"CUSTOMER_ID": "Member ID", "CUSTOMER_NAME": "Loc. Name",
                                                        "MAIN_ADDRESS_LINE_1": "Loc. Address",
                                                        "MAIN_ADDRESS_LINE_2": "Loc. Address 2", "CITY": "Loc. City",
                                                        "STATE": "Loc. State", "ZIP": "Loc. Zip",
                                                        "ORG_GROUP_NAME": "Sector",
                                                        "ORG_LEVEL2_NAME": "Org. Level 2",
                                                        "ORG_LEVEL3_NAME": "Org. Level 3",
                                                        "ORG_LEVEL4_NAME": "Org. Level 4",
                                                        "CHANNEL": "Segment", "CREATE_DATE": "Start Date"})
        return renamed_columns_data
    except Exception as e:
        errorinfo = []
        errorinfo.append(e)
        error_log_filename = 'ConFoodbuyPreErrorLog.csv'
        with open(error_log_filename, 'a') as errorlog:
            mywriter = csv.writer(errorlog)
            mywriter.writerows([errorinfo])


def kel_com():
    try:
        # Search for any files that follow the naming convention established below
        files = gb.glob("Compass-Foodbuy*Kellogg.xlsx")

        # Create empty dataframe
        raw_data = pd.DataFrame()

        # Read all files in directory and add to the empty dataframe creating one dataframe from one or more files.
        for file in files:
            stage_data = pd.read_excel(file, header=3)
            raw_data = raw_data.append(stage_data)

        # Add missing columns necessary for memberlist import
        raw_data["Rec. Type"] = "600"
        raw_data["Distributor"] = "Foodbuy"
        raw_data["DC"] = "Foodbuy"
        raw_data["PLG"] = "Compass MemberList"
        raw_data["Food Service ?"] = ""
		raw_data["Start Date"] = ""
        raw_data["End Date"] = ""
        raw_data["Contract Num."] = ""
        raw_data["Contract Name"] = ""
        raw_data["Contact Name"] = ""
        raw_data["Contact Title"] = ""
        raw_data["Contact Phone"] = ""
        raw_data["Contact Fax"] = ""
        raw_data["Contact Email"] = ""
        raw_data["Dist Acct Num."] = ""
        raw_data["GLN"] = ""
		raw_data["Loc. Address 2"] = ""
		raw_data["Sector"] = ""
		raw_data["Member Status"] = ""

        # Delete unnecessary columns
        del raw_data["Company"]
        del raw_data["Oper Org Level 2 ID"]
        del raw_data["Oper Org Level 3 ID"]
        del raw_data["Oper Org Level 4 ID"]
        del raw_data["Oper Org Level 4 Name"]
        del raw_data["Oper Org Level 5 ID"]
        del raw_data["Oper Org Level 5 Name"]

        # Rename remaining columns to expected values for import
        renamed_columns_data = raw_data.rename(columns={"Oper Org Group ID": "Member ID", "Operator Name": "Loc. Name",
                                                        "Main Contact Address": "Loc. Address", "Main Contact City": "Loc. City",
                                                        "Main Contact State": "Loc. State", "Main Contact Zip": "Loc. Zip",
                                                        "Oper Org Group Name": "Org. Level 2",
                                                        "Oper Org Level 2 Name": "Org. Level 3",
                                                        "Oper Org Level 3 Name": "Org. Level 4",
                                                        "CHANNEL": "Segment", "MAIN COUNTRY NAME":"Loc. Country"})
        return renamed_columns_data
    except Exception as e:
        errorinfo = []
        errorinfo.append(e)
        error_log_filename = 'ConFoodbuyPreErrorLog.csv'
        with open(error_log_filename, 'a') as errorlog:
            mywriter = csv.writer(errorlog)
            mywriter.writerows([errorinfo])
	# -- coding: utf-8 --
	"""
	Created on Wed Apr 18 11:56:27 2018

	@author: jeffrey.garcia
	"""

	import pandas as pd
	import numpy as np
	import csv
	import glob as gb


	def ml_validation(renamed_columns_data):
	# Reorder columns to expected order for import
	reordered_columns_data = renamed_columns_data[
	["Rec. Type", "Member ID", "Loc. Name", "Loc. Address", "Loc. Address 2", "Loc. City", "Loc. State", "Loc. Zip",
	"Operator ID", "Distributor", "DC", "PLG", "Dist Acct Num.", "Sector", "Org. Level 2", "Org. Level 3",
	"Org. Level 4", "Segment",
	"Food Service ?", "GLN", "Loc. Country", "Member Status", "Start Date", "End Date", "Contract Num.",
	"Contract Name", "Contact Name",
	"Contact Title", "Contact Phone", "Contact Fax", "Contact Email"]]

	# First test output
	# reordered_columns_data.to_csv('testoutput.csv', index=False)

	# Remove all duplicate rows except for first occurrence.
	deduped_data = reordered_columns_data.drop_duplicates()

	# Dedupe test output
	# data.to_csv('dedupetestoutput.csv', index=False)

	# Drop rows with empty 'Loc. Name' field
	no_empty_locations_data = deduped_data.replace({'Loc. Name': {'': np.nan}}).dropna(subset=['Loc. Name'])

	# Empty 'Loc. Name' name drop test
	# data.to_csv('blanklocname.csv', index=False)

	# Remove carriage returns and new lines from areas of the dataset where these new lines have caused problems during ingestion
	no_empty_locations_data['Loc. Address'] = no_empty_locations_data['Loc. Address'].str.replace('\n', ' ')
	no_empty_locations_data['Loc. Address 2'] = no_empty_locations_data['Loc. Address 2'].str.replace('\n', ' ')
	no_empty_locations_data['Loc. Zip'] = no_empty_locations_data['Loc. Zip'].str.replace('\n', ' ')


	# Proper state abbreviations for validation later
	states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA",
	"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
	"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
	"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
	"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

	# Incorrect state abbreviations rows set aside for separate manual review CSV
	incorrect_state_abbr_rows = no_empty_locations_data[~no_empty_locations_data["Loc. State"].str.upper().isin(states)]

	if len(incorrect_state_abbr_rows) > 0:
	incorrect_state_abbr_rows.to_csv('incorrect_state_abbreviations.csv', index=False)

	# Bad state abbreviation validation
	state_abbr_filtered_data = no_empty_locations_data[no_empty_locations_data["Loc. State"].str.upper().isin(states)]

	# Adds leading zeroes to zip code if state in (CT,MA,ME,NH,NJ,RI,VT)
	state_abbr_filtered_data = state_abbr_filtered_data.copy()

	state_abbr_filtered_data['Loc. Zip'] = state_abbr_filtered_data['Loc. Zip'].str.strip()

	state_abbr_filtered_data.loc[(state_abbr_filtered_data['Loc. Zip'].str.len() == 4) & (
	state_abbr_filtered_data['Loc. State'].str.upper().isin(
	['CT', 'MA', 'ME', 'NH', 'NJ', 'RI', 'VT'])), 'Loc. Zip'] = '0' + state_abbr_filtered_data[
	'Loc. Zip'].astype(str)

	# Collects rows with Zip Codes less than 5 digits or more than 10 digits and exports to CSV for manual review
	bad_zip_lengths = state_abbr_filtered_data.loc[~((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :]

	if len(bad_zip_lengths) > 0:
	bad_zip_lengths.to_csv('incorrect_zip_code_lengths.csv', index=False)

	# Filters out rows with 'Loc. Zip' lengths shorter than 5 and longer than 10 digits
	zip_length_filtered_data = state_abbr_filtered_data.loc[((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :]

	# Collects rows with 'Loc. Address' fields longer than 50 characters long and exports to CSV for manual review.
	zip_length_filtered_data = zip_length_filtered_data.copy()

	zip_length_filtered_data['Loc. Address'] = zip_length_filtered_data['Loc. Address'].str.strip()

	zip_length_filtered_data['Loc. Address 2'] = zip_length_filtered_data['Loc. Address 2'].str.strip()

	long_address_data = zip_length_filtered_data.loc[((zip_length_filtered_data["Loc. Address"].str.len() > 50) \| (
	zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :]

	if len(long_address_data) > 0:
	long_address_data.to_csv('long_address_lengths.csv', index=False)

	# Filters out rows with 'Loc. Address' lengths longer than 50 characters
	long_address_filtered_data = zip_length_filtered_data.loc[
	~((zip_length_filtered_data["Loc. Address"].str.len() > 50) \| (
	zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :]

	return long_address_filtered_data


	def ml_validation_sector(renamed_columns_data):
	# Reorder columns to expected order for import
	reordered_columns_data = renamed_columns_data[
	["Rec. Type", "Member ID", "Loc. Name", "Loc. Address", "Loc. Address 2", "Loc. City", "Loc. State", "Loc. Zip",
	"Operator ID", "Distributor", "DC", "PLG", "Dist Acct Num.", "Sector", "Org. Level 2", "Org. Level 3",
	"Org. Level 4", "Segment",
	"Food Service ?", "GLN", "Loc. Country", "Member Status", "Start Date", "End Date", "Contract Num.",
	"Contract Name", "Contact Name",
	"Contact Title", "Contact Phone", "Contact Fax", "Contact Email"]]

	# First test output
	# reordered_columns_data.to_csv('testoutput.csv', index=False)

	# Remove all duplicate rows except for first occurrence.
	deduped_data = reordered_columns_data.drop_duplicates()

	# Dedupe test output
	# data.to_csv('dedupetestoutput.csv', index=False)

	# Drop rows with empty 'Loc. Name' field
	no_empty_locations_data = deduped_data.replace({'Loc. Name': {'': np.nan}}).dropna(subset=['Loc. Name'])

	# Empty 'Loc. Name' name drop test
	# data.to_csv('blanklocname.csv', index=False)

	# Remove carriage returns and new lines from dataset
	no_empty_locations_data['Loc. Address'] = no_empty_locations_data['Loc. Address'].str.replace('\n', ' ')
	no_empty_locations_data['Loc. Address 2'] = no_empty_locations_data['Loc. Address 2'].str.replace('\n', ' ')
	no_empty_locations_data['Loc. Zip'] = no_empty_locations_data['Loc. Zip'].str.replace('\n', ' ')

	# Excluded sector list for validation later
	sector_filter_list = ["CLINIC", "EMERGENCY SERVICES", "HME/DME", "HOME HEALTH CARE", "INTEGRATED DELIVERY NETWORK",
	"LABORATORY",
	"NON-OWN USE (Retail/Pharmacy)", "PHYSICIAN PRACTICE", "SPECIALTY PHARMACY", "VETERINARY"]

	# Proper state abbreviations for validation later
	states = ["AL", "AK", "AZ", "AR", "CA", "CO", "CT", "DC", "DE", "FL", "GA",
	"HI", "ID", "IL", "IN", "IA", "KS", "KY", "LA", "ME", "MD",
	"MA", "MI", "MN", "MS", "MO", "MT", "NE", "NV", "NH", "NJ",
	"NM", "NY", "NC", "ND", "OH", "OK", "OR", "PA", "RI", "SC",
	"SD", "TN", "TX", "UT", "VT", "VA", "WA", "WV", "WI", "WY"]

	# Sector validation
	sector_filtered_data = no_empty_locations_data[
	~no_empty_locations_data["Sector"].str.upper().isin(sector_filter_list)]

	# Incorrect state abbreviations rows set aside for separate manual review CSV
	incorrect_state_abbr_rows = sector_filtered_data[~sector_filtered_data["Loc. State"].str.upper().isin(states)]

	if len(incorrect_state_abbr_rows) > 0:
	incorrect_state_abbr_rows.to_csv('incorrect_state_abbreviations.csv', index=False)

	# Bad state abbreviation validation
	state_abbr_filtered_data = sector_filtered_data[sector_filtered_data["Loc. State"].str.upper().isin(states)]

	# Adds leading zeroes to zip code if state in (CT,MA,ME,NH,NJ,RI,VT)
	state_abbr_filtered_data = state_abbr_filtered_data.copy()

	state_abbr_filtered_data['Loc. Zip'] = state_abbr_filtered_data['Loc. Zip'].str.strip()

	state_abbr_filtered_data.loc[(state_abbr_filtered_data['Loc. Zip'].str.len() == 4) & (
	state_abbr_filtered_data['Loc. State'].str.upper().isin(
	['CT', 'MA', 'ME', 'NH', 'NJ', 'RI', 'VT'])), 'Loc. Zip'] = '0' + state_abbr_filtered_data[
	'Loc. Zip'].astype(str)

	# Collects rows with Zip Codes less than 5 digits or more than 10 digits and exports to CSV for manual review
	bad_zip_lengths = state_abbr_filtered_data.loc[~((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :]

	if len(bad_zip_lengths) > 0:
	bad_zip_lengths.to_csv('incorrect_zip_code_lengths.csv', index=False)

	# Filters out rows with 'Loc. Zip' lengths shorter than 5 and longer than 10 digits
	zip_length_filtered_data = state_abbr_filtered_data.loc[((state_abbr_filtered_data["Loc. Zip"].str.len() <= 10)), :]

	# Collects rows with 'Loc. Address' fields longer than 50 characters long and exports to CSV for manual review.
	zip_length_filtered_data = zip_length_filtered_data.copy()

	zip_length_filtered_data['Loc. Address'] = zip_length_filtered_data['Loc. Address'].str.strip()

	zip_length_filtered_data['Loc. Address 2'] = zip_length_filtered_data['Loc. Address 2'].str.strip()

	long_address_data = zip_length_filtered_data.loc[((zip_length_filtered_data["Loc. Address"].str.len() > 50) \| (
	zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :]

	if len(long_address_data) > 0:
	long_address_data.to_csv('long_address_lengths.csv', index=False)

	# Filters out rows with 'Loc. Address' lengths longer than 50 characters
	long_address_filtered_data = zip_length_filtered_data.loc[
	~((zip_length_filtered_data["Loc. Address"].str.len() > 50) \| (
	zip_length_filtered_data["Loc. Address 2"].str.len() > 50)), :]

	return long_address_filtered_data


	def lw_hilton():
	try:
	# Search for any files that follow the naming convention established below
	files = gb.glob("hiltonproperties*.xlsx")

	# Create empty dataframe
	raw_data = pd.DataFrame()

	# Read all files in directory and add to the empty dataframe creating one dataframe from one or more files.
	for file in files:
	stage_data = pd.read_excel(file)
	raw_data = raw_data.append(stage_data)

	# Add missing columns necessary for memberlist import
	raw_data["Rec. Type"] = "600"
	raw_data["Distributor"] = "Hilton"
	raw_data["DC"] = "Hilton"
	raw_data["PLG"] = "Hilton Member List"
	raw_data["Operator ID"] = ""
	raw_data["Food Service ?"] = ""
	raw_data["Member Status"] = ""
	raw_data["Start Date"] = ""
	raw_data["End Date"] = ""
	raw_data["Contract Num."] = ""
	raw_data["Contract Name"] = ""
	raw_data["Contact Title"] = ""
	raw_data["Dist Acct Num."] = ""
	raw_data["Org. Level 4"] = ""
	raw_data["GLN"] = ""

	# Delete unnecessary columns
	del raw_data["Facilityid"]
	del raw_data["Addressline3"]
	del raw_data["Foreigncity2"]
	del raw_data["Province2"]
	del raw_data["Rooms"]
	del raw_data["Ownercompany Name"]
	del raw_data["ManagementCat"]
	del raw_data["Mgmtcompany Name"]
	del raw_data["Rponame"]

	# Rename remaining columns to expected values for import
	renamed_columns_data = raw_data.rename(columns={"Inncode": "Member ID", "Full Hotel Name": "Loc. Name",
	"Addressline1": "Loc. Address",
	"Addressline2": "Loc. Address 2", "City2": "Loc. City",
	"State": "Loc. State", "ZipCode": "Loc. Zip",
	"Managementcategory": "Sector",
	"Subbrand": "Org. Level 2", "Brandname": "Org. Level 3",
	"Facilitytypedescription": "Segment", "Country": "Loc. Country",
	"Gm Name": "Contact Name",
	"PhoneNumber": "Contact Phone", "FaxNumber": "Contact Fax",
	"Propertyemail": "Contact Email"})
	return renamed_columns_data
	except Exception as e:
	errorinfo = []
	errorinfo.append(e)
	error_log_filename = 'LWHiltonPreErrorLog.csv'
	with open(error_log_filename, 'a') as errorlog:
	mywriter = csv.writer(errorlog)
	mywriter.writerows([errorinfo])


	def lw_inta():
	try:
	# Search for any files that follow the naming convention established below
	files = gb.glob("intalereproperties*.xlsx")

	# Create empty dataframe
	raw_data = pd.DataFrame()

	# Read all files in directory and add to the empty dataframe creating one dataframe from one or more files.
	for file in files:
	stage_data = pd.read_excel(file, header=1)
	raw_data = raw_data.append(stage_data)

	# Add missing columns necessary for memberlist import
	raw_data["Rec. Type"] = "600"
	raw_data["Distributor"] = "Amerinet"
	raw_data["DC"] = "Amerinet"
	raw_data["PLG"] = "Amerinet MemberList"
	raw_data["Operator ID"] = ""
	raw_data["Food Service ?"] = ""
	raw_data["Loc. Country"] = ""
	raw_data["Member Status"] = ""
	raw_data["Start Date"] = ""
	raw_data["End Date"] = ""
	raw_data["Contract Num."] = ""
	raw_data["Contract Name"] = ""
	raw_data["Contact Name"] = ""
	raw_data["Contact Title"] = ""
	raw_data["Contact Phone"] = ""
	raw_data["Contact Fax"] = ""
	raw_data["Contact Email"] = ""
	raw_data["Dist Acct Num."] = ""
	raw_data["Org. Level 4"] = ""

	# Delete unnecessary columns
	del raw_data["Shareholder Name"]
	del raw_data["Membership blank=No Update A=Add C=Change D=Delete"]
	del raw_data["Update Effective Date"]
	del raw_data["Previous Facility Name"]
	del raw_data["Facility Name Extension"]
	del raw_data["Area Code"]
	del raw_data["Phone #"]
	del raw_data["Profit Status Y/N"]
	del raw_data["Member Since Date"]
	del raw_data["DEA #"]
	del raw_data["HIN"]
	del raw_data["Mail Code"]
	del raw_data["# of Beds"]
	del raw_data["IDN"]
	del raw_data["Sole Source Mbr Y/N"]
	del raw_data["Pharmacy Member Y/N"]
	del raw_data["Pharmacy Start Date"]
	del raw_data["Retail Pharmacy Member Y/N"]
	del raw_data["Retail Pharmacy Start Date"]
	del raw_data["Pharmacy Associate"]
	del raw_data["Pharmacy Associate Start Date"]
	del raw_data["Parent Number"]
	del raw_data["Family Parent Facility Number"]
	del raw_data["Alliance"]

	# Rename remaining columns to expected values for import
	renamed_columns_data = raw_data.rename(columns={"Facility Number": "Member ID", "Facility Name": "Loc. Name",
	"Address 1": "Loc. Address", "Address 2": "Loc. Address 2",
	"City": "Loc. City",
	"State": "Loc. State", "Zip Code": "Loc. Zip",
	"Facility Type": "Sector",
	"Parent Facility": "Org. Level 2",
	"Family Parent Facility Name": "Org. Level 3",
	"Sub-Class": "Segment", "GLN Number": "GLN"})
	return renamed_columns_data
	except Exception as e:
	errorinfo = []
	errorinfo.append(e)
	error_log_filename = 'LWIntalerePreErrorLog.csv'
	with open(error_log_filename, 'a') as errorlog:
	mywriter = csv.writer(errorlog)
	mywriter.writerows([errorinfo])


	def rp_fb():
	try:
	# Search for any files that follow the naming convention established below
	files = gb.glob("Subscribed Units List Rich*.csv")

	# Create empty dataframe
	raw_data = pd.DataFrame()

	# Read all files in directory and add to the empty dataframe creating one dataframe from one or more files.
	for file in files:
	stage_data = pd.read_csv(file)
	raw_data = raw_data.append(stage_data)

	# Add missing columns necessary for memberlist import
	raw_data["Rec. Type"] = "600"
	raw_data["Distributor"] = "Foodbuy"
	raw_data["DC"] = "Foodbuy"
	raw_data["PLG"] = "Foodbuy MemberList"
	raw_data["Operator ID"] = ""
	raw_data["Food Service ?"] = ""
	raw_data["Loc. Country"] = ""
	raw_data["Member Status"] = ""
	raw_data["End Date"] = ""
	raw_data["Contract Num."] = ""
	raw_data["Contract Name"] = ""
	raw_data["Contact Name"] = ""
	raw_data["Contact Title"] = ""
	raw_data["Contact Phone"] = ""
	raw_data["Contact Fax"] = ""
	raw_data["Contact Email"] = ""
	raw_data["Dist Acct Num."] = ""
	raw_data["GLN"] = ""

	# Delete unnecessary columns
	del raw_data["owner_id"]
	del raw_data["PERIOD"]
	del raw_data["ORG_GROUP_ID"]
	del raw_data["ORG_LEVEL2_ID"]
	del raw_data["ORG_LEVEL3_ID"]
	del raw_data["ORG_LEVEL4_ID"]
	del raw_data["UNIT_STATUS"]
	del raw_data["Primary_Distributor"]
	del raw_data["Primary_Dist_Org_Name"]
	del raw_data["DISTRIBUTOR_CUSTOMER_ID"]
	del raw_data["DISTRIBUTOR_CUSTOMER_NAME"]
	del raw_data["LAST_TRANSACTION_DATE"]

	# Rename remaining columns to expected values for import
	renamed_columns_data = raw_data.rename(columns={"CUSTOMER_ID": "Member ID", "CUSTOMER_NAME": "Loc. Name",
	"MAIN_ADDRESS_LINE_1": "Loc. Address",
	"MAIN_ADDRESS_LINE_2": "Loc. Address 2", "CITY": "Loc. City",
	"STATE": "Loc. State", "ZIP": "Loc. Zip",
	"ORG_GROUP_NAME": "Sector",
	"ORG_LEVEL2_NAME": "Org. Level 2",
	"ORG_LEVEL3_NAME": "Org. Level 3",
	"ORG_LEVEL4_NAME": "Org. Level 4",
	"CHANNEL": "Segment", "CREATE_DATE": "Start Date"})
	return renamed_columns_data
	except Exception as e:
	errorinfo = []
	errorinfo.append(e)
	error_log_filename = 'RPFoodbuyPreErrorLog.csv'
	with open(error_log_filename, 'a') as errorlog:
	mywriter = csv.writer(errorlog)
	mywriter.writerows([errorinfo])


	def con_fb():
	try:
	# Search for any files that follow the naming convention established below
	files = gb.glob("Subscribed Units List ConAgra*.csv")

	# Create empty dataframe
	raw_data = pd.DataFrame()

	# Read all files in directory and add to the empty dataframe creating one dataframe from one or more files.
	for file in files:
	stage_data = pd.read_csv(file)
	raw_data = raw_data.append(stage_data)

	# Add missing columns necessary for memberlist import
	raw_data["Rec. Type"] = "600"
	raw_data["Distributor"] = "Foodbuy"
	raw_data["DC"] = "Foodbuy"
	raw_data["PLG"] = "Foodbuy MemberList"
	raw_data["Operator ID"] = ""
	raw_data["Food Service ?"] = ""
	raw_data["Loc. Country"] = ""
	raw_data["Member Status"] = ""
	raw_data["End Date"] = ""
	raw_data["Contract Num."] = ""
	raw_data["Contract Name"] = ""
	raw_data["Contact Name"] = ""
	raw_data["Contact Title"] = ""
	raw_data["Contact Phone"] = ""
	raw_data["Contact Fax"] = ""
	raw_data["Contact Email"] = ""
	raw_data["Dist Acct Num."] = ""
	raw_data["GLN"] = ""

	# Delete unnecessary columns
	del raw_data["owner_id"]
	del raw_data["PERIOD"]
	del raw_data["ORG_GROUP_ID"]
	del raw_data["ORG_LEVEL2_ID"]
	del raw_data["ORG_LEVEL3_ID"]
	del raw_data["ORG_LEVEL4_ID"]
	del raw_data["UNIT_STATUS"]
	del raw_data["Primary_Distributor"]
	del raw_data["Primary_Dist_Org_Name"]
	del raw_data["DISTRIBUTOR_CUSTOMER_ID"]
	del raw_data["DISTRIBUTOR_CUSTOMER_NAME"]
	del raw_data["LAST_TRANSACTION_DATE"]

	# Rename remaining columns to expected values for import
	renamed_columns_data = raw_data.rename(columns={"CUSTOMER_ID": "Member ID", "CUSTOMER_NAME": "Loc. Name",
	"MAIN_ADDRESS_LINE_1": "Loc. Address",
	"MAIN_ADDRESS_LINE_2": "Loc. Address 2", "CITY": "Loc. City",
	"STATE": "Loc. State", "ZIP": "Loc. Zip",
	"ORG_GROUP_NAME": "Sector",
	"ORG_LEVEL2_NAME": "Org. Level 2",
	"ORG_LEVEL3_NAME": "Org. Level 3",
	"ORG_LEVEL4_NAME": "Org. Level 4",
	"CHANNEL": "Segment", "CREATE_DATE": "Start Date"})
	return renamed_columns_data
	except Exception as e:
	errorinfo = []
	errorinfo.append(e)
	error_log_filename = 'ConFoodbuyPreErrorLog.csv'
	with open(error_log_filename, 'a') as errorlog:
	mywriter = csv.writer(errorlog)
	mywriter.writerows([errorinfo])


	def kel_com():
	try:
	# Search for any files that follow the naming convention established below
	files = gb.glob("Compass-Foodbuy*Kellogg.xlsx")

	# Create empty dataframe
	raw_data = pd.DataFrame()

	# Read all files in directory and add to the empty dataframe creating one dataframe from one or more files.
	for file in files:
	stage_data = pd.read_excel(file, header=3)
	raw_data = raw_data.append(stage_data)

	# Add missing columns necessary for memberlist import
	raw_data["Rec. Type"] = "600"
	raw_data["Distributor"] = "Foodbuy"
	raw_data["DC"] = "Foodbuy"
	raw_data["PLG"] = "Compass MemberList"
	raw_data["Food Service ?"] = ""
	raw_data["Start Date"] = ""
	raw_data["End Date"] = ""
	raw_data["Contract Num."] = ""
	raw_data["Contract Name"] = ""
	raw_data["Contact Name"] = ""
	raw_data["Contact Title"] = ""
	raw_data["Contact Phone"] = ""
	raw_data["Contact Fax"] = ""
	raw_data["Contact Email"] = ""
	raw_data["Dist Acct Num."] = ""
	raw_data["GLN"] = ""
	raw_data["Loc. Address 2"] = ""
	raw_data["Sector"] = ""
	raw_data["Member Status"] = ""

	# Delete unnecessary columns
	del raw_data["Company"]
	del raw_data["Oper Org Level 2 ID"]
	del raw_data["Oper Org Level 3 ID"]
	del raw_data["Oper Org Level 4 ID"]
	del raw_data["Oper Org Level 4 Name"]
	del raw_data["Oper Org Level 5 ID"]
	del raw_data["Oper Org Level 5 Name"]

	# Rename remaining columns to expected values for import
	renamed_columns_data = raw_data.rename(columns={"Oper Org Group ID": "Member ID", "Operator Name": "Loc. Name",
	"Main Contact Address": "Loc. Address", "Main Contact City": "Loc. City",
	"Main Contact State": "Loc. State", "Main Contact Zip": "Loc. Zip",
	"Oper Org Group Name": "Org. Level 2",
	"Oper Org Level 2 Name": "Org. Level 3",
	"Oper Org Level 3 Name": "Org. Level 4",
	"CHANNEL": "Segment", "MAIN COUNTRY NAME":"Loc. Country"})
	return renamed_columns_data
	except Exception as e:
	errorinfo = []
	errorinfo.append(e)
	error_log_filename = 'ConFoodbuyPreErrorLog.csv'
	with open(error_log_filename, 'a') as errorlog:
	mywriter = csv.writer(errorlog)
	mywriter.writerows([errorinfo])