Last active
October 26, 2020 09:27
-
-
Save songokjesse/a2aae27d2b36f981d8b451436be7ece9 to your computer and use it in GitHub Desktop.
Removing Duplicates from Excel Columns
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def removeDuplicatedData(): | |
''' | |
Get the excel data and add it to a dataframe | |
''' | |
excelData = sheet_instance.get_all_records() | |
# Add the excel data from sheet 1 into a dataFrame | |
mydata = pd.DataFrame.from_dict(excelData) | |
# Create a blank data list | |
data = [] | |
# Use a for loop to iterate colom data in the dataframe | |
for items in mydata['coursecode']: | |
# split the data on the pipe element, this also removes the pipe from our data | |
getDuplicates = items.split('|') | |
# Remove duplicate from my list while retaining their Order | |
duplicatesRemoved = OrderedDict.fromkeys(getDuplicates) | |
# Create an array of my cleaned data | |
myCleanData = list(duplicatesRemoved) | |
# Insert the Initial required pipe to my data | |
MySeperator = '|' | |
myFinalResult = MySeperator.join(myCleanData) | |
data.append(myFinalResult) | |
# drop the coursecode old colomn | |
mydata.drop('coursecode', inplace=True, axis=1) | |
# Insert the clean coursecode values into the dataframe | |
mydata.insert(2, "coursecode", data, True) | |
return mydata |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment