songokjesse/RemoveDuplicates.py

## RemoveDuplicates.py
def removeDuplicatedData():
    '''
   Get the excel data and add it to a dataframe
   '''
    excelData = sheet_instance.get_all_records()

    # Add the excel data from sheet 1 into a dataFrame
    mydata = pd.DataFrame.from_dict(excelData)
    #     Create a blank data list
    data = []
    # Use a for loop to iterate colom data in the dataframe
    for items in mydata['coursecode']:
        # split the data on the pipe element, this also removes the pipe from our data
        getDuplicates = items.split('|')

        # Remove duplicate from my list while retaining their Order
        duplicatesRemoved = OrderedDict.fromkeys(getDuplicates)
        # Create an array of my cleaned data
        myCleanData = list(duplicatesRemoved)

        # Insert the Initial required pipe to my data
        MySeperator = '|'
        myFinalResult = MySeperator.join(myCleanData)
        data.append(myFinalResult)
    # drop the coursecode old colomn
    mydata.drop('coursecode', inplace=True, axis=1)
    # Insert the clean coursecode values into the dataframe
    mydata.insert(2, "coursecode", data, True)
    return mydata
	def removeDuplicatedData():
	'''
	Get the excel data and add it to a dataframe
	'''
	excelData = sheet_instance.get_all_records()

	# Add the excel data from sheet 1 into a dataFrame
	mydata = pd.DataFrame.from_dict(excelData)
	# Create a blank data list
	data = []
	# Use a for loop to iterate colom data in the dataframe
	for items in mydata['coursecode']:
	# split the data on the pipe element, this also removes the pipe from our data
	getDuplicates = items.split('\|')

	# Remove duplicate from my list while retaining their Order
	duplicatesRemoved = OrderedDict.fromkeys(getDuplicates)
	# Create an array of my cleaned data
	myCleanData = list(duplicatesRemoved)

	# Insert the Initial required pipe to my data
	MySeperator = '\|'
	myFinalResult = MySeperator.join(myCleanData)
	data.append(myFinalResult)
	# drop the coursecode old colomn
	mydata.drop('coursecode', inplace=True, axis=1)
	# Insert the clean coursecode values into the dataframe
	mydata.insert(2, "coursecode", data, True)
	return mydata