Skip to content

Instantly share code, notes, and snippets.

@aylmerbritto
Created August 15, 2020 11:34
Show Gist options
  • Save aylmerbritto/657b31bb4c3390cbd110770b78fa101d to your computer and use it in GitHub Desktop.
Save aylmerbritto/657b31bb4c3390cbd110770b78fa101d to your computer and use it in GitHub Desktop.
So in yknot we took this initiative of distributing T-shirts to our college peeps. We collected information of people who were interested in the beginning to choose their affordable price range, colour and sizes. And in the second information we collected their payment details. At the end of the day we had to spend a lot of time to manually pic…
import pandas as pd
def findDuplicates(df,key):
duplicates = df[df.duplicated([key], keep=False)]
print(duplicates)
return duplicates
def notPaidList(wholeSheet,paidSheet, Key):
#print("=================================")
whSheetMerged= wholeSheet.merge(paidSheet, on=Key, how="left", indicator=True)
final = whSheetMerged[whSheetMerged["_merge"] == "left_only"].drop(columns=["_merge"])
#print("=================================")
print("%d people yet to pay \n\n \n" %(len(final)-1))
#print("=================================")
return final
def findCulprits(wholeSheet,paidSheet):
#print("=================================")
whSheetMerged= paidSheet.merge(wholeSheet, on=['Mobile Number', 'Email Address'], how="left", indicator=True)
final = whSheetMerged[whSheetMerged["_merge"] == "both"].drop(columns=["_merge"])
print("same phone number and mail address")
print("=================================")
print("%d people have paid" %(len(final)-1))
notPaidList(wholeSheet,paidSheet,['Mobile Number', 'Email Address'])
final = final.drop(columns=["Name_y"])
final = final.rename(columns = {'Name_x':'Name'})
#print("=================================")
whSheetMerged= paidSheet.merge(wholeSheet, on=['Email Address'], how="left", indicator=True)
correctMail = whSheetMerged[whSheetMerged["_merge"] == "both"].drop(columns=["_merge"])
#print(final)
print("same mail address and different phone number")
print("=============================================")
print("%d people have paid with same mail address and different phone number" %(len(correctMail)-1))
notPaidList(wholeSheet,paidSheet,['Email Address'])
tempMail = correctMail.loc[correctMail["Mobile Number_x"] != correctMail["Mobile Number_y"]]
tempMail = tempMail.drop(columns=["Mobile Number_y","Name_y"])
tempMail = tempMail.rename(columns = {'Name_x':'Name','Mobile Number_x':'Mobile Number'})
#print("=================================")
whSheetMerged= paidSheet.merge(wholeSheet, on=['Mobile Number'], how="left", indicator=True)
correctPhone = whSheetMerged[whSheetMerged["_merge"] == "both"].drop(columns=["_merge"])
#print(final)
print("with same phone number and different mail address")
print("==================================================")
print("%d people have paid with same phone number and different mail address" %(len(correctPhone)-1))
notPaidList(wholeSheet,paidSheet,['Mobile Number'])
tempPhone = correctPhone.loc[correctPhone["Email Address_x"] != correctPhone["Email Address_y"]]
tempPhone = tempPhone.drop(columns=["Email Address_y","Name_y"])
tempPhone = tempPhone.rename(columns = {'Name_x':'Name','Email Address_x':'Email Address'})
final = final.append(tempMail)
final = final.append(tempPhone)
mergePaid = paidSheet.merge(final, how="left", indicator=True)
dumbestCulprits = mergePaid.loc[mergePaid["_merge"] == "left_only"].drop(columns=["_merge"])
#final = final.append(dumbestCulprits)
print(dumbestCulprits)
final['Mobile Number'] = [str(int(x))[-10:] for x in final['Mobile Number']]
final.to_csv('paidAndShirtSize.csv')
print(len(final),len(paidSheet))
def findSize():
final = pd.read_csv('paidAndShirtSize.csv')
sizeData = final.pivot_table(index=['Shirt size'], aggfunc='size')
print(sizeData)
print("=================================")
sheet1 = 'sheets.csv'
sheet2 = 'paid.csv'
responses = pd.read_csv(sheet1)
responses = responses.dropna()
paidList = pd.read_csv(sheet2)
paidList = paidList.dropna()
#notPaid=notPaidList(responses,paidList,'Mobile Number')
#Removes Country codes
#notPaid['Mobile Number'] = [str(int(x))[-10:] for x in notPaid['Mobile Number']]
#notPaid.to_csv('notPaid.csv')
#findCulprits(responses,paidList)
findSize()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment