-
-
Save Koyel-Chakraborty/c8a0c3c03725aae0b2f498d50c36a86e to your computer and use it in GitHub Desktop.
pro3
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import pandas as pd | |
import sklearn as sk | |
from sklearn.preprocessing import LabelEncoder | |
data = pd.read_csv('raw_data.csv') #I had renamed the downloaded file as raw_data.csv, please check out the actual file name in your case | |
data.drop(['identifierHash','type','country','language','civilityTitle','civilityGenderId','seniorityAsMonths','seniorityAsYears','countryCode','hasAndroidApp','hasIosApp'],axis=1,inplace=True) | |
def pure_round(num): | |
integer = int(num) | |
fraction = num-float(integer) | |
if fraction>=0.5: | |
integer+=1 | |
return integer | |
for i in data.index: | |
case_no = data.loc[i,'productsSold'] | |
pass_no = pure_round((case_no * data.loc[i,'productsPassRate']) / 100) | |
fail_no = case_no - pass_no | |
data.loc[i,'productsPassed'] = pass_no | |
data.loc[i,'productsFailed'] = fail_no | |
if case_no == 0: | |
data.drop(i, axis=0,inplace=True) | |
data.drop(['productsPassRate','productsSold'], axis=1,inplace=True) | |
Encodedict={} | |
for i in ['gender','hasAnyApp','hasProfilePicture']: | |
key='_{}'.format(i) | |
le=LabelEncoder() | |
data[key]=le.fit_transform(list(data[i])) | |
Encodedict[key]=le.classes_ | |
data.drop(['gender','hasAnyApp','hasProfilePicture'],axis=1,inplace=True) | |
indices = data3.index | |
dfdict = {} | |
for j in indices: | |
x = data3.loc[j,'productsPassed'] | |
y = data3.loc[j,'productsFailed'] | |
if x != 0: | |
data3.loc[j,'Fraud'] = 0 | |
df = pd.DataFrame(data3.loc[j,:]).transpose() | |
ldf = pd.concat([df]*int(x), ignore_index=True) | |
if y != 0: | |
data3.loc[j,'Fraud'] = 1 | |
df2 = pd.DataFrame(data3.loc[j,:]).transpose() | |
ldf2 = pd.concat([df2]*int(y), ignore_index=True) | |
if x!=0 and y!=0: | |
dfdict[i] = pd.concat([ldf, ldf2],ignore_index=True) | |
elif x!=0: | |
dfdict[i] = ldf | |
else: | |
dfdict[i] = ldf2 | |
whole_df = pd.concat(dfdict.values(),ignore_index=True) | |
whole_df.drop(['productsPassed','productsFailed'],axis=1,inplace=True) | |
whole_df.to_csv('modified_data.csv') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment