Skip to content

Instantly share code, notes, and snippets.

@Koyel-Chakraborty
Created June 3, 2021 08:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save Koyel-Chakraborty/c8a0c3c03725aae0b2f498d50c36a86e to your computer and use it in GitHub Desktop.
Save Koyel-Chakraborty/c8a0c3c03725aae0b2f498d50c36a86e to your computer and use it in GitHub Desktop.
pro3
import numpy as np
import pandas as pd
import sklearn as sk
from sklearn.preprocessing import LabelEncoder
data = pd.read_csv('raw_data.csv') #I had renamed the downloaded file as raw_data.csv, please check out the actual file name in your case
data.drop(['identifierHash','type','country','language','civilityTitle','civilityGenderId','seniorityAsMonths','seniorityAsYears','countryCode','hasAndroidApp','hasIosApp'],axis=1,inplace=True)
def pure_round(num):
integer = int(num)
fraction = num-float(integer)
if fraction>=0.5:
integer+=1
return integer
for i in data.index:
case_no = data.loc[i,'productsSold']
pass_no = pure_round((case_no * data.loc[i,'productsPassRate']) / 100)
fail_no = case_no - pass_no
data.loc[i,'productsPassed'] = pass_no
data.loc[i,'productsFailed'] = fail_no
if case_no == 0:
data.drop(i, axis=0,inplace=True)
data.drop(['productsPassRate','productsSold'], axis=1,inplace=True)
Encodedict={}
for i in ['gender','hasAnyApp','hasProfilePicture']:
key='_{}'.format(i)
le=LabelEncoder()
data[key]=le.fit_transform(list(data[i]))
Encodedict[key]=le.classes_
data.drop(['gender','hasAnyApp','hasProfilePicture'],axis=1,inplace=True)
indices = data3.index
dfdict = {}
for j in indices:
x = data3.loc[j,'productsPassed']
y = data3.loc[j,'productsFailed']
if x != 0:
data3.loc[j,'Fraud'] = 0
df = pd.DataFrame(data3.loc[j,:]).transpose()
ldf = pd.concat([df]*int(x), ignore_index=True)
if y != 0:
data3.loc[j,'Fraud'] = 1
df2 = pd.DataFrame(data3.loc[j,:]).transpose()
ldf2 = pd.concat([df2]*int(y), ignore_index=True)
if x!=0 and y!=0:
dfdict[i] = pd.concat([ldf, ldf2],ignore_index=True)
elif x!=0:
dfdict[i] = ldf
else:
dfdict[i] = ldf2
whole_df = pd.concat(dfdict.values(),ignore_index=True)
whole_df.drop(['productsPassed','productsFailed'],axis=1,inplace=True)
whole_df.to_csv('modified_data.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment