Skip to content

Instantly share code, notes, and snippets.

df["User_Mean"] = df.groupby('User_ID')["Purchase"].transform('mean')
#importing libraries
import pandas as pd
import random
data = pd.DataFrame({
'C' : [random.choice(('a','b','c')) for i in range(1000000)],
'A' : [random.randint(1,10) for i in range(1000000)],
'B' : [random.randint(1,10) for i in range(1000000)]
})
%%timeit
data.groupby('C')["A"].mean()
mean =data.groupby('C')["A"].mean().rename("N").reset_index()
df_1 = data.merge(mean)
%%timeit
data['N3'] = data.groupby(['C'])['A'].transform('mean')
df['d'] = df.apply(lambda row: row.a + row.b + row.c, axis=1)
# Function to calculate VIF
def calculate_vif(data):
vif_df = pd.DataFrame(columns = ['Var', 'Vif'])
x_var_names = data.columns
for i in range(0, x_var_names.shape[0]):
y = data[x_var_names[i]]
x = data[x_var_names.drop([x_var_names[i]])]
r_squared = sm.OLS(y,x).fit().rsquared
vif = round(1/(1-r_squared),2)
vif_df.loc[i] = [x_var_names[i], vif]
df = df.drop(df.columns[[0]], axis=1)
calculate_vif(df)
print df.info
#importing the libraries
import pandas as pd
import numpy as np
#reading the dataset
df=pd.read_csv("Salary.csv")