Skip to content

Instantly share code, notes, and snippets.

dataset = pd.read_csv('Position_Salaries.csv')
X = dataset.iloc[:, 1:2].values
y = dataset.iloc[:, 2].values
#include<bits/stdc++.h> // header file for all c++ libraries
using namespace std; // stdout library for printing values
bool custom_sort(double a, double b) /* this custom sort function is defined to
sort on basis of min absolute value or error*/
{
double a1=abs(a-0);
double b1=abs(b-0);
return a1<b1;
}
int main()
file_loc = 'loan_prediction.csv'
df = pd.read_csv(file_loc)
df.head()
print(train['Gender'].unique())
print(train['City_Category'].unique())
print(train['Age'].unique())
print(train['Stay_In_Current_City_Years'].unique())
print(train['Product_ID'].unique())
train['Gender'].unique()
train.info()
train["Product_Cat1_MaxPrice"] = train.groupby(['Product_Category_1'])['Purchase'].transform('max')
pc1_max_dict = train.groupby(['Product_Category_1'])['Purchase'].max().to_dict()
test['Product_Cat1_MaxPrice'] = test['Product_Category_1'].apply(lambda x:pc1_max_dict.get(x,0))
train["Product_Cat1_MeanPrice"] = train.groupby(['Product_Category_1'])['Purchase'].transform('mean')
pc1_mean_dict = train.groupby(['Product_Category_1'])['Purchase'].mean().to_dict()
test['Product_Cat1_MeanPrice'] = test['Product_Category_1'].apply(lambda x:pc1_mean_dict.get(x,0))
train["Age_Count"] = train.groupby(['Age'])['Age'].transform('count')
age_count_dict = train.groupby(['Age']).size().to_dict()
train["User_ID_MinPrice"] = train.groupby(['User_ID'])['Purchase'].transform('min')
userID_min_dict = train.groupby(['User_ID'])['Purchase'].min().to_dict()
test['User_ID_MinPrice'] = test['User_ID'].apply(lambda x:userID_min_dict.get(x,0))
train["User_ID_MaxPrice"] = train.groupby(['User_ID'])['Purchase'].transform('max')
userID_max_dict = train.groupby(['User_ID'])['Purchase'].max().to_dict()
test['User_ID_MaxPrice'] = test['User_ID'].apply(lambda x:userID_max_dict.get(x,0))
train["Product_ID_MinPrice"] = train.groupby(['Product_ID'])['Purchase'].transform('min')
productID_min_dict = train.groupby(['Product_ID'])['Purchase'].min().to_dict()
train["User_ID_MeanPrice"] = train.groupby(['User_ID'])['Purchase'].transform('mean')
userID_mean_dict = train.groupby(['User_ID'])['Purchase'].mean().to_dict()
test['User_ID_MeanPrice'] = test['User_ID'].apply(lambda x:userID_mean_dict.get(x,0))
train["Product_ID_MeanPrice"] = train.groupby(['Product_ID'])['Purchase'].transform('mean')
productID_mean_dict = train.groupby(['Product_ID'])['Purchase'].mean().to_dict()
test['Product_ID_MeanPrice'] = test['Product_ID'].apply(lambda x:productID_mean_dict.get(x,0))
dtr = DecisionTreeRegressor()
dtr.fit(X_train,Y_train)
y_pred = dtr.predict(X_test)
y_pred_dt=dtr.predict(test)
submission['Purchase'] = y_pred_dt
submission.to_csv('dtr_model3.csv',index=False)
mse = mean_squared_error(Y_test, y_pred)
print("RMSE Error:", np.sqrt(mse))
r2 = r2_score(Y_test, y_pred)
print("R2 Score:", r2)