View dataaspirant-bag-of-words-implementation.py
## dependencies | |
import pandas as pd | |
import nltk | |
import numpy as np | |
from nltk.corpus import stopwords | |
from nltk.tokenize import sent_tokenize as st | |
from nltk.stem import WordNetLemmatizer as wordnet | |
import re |
View dataaspirant-bag-of-words-requried-pacakges.py
pip install nltk | |
pip install pandas | |
pip install numpy | |
## After installing the pacakges run the below code. | |
nltk.download() |
View dataaspirant-regularization-ridge-regression-prediction.py
## Ridge Regression Predictions | |
final_model = Ridge(alpha=0.25) | |
final_model.fit(X_train, y_train) | |
print(final_model.score(X_test, y_test)) | |
## Output: 0.6973569341182368 |
View dataaspirant-regularization-ridge-regression.py
## Ridge regression | |
## two lists to hold alpha values and cross-validation scores | |
alpha = [] | |
ridge_scores = [] | |
## loop over different alpha values | |
for i in range(1,10): | |
ridge_model = Ridge(alpha=0.25*i) |
View dataaspirant-regularization-lasoo-regression.py
# Lasso regression | |
## two lists to hold alpha values and cross-validation scores | |
alpha = [] | |
lasso_scores = [] | |
## we’ll be looping over different alpha values to find the one which gives us the best score. | |
## loop over different alpha values | |
for i in range(1,10): |
View dataaspirant-regularization-linear-regression-model.py
## Linear regression model | |
linearModel = LinearRegression() | |
linearModel.fit(X_train, y_train) | |
## Evaluating the model | |
print(linearModel.score(X_test, y_test)) | |
""" |
View dataaspirant-regularization-load-house-price-data.py
## import the required libraries | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
from statistics import mean | |
from sklearn.linear_model import LinearRegression, Ridge, Lasso | |
from sklearn.model_selection import train_test_split, cross_val_score | |
## load the dataset | |
df = pd.read_csv("kc_house_data.csv") |
View dataaspirant-pca-visualization.py
## PCA Visualization | |
a_std = pca.transform(transformed) | |
plt.figure() | |
plt.title(label="PCA Visualization", fontsize=30, color="blue") | |
plt.xlabel("Principal Component 1") | |
plt.ylabel("Principal Component 2") | |
plt.title(label="PCA Visualization", fontsize=40, color="blue") | |
plt.scatter(a_std[:, 0], a_std[:, 1], c=b) |
View dataaspirant-pca-create-two-pca.py
std = StandardScaler() | |
transformed = StandardScaler().fit_transform(a) | |
## Two PCA components | |
pca = convers_pca(no_of_components=2) | |
pca.fit(transformed) | |
print(pca.eigen_vectors) | |
print(pca.eigen_values) | |
print(pca.sorted_components) |
View dataaspirant-pca-create-pca.py
class convers_pca(): | |
def __init__(self, no_of_components): | |
self.no_of_components = no_of_components | |
self.eigen_values = None | |
self.eigen_vectors = None | |
def transform(self, a): | |
return np.dot(a - self.mean, self.projection_matrix.T) |
NewerOlder