We can make this file beautiful and searchable if this error is corrected: It looks like row 6 should actually have 6 columns, instead of 5 in line 5.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
S.No., Group Name, Group Link, Number of Members in K*, Social Media, Average Rating | |
3, Data Science - R & Python, https://www.facebook.com/groups/AnalyticsEdge/, 217, Facebook, 88.7 | |
5, Data Science, https://www.facebook.com/groups/DataScienceGroup/, 137, Facebook , 88.6 | |
15, Data Science with Python, https://www.facebook.com/groups/1006538092836222/, 56.5, Facebook, 84.5 | |
6, Machine Learning and Data Science, https://www.linkedin.com/groups/4298680/, 128, Linkedin, 68.34 | |
13, Python Data Science and Machine Learning, https://www.linkedin.com/groups/4388870/, 63, L |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Cross Validation | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from sklearn.model_selection import RepeatedKFold | |
from sklearn.model_selection import cross_val_score | |
import seaborn as sns | |
from sklearn.linear_model import LinearRegression, Lasso,ElasticNet, Ridge, MultiTaskLasso, LassoLars, OrthogonalMatchingPursuit | |
from sklearn.model_selection import train_test_split |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Regularization | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from sklearn.linear_model import LinearRegression, Lasso,ElasticNet, Ridge, MultiTaskLasso, LassoLars, OrthogonalMatchingPursuit | |
from sklearn.model_selection import train_test_split | |
from sklearn import metrics |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Test Train Split | |
import numpy as np | |
import pandas as pd | |
from sklearn.linear_model import LinearRegression | |
from sklearn.model_selection import train_test_split | |
from sklearn import metrics | |
class predit: | |
def bestFitLine(self): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Log Transformation to remove skewness | |
y = np.log1p(y) | |
for col in x.columns: | |
if np.abs(x[col].skew()) > 0.3: | |
x[col] = np.log1p(x[col]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Scale all the attributes except RAD, CHAS, ZN | |
from sklearn import preprocessing | |
min_max_scaler = preprocessing.MinMaxScaler() | |
header_new = ['CRIM', 'INDUS', 'NOX', 'RM', 'AGE','DIS','TAX', 'PTRATIO','B','LSTAT'] | |
x = data.loc[:,header_new] | |
y = data['MEDV'] | |
x = pd.DataFrame(data=min_max_scaler.fit_transform(x), columns=header_new) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Find Percentage of outliers in every column | |
for k, v in data.items(): | |
q1 = v.quantile(0.25) | |
q3 = v.quantile(0.75) | |
IQR = q3 - q1 | |
v_col = v[(v <= q1 - 1.5 * IQR) | (v >= q3 + 1.5 * IQR)] | |
percentage = np.shape(v_col)[0] * 100.0 / np.shape(data)[0] | |
print("Column %s outliers = %.2f%%" % (k, percentage)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#HeatMap to find the corelated features | |
plt.figure(figsize=(25, 10)) | |
sns.heatmap(data.corr().abs(), annot=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Visualze Scale of the data | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
fig,ax = plt.subplots(figsize=(15, 7)) | |
ax.set_xscale("log") | |
for k,v in data.items(): | |
sns.boxplot(data=df, orient='h') | |
plt.grid() | |
plt.tight_layout(pad=0.5, w_pad=0.7, h_pad=5.0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Visualize Box plots for outliers | |
fig, axs = plt.subplots(ncols=5, nrows=3, figsize=(20, 10)) | |
index = 0 | |
axs = axs.flatten() | |
for k,v in data.items(): | |
sns.boxplot(x=k, data=data, ax=axs[index], color="orange") | |
index += 1 | |
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=5.0) | |
data = data[~(data['MEDV'] >= 50.0)] | |
print(np.shape(data)) |
NewerOlder