View Survey.csv
We can make this file beautiful and searchable if this error is corrected: It looks like row 6 should actually have 6 columns, instead of 5. in line 5.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
S.No., Group Name, Group Link, Number of Members in K*, Social Media, Average Rating | |
3, Data Science - R & Python, https://www.facebook.com/groups/AnalyticsEdge/, 217, Facebook, 88.7 | |
5, Data Science, https://www.facebook.com/groups/DataScienceGroup/, 137, Facebook , 88.6 | |
15, Data Science with Python, https://www.facebook.com/groups/1006538092836222/, 56.5, Facebook, 84.5 | |
6, Machine Learning and Data Science, https://www.linkedin.com/groups/4298680/, 128, Linkedin, 68.34 | |
13, Python Data Science and Machine Learning, https://www.linkedin.com/groups/4388870/, 63, L |
View CrossValidation.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Cross Validation | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from sklearn.model_selection import RepeatedKFold | |
from sklearn.model_selection import cross_val_score | |
import seaborn as sns | |
from sklearn.linear_model import LinearRegression, Lasso,ElasticNet, Ridge, MultiTaskLasso, LassoLars, OrthogonalMatchingPursuit | |
from sklearn.model_selection import train_test_split |
View Regularization.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Regularization | |
import numpy as np | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from sklearn.linear_model import LinearRegression, Lasso,ElasticNet, Ridge, MultiTaskLasso, LassoLars, OrthogonalMatchingPursuit | |
from sklearn.model_selection import train_test_split | |
from sklearn import metrics |
View TestTrain.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Test Train Split | |
import numpy as np | |
import pandas as pd | |
from sklearn.linear_model import LinearRegression | |
from sklearn.model_selection import train_test_split | |
from sklearn import metrics | |
class predit: | |
def bestFitLine(self): |
View LogTransformation.Py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Log Transformation to remove skewness | |
y = np.log1p(y) | |
for col in x.columns: | |
if np.abs(x[col].skew()) > 0.3: | |
x[col] = np.log1p(x[col]) |
View Scalecolumns.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Scale all the attributes except RAD, CHAS, ZN | |
from sklearn import preprocessing | |
min_max_scaler = preprocessing.MinMaxScaler() | |
header_new = ['CRIM', 'INDUS', 'NOX', 'RM', 'AGE','DIS','TAX', 'PTRATIO','B','LSTAT'] | |
x = data.loc[:,header_new] | |
y = data['MEDV'] | |
x = pd.DataFrame(data=min_max_scaler.fit_transform(x), columns=header_new) |
View OutliersPercentage.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Find Percentage of outliers in every column | |
for k, v in data.items(): | |
q1 = v.quantile(0.25) | |
q3 = v.quantile(0.75) | |
IQR = q3 - q1 | |
v_col = v[(v <= q1 - 1.5 * IQR) | (v >= q3 + 1.5 * IQR)] | |
percentage = np.shape(v_col)[0] * 100.0 / np.shape(data)[0] | |
print("Column %s outliers = %.2f%%" % (k, percentage)) |
View HeatMap.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#HeatMap to find the corelated features | |
plt.figure(figsize=(25, 10)) | |
sns.heatmap(data.corr().abs(), annot=True) |
View ScaleOfData.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Visualze Scale of the data | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
fig,ax = plt.subplots(figsize=(15, 7)) | |
ax.set_xscale("log") | |
for k,v in data.items(): | |
sns.boxplot(data=df, orient='h') | |
plt.grid() | |
plt.tight_layout(pad=0.5, w_pad=0.7, h_pad=5.0) |
View VisualizeBoxPlots.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Visualize Box plots for outliers | |
fig, axs = plt.subplots(ncols=5, nrows=3, figsize=(20, 10)) | |
index = 0 | |
axs = axs.flatten() | |
for k,v in data.items(): | |
sns.boxplot(x=k, data=data, ax=axs[index], color="orange") | |
index += 1 | |
plt.tight_layout(pad=0.4, w_pad=0.5, h_pad=5.0) | |
data = data[~(data['MEDV'] >= 50.0)] | |
print(np.shape(data)) |
NewerOlder