Skip to content

Instantly share code, notes, and snippets.

View skillcate's full-sized avatar

Gopal Sharma skillcate

View GitHub Profile
@skillcate
skillcate / fakenews_setup_env.py
Created October 2, 2022 10:29
fakenews_setup_env
# Install specific libraries
! pip install transformers
! pip install pycaret
import numpy as np
import pandas as pd
import pycaret
import transformers
from transformers import AutoModel, BertTokenizerFast
import matplotlib.pyplot as plt
from sklearn.metrics import plot_confusion_matrix
@skillcate
skillcate / bert_walkthrough.py
Created October 1, 2022 09:14
bert_walkthrough
# Set up environment
!pip install transformers
from transformers import pipeline
unmasker = pipeline('fill-mask', model='bert-base-uncased')
# To freak you out :)
unmasker("Artificial Intelligence [MASK] take over the world.")
# Understanding context..
unmasker("My wife is so obsessed with cleanliness, that [MASK] will throw me out of the house one day.")
@skillcate
skillcate / SentimentPipeline_app.py
Created September 26, 2022 08:14
SentimentPipeline_app
# Library imports
import pandas as pd
import numpy as np
import spacy
from sklearn.svm import LinearSVC
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.pipeline import Pipeline
import joblib
import string
from spacy.lang.en.stop_words import STOP_WORDS
@skillcate
skillcate / FlightFare_Model.py
Created August 28, 2022 07:46
FlightFare_Model
# Split data into 80:20 Training Test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
# Model training
from sklearn.ensemble import RandomForestRegressor
rf_reg = RandomForestRegressor()
rf_reg.fit(X_train, y_train)
# Model R2 on Training & Test
@skillcate
skillcate / FlightFare_FeatureSelection.py
Created August 28, 2022 07:42
FlightFare_FeatureSelection
# Drop 'Source_Delhi'
X = data_train.loc[:, ['Total_Stops', 'journey_day', 'journey_month', 'dep_hour',
'dep_min', 'arrival_hour', 'arrival_min', 'Duration_hours',
'Duration_mins', 'Airline_Air India', 'Airline_GoAir', 'Airline_IndiGo',
'Airline_Jet Airways', 'Airline_Multiple carriers', 'Airline_Other',
'Airline_SpiceJet', 'Airline_Vistara', 'Source_Chennai',
'Source_Kolkata', 'Source_Mumbai', 'Destination_Cochin',
'Destination_Delhi', 'Destination_Hyderabad', 'Destination_Kolkata']]
X.head()
@skillcate
skillcate / FlightFare_VIF.py
Created August 28, 2022 07:35
FlightFare_VIF
# Checking for Multicollinearity
from statsmodels.stats.outliers_influence import variance_inflation_factor
def calc_vif(z):
# Calculating Variable Inflation Factor (VIF)
vif = pd.DataFrame()
vif["variables"] = z.columns
vif["VIF"] = [variance_inflation_factor(z.values, i) for i in range(z.shape[1])]
return(vif)
# Compute VIF on X
@skillcate
skillcate / FlightFare_FeatureImp.py
Last active August 28, 2022 07:33
FlightFare_FeatureImp
X = data_train.loc[:, ['Total_Stops', 'journey_day', 'journey_month', 'dep_hour',
'dep_min', 'arrival_hour', 'arrival_min', 'Duration_hours',
'Duration_mins', 'Airline_Air India', 'Airline_GoAir', 'Airline_IndiGo',
'Airline_Jet Airways', 'Airline_Multiple carriers', 'Airline_Other',
'Airline_SpiceJet', 'Airline_Vistara', 'Source_Chennai', 'Source_Delhi',
'Source_Kolkata', 'Source_Mumbai', 'Destination_Cochin',
'Destination_Delhi', 'Destination_Hyderabad', 'Destination_Kolkata']]
y = data_train.iloc[:, 1]
print(X.shape, y.shape)
@skillcate
skillcate / FlightFare_EDA8.py
Created August 28, 2022 07:28
FlightFare_EDA8
# Concatenate dataframe --> train_data + Airline + Source + Destination
data_train = pd.concat([dataset, Airline, Source, Destination], axis = 1) # axis = 1 signifies column
data_train.drop(["Airline", "Source", "Destination"], axis = 1, inplace = True)
data_train.head()
@skillcate
skillcate / FlightFare_EDA7.py
Created August 28, 2022 07:27
FlightFare_EDA7
# Additional_Info contains almost 80% no_info
# Route and Total_Stops are related to each other
dataset.drop(["Route", "Additional_Info"], axis = 1, inplace = True)
# Feature engineering on: Total_Stops
print(dataset["Total_Stops"].value_counts())
# As this is case of Ordinal Categorical type we perform LabelEncoder
# Here Values are assigned with corresponding keys
dataset.replace({"non-stop": 0, "1 stop": 1, "2 stops": 2, "3 stops": 3, "4 stops": 4}, inplace = True)
dataset.head()
@skillcate
skillcate / FlightFare_EDA6.py
Created August 28, 2022 07:23
FlightFare_EDA6
# Feature engineering on: Destination
print(dataset["Destination"].value_counts())
# Renaming destination 'New Delhi' to 'Delhi' - to match with Source
Destination = dataset[["Destination"]]
Current_Destination_List = Destination['Destination']
New_Destination_List = []
for value in Current_Destination_List:
if value in ['New Delhi']:
New_Destination_List.append('Delhi')
else: