Skip to content

Instantly share code, notes, and snippets.

Avatar

Aniruddha Bhandari aniruddha27

View GitHub Profile
View sql_window_functions_01.sql
/* Sample data */
insert into emp (EMPID, NAME, JOB, SALARY)
values
(201, 'ANIRUDDHA', 'ANALYST', 2100),
(212, 'LAKSHAY', 'DATA ENGINEER', 2700),
(209, 'SIDDHARTH', 'DATA ENGINEER', 3000),
(232, 'ABHIRAJ', 'DATA SCIENTIST', 2500),
(205, 'RAM', 'ANALYST', 2500),
(222, 'PRANAV', 'MANAGER', 4500),
(202, 'SUNIL', 'MANAGER', 4800),
View GroupBy_1.py
import pandas as pd
import numpy as np
df = pd.read_csv(r'C:\Users\Dell\Desktop\train_big_mart.csv')
df.head()
View nlp_ie_12.py
doc = nlp(' Last year, I spoke about the Ujjwala programme , through which, I am happy to report, 50 million free liquid-gas connections have been provided so far')
png = visualise_spacy_tree.create_png(doc)
display(Image(png))
View Multicollinearity_import.py
df=pd.read_csv(r'C:/Users/Dell/Desktop/salary.csv')
df.head()
View text detection.py
# text detection
def contours_text(orig, img, contours):
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
# Drawing a rectangle on copied image
rect = cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 255, 255), 2)
cv2.imshow('cnt',rect)
cv2.waitKey()
View tweepy_streamlistener.py
class MyStreamListener(tweepy.StreamListener):
def __init__(self, time_limit=300):
self.start_time = time.time()
self.limit = time_limit
super(MyStreamListener, self).__init__()
def on_connect(self):
print("Connected to Twitter API.")
View Confusion matrix.py
# confusion matrix in sklearn
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
# actual values
actual = [1,0,0,1,0,0,1,0,0,1]
# predicted values
predicted = [1,0,0,1,0,0,0,1,0,0]
# confusion matrix
View nlp_ie_1.py
# import spacy
import spacy
# load english language model
nlp = spacy.load('en_core_web_sm',disable=['ner','textcat'])
text = "This is a sample sentence."
# create spacy
doc = nlp(text)
View AUC-ROC6.py
# multi-class classification
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
# generate 2 class dataset
X, y = make_classification(n_samples=1000, n_classes=3, n_features=20, n_informative=3, random_state=42)
View NormalizationVsStandarization_3.py
# data standardization with sklearn
from sklearn.preprocessing import StandardScaler
# copy of datasets
X_train_stand = X_train.copy()
X_test_stand = X_test.copy()
# numerical features
num_cols = ['Item_Weight','Item_Visibility','Item_MRP','Outlet_Establishment_Year']