Aniruddha Bhandari aniruddha27

## sql_window_functions_01.sql
/* Sample data */
insert into emp (EMPID, NAME, JOB, SALARY)
values
(201, 'ANIRUDDHA', 'ANALYST', 2100),
(212, 'LAKSHAY', 'DATA ENGINEER', 2700),
(209, 'SIDDHARTH', 'DATA ENGINEER', 3000),
(232, 'ABHIRAJ', 'DATA SCIENTIST', 2500),
(205, 'RAM', 'ANALYST', 2500),
(222, 'PRANAV', 'MANAGER', 4500),
(202, 'SUNIL', 'MANAGER', 4800),

## folder_structure.py
import pandas as pd
import os
import shutil
from sklearn.model_selection import train_test_split

# Home directory
home_path = r'C:/Users/Dell/Desktop/Analytics Vidhya/ImageDataGenerator/emergency_vs_non-emergency_dataset/emergency_vs_non-emergency_dataset'

# Create train and validation directories
train_path = os.path.join(home_path,'train')

## text detection.py
# text detection
def contours_text(orig, img, contours):
    for cnt in contours:
        x, y, w, h = cv2.boundingRect(cnt)

        # Drawing a rectangle on copied image
        rect = cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 255, 255), 2)

        cv2.imshow('cnt',rect)
        cv2.waitKey()

## AUC-ROC6.py
# multi-class classification
from sklearn.multiclass import OneVsRestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score

# generate 2 class dataset
X, y = make_classification(n_samples=1000, n_classes=3, n_features=20, n_informative=3, random_state=42)

## flow_from_dataframe.py
# ImageDataGenerator flow_from_dataframe

df_train = pd.read_csv(home_path + r'/emergency_train.csv')
df_train['emergency_or_not'] = df_train['emergency_or_not'].astype('str') # requires target in string format

train_generator_df = datagen.flow_from_dataframe(dataframe=df_train,
                                              directory=home_path+'/images/',
                                              x_col="image_names",
                                              y_col="emergency_or_not",
                                              class_mode="binary",

## GroupBy_apply.py
df_apply = df.groupby(['Outlet_Establishment_Year'])['Item_MRP'].apply(lambda x: x - x.mean())
df_apply

## GroupBy_filter.py
def filter_func(x):
    return x['Item_Weight'].std() < 3

df_filter = df.groupby(['Item_Weight']).filter(filter_func)
df_filter.shape

## GroupBy_1.py
import pandas as pd
import numpy as np
df = pd.read_csv(r'C:\Users\Dell\Desktop\train_big_mart.csv')
df.head()

## nlp_ie_12.py
doc = nlp(' Last year, I spoke about the Ujjwala programme , through which, I am happy to report, 50 million free liquid-gas connections have been provided so far')
png = visualise_spacy_tree.create_png(doc)
display(Image(png))

## Multicollinearity_import.py
df=pd.read_csv(r'C:/Users/Dell/Desktop/salary.csv')
df.head()
	/* Sample data */
	insert into emp (EMPID, NAME, JOB, SALARY)
	values
	(201, 'ANIRUDDHA', 'ANALYST', 2100),
	(212, 'LAKSHAY', 'DATA ENGINEER', 2700),
	(209, 'SIDDHARTH', 'DATA ENGINEER', 3000),
	(232, 'ABHIRAJ', 'DATA SCIENTIST', 2500),
	(205, 'RAM', 'ANALYST', 2500),
	(222, 'PRANAV', 'MANAGER', 4500),
	(202, 'SUNIL', 'MANAGER', 4800),
	import pandas as pd
	import os
	import shutil
	from sklearn.model_selection import train_test_split

	# Home directory
	home_path = r'C:/Users/Dell/Desktop/Analytics Vidhya/ImageDataGenerator/emergency_vs_non-emergency_dataset/emergency_vs_non-emergency_dataset'

	# Create train and validation directories
	train_path = os.path.join(home_path,'train')
	# text detection
	def contours_text(orig, img, contours):
	for cnt in contours:
	x, y, w, h = cv2.boundingRect(cnt)

	# Drawing a rectangle on copied image
	rect = cv2.rectangle(orig, (x, y), (x + w, y + h), (0, 255, 255), 2)

	cv2.imshow('cnt',rect)
	cv2.waitKey()
	# multi-class classification
	from sklearn.multiclass import OneVsRestClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import roc_curve
	from sklearn.metrics import roc_auc_score

	# generate 2 class dataset
	X, y = make_classification(n_samples=1000, n_classes=3, n_features=20, n_informative=3, random_state=42)
	# ImageDataGenerator flow_from_dataframe

	df_train = pd.read_csv(home_path + r'/emergency_train.csv')
	df_train['emergency_or_not'] = df_train['emergency_or_not'].astype('str') # requires target in string format

	train_generator_df = datagen.flow_from_dataframe(dataframe=df_train,
	directory=home_path+'/images/',
	x_col="image_names",
	y_col="emergency_or_not",
	class_mode="binary",
	df_apply = df.groupby(['Outlet_Establishment_Year'])['Item_MRP'].apply(lambda x: x - x.mean())
	df_apply
	def filter_func(x):
	return x['Item_Weight'].std() < 3

	df_filter = df.groupby(['Item_Weight']).filter(filter_func)
	df_filter.shape
	import pandas as pd
	import numpy as np
	df = pd.read_csv(r'C:\Users\Dell\Desktop\train_big_mart.csv')
	df.head()
	doc = nlp(' Last year, I spoke about the Ujjwala programme , through which, I am happy to report, 50 million free liquid-gas connections have been provided so far')
	png = visualise_spacy_tree.create_png(doc)
	display(Image(png))
	df=pd.read_csv(r'C:/Users/Dell/Desktop/salary.csv')
	df.head()