Skip to content

Instantly share code, notes, and snippets.

View satkr7's full-sized avatar

Satyam Kumar satkr7

  • kolkata, india
View GitHub Profile
# -*- coding: utf-8 -*-
"""
@author: satyam.kumar
"""
'''
Import necessary packages
'''
import numpy as np
import pandas as pd
import os
import gc
import re
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from bs4 import BeautifulSoup
def preprocess(x):
#Install the below libaries before importing
import pandas as pd
from pandas_profiling import ProfileReport
#EDA using pandas-profiling
profile = ProfileReport(pd.read_csv('titanic.csv'), explorative=True)
#Saving results to a HTML file
profile.to_file("output.html")
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OrdinalEncoder
from category_encoders import BinaryEncoder
from category_encoders import TargetEncoder
#Label Encoder
le = LabelEncoder()
df['columnName'] = le.fit_transform(df['columnName'])
import pandas as pd
# load height-weight dataset downloaded from kaggle
data = pd.read_csv("weight-height.csv")
#equal width binning
data["ewb"] = pd.cut(data["Height"], bins=10)
#equal frequency binning
data["efb"] = pd.qcut(data["Height"], q=10)
import pandas as pd
pip install datawig
import datawig
data = pd.read_csv("train.csv")
df_train, df_test = datawig.utils.random_split(data)
#Initialize a SimpleImputer model
imputer = datawig.SimpleImputer(