Skip to content

Instantly share code, notes, and snippets.

View sksoumik's full-sized avatar

Sadman Kabir Soumik sksoumik

View GitHub Profile
@sksoumik
sksoumik / import_basic_libraries.py
Created July 5, 2020 00:58
import basic common libraries for ml tasks
# import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
# ignore warnings
warnings.filterwarnings("ignore")
@sksoumik
sksoumik / drive_mount.py
Created July 1, 2020 23:43
mounting drive on colab
from google.colab import drive
drive.mount('/content/drive')
@sksoumik
sksoumik / split_a_data.py
Created July 1, 2020 22:55
Spliting a dataframe into train and test
data['split'] = np.random.randn(data.shape[0], 1)
msk = np.random.rand(len(data)) <= 0.7
train = data[msk]
test = data[~msk]
train = train.filter(items=['comment', 'class_id'])
test = test.filter(items=["comment", "class_id"])
@sksoumik
sksoumik / tf_version.py
Created July 1, 2020 18:18
Force colab to use a specific version of tensorflow
# make sure colab runs tf 2.0
try:
%tensorflow_version 2.x
except Exception:
pass
import tensorflow as tf
@sksoumik
sksoumik / confusion_matrix_data_distribution.py
Created June 30, 2020 21:19
printing confusion matrix data
def confusion_matrix_data(true_label:int, predicted_label:int):
data = (label_df
.loc[lambda d: d['Pred'] == predicted_label]
.loc[lambda d: d['class_id'] == true_label]
['comment'])
return data
false_positive = confusion_matrix_data(0, 1)
print(false_positive[:3])
@sksoumik
sksoumik / label_encoding.py
Created June 25, 2020 08:10
label encoding and train-test split
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder,OneHotEncoder
'''
convert target column texts into numeric values
'''
# convert text sentiments into numeric values
sentiment_to_id = {
"empty": 0,
@sksoumik
sksoumik / text_data_cleaning.py
Last active July 5, 2020 14:24
basic text data cleaning methods
# Basic few methods to clean text data
"""
Install the following libraries
!pip install tweet-preprocessor
!pip install emoji
"""
"""
@sksoumik
sksoumik / remove_urls _mentions.py
Created June 24, 2020 09:02
remove URLs and username mentions from Strings
import preprocessor as p
# set opt(option) as MENTION and URL
p.set_options(p.OPT.MENTION, p.OPT.URL)
p.clean("hello guys @alx #sport🔥 1245 https://github.com/s/preprocessor")
# apply to a specfic column and add a new column with the new clean data.
data['clean_content'] = data.content.apply(lambda x: p.clean(x))
@sksoumik
sksoumik / pandas_basics.py
Last active June 22, 2020 04:02
pandas dataframe basics to read data
import warnings
import pandas as pd
# ignore warnigs
warnings.filterwarnings("ignore")
# force output to display the full description of pandas
pd.set_option('display.max_colwidth', -1)
df = pd.read_csv('PATH')
@sksoumik
sksoumik / print_dictionary.py
Created June 20, 2020 19:04
print specific amount of dictionary data
# sample_dict is an dict()
# print 3 key and value from the dictionay.
N = 3
{key:value for key,value in list(sample_dict.items())[0:N]}