This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# import necessary libraries | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import warnings | |
# ignore warnings | |
warnings.filterwarnings("ignore") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from google.colab import drive | |
drive.mount('/content/drive') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
data['split'] = np.random.randn(data.shape[0], 1) | |
msk = np.random.rand(len(data)) <= 0.7 | |
train = data[msk] | |
test = data[~msk] | |
train = train.filter(items=['comment', 'class_id']) | |
test = test.filter(items=["comment", "class_id"]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# make sure colab runs tf 2.0 | |
try: | |
%tensorflow_version 2.x | |
except Exception: | |
pass | |
import tensorflow as tf |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def confusion_matrix_data(true_label:int, predicted_label:int): | |
data = (label_df | |
.loc[lambda d: d['Pred'] == predicted_label] | |
.loc[lambda d: d['class_id'] == true_label] | |
['comment']) | |
return data | |
false_positive = confusion_matrix_data(0, 1) | |
print(false_positive[:3]) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.model_selection import train_test_split | |
from sklearn.preprocessing import LabelEncoder,OneHotEncoder | |
''' | |
convert target column texts into numeric values | |
''' | |
# convert text sentiments into numeric values | |
sentiment_to_id = { | |
"empty": 0, |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Basic few methods to clean text data | |
""" | |
Install the following libraries | |
!pip install tweet-preprocessor | |
!pip install emoji | |
""" | |
""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import preprocessor as p | |
# set opt(option) as MENTION and URL | |
p.set_options(p.OPT.MENTION, p.OPT.URL) | |
p.clean("hello guys @alx #sport🔥 1245 https://github.com/s/preprocessor") | |
# apply to a specfic column and add a new column with the new clean data. | |
data['clean_content'] = data.content.apply(lambda x: p.clean(x)) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import warnings | |
import pandas as pd | |
# ignore warnigs | |
warnings.filterwarnings("ignore") | |
# force output to display the full description of pandas | |
pd.set_option('display.max_colwidth', -1) | |
df = pd.read_csv('PATH') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# sample_dict is an dict() | |
# print 3 key and value from the dictionay. | |
N = 3 | |
{key:value for key,value in list(sample_dict.items())[0:N]} |