Skip to content

Instantly share code, notes, and snippets.

# Keeping classes which have more than 20 values in them
index_counts = df_train["condition"].value_counts()[df_train.condition.value_counts() >= 20].index
df_train = df_train[df_train["condition"].isin(index_counts)]
number_of_classes(df_train)
def plot_bar_chart(df):
# analyze the condition labels
counts_series = df.condition.value_counts()
counts_df = pd.DataFrame(counts_series)
counts_df.reset_index(level=0, inplace=True)
number_of_classes(df)
fig = px.bar(counts_df, x="index", y="condition", orientation='v',
height=400,
# train data
df_train = pd.read_csv("/content/drive/My Drive/data/drugsComTest_raw.csv")
# test data
df_test = pd.read_csv("/content/drive/My Drive/data/drugsComTest_raw.csv")
# Removing nan values
df_train = df_train.dropna()
df_test = df_test.dropna()
from sklearn.model_selection import train_test_split, cross_val_score, cross_val_predict, StratifiedKFold
import plotly.express as px
import numpy as np
import pandas as pd
from mlxtend.plotting import plot_learning_curves
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer, TfidfTransformer