Charlie O'Neill charlieoneill11

## from_pretrained.py
from transformers import AutoTokenizer

model_ckpt = "distilbert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_ckpt)

## word_per_tweet.py
df["Words Per Tweet"] = df["text"].str.split().apply(len)
df.boxplot("Words Per Tweet", by="label_name", grid=False,
          showfliers=False, color="black")
plt.suptitle("")
plt.xlabel("")
plt.show()

## imbalanced.py
import matplotlib.pyplot as plt

df["label_name"].value_counts(ascending=True).plot.barh()
plt.title("Frequency of Classes")
plt.show()

## int2str.py
def label_int2str(row):
    return offensive["train"].features["label"].int2str(row)

df["label_name"] = df["label"].apply(label_int2str)
df.head()

## set_format.py
import pandas as pd

offensive.set_format(type="pandas")
df = offensive["train"][:]
df.head()

## index_train.py
train_ds[0]

> {'text': '@user Bono... who cares. Soon people will understand that they gain nothing from following a phony celebrity. Become a Leader of your people instead or help and support your fellow countrymen.',
 'label': 0}

len(train_ds)

> 11916

## gist:e97ae9238555862db1c7d2fdc78ca406
train_ds[0]

> {'text': '@user Bono... who cares. Soon people will understand that they gain nothing from following a phony celebrity. Become a Leader of your people instead or help and support your fellow countrymen.',
 'label': 0}

len(train_ds)

> 11916

## train_ds.py
train_ds = offensive["train"]
train_ds

> Dataset({
    features: ['text', 'label'],
    num_rows: 11916
})

## offensive.py
offensive

> DatasetDict({
    train: Dataset({
        features: ['text', 'label'],
        num_rows: 11916
    })
    test: Dataset({
        features: ['text', 'label'],
        num_rows: 860

## load_dataset.py
from datasets import load_dataset

offensive = load_dataset("tweet_eval", "offensive")
	from transformers import AutoTokenizer

	model_ckpt = "distilbert-base-uncased"
	tokenizer = AutoTokenizer.from_pretrained(model_ckpt)
	df["Words Per Tweet"] = df["text"].str.split().apply(len)
	df.boxplot("Words Per Tweet", by="label_name", grid=False,
	showfliers=False, color="black")
	plt.suptitle("")
	plt.xlabel("")
	plt.show()
	import matplotlib.pyplot as plt

	df["label_name"].value_counts(ascending=True).plot.barh()
	plt.title("Frequency of Classes")
	plt.show()
	def label_int2str(row):
	return offensive["train"].features["label"].int2str(row)

	df["label_name"] = df["label"].apply(label_int2str)
	df.head()
	import pandas as pd

	offensive.set_format(type="pandas")
	df = offensive["train"][:]
	df.head()
	train_ds[0]

	> {'text': '@user Bono... who cares. Soon people will understand that they gain nothing from following a phony celebrity. Become a Leader of your people instead or help and support your fellow countrymen.',
	'label': 0}

	len(train_ds)

	> 11916
	train_ds = offensive["train"]
	train_ds

	> Dataset({
	features: ['text', 'label'],
	num_rows: 11916
	})
	offensive

	> DatasetDict({
	train: Dataset({
	features: ['text', 'label'],
	num_rows: 11916
	})
	test: Dataset({
	features: ['text', 'label'],
	num_rows: 860
	from datasets import load_dataset

	offensive = load_dataset("tweet_eval", "offensive")