RayWilliam46

## train_2.py
FT_EPOCHS = 4
BATCH_SIZE = 64
NUM_STEPS = len(X_train.index)

# Unfreeze distilBERT layers and make available for training
for layer in distilBERT.layers:
    layer.trainable = True

# Recompile model after unfreezing
model.compile(optimizer=tf.keras.optimizers.Adam(lr=2e-5),

## train_1.py
EPOCHS = 6
BATCH_SIZE = 64
NUM_STEPS = len(X_train.index) // BATCH_SIZE

# Train the model
train_history1 = model.fit(
    x = [X_train_ids, X_train_attention],
    y = y_train.to_numpy(),
    epochs = EPOCHS,
    batch_size = BATCH_SIZE,

## build_model.py
MAX_LENGTH = 128
LAYER_DROPOUT = 0.2
LEARNING_RATE = 5e-5
RANDOM_STATE = 42

def build_model(transformer, max_length=MAX_LENGTH):
    """""""""
    Template for building a model off of the BERT or DistilBERT architecture
    for a binary classification task.


## import_distilbert.py
from transformers import TFDistilBertModel, DistilBertConfig

DISTILBERT_DROPOUT = 0.2
DISTILBERT_ATT_DROPOUT = 0.2

# Configure DistilBERT's initialization
config = DistilBertConfig(dropout=DISTILBERT_DROPOUT,
                          attention_dropout=DISTILBERT_ATT_DROPOUT,
                          output_hidden_states=True)


## batch_encode.py
# Define the maximum number of words to tokenize (DistilBERT can tokenize up to 512)
MAX_LENGTH = 128


# Define function to encode text data in batches
def batch_encode(tokenizer, texts, batch_size=256, max_length=MAX_LENGTH):
    """""""""
    A function that encodes a batch of texts and returns the texts'
    corresponding encodings and attention masks that are ready to be fed
    into a pre-trained transformer model.

## tokenizer.py
from transformers import DistilBertTokenizerFast

# Instantiate DistilBERT tokenizer...we use the Fast version to optimize runtime
tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')

## augment.py
!pip install numpy requests nlpaug

import pandas as pd
import numpy as np
import nlpaug.augmenter.word as nlpaw
from tqdm import tqdm


def augment_sentence(sentence, aug, num_threads):
	FT_EPOCHS = 4
	BATCH_SIZE = 64
	NUM_STEPS = len(X_train.index)

	# Unfreeze distilBERT layers and make available for training
	for layer in distilBERT.layers:
	layer.trainable = True

	# Recompile model after unfreezing
	model.compile(optimizer=tf.keras.optimizers.Adam(lr=2e-5),
	EPOCHS = 6
	BATCH_SIZE = 64
	NUM_STEPS = len(X_train.index) // BATCH_SIZE

	# Train the model
	train_history1 = model.fit(
	x = [X_train_ids, X_train_attention],
	y = y_train.to_numpy(),
	epochs = EPOCHS,
	batch_size = BATCH_SIZE,
	MAX_LENGTH = 128
	LAYER_DROPOUT = 0.2
	LEARNING_RATE = 5e-5
	RANDOM_STATE = 42

	def build_model(transformer, max_length=MAX_LENGTH):
	"""""""""
	Template for building a model off of the BERT or DistilBERT architecture
	for a binary classification task.
	from transformers import TFDistilBertModel, DistilBertConfig

	DISTILBERT_DROPOUT = 0.2
	DISTILBERT_ATT_DROPOUT = 0.2

	# Configure DistilBERT's initialization
	config = DistilBertConfig(dropout=DISTILBERT_DROPOUT,
	attention_dropout=DISTILBERT_ATT_DROPOUT,
	output_hidden_states=True)
	# Define the maximum number of words to tokenize (DistilBERT can tokenize up to 512)
	MAX_LENGTH = 128


	# Define function to encode text data in batches
	def batch_encode(tokenizer, texts, batch_size=256, max_length=MAX_LENGTH):
	"""""""""
	A function that encodes a batch of texts and returns the texts'
	corresponding encodings and attention masks that are ready to be fed
	into a pre-trained transformer model.
	from transformers import DistilBertTokenizerFast

	# Instantiate DistilBERT tokenizer...we use the Fast version to optimize runtime
	tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
	!pip install numpy requests nlpaug

	import pandas as pd
	import numpy as np
	import nlpaug.augmenter.word as nlpaw
	from tqdm import tqdm



	def augment_sentence(sentence, aug, num_threads):