Skip to content

Instantly share code, notes, and snippets.

View pythonlessons's full-sized avatar

Rokas Liuberskis pythonlessons

View GitHub Profile
@pythonlessons
pythonlessons / transformers_training_4.py
Created September 4, 2023 15:04
transformers_training
#train.py
from model import Transformer
from configs import ModelConfigs
configs = ModelConfigs()
@pythonlessons
pythonlessons / transformers_training_3.py
Created September 4, 2023 15:04
transformers_training
#configs.py
import os
from datetime import datetime
from mltu.configs import BaseModelConfigs
class ModelConfigs(BaseModelConfigs):
def __init__(self):
super().__init__()
@pythonlessons
pythonlessons / transformers_training_2.py
Created September 4, 2023 15:04
transformers_training
#model.py
import tensorflow as tf
from mltu.tensorflow.transformer.layers import Encoder, Decoder
def Transformer(
input_vocab_size: int,
target_vocab_size: int,
encoder_input_size: int = None,
decoder_input_size: int = None,
@pythonlessons
pythonlessons / transformers_training_1.py
Created September 4, 2023 15:04
transformers_training
import numpy as np
import tensorflow as tf
try: [tf.config.experimental.set_memory_growth(gpu, True) for gpu in tf.config.experimental.list_physical_devices("GPU")]
except: pass
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from mltu.tensorflow.callbacks import Model2onnx, WarmupCosineDecay
from mltu.tensorflow.dataProvider import DataProvider
@pythonlessons
pythonlessons / transformers_training_0.py
Created September 4, 2023 15:04
transformers_training
import os
import requests
from tqdm import tqdm
from bs4 import BeautifulSoup
# URL to the directory containing the files to be downloaded
language = "en-es"
url = f"https://data.statmt.org/opus-100-corpus/v1.0/supervised/{language}/"
save_directory = f"./Datasets/{language}"
@pythonlessons
pythonlessons / transformers_nlp_data_10.css
Created August 24, 2023 10:12
transformers_nlp_data
['fueron los asbestos aquí. ¡eso es lo que ocurrió!', 'me voy de aquí.', 'una vez, juro que cagué una barra de tiza.', 'y prefiero mudarme, ¿entiendes?']
["<start>it was the asbestos in here, that's what did it!", "<start>i'm out of here.", '<start>one time, i swear i pooped out a stick of chalk.', '<start>and i will move, do you understand me?']
["it was the asbestos in here, that's what did it!<eos>", "i'm out of here.<eos>", 'one time, i swear i pooped out a stick of chalk.<eos>', 'and i will move, do you understand me?<eos>']
@pythonlessons
pythonlessons / transformers_nlp_data_9.css
Created August 24, 2023 10:12
transformers_nlp_data
[33, 51, 48, 55, 55, 58, 3, 66, 58, 61, 55, 47, 15, 3, 51, 58, 66, 3, 44, 61, 48, 3, 68, 58, 64, 36, 32]
['<start>hello world, how are you?<eos>']
['hello world, how are you?']
@pythonlessons
pythonlessons / transformers_nlp_data_8.css
Created August 24, 2023 10:12
transformers_nlp_data
Fitting tokenizer: 100%|██████████| 995249/995249 [00:10<00:00, 95719.57it/s]
Fitting tokenizer: 100%|██████████| 995249/995249 [00:07<00:00, 134446.71it/s]
@pythonlessons
pythonlessons / transformers_nlp_data_7.css
Created August 24, 2023 10:12
transformers_nlp_data
995249
1990
('Fueron los asbestos aquí. ¡Eso es lo que ocurrió!', 'Me voy de aquí.', 'Una vez, juro que cagué una barra de tiza.')
("It was the asbestos in here, that's what did it!", "I'm out of here.", 'One time, I swear I pooped out a stick of chalk.')
@pythonlessons
pythonlessons / transformers_nlp_data_6.py
Created August 24, 2023 10:12
transformers_nlp_data
for data_batch in train_dataProvider:
(encoder_inputs, decoder_inputs), decoder_outputs = data_batch
encoder_inputs_str = tokenizer.detokenize(encoder_inputs)
decoder_inputs_str = detokenizer.detokenize(decoder_inputs, remove_start_end=False)
decoder_outputs_str = detokenizer.detokenize(decoder_outputs, remove_start_end=False)
print(encoder_inputs_str)
print(decoder_inputs_str)
print(decoder_outputs_str)