This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04 | |
WORKDIR /srv | |
RUN chmod 777 /tmp | |
RUN apt-get update && apt-get install -y --no-install-recommends \ | |
build-essential \ | |
python3.7 python3.7-dev python3-virtualenv | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Set up paths | |
ENV RESOURCES_PATH="/resources" \ | |
DATA_PATH="/data" \ | |
LOG_PATH="/logs" | |
RUN \ | |
mkdir -p $RESOURCES_PATH && \ | |
mkdir -p $DATA_PATH && \ | |
chmod -R a+rwx $DATA_PATH |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Install training requirements | |
COPY docker-res/requirements.txt "/tmp/" | |
RUN pip install -r "tmp/requirements.txt" | |
# Install app requirements | |
RUN pip install --upgrade uvicorn fastapi |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
import logging, sys, os | |
logging.basicConfig(stream=sys.stdout, | |
format='%(asctime)s : %(levelname)s : %(message)s', | |
level=logging.INFO) | |
import torch | |
import torch.nn.functional as F | |
from pytorch_transformers import BertTokenizer | |
from utils import TransformerWithClfHead | |
from types import SimpleNamespace |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# fit the model on `train_dl`" | |
trainer.run(train_dl, max_epochs=finetuning_config.n_epochs) | |
# save model weights | |
torch.save(model.state_dict(), os.path.join(finetuning_config.log_dir, "model_weights.pth")) | |
# evaluate the model on `test_dl`" | |
evaluator.run(test_dl) | |
print(f"Test accuracy: {100*evaluator.state.metrics['accuracy']:.3f}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from concurrent.futures import ProcessPoolExecutor | |
from multiprocessing import cpu_count | |
from itertools import repeat | |
num_cores = cpu_count() | |
def process_row(processor, row): | |
return processor.process_example((row[1][LABEL_COL], row[1][TEXT_COL])) | |
def create_dataloader(df: pd.DataFrame, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ignite.engine import Engine, Events | |
from ignite.metrics import RunningAverage, Accuracy | |
from ignite.handlers import ModelCheckpoint | |
from ignite.contrib.handlers import CosineAnnealingScheduler, PiecewiseLinear, create_lr_scheduler_with_warmup, ProgressBar | |
import torch.nn.functional as F | |
from pytorch_transformers.optimization import AdamW | |
# Bert optimizer | |
optimizer = AdamW(model.parameters(), lr=finetuning_config.lr, correct_bias=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
from torch.utils.data import TensorDataset, random_split, DataLoader | |
import numpy as np | |
import warnings | |
from tqdm import tqdm_notebook as tqdm | |
from typing import Tuple | |
NUM_MAX_POSITIONS = 256 | |
BATCH_SIZE = 32 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import re | |
# text and label column names | |
TEXT_COL = "text" | |
LABEL_COL = "label" | |
def clean_html(text: str): | |
"remove html tags and whitespaces" | |
cleanr = re.compile('<.*?>') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import requests | |
import tarfile | |
from tqdm import tqdm | |
# path to data | |
DATA_DIR = os.path.abspath('./data') | |
# path to IMDB | |
IMDB_DIR = os.path.join(DATA_DIR, "imdb5k") |
NewerOlder