Skip to content

Instantly share code, notes, and snippets.

View ben0it8's full-sized avatar

ben_oght_ah_eight ben0it8

  • Aignostics
  • Berlin
View GitHub Profile
@ben0it8
ben0it8 / Dockerfile
Created October 31, 2019 12:20
Ubuntu 18.04 + Python3.7 + tf1.5 + dali-nightly
FROM nvidia/cuda:10.0-cudnn7-devel-ubuntu18.04
WORKDIR /srv
RUN chmod 777 /tmp
RUN apt-get update && apt-get install -y --no-install-recommends \
build-essential \
python3.7 python3.7-dev python3-virtualenv
@ben0it8
ben0it8 / Dockerfile
Created July 30, 2019 11:49
dockerfile variables
# Set up paths
ENV RESOURCES_PATH="/resources" \
DATA_PATH="/data" \
LOG_PATH="/logs"
RUN \
mkdir -p $RESOURCES_PATH && \
mkdir -p $DATA_PATH && \
chmod -R a+rwx $DATA_PATH
# Install training requirements
COPY docker-res/requirements.txt "/tmp/"
RUN pip install -r "tmp/requirements.txt"
# Install app requirements
RUN pip install --upgrade uvicorn fastapi
@ben0it8
ben0it8 / app.py
Last active July 29, 2019 10:12
FastAPI app for IMDB Transformer
#!/usr/local/bin/python3
import logging, sys, os
logging.basicConfig(stream=sys.stdout,
format='%(asctime)s : %(levelname)s : %(message)s',
level=logging.INFO)
import torch
import torch.nn.functional as F
from pytorch_transformers import BertTokenizer
from utils import TransformerWithClfHead
from types import SimpleNamespace
@ben0it8
ben0it8 / finetune_and_eval.py
Last active July 25, 2019 15:45
fine-tune and evaluate model
# fit the model on `train_dl`"
trainer.run(train_dl, max_epochs=finetuning_config.n_epochs)
# save model weights
torch.save(model.state_dict(), os.path.join(finetuning_config.log_dir, "model_weights.pth"))
# evaluate the model on `test_dl`"
evaluator.run(test_dl)
print(f"Test accuracy: {100*evaluator.state.metrics['accuracy']:.3f}")
@ben0it8
ben0it8 / create_dataloders.py
Last active July 25, 2019 11:37
Create dataloders
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
from itertools import repeat
num_cores = cpu_count()
def process_row(processor, row):
return processor.process_example((row[1][LABEL_COL], row[1][TEXT_COL]))
def create_dataloader(df: pd.DataFrame,
@ben0it8
ben0it8 / prepare_training_eval_loops.py
Last active July 18, 2019 14:05
prepare training and eval loops
from ignite.engine import Engine, Events
from ignite.metrics import RunningAverage, Accuracy
from ignite.handlers import ModelCheckpoint
from ignite.contrib.handlers import CosineAnnealingScheduler, PiecewiseLinear, create_lr_scheduler_with_warmup, ProgressBar
import torch.nn.functional as F
from pytorch_transformers.optimization import AdamW
# Bert optimizer
optimizer = AdamW(model.parameters(), lr=finetuning_config.lr, correct_bias=False)
@ben0it8
ben0it8 / bert_textprocessor.py
Last active July 18, 2019 14:04
Create bert textprocessor
import torch
from torch.utils.data import TensorDataset, random_split, DataLoader
import numpy as np
import warnings
from tqdm import tqdm_notebook as tqdm
from typing import Tuple
NUM_MAX_POSITIONS = 256
BATCH_SIZE = 32
@ben0it8
ben0it8 / read_clean_imdb_data.py
Last active July 18, 2019 13:56
read and clean imdb data
import pandas as pd
import re
# text and label column names
TEXT_COL = "text"
LABEL_COL = "label"
def clean_html(text: str):
"remove html tags and whitespaces"
cleanr = re.compile('<.*?>')
@ben0it8
ben0it8 / download_imdb.py
Last active July 18, 2019 13:55
read imdb
import os
import requests
import tarfile
from tqdm import tqdm
# path to data
DATA_DIR = os.path.abspath('./data')
# path to IMDB
IMDB_DIR = os.path.join(DATA_DIR, "imdb5k")