Skip to content

Instantly share code, notes, and snippets.

View ben0it8's full-sized avatar

ben_oght_ah_eight ben0it8

  • Aignostics
  • Berlin
View GitHub Profile
@ben0it8
ben0it8 / cfg-init
Last active March 3, 2019 22:01
initialize dotfiles
sh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)"
echo "ZSH=$HOME/.oh-my-zsh" >> ~/.zshrc
echo "source $ZSH/oh-my-zsh.sh" >> ~/.zshrc
echo "ZSH_THEME='robbyrussel'" >> ~/.zshrc
echo "plugins = (git python osx web-search vi-mode dotenv" >> ~/.zshrc
echo "alias config='/usr/bin/git --git-dir=$HOME/.cfg/ --work-tree=$HOME'" >> ~/.zshrc
source ~/.zshrc
echo ".cfg" >> .gitignore
git clone --bare https://github.com/ben0it8/dotfiles.git .cfg/
config checkout
@ben0it8
ben0it8 / transformer_models.py
Last active July 12, 2019 13:27
Transformer models
import torch.nn as nn
import torch
class Transformer(nn.Module):
"Adopted from https://github.com/huggingface/naacl_transfer_learning_tutorial"
def __init__(self, embed_dim, hidden_dim, num_embeddings, num_max_positions,
num_heads, num_layers, dropout, causal):
super().__init__()
self.causal = causal
@ben0it8
ben0it8 / load_pretrained_transformer.py
Last active July 17, 2019 09:00
load pretrained NAACL Transformer
from pytorch_transformers import cached_path
# download pre-trained model and config
state_dict = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
"naacl-2019-tutorial/model_checkpoint.pth"), map_location='cpu')
config = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
"naacl-2019-tutorial/model_training_args.bin"))
# init model: Transformer base + classifier head
@ben0it8
ben0it8 / finetuning_config.py
Last active July 17, 2019 09:06
Fine-tuning config
from collections import namedtuple
import torch
LOG_DIR = "./logs/"
CACHE_DIR = "./cache/"
device = "cuda" if torch.cuda.is_available() else "cpu"
FineTuningConfig = namedtuple('FineTuningConfig',
field_names="num_classes, dropout, init_range, batch_size, lr, max_norm,"
@ben0it8
ben0it8 / download_imdb.py
Last active July 18, 2019 13:55
read imdb
import os
import requests
import tarfile
from tqdm import tqdm
# path to data
DATA_DIR = os.path.abspath('./data')
# path to IMDB
IMDB_DIR = os.path.join(DATA_DIR, "imdb5k")
@ben0it8
ben0it8 / read_clean_imdb_data.py
Last active July 18, 2019 13:56
read and clean imdb data
import pandas as pd
import re
# text and label column names
TEXT_COL = "text"
LABEL_COL = "label"
def clean_html(text: str):
"remove html tags and whitespaces"
cleanr = re.compile('<.*?>')
@ben0it8
ben0it8 / bert_textprocessor.py
Last active July 18, 2019 14:04
Create bert textprocessor
import torch
from torch.utils.data import TensorDataset, random_split, DataLoader
import numpy as np
import warnings
from tqdm import tqdm_notebook as tqdm
from typing import Tuple
NUM_MAX_POSITIONS = 256
BATCH_SIZE = 32
@ben0it8
ben0it8 / prepare_training_eval_loops.py
Last active July 18, 2019 14:05
prepare training and eval loops
from ignite.engine import Engine, Events
from ignite.metrics import RunningAverage, Accuracy
from ignite.handlers import ModelCheckpoint
from ignite.contrib.handlers import CosineAnnealingScheduler, PiecewiseLinear, create_lr_scheduler_with_warmup, ProgressBar
import torch.nn.functional as F
from pytorch_transformers.optimization import AdamW
# Bert optimizer
optimizer = AdamW(model.parameters(), lr=finetuning_config.lr, correct_bias=False)
@ben0it8
ben0it8 / create_dataloders.py
Last active July 25, 2019 11:37
Create dataloders
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
from itertools import repeat
num_cores = cpu_count()
def process_row(processor, row):
return processor.process_example((row[1][LABEL_COL], row[1][TEXT_COL]))
def create_dataloader(df: pd.DataFrame,
@ben0it8
ben0it8 / finetune_and_eval.py
Last active July 25, 2019 15:45
fine-tune and evaluate model
# fit the model on `train_dl`"
trainer.run(train_dl, max_epochs=finetuning_config.n_epochs)
# save model weights
torch.save(model.state_dict(), os.path.join(finetuning_config.log_dir, "model_weights.pth"))
# evaluate the model on `test_dl`"
evaluator.run(test_dl)
print(f"Test accuracy: {100*evaluator.state.metrics['accuracy']:.3f}")