ben_oght_ah_eight ben0it8

## cfg-init
sh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)"
echo "ZSH=$HOME/.oh-my-zsh" >> ~/.zshrc
echo "source $ZSH/oh-my-zsh.sh" >> ~/.zshrc
echo "ZSH_THEME='robbyrussel'" >> ~/.zshrc
echo "plugins = (git python osx web-search vi-mode dotenv" >> ~/.zshrc
echo "alias config='/usr/bin/git --git-dir=$HOME/.cfg/ --work-tree=$HOME'" >> ~/.zshrc
source ~/.zshrc
echo ".cfg" >> .gitignore
git clone --bare https://github.com/ben0it8/dotfiles.git .cfg/
config checkout

## transformer_models.py
import torch.nn as nn
import torch

class Transformer(nn.Module):
    "Adopted from https://github.com/huggingface/naacl_transfer_learning_tutorial"

    def __init__(self, embed_dim, hidden_dim, num_embeddings, num_max_positions,
                 num_heads, num_layers, dropout, causal):
        super().__init__()
        self.causal = causal

## load_pretrained_transformer.py
from pytorch_transformers import cached_path

# download pre-trained model and config
state_dict = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
                                    "naacl-2019-tutorial/model_checkpoint.pth"), map_location='cpu')

config = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
                                        "naacl-2019-tutorial/model_training_args.bin"))

# init model: Transformer base + classifier head

## finetuning_config.py
from collections import namedtuple
import torch

LOG_DIR = "./logs/"
CACHE_DIR = "./cache/"

device = "cuda" if torch.cuda.is_available() else "cpu"

FineTuningConfig = namedtuple('FineTuningConfig',
      field_names="num_classes, dropout, init_range, batch_size, lr, max_norm,"

## download_imdb.py
import os
import requests
import tarfile
from tqdm import tqdm

# path to data
DATA_DIR = os.path.abspath('./data')

# path to IMDB
IMDB_DIR = os.path.join(DATA_DIR, "imdb5k")

## read_clean_imdb_data.py
import pandas as pd
import re

# text and label column names
TEXT_COL = "text"
LABEL_COL = "label"

def clean_html(text: str):
    "remove html tags and whitespaces"
    cleanr = re.compile('<.*?>')

## bert_textprocessor.py
import torch
from torch.utils.data import TensorDataset, random_split, DataLoader
import numpy as np
import warnings
from tqdm import tqdm_notebook as tqdm
from typing import Tuple

NUM_MAX_POSITIONS = 256
BATCH_SIZE = 32

## prepare_training_eval_loops.py
from ignite.engine import Engine, Events
from ignite.metrics import RunningAverage, Accuracy
from ignite.handlers import ModelCheckpoint
from ignite.contrib.handlers import CosineAnnealingScheduler, PiecewiseLinear, create_lr_scheduler_with_warmup, ProgressBar
import torch.nn.functional as F
from pytorch_transformers.optimization import AdamW

# Bert optimizer
optimizer = AdamW(model.parameters(), lr=finetuning_config.lr, correct_bias=False)

## create_dataloders.py
from concurrent.futures import ProcessPoolExecutor
from multiprocessing import cpu_count
from itertools import repeat

num_cores = cpu_count()

def process_row(processor, row):
    return processor.process_example((row[1][LABEL_COL], row[1][TEXT_COL]))

def create_dataloader(df: pd.DataFrame,

## finetune_and_eval.py
# fit the model on `train_dl`"
trainer.run(train_dl, max_epochs=finetuning_config.n_epochs)

# save model weights
torch.save(model.state_dict(), os.path.join(finetuning_config.log_dir, "model_weights.pth"))

# evaluate the model on `test_dl`"
evaluator.run(test_dl)
print(f"Test accuracy: {100*evaluator.state.metrics['accuracy']:.3f}")
	sh -c "$(curl -fsSL https://raw.githubusercontent.com/robbyrussell/oh-my-zsh/master/tools/install.sh)"
	echo "ZSH=$HOME/.oh-my-zsh" >> ~/.zshrc
	echo "source $ZSH/oh-my-zsh.sh" >> ~/.zshrc
	echo "ZSH_THEME='robbyrussel'" >> ~/.zshrc
	echo "plugins = (git python osx web-search vi-mode dotenv" >> ~/.zshrc
	echo "alias config='/usr/bin/git --git-dir=$HOME/.cfg/ --work-tree=$HOME'" >> ~/.zshrc
	source ~/.zshrc
	echo ".cfg" >> .gitignore
	git clone --bare https://github.com/ben0it8/dotfiles.git .cfg/
	config checkout
	import torch.nn as nn
	import torch

	class Transformer(nn.Module):
	"Adopted from https://github.com/huggingface/naacl_transfer_learning_tutorial"

	def __init__(self, embed_dim, hidden_dim, num_embeddings, num_max_positions,
	num_heads, num_layers, dropout, causal):
	super().__init__()
	self.causal = causal
	from pytorch_transformers import cached_path

	# download pre-trained model and config
	state_dict = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
	"naacl-2019-tutorial/model_checkpoint.pth"), map_location='cpu')

	config = torch.load(cached_path("https://s3.amazonaws.com/models.huggingface.co/"
	"naacl-2019-tutorial/model_training_args.bin"))

	# init model: Transformer base + classifier head
	from collections import namedtuple
	import torch

	LOG_DIR = "./logs/"
	CACHE_DIR = "./cache/"

	device = "cuda" if torch.cuda.is_available() else "cpu"

	FineTuningConfig = namedtuple('FineTuningConfig',
	field_names="num_classes, dropout, init_range, batch_size, lr, max_norm,"
	import os
	import requests
	import tarfile
	from tqdm import tqdm

	# path to data
	DATA_DIR = os.path.abspath('./data')

	# path to IMDB
	IMDB_DIR = os.path.join(DATA_DIR, "imdb5k")
	import pandas as pd
	import re

	# text and label column names
	TEXT_COL = "text"
	LABEL_COL = "label"

	def clean_html(text: str):
	"remove html tags and whitespaces"
	cleanr = re.compile('<.*?>')
	import torch
	from torch.utils.data import TensorDataset, random_split, DataLoader
	import numpy as np
	import warnings
	from tqdm import tqdm_notebook as tqdm
	from typing import Tuple

	NUM_MAX_POSITIONS = 256
	BATCH_SIZE = 32
	from ignite.engine import Engine, Events
	from ignite.metrics import RunningAverage, Accuracy
	from ignite.handlers import ModelCheckpoint
	from ignite.contrib.handlers import CosineAnnealingScheduler, PiecewiseLinear, create_lr_scheduler_with_warmup, ProgressBar
	import torch.nn.functional as F
	from pytorch_transformers.optimization import AdamW

	# Bert optimizer
	optimizer = AdamW(model.parameters(), lr=finetuning_config.lr, correct_bias=False)
	from concurrent.futures import ProcessPoolExecutor
	from multiprocessing import cpu_count
	from itertools import repeat

	num_cores = cpu_count()

	def process_row(processor, row):
	return processor.process_example((row[1][LABEL_COL], row[1][TEXT_COL]))

	def create_dataloader(df: pd.DataFrame,
	# fit the model on `train_dl`"
	trainer.run(train_dl, max_epochs=finetuning_config.n_epochs)

	# save model weights
	torch.save(model.state_dict(), os.path.join(finetuning_config.log_dir, "model_weights.pth"))

	# evaluate the model on `test_dl`"
	evaluator.run(test_dl)
	print(f"Test accuracy: {100*evaluator.state.metrics['accuracy']:.3f}")