This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[loggers] | |
keys=root | |
[logger_root] | |
level=INFO | |
handlers=screen,file | |
[formatters] | |
keys=simple |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
import json | |
import logging | |
import ntpath | |
import os | |
def create_folder(directory): | |
try: | |
if not os.path.exists(directory): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
from sklearn.metrics import (accuracy_score, classification_report, | |
confusion_matrix, f1_score, fbeta_score) | |
def get_metrics(y, y_pred, beta=2, average_method='macro', y_encoder=None): | |
if y_encoder: | |
y = y_encoder.inverse_transform(y) | |
y_pred = y_encoder.inverse_transform(y_pred) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
def run_command(cmd): | |
return os.system(cmd) | |
def shutdown(seconds=0, os='linux'): | |
"""Shutdown system after seconds given. Useful for shutting EC2 to save costs.""" | |
if os == 'linux': | |
run_command('sudo shutdown -h -t sec %s' % seconds) | |
elif os == 'windows': |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
from functools import wraps | |
def timing(f): | |
"""Decorator for timing functions | |
Usage: | |
@timing | |
def function(a): | |
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
from abc import ABCMeta, abstractmethod | |
class DataProcessor(metaclass=ABCMeta): | |
"""Base processor to be used for all preparation.""" | |
def __init__(self, input_directory, output_directory): | |
self.input_directory = input_directory | |
self.output_directory = output_directory |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tqdm import tqdm | |
import time | |
tqdm.pandas() | |
df['col'] = df['col'].progress_apply(lambda x: x**2) | |
text = "" | |
for char in tqdm(["a", "b", "c", "d"]): | |
time.sleep(0.25) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from fastprogress.fastprogress import master_bar, progress_bar | |
from time import sleep | |
mb = master_bar(range(10)) | |
for i in mb: | |
for j in progress_bar(range(100), parent=mb): | |
sleep(0.01) | |
mb.child.comment = f'second bar stat' | |
mb.first_bar.comment = f'first bar stat' | |
mb.write(f'Finished loop {i}.') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def set_seed(args): | |
random.seed(args.seed) | |
np.random.seed(args.seed) | |
torch.manual_seed(args.seed) | |
if args.n_gpu > 0: | |
torch.cuda.manual_seed_all(args.seed) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from pathlib import Path | |
from tokenizers import BertWordPieceTokenizer | |
def add_vocab_to_model(df, model, tokenizer, old_vocab, vocab_size=30000): | |
"""Adds new vocab to tokenizer and randomly initialises rows for new vocab in the model""" | |
PATH = Path('/tmp/lm_data') | |
PATH.mkdir(exist_ok=True) |