Skip to content

Instantly share code, notes, and snippets.

View bhavsarpratik's full-sized avatar

Pratik Bhavsar bhavsarpratik

View GitHub Profile
[loggers]
keys=root
[logger_root]
level=INFO
handlers=screen,file
[formatters]
keys=simple
import datetime
import json
import logging
import ntpath
import os
def create_folder(directory):
try:
if not os.path.exists(directory):
import json
import os
from sklearn.metrics import (accuracy_score, classification_report,
confusion_matrix, f1_score, fbeta_score)
def get_metrics(y, y_pred, beta=2, average_method='macro', y_encoder=None):
if y_encoder:
y = y_encoder.inverse_transform(y)
y_pred = y_encoder.inverse_transform(y_pred)
import os
def run_command(cmd):
return os.system(cmd)
def shutdown(seconds=0, os='linux'):
"""Shutdown system after seconds given. Useful for shutting EC2 to save costs."""
if os == 'linux':
run_command('sudo shutdown -h -t sec %s' % seconds)
elif os == 'windows':
import time
from functools import wraps
def timing(f):
"""Decorator for timing functions
Usage:
@timing
def function(a):
pass
import os
from abc import ABCMeta, abstractmethod
class DataProcessor(metaclass=ABCMeta):
"""Base processor to be used for all preparation."""
def __init__(self, input_directory, output_directory):
self.input_directory = input_directory
self.output_directory = output_directory
from tqdm import tqdm
import time
tqdm.pandas()
df['col'] = df['col'].progress_apply(lambda x: x**2)
text = ""
for char in tqdm(["a", "b", "c", "d"]):
time.sleep(0.25)
from fastprogress.fastprogress import master_bar, progress_bar
from time import sleep
mb = master_bar(range(10))
for i in mb:
for j in progress_bar(range(100), parent=mb):
sleep(0.01)
mb.child.comment = f'second bar stat'
mb.first_bar.comment = f'first bar stat'
mb.write(f'Finished loop {i}.')
def set_seed(args):
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.n_gpu > 0:
torch.cuda.manual_seed_all(args.seed)
import pandas as pd
from pathlib import Path
from tokenizers import BertWordPieceTokenizer
def add_vocab_to_model(df, model, tokenizer, old_vocab, vocab_size=30000):
"""Adds new vocab to tokenizer and randomly initialises rows for new vocab in the model"""
PATH = Path('/tmp/lm_data')
PATH.mkdir(exist_ok=True)