Skip to content

Instantly share code, notes, and snippets.

import spacy
import neuralcoref
import re
nlp = spacy.load('en')
neuralcoref.add_to_pipe(nlp, conv_dict={'Jack Porter': ['man', 'CEO'], 'Cognizer':['company', 'organization']})
history = ""
while True:
text = input("Enter your text.\n")
if text != "exit":
@Deepayan137
Deepayan137 / dummyData_ver2.py
Created September 16, 2020 14:57
a toy dataset for seq2seq implementation
import torch
from torch.utils.data import Dataset
import numpy as np
import pdb
class DummyDataset(Dataset):
def __init__(self, prob, vocab_size=None,
nSamples=None, max_len=None):
self.prob = prob
if not vocab_size: vocab_size = 10
@Deepayan137
Deepayan137 / dummyData.py
Last active September 16, 2020 06:36
sample code
import torch
from torch.utils.data import Dataset
import numpy as np
class DummyDataset(Dataset):
def __init__(self, **kwargs):
self.prob = kwargs['prob']
self.vocab_size = kwargs['vocab_size']
self.nSamples = kwargs['nSamples']
self.src_data = np.random.choice(self.vocab_size,
'STLR scheduler from https://arxiv.org/abs/1801.06146'
class STLR(_LRScheduler):
def __init__(self, optimizer, T_max, last_epoch=-1, ratio=32):
self.T_max = T_max
self.cut = np.floor(T_max*0.1)
self.ratio = ratio
super(STLR, self).__init__(optimizer, last_epoch)
def get_lr(self):
@Deepayan137
Deepayan137 / align.py
Created September 16, 2019 06:56
word prediction and ground truth alignment
from collections import Counter, defaultdict
from textdistance import levenshtein as lev
import numpy as np
import pdb
from tqdm import *
def CharMajVoting(words):
def most_frequent(list_):
counter = Counter(list_)
return counter.most_common()[0][0]
@Deepayan137
Deepayan137 / cmv.py
Created January 16, 2019 11:03
character majority voting
def CharMajVoting(words):
def most_frequent(list_):
counter = Counter(list_)
return counter.most_common()[0][0]
dict_ = defaultdict(list)
lengths = [len(word) for word in words]
common_length = most_frequent(lengths)
for word in words:
for i in range(len(word)):
@Deepayan137
Deepayan137 / evaluate.py
Created January 3, 2019 06:07
CA and WA
import re
import sys
import os
import tempfile
import subprocess
import pdb
import pandas as pd
import numpy as np
from collections import defaultdict
from ocr.baselines.base_config import *
@Deepayan137
Deepayan137 / char_acc.py
Created January 1, 2019 16:25
Character accuracy
import Levenshtein as lev
def cer(prediction, target):
sum_edit_dists = lev.distance(prediction, target)
sum_gt_lengths = sum(map(len, target))
fraction = sum_edit_dists/sum_gt_lengths
percent = fraction*100
return (100.0-percent)
import pandas as pd
from collections import defaultdict
import pdb
import numpy as np
def row(dict_ques):
def dosomething(marks):
marks = list(marks)
total = min(np.sum(marks), 304)
return total
import cv2
import os
import sys
from tqdm import *
def convert(**kwargs):
dirname = kwargs['dir_']
savedir = kwargs['save']
image_paths = list(map(lambda f: dirname +'/'+f , os.listdir(dirname)))
for path in tqdm(image_paths):
image = cv2.imread(path)