Skip to content

Instantly share code, notes, and snippets.

NUM_CLASSES = 228
with open("data/train.json") as train:
train_json = json.load(train)
def generate_label_array(json_obj):
result = []
for data in json_obj['annotations']:
temp_array = [0] * NUM_CLASSES
for elem in data['labelId']:
from keras.utils.data_utils import Sequence
import requests
import io
class BatchSequence(Sequence):
def __init__(self, x_set, y_set, batch_size=32, image_size=228):
self.x, self.y = x_set, y_set
self.batch_size = batch_size
self.image_size = image_size
@fantods
fantods / eggcorns.py
Created February 25, 2020 17:24
Fixing eggcorns using Entity Resolution
import csv
from nltk import tokenize
from er_core.er import ER
from er_core.preprocessors.normalizer import Normalizer
normalizer = Normalizer("default")
bad_string = "I hole-hardedly agree, but allow me to play doubles advocate here for a moment. For all intensive purposes I think you are wrong. In an age where false morals are a diamond dozen, true virtues are a blessing in the skies. We often put our false morality on a petal stool like a bunch of pre-Madonnas, but you all seem to be taking something very valuable for granite. So I ask of you to mustard up all the strength you can because it is a doggy dog world out there. Although there is some merit to what you are saying it seems like you have a huge ship on your shoulder. In your argument you seem to throw everything in but the kids Nsync, and even though you are having a feel day with this I am here to bring you back into reality. I have a sick sense when it comes to these types of things. It is almost spooky, because I cannot turn a blonde eye to
def strip_header(row):
return re.sub(r'(", ")?Numéro inscription\s:\s(.*?):\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}(\s\d{1,2})?', "", row)
abbreviations = {'dr.': 'doctor', 'mr.': 'mister', 'bro.': 'brother', 'bro': 'brother', 'mrs.': 'mistress', 'ms.': 'miss', 'jr.': 'junior', 'sr.': 'senior',
'i.e.': 'for example', 'e.g.': 'for example', 'vs.': 'versus'}
terminators = ['.', '!', '?', ';']
wrappers = ['"', "'", ')', ']', '}']
def find_sentences(paragraph):
end = True