This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
NUM_CLASSES = 228 | |
with open("data/train.json") as train: | |
train_json = json.load(train) | |
def generate_label_array(json_obj): | |
result = [] | |
for data in json_obj['annotations']: | |
temp_array = [0] * NUM_CLASSES | |
for elem in data['labelId']: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from keras.utils.data_utils import Sequence | |
import requests | |
import io | |
class BatchSequence(Sequence): | |
def __init__(self, x_set, y_set, batch_size=32, image_size=228): | |
self.x, self.y = x_set, y_set | |
self.batch_size = batch_size | |
self.image_size = image_size |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from nltk import tokenize | |
from er_core.er import ER | |
from er_core.preprocessors.normalizer import Normalizer | |
normalizer = Normalizer("default") | |
bad_string = "I hole-hardedly agree, but allow me to play doubles advocate here for a moment. For all intensive purposes I think you are wrong. In an age where false morals are a diamond dozen, true virtues are a blessing in the skies. We often put our false morality on a petal stool like a bunch of pre-Madonnas, but you all seem to be taking something very valuable for granite. So I ask of you to mustard up all the strength you can because it is a doggy dog world out there. Although there is some merit to what you are saying it seems like you have a huge ship on your shoulder. In your argument you seem to throw everything in but the kids Nsync, and even though you are having a feel day with this I am here to bring you back into reality. I have a sick sense when it comes to these types of things. It is almost spooky, because I cannot turn a blonde eye to |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def strip_header(row): | |
return re.sub(r'(", ")?Numéro inscription\s:\s(.*?):\s\d{4}-\d{2}-\d{2}\s\d{2}:\d{2}(\s\d{1,2})?', "", row) | |
abbreviations = {'dr.': 'doctor', 'mr.': 'mister', 'bro.': 'brother', 'bro': 'brother', 'mrs.': 'mistress', 'ms.': 'miss', 'jr.': 'junior', 'sr.': 'senior', | |
'i.e.': 'for example', 'e.g.': 'for example', 'vs.': 'versus'} | |
terminators = ['.', '!', '?', ';'] | |
wrappers = ['"', "'", ')', ']', '}'] | |
def find_sentences(paragraph): | |
end = True |