This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
import math | |
import random | |
import sys | |
from argparse import ArgumentParser | |
from collections import defaultdict | |
from util.functions import trace | |
def parse_args(): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from numpy.random import choice as random_choice, randint as random_randint, rand | |
MAX_INPUT_LEN = 40 | |
AMOUNT_OF_NOISE = 0.2 / MAX_INPUT_LEN | |
CHARS = list("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ .") | |
def add_noise_to_string(a_string, amount_of_noise): | |
"""Add some artificial spelling mistakes to the string""" | |
if rand() < amount_of_noise * len(a_string): | |
# Replace a character with a random character | |
random_char_position = random_randint(len(a_string)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# Ander Martinez Sanchez | |
from __future__ import division, print_function | |
from math import exp, log | |
from collections import Counter | |
def ngram_count(words, n): | |
if n <= len(words): |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Source: | |
# https://github.com/farizrahman4u/seq2seq/blob/master/seq2seq/layers/state_transfer_lstm.py | |
from keras import backend as K | |
from keras.layers.recurrent import LSTM | |
class StateTransferLSTM(LSTM): | |
"""LSTM with the ability to transfer its hidden state. | |
This layer behaves just like an LSTM, except that it can transfer (or |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def smart_procrustes_align_gensim(base_embed, other_embed, words=None): | |
"""Procrustes align two gensim word2vec models (to allow for comparison between same word across models). | |
Code ported from HistWords <https://github.com/williamleif/histwords> by William Hamilton <wleif@stanford.edu>. | |
(With help from William. Thank you!) | |
First, intersect the vocabularies (see `intersection_align_gensim` documentation). | |
Then do the alignment on the other_embed model. | |
Replace the other_embed model's syn0 and syn0norm numpy matrices with the aligned version. | |
Return other_embed. |
OlderNewer