Skip to content

Instantly share code, notes, and snippets.

@VXU1230
Last active March 19, 2019 17:55
Show Gist options
  • Save VXU1230/06dcee61923af3239c3c3b1d9854f82a to your computer and use it in GitHub Desktop.
Save VXU1230/06dcee61923af3239c3c3b1d9854f82a to your computer and use it in GitHub Desktop.
WINDOW_SIZE = 5
NEGATIVE_SAMPLES = 10
def process_sent(sent):
sampling = keras.preprocessing.sequence.make_sampling_table(VOCAB_SIZE)
couples, labels = keras.preprocessing.sequence.skipgrams(
sequence=sent, vocabulary_size=VOCAB_SIZE, window_size=WINDOW_SIZE,
negative_samples=NEGATIVE_SAMPLES, shuffle=True, sampling_table=sampling
)
target, context = zip(*couples)
target = list(target)
context = list(context)
return [target, context, labels]
def process_batch(data):
with Pool(processes=NUM_WORKERS) as pool:
results = pool.map(process_sent, data, chunksize=200)
results = [x for x in results if x is not None]
return np.asarray(results)
train_input = process_batch(train_text_ids)
test_input = process_batch(test_text_ids)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment