Skip to content

Instantly share code, notes, and snippets.

@jcrousse
jcrousse / async_streams.py
Created March 8, 2022 20:06
Using CUDA streams with CuPy
import cupy as cp
import time
import asyncio
async def predict(N, power):
compute_stream = cp.cuda.stream.Stream(non_blocking=True)
compute_stream.use()
d_mat = cp.random.randn(N * N, dtype=cp.float64).reshape(N, N)
d_ret = d_mat
from prefixspan import PrefixSpan
from data_sources.data_generator import ExamplesGenerator, get_multiple_patterns
VOCAB_SIZE = 1000
SEQ_LEN = 250
multiple_patterns = get_multiple_patterns(10)
NUM_EXAMPLES = 200
MIN_FREQ = 25
@jcrousse
jcrousse / keras_models.py
Created July 21, 2020 09:40
SOS Keras model 2.1
def get_learned_scores(**kwargs):
"""
scores each sentence, then multiply by score before next sequence layer.
:Keyword Arguments:
* sent_len (int) Sentence length
* embedding_size (int) word embedding length
* seq_len (int) length of overall sequence, equal to number of sentences x number of words per sentence
* pre_embedded (bool) True if input is already vectors of word embeddings, false if tokens to be embedded
:param : (int)
"""
@jcrousse
jcrousse / keras_models.py
Created June 7, 2020 10:30
sequence of sequence model
def get_learned_scores(**kwargs):
"""
scores each sentence, then multiply by score before next sequence layer.
:Keyword Arguments:
* sent_len (int) Sentence length
* embedding_size (int) word embedding length
* seq_len (int) length of overall sequence, equal to number of sentences x number of words per sentence
* pre_embedded (bool) True if input is already vectors of word embeddings, false if tokens to be embedded
* concat_outputs (bool) True for a model with two similar outputs (2 level sequence model), False for
a single output attention model (weighted average of sentences)
def write_to_html(sentences, highlight_vals, filename, low_val=(255, 255, 255), high_val=(77, 145, 255),
out_dir=OUT_FOLDER):
scaled_hl = [e * (1 / max(highlight_vals)) for e in highlight_vals]
with open(Path(out_dir) / filename, 'w') as f:
for sent, score in zip(sentences, scaled_hl):
color_vals = [int(low*(1-score) + high*score) for low, high in zip(low_val, high_val)]
f.write(f"<span style=\"background-color: rgb({color_vals[0]},{color_vals[1]},{color_vals[2]})\">"
f"{sent}</span>\n")
def get_learned_scores(**kwargs):
"""
scores each sentence, then multiply by score before next sequence layer
"""
sent_len = kwargs.get('sent_len')
embed_size = kwargs.get('embedding_size')
seq_len = kwargs.get("seq_len")
pre_embedded = kwargs.get("pre_embedded", False)
assert seq_len % sent_len == 0, "sequence length must be a multiple of sentence length"
sent_per_obs = seq_len // sent_len