This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2016 Google Inc. All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import multiprocessing | |
from joblib import Parallel, delayed | |
from tqdm import tqdm_notebook | |
from skimage import io | |
from PIL import Image | |
import os | |
import gc | |
import numpy as np | |
import warnings |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import tensorflow.keras.backend as K | |
def get_activations(model, model_inputs): | |
print('----- activations -----') | |
activations = [] | |
inp = model.input | |
model_multi_inputs_cond = True | |
if not isinstance(inp, list): | |
# only one input! let's wrap it in a list. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
features = { | |
'image/encoded': tf.FixedLenFeature([], tf.string), | |
'image/height': tf.FixedLenFeature([], tf.int64), | |
'image/width': tf.FixedLenFeature([], tf.int64) | |
} | |
def parse(record, image_size=256): | |
# Parse data | |
parsed = tf.parse_single_example(record, features) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import time | |
import trimap | |
import umap | |
import pacmap | |
from sklearn.manifold import TSNE | |
# Read data | |
df = pd.read_csv('your_dataset.csv') | |
# Algorithms to test |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Deploy to Azure | |
terraform { | |
required_providers { | |
azurerm = { | |
source = "hashicorp/azurerm" | |
version = "=2.46.0" | |
} | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Keras MNIST example from: https://keras.io/examples/vision/mnist_convnet/ | |
Adapted to add mlflow logging | |
""" | |
import mlflow | |
import mlflow.keras | |
import numpy as np | |
from tensorflow import keras | |
from tensorflow.keras import layers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from transformers import AutoTokenizer, AutoModel | |
def mean_pooling(model_output, attention_mask): | |
""" | |
Mean pooling to get sentence embeddings. See: | |
https://huggingface.co/sentence-transformers/paraphrase-distilroberta-base-v1 | |
""" | |
token_embeddings = model_output[0] | |
input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() | |
sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1) # Sum columns |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.preprocessing import normalize | |
# Use the first question as the query | |
QUERY_ID = 0 | |
# Noralize the data | |
norm_data = normalize(sentence_embeddings, norm='l2') | |
# Calculate scores as dot product between all embedding & query |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# For each sentence, store a list of token embeddings; i.e. a 1024-dimensional vector for each token | |
for i, sentence in enumerate(valid_sentences): | |
tokens = tokenizer.convert_ids_to_tokens(encoded_input['input_ids'][i]) | |
embeddings = model_output[0][i] | |
token_embeddings.append( | |
[{"token": token, "embedding": embedding.detach().numpy()} for token, embedding in zip(tokens, embeddings)] | |
) | |
def get_token_embeddings(embeddings_word): | |
"""Returns a list of tokens and list of embeddings""" |
OlderNewer