Skip to content

Instantly share code, notes, and snippets.

View TheodoreGalanos's full-sized avatar

Theodore Galanos TheodoreGalanos

View GitHub Profile
@TheodoreGalanos
TheodoreGalanos / gist:51b40206e2feca6a6c4aab61ff392a28
Created February 26, 2024 23:33
Hacking AzureOpenAI for lancedb
@register("azure_openai")
class AzureOpenAIEmbeddings(TextEmbeddingFunction):
"""
An embedding function that uses the Azure OpenAI API
"""
name: str = "text-embedding-ada-002"
azure_api_key: str
azure_endpoint: str
azure_deployment: str
import os
import openai
from jinja2 import Template, meta, Environment
from dotenv import load_dotenv
load_dotenv() # add a .env file with the following
# setup is for azure, change accordingly for normal openai
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_type = os.getenv("OPENAI_API_TYPE")
openai.api_version = os.getenv("OPENAI_API_VERSION")
openai.api_base = os.getenv("OPENAI_API_BASE")
"""
the prompts.json file should contain (adjust for your prompt and use case):
{
"prompts": {
{
"example": {
"input_variables = ["variables", "here"],
"template = ["Prompt details and {variables} {here} according to your use case.\nRationale:"]
}
@TheodoreGalanos
TheodoreGalanos / shuffle_tfrecords.py
Created September 5, 2021 14:02 — forked from kingoflolz/shuffle_tfrecords.py
A quick script for shuffling tfrecord datasets
import tensorflow as tf
from tqdm import tqdm
index = open("data/openwebtext2_new_inputs.train.index").read().splitlines()
dataset = tf.data.Dataset.from_tensor_slices(index)
dataset = dataset.interleave(tf.data.TFRecordDataset, cycle_length=128, num_parallel_calls=tf.data.experimental.AUTOTUNE)
d = dataset.shuffle(10000).prefetch(100)
def AttentionMask(encoder_len, state_len, decoder_len, offset=0, near_decay=0, far_decay=0, device='cpu'):
m = -offset*np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len)
for i in range(encoder_len+decoder_len-1):
m += np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len-i-1)
if state_len:
ms = np.zeros((state_len, encoder_len+decoder_len+state_len))
m = np.concatenate([m, ms], axis=0)
m = torch.tensor(m, dtype=torch.float32, device=device)
mx = 1-np.tri(decoder_len, encoder_len+decoder_len, encoder_len)
mx = np.concatenate([mx, np.zeros((decoder_len, state_len))], axis=1)
import torch
import torch.nn as nn
class FIR(nn.Module):
def __init__(self, in_dim, out_dim=None, hidden_dim=None, segment_sizes=[1,2,4,8], activation=nn.functional.gelu, device='cpu'):
super().__init__()
if not out_dim: out_dim = in_dim
if not hidden_dim: hidden_dim = in_dim
cursor = 1
nodes = [cursor]
# Modified StyleGAN2 Projector with CLIP, addl. losses, kmeans, etc.
# by Peter Baylies, 2021 -- @pbaylies on Twitter
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.