Theodore Galanos TheodoreGalanos

## gist:51b40206e2feca6a6c4aab61ff392a28
@register("azure_openai")
class AzureOpenAIEmbeddings(TextEmbeddingFunction):
    """
    An embedding function that uses the Azure OpenAI API
    """

    name: str = "text-embedding-ada-002"
    azure_api_key: str
    azure_endpoint: str
    azure_deployment: str

## domain_knowledge_generation_gpt4.py
import os
import openai
from jinja2 import Template, meta, Environment
from dotenv import load_dotenv
load_dotenv() # add a .env file with the following
# setup is for azure, change accordingly for normal openai
openai.api_key = os.getenv("OPENAI_API_KEY")
openai.api_type = os.getenv("OPENAI_API_TYPE")
openai.api_version = os.getenv("OPENAI_API_VERSION")
openai.api_base = os.getenv("OPENAI_API_BASE")

## simple_rci.py
"""
the prompts.json file should contain (adjust for your prompt and use case):

{
  "prompts": {
      {
      "example": {
        "input_variables = ["variables", "here"],
        "template = ["Prompt details and {variables} {here} according to your use case.\nRationale:"]
      }

## shuffle_tfrecords.py
import tensorflow as tf
from tqdm import tqdm

index = open("data/openwebtext2_new_inputs.train.index").read().splitlines()

dataset = tf.data.Dataset.from_tensor_slices(index)
dataset = dataset.interleave(tf.data.TFRecordDataset, cycle_length=128, num_parallel_calls=tf.data.experimental.AUTOTUNE)

d = dataset.shuffle(10000).prefetch(100)

## AttentionMask.py
def AttentionMask(encoder_len, state_len, decoder_len, offset=0, near_decay=0, far_decay=0, device='cpu'):
	m = -offset*np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len)
	for i in range(encoder_len+decoder_len-1):
		m += np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len-i-1)
	if state_len:
		ms = np.zeros((state_len, encoder_len+decoder_len+state_len))
		m = np.concatenate([m, ms], axis=0)
	m = torch.tensor(m, dtype=torch.float32, device=device)
	mx = 1-np.tri(decoder_len, encoder_len+decoder_len, encoder_len)
	mx = np.concatenate([mx, np.zeros((decoder_len, state_len))], axis=1)

## FIR.py
import torch
import torch.nn as nn

class FIR(nn.Module):
	def __init__(self, in_dim, out_dim=None, hidden_dim=None, segment_sizes=[1,2,4,8], activation=nn.functional.gelu, device='cpu'):
		super().__init__()
		if not out_dim: out_dim = in_dim
		if not hidden_dim: hidden_dim = in_dim
		cursor = 1
		nodes = [cursor]

## projector_clip.py
# Modified StyleGAN2 Projector with CLIP, addl. losses, kmeans, etc.
# by Peter Baylies, 2021 -- @pbaylies on Twitter

# Copyright (c) 2021, NVIDIA CORPORATION.  All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto.  Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
	@register("azure_openai")
	class AzureOpenAIEmbeddings(TextEmbeddingFunction):
	"""
	An embedding function that uses the Azure OpenAI API
	"""

	name: str = "text-embedding-ada-002"
	azure_api_key: str
	azure_endpoint: str
	azure_deployment: str
	import os
	import openai
	from jinja2 import Template, meta, Environment
	from dotenv import load_dotenv
	load_dotenv() # add a .env file with the following
	# setup is for azure, change accordingly for normal openai
	openai.api_key = os.getenv("OPENAI_API_KEY")
	openai.api_type = os.getenv("OPENAI_API_TYPE")
	openai.api_version = os.getenv("OPENAI_API_VERSION")
	openai.api_base = os.getenv("OPENAI_API_BASE")
	"""
	the prompts.json file should contain (adjust for your prompt and use case):

	{
	"prompts": {
	{
	"example": {
	"input_variables = ["variables", "here"],
	"template = ["Prompt details and {variables} {here} according to your use case.\nRationale:"]
	}
	import tensorflow as tf
	from tqdm import tqdm

	index = open("data/openwebtext2_new_inputs.train.index").read().splitlines()

	dataset = tf.data.Dataset.from_tensor_slices(index)
	dataset = dataset.interleave(tf.data.TFRecordDataset, cycle_length=128, num_parallel_calls=tf.data.experimental.AUTOTUNE)

	d = dataset.shuffle(10000).prefetch(100)
	def AttentionMask(encoder_len, state_len, decoder_len, offset=0, near_decay=0, far_decay=0, device='cpu'):
	m = -offset*np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len)
	for i in range(encoder_len+decoder_len-1):
	m += np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len-i-1)
	if state_len:
	ms = np.zeros((state_len, encoder_len+decoder_len+state_len))
	m = np.concatenate([m, ms], axis=0)
	m = torch.tensor(m, dtype=torch.float32, device=device)
	mx = 1-np.tri(decoder_len, encoder_len+decoder_len, encoder_len)
	mx = np.concatenate([mx, np.zeros((decoder_len, state_len))], axis=1)
	import torch
	import torch.nn as nn

	class FIR(nn.Module):
	def __init__(self, in_dim, out_dim=None, hidden_dim=None, segment_sizes=[1,2,4,8], activation=nn.functional.gelu, device='cpu'):
	super().__init__()
	if not out_dim: out_dim = in_dim
	if not hidden_dim: hidden_dim = in_dim
	cursor = 1
	nodes = [cursor]
	# Modified StyleGAN2 Projector with CLIP, addl. losses, kmeans, etc.
	# by Peter Baylies, 2021 -- @pbaylies on Twitter

	# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
	#
	# NVIDIA CORPORATION and its licensors retain all intellectual property
	# and proprietary rights in and to this software, related documentation
	# and any modifications thereto. Any use, reproduction, disclosure or
	# distribution of this software and related documentation without an express
	# license agreement from NVIDIA CORPORATION is strictly prohibited.