Theodore Galanos TheodoreGalanos

## modeling_mixtral.py
# coding=utf-8
# Copyright 2023 Mixtral AI and the HuggingFace Inc. team. All rights reserved.
#
# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
# and OPT implementations in this library. It has been modified from its
# original forms to accommodate minor architectural differences compared
# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.

## openai_prompts.md

      
              1 file
            
          
              7 forks
            
          
              0 comments
            
          
              12 stars
            
          
                csiebler
                / openai_prompts.md
            
            
              Last active
              November 23, 2023 14:28
            
              
                Azure OpenAI Service prompt examples
              
          
    GPT-3 examples (using mostly text-davinci-003)

Information extraction from claim phone conversations


Engine: text-davinci-003 (also works in text-davinci-002, but might require more instructions to get a correct JSON back)
Temperature: 0.7

You must extract the following information from the phone conversation below:


## AttentionMask.py
def AttentionMask(encoder_len, state_len, decoder_len, offset=0, near_decay=0, far_decay=0, device='cpu'):
	m = -offset*np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len)
	for i in range(encoder_len+decoder_len-1):
		m += np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len-i-1)
	if state_len:
		ms = np.zeros((state_len, encoder_len+decoder_len+state_len))
		m = np.concatenate([m, ms], axis=0)
	m = torch.tensor(m, dtype=torch.float32, device=device)
	mx = 1-np.tri(decoder_len, encoder_len+decoder_len, encoder_len)
	mx = np.concatenate([mx, np.zeros((decoder_len, state_len))], axis=1)

## FIR.py
import torch
import torch.nn as nn

class FIR(nn.Module):
	def __init__(self, in_dim, out_dim=None, hidden_dim=None, segment_sizes=[1,2,4,8], activation=nn.functional.gelu, device='cpu'):
		super().__init__()
		if not out_dim: out_dim = in_dim
		if not hidden_dim: hidden_dim = in_dim
		cursor = 1
		nodes = [cursor]

## gist:aaacc540ebefccc1bc5fef457e54ddbe
def apply_reshard(pytree_params_in, pytree_params_out, shards_in, shards_out):

    def override_dtype(x):
        if x.dtype == np.dtype('V2'):
            x.dtype = jnp.bfloat16
        return x

    def is_leaf(x):
        return type(x) == np.ndarray

## cross_mod_conv.py
class CrossAttentionModConv2d(nn.Module):
    def __init__(self, state, ch, d_context, ch_q=None, d_v=None, n_head=1):
        super().__init__()
        assert ch % n_head == 0
        self.state = state
        self.n_head = n_head
        self.ch = ch
        self.d_context = d_context
        self.ch_q = ch_q or self.ch
        self.d_v = d_v or self.d_context

## j6b_train_hf_ds.py
#  So now you want to finetune that GPT-J-6B on a 3090/TITAN GPU ... okay
#  More exploratory coding. It uses the Huggingface model port, deepspeed and reads all text/md files from a target directory
#  It is a fragment of a larger system with remote editing, but that's another story
#  This is the raw, training tester. Items to look out for:
#  - uses DeepSpeed and has a DS config
#  - to save space uses SGD instead of ADAM
#  - uses gradient checkpointing
#  - freezes 25% of the layers to fit

# Assumes you can already run https://gist.github.com/kinoc/2d636a68876cd3de7b6e9c9452b61089

## gpt-j-6b-inference.ipynb

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              2 stars
            
          
                CurtisASmith
                / gpt-j-6b-inference.ipynb
            
            
              Created
              July 5, 2021 13:44
            
              
                GPT-J-6B Inference.ipynb
              
          
        Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## jserv_hf_fast.py

# So you want to run GPT-J-6B using HuggingFace+FastAPI on a local rig (3090 or TITAN) ... tricky.
# special help from the Kolob Colab server https://colab.research.google.com/drive/1VFh5DOkCJjWIrQ6eB82lxGKKPgXmsO5D?usp=sharing#scrollTo=iCHgJvfL4alW
# Conversion to HF format (12.6GB tar image) found at https://drive.google.com/u/0/uc?id=1NXP75l1Xa5s9K18yf3qLoZcR6p4Wced1&export=download
# Uses GDOWN to get the image
# You will need 26 GB of space, 12+GB for the tar and 12+GB expanded (you can nuke the tar after expansion)

# Near Simplest Language model API, with room to expand!
# runs GPT-J-6B on 3090 and TITAN and servers it using FastAPI
# change "seq" (which is the context size) to adjust footprint

## complex_optim.py
"""Complex momentum SGD and Adam. See https://arxiv.org/abs/2102.08431."""

import math

import torch
from torch import optim


class ComplexSGD(optim.Optimizer):
    def __init__(self, params, lr=1e-2, momentum=0.9, angle=math.pi / 8, weight_decay=0.):
	# coding=utf-8
	# Copyright 2023 Mixtral AI and the HuggingFace Inc. team. All rights reserved.
	#
	# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
	# and OPT implementations in this library. It has been modified from its
	# original forms to accommodate minor architectural differences compared
	# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	def AttentionMask(encoder_len, state_len, decoder_len, offset=0, near_decay=0, far_decay=0, device='cpu'):
	m = -offset*np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len)
	for i in range(encoder_len+decoder_len-1):
	m += np.tri(decoder_len, encoder_len+decoder_len+state_len, encoder_len-i-1)
	if state_len:
	ms = np.zeros((state_len, encoder_len+decoder_len+state_len))
	m = np.concatenate([m, ms], axis=0)
	m = torch.tensor(m, dtype=torch.float32, device=device)
	mx = 1-np.tri(decoder_len, encoder_len+decoder_len, encoder_len)
	mx = np.concatenate([mx, np.zeros((decoder_len, state_len))], axis=1)
	import torch
	import torch.nn as nn

	class FIR(nn.Module):
	def __init__(self, in_dim, out_dim=None, hidden_dim=None, segment_sizes=[1,2,4,8], activation=nn.functional.gelu, device='cpu'):
	super().__init__()
	if not out_dim: out_dim = in_dim
	if not hidden_dim: hidden_dim = in_dim
	cursor = 1
	nodes = [cursor]
	def apply_reshard(pytree_params_in, pytree_params_out, shards_in, shards_out):

	def override_dtype(x):
	if x.dtype == np.dtype('V2'):
	x.dtype = jnp.bfloat16
	return x

	def is_leaf(x):
	return type(x) == np.ndarray
	class CrossAttentionModConv2d(nn.Module):
	def __init__(self, state, ch, d_context, ch_q=None, d_v=None, n_head=1):
	super().__init__()
	assert ch % n_head == 0
	self.state = state
	self.n_head = n_head
	self.ch = ch
	self.d_context = d_context
	self.ch_q = ch_q or self.ch
	self.d_v = d_v or self.d_context
	# So now you want to finetune that GPT-J-6B on a 3090/TITAN GPU ... okay
	# More exploratory coding. It uses the Huggingface model port, deepspeed and reads all text/md files from a target directory
	# It is a fragment of a larger system with remote editing, but that's another story
	# This is the raw, training tester. Items to look out for:
	# - uses DeepSpeed and has a DS config
	# - to save space uses SGD instead of ADAM
	# - uses gradient checkpointing
	# - freezes 25% of the layers to fit

	# Assumes you can already run https://gist.github.com/kinoc/2d636a68876cd3de7b6e9c9452b61089

	# So you want to run GPT-J-6B using HuggingFace+FastAPI on a local rig (3090 or TITAN) ... tricky.
	# special help from the Kolob Colab server https://colab.research.google.com/drive/1VFh5DOkCJjWIrQ6eB82lxGKKPgXmsO5D?usp=sharing#scrollTo=iCHgJvfL4alW
	# Conversion to HF format (12.6GB tar image) found at https://drive.google.com/u/0/uc?id=1NXP75l1Xa5s9K18yf3qLoZcR6p4Wced1&export=download
	# Uses GDOWN to get the image
	# You will need 26 GB of space, 12+GB for the tar and 12+GB expanded (you can nuke the tar after expansion)

	# Near Simplest Language model API, with room to expand!
	# runs GPT-J-6B on 3090 and TITAN and servers it using FastAPI
	# change "seq" (which is the context size) to adjust footprint
	"""Complex momentum SGD and Adam. See https://arxiv.org/abs/2102.08431."""

	import math

	import torch
	from torch import optim


	class ComplexSGD(optim.Optimizer):
	def __init__(self, params, lr=1e-2, momentum=0.9, angle=math.pi / 8, weight_decay=0.):