jphillips fearnworks

## gist:ee2ae0e528ce20b542aa5500e1232946
import gradio as gr
import transformers
from torch import bfloat16
# from dotenv import load_dotenv  # if you wanted to adapt this for a repo that uses auth
from threading import Thread
from gradio.themes.utils.colors import Color


#HF_AUTH = os.getenv('HF_AUTH')
#model_id = "stabilityai/StableBeluga2" # 70B parm model based off Llama 2 70B

## codellama_auth_gen.py
from passlib.context import CryptContext
from jose import jwt
import datetime

SECRET_KEY = "YOUR-SECRET-KEY"  # Replace this with your secret key
ALGORITHM = "HS256"
ACCESS_TOKEN_EXPIRE_MINUTES = 30
pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")

class User:

## arxiv_dowloader_open_ai_func.py
import arxiv
import ast
import concurrent
from csv import writer
import openai
import os
import pandas as pd
from PyPDF2 import PdfReader
import requests
from scipy import spatial

## sd_gen_agent_example
OUTPUT :

0.0:

Summary:

{
  "subject": "a man",
  "characters": ["a man", "a silhouetted figure", "an old friend"],
  "locations": ["a dark alley in the heart of the city"],

## falcoln_7b_qlora_axolotl.yml
base_model: tiiuae/falcon-7b
base_model_config: tiiuae/falcon-7b
trust_remote_code: true
model_type: AutoModelForCausalLM
tokenizer_type: AutoTokenizer
load_in_8bit: false
load_in_4bit: true
gptq: false
strict: false
push_dataset_to_hub:

## bug.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                fearnworks
                / bug.md
            
            
              Last active
              June 2, 2023 17:54
            
              
                Falcon Qlora 7b bug
              
          
    workspace/llm-playground/notebooks/axolotl/runpod/axolotl-falcon-7b-qlora-gsm8k.ipynb
Steps to reproduce :
1 ) Copy config from #4 run-16: 40*2 + xformer into examples/falcon/qlora.yml
2 ) Run cells 1 & 2
3 ) Run !accelerate launch scripts/finetune.py examples/falcon/qlora.yml

  
## run_qlora.sh
#!/bin/bash
#
# Container source: https://github.com/OpenAccess-AI-Collective/axolotl/blob/main/docker/Dockerfile-runpod
#
#
# To run this in RunPod with `winglian/axolotl-runpod:main-cu118-2.0.0`, set
# Expose HTTP Ports (Max 10): 7860,8888
# docker command: `bash -c "curl -H 'Cache-Control: no-cache' https://raw.githubusercontent.com/utensil/llm-playground/main/scripts/entry/prepare_ax.sh -sSf | bash"`
# JUPYTER_PASSWORD change to your secret
# HUGGINGFACE_TOKEN change to your token from https://huggingface.co/settings/tokens

## config.yml
base_model: openlm-research/open_llama_3b_600bt_preview
base_model_config: openlm-research/open_llama_3b_600bt_preview
model_type: LlamaForCausalLM
tokenizer_type: LlamaTokenizer
load_in_8bit: false
load_in_4bit: true
strict: false
push_dataset_to_hub:
datasets:
  - path: teknium/GPT4-LLM-Cleaned

## config.yml
# Changing this would change the base model used for training
base_model: huggyllama/llama-7b
# Changing this would change the configuration used for the base model
base_model_config: huggyllama/llama-7b
# Changing this would change the type of model used for training
model_type: LlamaForCausalLM
# Changing this would change the type of tokenizer used for tokenizing text data
tokenizer_type: LlamaTokenizer
# Changing this to false would prevent the model from being loaded in 8-bit precision
load_in_8bit: false
	import gradio as gr
	import transformers
	from torch import bfloat16
	# from dotenv import load_dotenv # if you wanted to adapt this for a repo that uses auth
	from threading import Thread
	from gradio.themes.utils.colors import Color


	#HF_AUTH = os.getenv('HF_AUTH')
	#model_id = "stabilityai/StableBeluga2" # 70B parm model based off Llama 2 70B
	from passlib.context import CryptContext
	from jose import jwt
	import datetime

	SECRET_KEY = "YOUR-SECRET-KEY" # Replace this with your secret key
	ALGORITHM = "HS256"
	ACCESS_TOKEN_EXPIRE_MINUTES = 30
	pwd_context = CryptContext(schemes=["bcrypt"], deprecated="auto")

	class User:
	import arxiv
	import ast
	import concurrent
	from csv import writer
	import openai
	import os
	import pandas as pd
	from PyPDF2 import PdfReader
	import requests
	from scipy import spatial
	OUTPUT :

	0.0:

	Summary:

	{
	"subject": "a man",
	"characters": ["a man", "a silhouetted figure", "an old friend"],
	"locations": ["a dark alley in the heart of the city"],
	base_model: tiiuae/falcon-7b
	base_model_config: tiiuae/falcon-7b
	trust_remote_code: true
	model_type: AutoModelForCausalLM
	tokenizer_type: AutoTokenizer
	load_in_8bit: false
	load_in_4bit: true
	gptq: false
	strict: false
	push_dataset_to_hub:
	#!/bin/bash
	#
	# Container source: https://github.com/OpenAccess-AI-Collective/axolotl/blob/main/docker/Dockerfile-runpod
	#
	#
	# To run this in RunPod with `winglian/axolotl-runpod:main-cu118-2.0.0`, set
	# Expose HTTP Ports (Max 10): 7860,8888
	# docker command: `bash -c "curl -H 'Cache-Control: no-cache' https://raw.githubusercontent.com/utensil/llm-playground/main/scripts/entry/prepare_ax.sh -sSf \| bash"`
	# JUPYTER_PASSWORD change to your secret
	# HUGGINGFACE_TOKEN change to your token from https://huggingface.co/settings/tokens
	base_model: openlm-research/open_llama_3b_600bt_preview
	base_model_config: openlm-research/open_llama_3b_600bt_preview
	model_type: LlamaForCausalLM
	tokenizer_type: LlamaTokenizer
	load_in_8bit: false
	load_in_4bit: true
	strict: false
	push_dataset_to_hub:
	datasets:
	- path: teknium/GPT4-LLM-Cleaned
	# Changing this would change the base model used for training
	base_model: huggyllama/llama-7b
	# Changing this would change the configuration used for the base model
	base_model_config: huggyllama/llama-7b
	# Changing this would change the type of model used for training
	model_type: LlamaForCausalLM
	# Changing this would change the type of tokenizer used for tokenizing text data
	tokenizer_type: LlamaTokenizer
	# Changing this to false would prevent the model from being loaded in 8-bit precision
	load_in_8bit: false