BigsnarfDude bigsnarfdude

## r_and_b.py
(harness) vincent@virus:~/Downloads$ cat bleu_text.py
from nltk.translate.bleu_score import sentence_bleu
reference = [
    'this is a dog'.split(),
    'it is dog'.split(),
    'dog it is'.split(),
    'a dog, it is'.split()
]
candidate = 'it is dog'.split()
print('BLEU score -> {}'.format(sentence_bleu(reference, candidate )))

## kaist_orpo.py
# requires A100 40GB - 30gb VRAM

from transformers import AutoModelForCausalLM, AutoTokenizer

device = "cuda"
model = AutoModelForCausalLM.from_pretrained("kaist-ai/mistral-orpo-capybara-7k").to(device)
tokenizer = AutoTokenizer.from_pretrained("kaist-ai/mistral-orpo-capybara-7k")
query = [{'role': 'user', 'content': 'Tell me how AI is like the Industrial Revolution'}]
prompt = tokenizer.apply_chat_template(query, tokenize=False, add_generation_prompt=True)
inputs = tokenizer (prompt, return_tensors='pt').to(device)

## finetune_gpt2.py
import os
import time
import datetime

import pandas as pd
import seaborn as sns
import numpy as np
import random

import matplotlib.pyplot as plt

## sft.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bigsnarfdude
                / sft.ipynb
            
            
              Created
              April 28, 2024 03:33
            
              
                sft.ipynb
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## reference.md

      
              1 file
            
          
              0 forks
            
          
              1 comment
            
          
              0 stars
            
          
                bigsnarfdude
                / reference.md
            
            
              Last active
              April 28, 2024 00:42
            
              
                reference.md
              
          
    The AI Revolution:
Echoes of the Industrial Age
The rise of Artificial Intelligence (AI) is often compared to the Industrial Revolution, and for good reason. Both represent periods of significant technological advancement that have fundamentally reshaped societies and economies. While separated by centuries, striking parallels emerge in their impact on labor, production, and the overall fabric of human life.
Transformation of Labor:
Industrial Revolution:

  
## every_frame.sh
#!/bin/zsh

# Check if the video filename is provided as an argument
if [ $# -eq 0 ]; then
    echo "Please provide the video filename as an argument."
    exit 1
fi

video_filename=$1

## gpt2-ppo-training.ipynb

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bigsnarfdude
                / gpt2-ppo-training.ipynb
            
            
              Last active
              April 24, 2024 18:59
            
              
                gpt2-ppo-training.ipynb
              
          
      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## prompts.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                bigsnarfdude
                / prompts.md
            
            
              Created
              April 24, 2024 15:15
            
          
    Instruction-following prompts for ChatGPT, GPT-3.5, GPT-4

This project consists of prompts for ChatGPT and GPT-3.5 models, designed to assist with writing, analysis, and comprehension tasks. There are numerous prompts below that you can use to generate content for your projects, debug your code, find solutions to problems, or simply learn more about what these models can do. By using the appropriate instructional verbs, you can guide the models to solve any language-related tasks.
InstructGPT (more recent versions refered as GPT-3.5) are series language models that has been trained using instructions and  human feedback to better understand and align with a user's intent, producing more accurate and appropriate outputs.
ChatGPT also uses instructGPT method but in a dialogue form to understand user instruction along and generate outputs based on user's instruct.
GPT4 More powerful than any GPT-3.5

  
## gist:960c2d3d150ac1334839791df573506c
Wed Apr 24 00:45:56 2024
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.129.03             Driver Version: 535.129.03   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|=========================================+======================+======================|
|   0  NVIDIA A100-SXM4-40GB          On  | 00000000:07:00.0 Off |                    0 |
| N/A   56C    P0             367W / 400W |  20643MiB / 40960MiB |    100%      Default |

## convert_alpaca.py
# pretraining -> supervised instruction-finetuning -> RLHF

import json
import tiktoken

# open file
def extract_text_from_jsonl(file_path):
    prompts = []
    completions = []
    with open(file_path, 'r') as file:
	(harness) vincent@virus:~/Downloads$ cat bleu_text.py
	from nltk.translate.bleu_score import sentence_bleu
	reference = [
	'this is a dog'.split(),
	'it is dog'.split(),
	'dog it is'.split(),
	'a dog, it is'.split()
	]
	candidate = 'it is dog'.split()
	print('BLEU score -> {}'.format(sentence_bleu(reference, candidate )))
	# requires A100 40GB - 30gb VRAM

	from transformers import AutoModelForCausalLM, AutoTokenizer

	device = "cuda"
	model = AutoModelForCausalLM.from_pretrained("kaist-ai/mistral-orpo-capybara-7k").to(device)
	tokenizer = AutoTokenizer.from_pretrained("kaist-ai/mistral-orpo-capybara-7k")
	query = [{'role': 'user', 'content': 'Tell me how AI is like the Industrial Revolution'}]
	prompt = tokenizer.apply_chat_template(query, tokenize=False, add_generation_prompt=True)
	inputs = tokenizer (prompt, return_tensors='pt').to(device)
	import os
	import time
	import datetime

	import pandas as pd
	import seaborn as sns
	import numpy as np
	import random

	import matplotlib.pyplot as plt
	#!/bin/zsh

	# Check if the video filename is provided as an argument
	if [ $# -eq 0 ]; then
	echo "Please provide the video filename as an argument."
	exit 1
	fi

	video_filename=$1
	Wed Apr 24 00:45:56 2024
	+---------------------------------------------------------------------------------------+
	\| NVIDIA-SMI 535.129.03 Driver Version: 535.129.03 CUDA Version: 12.2 \|
	\|-----------------------------------------+----------------------+----------------------+
	\| GPU Name Persistence-M \| Bus-Id Disp.A \| Volatile Uncorr. ECC \|
	\| Fan Temp Perf Pwr:Usage/Cap \| Memory-Usage \| GPU-Util Compute M. \|
	\| \| \| MIG M. \|
	\|=========================================+======================+======================\|
	\| 0 NVIDIA A100-SXM4-40GB On \| 00000000:07:00.0 Off \| 0 \|
	\| N/A 56C P0 367W / 400W \| 20643MiB / 40960MiB \| 100% Default \|
	# pretraining -> supervised instruction-finetuning -> RLHF

	import json
	import tiktoken

	# open file
	def extract_text_from_jsonl(file_path):
	prompts = []
	completions = []
	with open(file_path, 'r') as file: