Jonathan Tow jon-tow

## accelerate_config.yaml
command_file: null
commands: null
compute_environment: LOCAL_MACHINE
deepspeed_config:
  gradient_accumulation_steps: 1
  gradient_clipping: 1.0
  offload_optimizer_device: none
  offload_param_device: none
  zero3_init_flag: true
  zero_stage: 2

## traceback.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                jon-tow
                / traceback.md
            
            
              Last active
              February 17, 2023 00:23
            
              
                trlx: HH GPU hosting error
              
          
    Command:
accelerate launch --num_processes 7 --config_file ../../configs/accelerate/zero2-bf16.yaml ppo_hh.py
Traceback:
╭─────────────────────────────── Traceback (most recent call last) ────────────────────────────────╮
│ trlx/examples/hh/ppo_hh.py:129 in <module>                                        │
│                                                                                                  │

  
## test_ilql_generate.py
import argparse
import yaml
import trlx
from trlx.data.configs import TRLConfig


parser = argparse.ArgumentParser()
parser.add_argument("--config", type=str, default="configs/ilql_config.yml")
parser.add_argument("--checkpoint", type=str, default="ckpts")
args = parser.parse_args()

## trlx_accel_to_hf_inference.py
"""This scripts shows how to convert a `accelerate` checkpoint to a `hf` model
that can be used for inference.

NOTE: You may need to call this script with `accelerate launch` (or the proper distributed launcher)
to run it on multiple GPUs and load the model properly (e.g. when the model was trained with `deepspeed`).
"""
import argparse
import yaml
import trlx
from trlx.data.configs import TRLConfig

## ppo_sentiments.py
# Generates positive movie reviews by tuning a pretrained model on IMDB dataset
# with a sentiment reward function
import os
from typing import List
import torch
import yaml
from datasets import load_dataset
from transformers import pipeline
import trlx
from trlx.data.configs import TRLConfig

## CW_MULTI_NODE_DDP_README.md

      
              4 files
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                jon-tow
                / CW_MULTI_NODE_DDP_README.md
            
            
              Last active
              December 12, 2022 17:03
            
              
                trlx: Multi-Node DDP on CW Cluster
              
          
Create your python env with conda or micromamba and intall mpi / mpi4py (mpi is not available on the CW cluster)

micromamba install -c conda-forge mpi
micromamba install -c conda-forge mpi4py

Copy both slurm_<_>.sh scripts into your trlx clone.
Update your accelerate config with the provided yaml.
sbatch slurm_ddp.sh


## README.md

      
              3 files
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                jon-tow
                / README.md
            
            
              Last active
              December 5, 2022 22:47
            
              
                trlx: CausalLM (IMDB) Sentiment Benchmark
              
          
    This gist contains config setups for PPO and ILQL methods.
NOTE: Hyper-parameters were optimized for opt/gpt-j models at the 6B parameter scale.

  
## pile_raw_decompress.sh
#!/bin/bash
# Decompress the raw Pile from `the-eye`.
#
# Takes an input dir of the form:
#
# train/
#   {00-29}.jsonl.zst
# val.jsonl.zst
# test.jsonl.zst
# SHA256SUMS.txt

## baselines.yml
# Random and Human Baselines for common NLP tasks/benchmarks.

# TDOO: Maybe add metadata containing citations, repos, etc.

arc_challenge:
    # https://leaderboard.allenai.org/arc/submissions/public
    # NOTE: For for random scores the .02% score difference from an ideal 25% is due
    # to some questions having fewer or more than four choices.
    acc:
        random: 0.2502
	command_file: null
	commands: null
	compute_environment: LOCAL_MACHINE
	deepspeed_config:
	gradient_accumulation_steps: 1
	gradient_clipping: 1.0
	offload_optimizer_device: none
	offload_param_device: none
	zero3_init_flag: true
	zero_stage: 2
	import argparse
	import yaml
	import trlx
	from trlx.data.configs import TRLConfig


	parser = argparse.ArgumentParser()
	parser.add_argument("--config", type=str, default="configs/ilql_config.yml")
	parser.add_argument("--checkpoint", type=str, default="ckpts")
	args = parser.parse_args()
	"""This scripts shows how to convert a `accelerate` checkpoint to a `hf` model
	that can be used for inference.

	NOTE: You may need to call this script with `accelerate launch` (or the proper distributed launcher)
	to run it on multiple GPUs and load the model properly (e.g. when the model was trained with `deepspeed`).
	"""
	import argparse
	import yaml
	import trlx
	from trlx.data.configs import TRLConfig
	# Generates positive movie reviews by tuning a pretrained model on IMDB dataset
	# with a sentiment reward function
	import os
	from typing import List
	import torch
	import yaml
	from datasets import load_dataset
	from transformers import pipeline
	import trlx
	from trlx.data.configs import TRLConfig
	#!/bin/bash
	# Decompress the raw Pile from `the-eye`.
	#
	# Takes an input dir of the form:
	#
	# train/
	# {00-29}.jsonl.zst
	# val.jsonl.zst
	# test.jsonl.zst
	# SHA256SUMS.txt
	# Random and Human Baselines for common NLP tasks/benchmarks.

	# TDOO: Maybe add metadata containing citations, repos, etc.

	arc_challenge:
	# https://leaderboard.allenai.org/arc/submissions/public
	# NOTE: For for random scores the .02% score difference from an ideal 25% is due
	# to some questions having fewer or more than four choices.
	acc:
	random: 0.2502