epwalsh/run.sh

## run.sh
# Step 1: Create and activate a new virtual environment (need Python 3.7 or newer)
virtualenv .venv
. .venv/bin/activate

# Step 2: Install latest PyTorch
# This assumes your drivers are compatable with CUDA 11.*. If not, see https://pytorch.org/
# for alternate install instructions.
pip install torch==1.10.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

# Step 3: Clone and install the "tango" repo which has the GPT-J example.
git clone https://github.com/allenai/tango.git && cd tango
git checkout deepspeed-3  # this is the branch I have the example on (ignore the "deepspeed" name, it actually uses FairScale)
pip install -e '.[all]'

# Step 4: Prepare the training config.
cd examples/train_lm
cp config.jsonnet my-config.jsonnet
# Now open "my-config.jsonnet" with a text editor and change the constants for your use case.
# For example:
# - change "pretrained_model" to "EleutherAI/gpt-j-6B"
# - change "devices" to however many GPUs you have

# Step 5: Run the example.
WORKSPACE_DIR=/tmp/train  # change this to whatever you want.
tango --log-level info run my-config.jsonnet -i components.py -d $WORKSPACE_DIR
	# Step 1: Create and activate a new virtual environment (need Python 3.7 or newer)
	virtualenv .venv
	. .venv/bin/activate

	# Step 2: Install latest PyTorch
	# This assumes your drivers are compatable with CUDA 11.*. If not, see https://pytorch.org/
	# for alternate install instructions.
	pip install torch==1.10.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

	# Step 3: Clone and install the "tango" repo which has the GPT-J example.
	git clone https://github.com/allenai/tango.git && cd tango
	git checkout deepspeed-3 # this is the branch I have the example on (ignore the "deepspeed" name, it actually uses FairScale)
	pip install -e '.[all]'

	# Step 4: Prepare the training config.
	cd examples/train_lm
	cp config.jsonnet my-config.jsonnet
	# Now open "my-config.jsonnet" with a text editor and change the constants for your use case.
	# For example:
	# - change "pretrained_model" to "EleutherAI/gpt-j-6B"
	# - change "devices" to however many GPUs you have

	# Step 5: Run the example.
	WORKSPACE_DIR=/tmp/train # change this to whatever you want.
	tango --log-level info run my-config.jsonnet -i components.py -d $WORKSPACE_DIR