Aaron a0n

## j6b_train_hf_ds.py
#  So now you want to finetune that GPT-J-6B on a 3090/TITAN GPU ... okay
#  More exploratory coding. It uses the Huggingface model port, deepspeed and reads all text/md files from a target directory
#  It is a fragment of a larger system with remote editing, but that's another story
#  This is the raw, training tester. Items to look out for:
#  - uses DeepSpeed and has a DS config
#  - to save space uses SGD instead of ADAM
#  - uses gradient checkpointing
#  - freezes 25% of the layers to fit

# Assumes you can already run https://gist.github.com/kinoc/2d636a68876cd3de7b6e9c9452b61089

## upgrade_raspbian_to_buster.sh
#!/bin/bash
sudo bash -c "sync \
&& apt-get update \
&& apt-get -y upgrade \
&& apt-get -y dist-upgrade \
&& sed -i 's/stretch/buster/g' /etc/apt/sources.list \
&& sed -i 's/stretch/buster/g' /etc/apt/sources.list.d/raspi.list \
&& apt-get update \
&& apt-get -y upgrade \
&& apt-get -y dist-upgrade \
	# So now you want to finetune that GPT-J-6B on a 3090/TITAN GPU ... okay
	# More exploratory coding. It uses the Huggingface model port, deepspeed and reads all text/md files from a target directory
	# It is a fragment of a larger system with remote editing, but that's another story
	# This is the raw, training tester. Items to look out for:
	# - uses DeepSpeed and has a DS config
	# - to save space uses SGD instead of ADAM
	# - uses gradient checkpointing
	# - freezes 25% of the layers to fit

	# Assumes you can already run https://gist.github.com/kinoc/2d636a68876cd3de7b6e9c9452b61089
	#!/bin/bash
	sudo bash -c "sync \
	&& apt-get update \
	&& apt-get -y upgrade \
	&& apt-get -y dist-upgrade \
	&& sed -i 's/stretch/buster/g' /etc/apt/sources.list \
	&& sed -i 's/stretch/buster/g' /etc/apt/sources.list.d/raspi.list \
	&& apt-get update \
	&& apt-get -y upgrade \
	&& apt-get -y dist-upgrade \