Sean Ryan mrseanryan

## designer.html
<link rel="import" href="../google-map/google-map.html">
<link rel="import" href="../cool-clock/cool-clock.html">
<link rel="import" href="../smoothie-chart/smoothie-chart.html">
<link rel="import" href="../paper-button/paper-button.html">
<link rel="import" href="../topeka-elements/theme.html">
<link rel="import" href="../topeka-elements/topeka-resources.html">
<link rel="import" href="../topeka-elements/topeka-app.html">
<link rel="import" href="../topeka-elements/topeka-datasource.html">
<link rel="import" href="../paper-calculator/paper-calculator.html">
<link rel="import" href="../topeka-elements/category-icons.html">

## calc-gpu-layers.py
"""
Rough PoC using binary search to find the optimal number of model layers to offload to the GPU, for this LLM and this hardware.
"""
import time

def call_llm(prompt, gpu_layers):
    # TODO fill in the actual call to LLM here

    # dummy GPU memory limit
    test_best_layers = 60

## gist:54a411dd1849578724b0157fb0e83b40
#cloud-boothook
#!/bin/bash
cd /home/ubuntu/text-generation-webui
GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=TRUE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --public-api --extensions openai --api-port 5000 --auto-devices --gpu-memory 24
# --share

## install_text_gen.sh
#!/bin/bash

GPU_GB=${1:-24}

#sudo apt update
#sudo apt install python3-pip

git clone https://github.com/oobabooga/text-generation-webui.git

pushd text-generation-webui

## gist:1f6ee66fde867ac37e9c57da2d7a3f09
sudo apt update
sudo apt install python3-pip

conda create -n textgen python=3.10.9
conda activate textgen
conda init bash
pip install torch torchvision torchaudio
git clone https://github.com/oobabooga/text-generation-webui
cd text-generation-webui
pip install -r requirements.txt

## gist:dd1e945316c1b0c4a763aa65eccd3fb8
#/etc/systemd/system/llmtextgen.service
[Unit]
After=network.target

[Service]
Type=simple
ExecStart=/bin/bash /home/ubuntu/start-llm.sh

User=ubuntu
Group=ubuntu

## gist:6bf26b78c6af43fe38d322533c8e10b5
#!/bin/bash -e
GPU_GB=${1:-24}
OUT_FILE=/etc/systemd/system/llmtextgen.service
if [ -e $OUT_FILE ]
then
  echo LLM servic 'llmtextgen.service' already installed - exiting.
  exit 0;
fi

echo Downloading script to /home/ubuntu/start-llm.sh

## gist:2dcdb8182c05d1ea116e6bd0b772ba3d
#!/bin/bash -e
GPU_GB=${1:-24}
pushd  /home/ubuntu/text-generation-webui
GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=TRUE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --public-api --extensions openai --api-port 5000 --auto-devices --gpu-memory $GPU_GB
# --share

## gist:a7d8ed4bfda2f3f6e495bfe719e8d35c
# so future errors halt the script.
set -e

echo Linting ...

# Disable less useful errors
# - ref https://www.flake8rules.com/
#
# E302 Expected 2 blank lines
# E305 Expected 2 blank lines after end of function or class

## describe_instance_types.sh
aws ec2 describe-instance-types --instance-types g4dn.xlarge g5.xlarge --query "InstanceTypes[].GpuInfo" --no-cli-pager
	<link rel="import" href="../google-map/google-map.html">
	<link rel="import" href="../cool-clock/cool-clock.html">
	<link rel="import" href="../smoothie-chart/smoothie-chart.html">
	<link rel="import" href="../paper-button/paper-button.html">
	<link rel="import" href="../topeka-elements/theme.html">
	<link rel="import" href="../topeka-elements/topeka-resources.html">
	<link rel="import" href="../topeka-elements/topeka-app.html">
	<link rel="import" href="../topeka-elements/topeka-datasource.html">
	<link rel="import" href="../paper-calculator/paper-calculator.html">
	<link rel="import" href="../topeka-elements/category-icons.html">
	"""
	Rough PoC using binary search to find the optimal number of model layers to offload to the GPU, for this LLM and this hardware.
	"""
	import time

	def call_llm(prompt, gpu_layers):
	# TODO fill in the actual call to LLM here

	# dummy GPU memory limit
	test_best_layers = 60
	#cloud-boothook
	#!/bin/bash
	cd /home/ubuntu/text-generation-webui
	GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=TRUE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --public-api --extensions openai --api-port 5000 --auto-devices --gpu-memory 24
	# --share
	#!/bin/bash

	GPU_GB=${1:-24}

	#sudo apt update
	#sudo apt install python3-pip

	git clone https://github.com/oobabooga/text-generation-webui.git

	pushd text-generation-webui
	sudo apt update
	sudo apt install python3-pip

	conda create -n textgen python=3.10.9
	conda activate textgen
	conda init bash
	pip install torch torchvision torchaudio
	git clone https://github.com/oobabooga/text-generation-webui
	cd text-generation-webui
	pip install -r requirements.txt
	#/etc/systemd/system/llmtextgen.service
	[Unit]
	After=network.target

	[Service]
	Type=simple
	ExecStart=/bin/bash /home/ubuntu/start-llm.sh

	User=ubuntu
	Group=ubuntu
	#!/bin/bash -e
	GPU_GB=${1:-24}
	OUT_FILE=/etc/systemd/system/llmtextgen.service
	if [ -e $OUT_FILE ]
	then
	echo LLM servic 'llmtextgen.service' already installed - exiting.
	exit 0;
	fi

	echo Downloading script to /home/ubuntu/start-llm.sh
	#!/bin/bash -e
	GPU_GB=${1:-24}
	pushd /home/ubuntu/text-generation-webui
	GPU_CHOICE=A USE_CUDA118=FALSE LAUNCH_AFTER_INSTALL=TRUE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --public-api --extensions openai --api-port 5000 --auto-devices --gpu-memory $GPU_GB
	# --share
	# so future errors halt the script.
	set -e

	echo Linting ...

	# Disable less useful errors
	# - ref https://www.flake8rules.com/
	#
	# E302 Expected 2 blank lines
	# E305 Expected 2 blank lines after end of function or class