Skip to content

Instantly share code, notes, and snippets.

@mcapodici
Last active July 27, 2023 06:51
Show Gist options
  • Save mcapodici/eaa39861affe75be13badefbe9e05079 to your computer and use it in GitHub Desktop.
Save mcapodici/eaa39861affe75be13badefbe9e05079 to your computer and use it in GitHub Desktop.
modal local code runner script
import sys
import modal
import os
# Script to run your local code
#
# * Requires python package modal-client install, and you have obtained a token, e.g.:
# ```
# pip install modal-client
# modal token new
# ```
# * You need to set up the parameters before running the script
#
# Example usage:
#
# ```
# modal run lob.py --command prepare
# modal run lob.py --command train
# modal run lob.py --command sample
# ````
#
# Note: There is a persistent volume at /volume for downloading large models, saving model checkpoints etc. to.
#
# How to use on nanoGPT:
# ```
# git clone https://github.com/karpathy/nanoGPT
# cd nanoGPT
# mkdir src
# mv *.py data config src
# ```
#
import os
import modal
# Parameters for run
# =========================================================================================
# Choose one of: "t4", "a10g", "inf2", "a100-20g", "a100" or None
gpu="t4"
commands={
'prepare': ['python data/shakespeare_char/prepare.py'],
'train': ['python train.py config/train_shakespeare_char.py'],
'sample': ['python sample.py --out_dir=out-shakespeare-char'],
}
verbose=True
volume_name_prefix="2023-07-27-10-45"
timeout_mins=60
exclude_paths_starting_with=["./.git", "./.github", "./bin", "./lib", "./share"]
image = modal.Image \
.debian_slim() \
.apt_install("rsync") \
.pip_install("torch numpy transformers datasets tiktoken wandb tqdm".split(" "))
# =========================================================================================
# End parameters for run
cloud="gcp" if gpu and gpu.startswith("a100") else "aws"
volume_name = f"{volume_name_prefix}-{cloud}"
print(f"💾 using volume name: {volume_name}")
volume = modal.NetworkFileSystem.new().persisted(volume_name)
stub = modal.Stub("lob-run", image = image)
def file_condition(path: str):
for exclude_if_in_path in exclude_paths_starting_with:
if path.startswith(exclude_if_in_path):
return False
megabytes = round(os.stat(path).st_size / (1024 * 1024), 2)
sizemsg = f"({megabytes}Mb) " if megabytes >= 1 else ""
if verbose:
print(f"{sizemsg} {path}")
return True;
@stub.local_entrypoint()
def run(command):
if not command in commands.keys():
possible_commands = ", ".join(commands.keys());
print(f"Command not recognised. Possible commands: {possible_commands}", file=sys.stderr)
exit(1)
command_text = commands[command]
print(f'Command {command} was chosen.')
print(f'This will run: {command_text}')
copy.call(command_text)
@stub.function(
cloud=cloud,
gpu=gpu,
timeout=timeout_mins*60,
mounts=[modal.Mount.from_local_dir(".", remote_path="/source/code", condition=file_condition)],
network_file_systems={"/root/code": volume})
def copy(commands: "list[str]"):
import os
source = "/source/code/"
dest = "/root/code/"
print("📁 Running rsync to copy files up to container:")
# -r, --recursive recurse into directories
# -u, --update skip files that are newer on the receiver
# -l, --links copy symlinks as symlinks
# --copy-unsafe-links only "unsafe" symlinks are transformed
# -p, --perms preserve permissions
# -t, --times preserve modification times
# --progress show progress during transfer
os.system(f"rsync -r -u -l --copy-unsafe-links -p -t --progress {source} {dest}")
print("🐍 Using remote python version:")
os.system(f"python --version")
os.chdir("code")
for command in commands:
print(f"🏃🏽Executing command: {command}")
os.system(command)
# # Parameters for run
# # =========================================================================================
# # Choose one of: "t4", "a10g", "inf2", "a100-20g", "a100" or None
# gpu=None
# volume_name_prefix = "temp20230719"
# timeout_minutes = 10
# # Set up your image here:
# image = modal.Image \
# .debian_slim() \
# .pip_install("torch numpy transformers datasets tiktoken wandb tqdm".split(" "))
# # =========================================================================================
# # End parameters for run
# # To avoid slowness/egress, make a seperate volume for aws instances vs. gcp
# cloud="gcp" if gpu and gpu.startswith("a100") else "aws"
# volume_name = f"{volume_name_prefix}-{cloud}"
# print(f"💾 using volume name: {volume_name}")
# volume = modal.NetworkFileSystem.new().persisted(volume_name)
# stub = modal.Stub("lob-run",
# image=image
# )
# @stub.local_entrypoint()
# def main(command: str):
# run_command.call(command)
# @stub.function(
# cloud=cloud,
# gpu=gpu,
# timeout=timeout_minutes * 60,
# mounts=[modal.Mount.from_local_dir("./src", remote_path="/src")],
# network_file_systems={"/volume": volume})
# def run_command(command: str):
# os.chdir("/src")
# os.system(command)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment