Skip to content

Instantly share code, notes, and snippets.

View Zasder3's full-sized avatar
🔥
mor gpu

Cade Gordon Zasder3

🔥
mor gpu
View GitHub Profile
export WORLD_SIZE=96
export MASTER_ADDR=172.31.207.212
export MASTER_PORT=13820
cd open_clip
export PYTHONPATH="$PYTHONPATH:$PWD/src"
torchrun --nnodes=12 --nproc_per_node=8 --rdzv_id=42 --rdzv_backend=c10d --rdzv_endpoint=$MASTER_ADDR \
src/training/main.py --save-frequency 1 --report-to wandb --train-data="pipe:aws s3 cp --quiet s3://laion-us-east-1/laion-data/laion2B-data/{000000..041455}.tar -" --dataset-type="webdataset" --model=ViT-B/32 --batch-size=256 --warmup=2000 --workers=8 --local-loss --gather-with-grad --dist-url="env://"
@Zasder3
Zasder3 / start.bash
Last active February 27, 2022 22:14
. ./.bashrc
pkill -9 python
cd ~/cade
./miniconda3/bin/conda shell.bash hook
conda activate open_clip
read -n 1 -p "Input Selection:" mainmenuinput
import os
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import torch.distributed as dist
import torch.distributed.nn
import torch.multiprocessing as mp
from torch.cuda.amp import GradScaler
+ '[' -z '' ']'
+ case "$-" in
+ __lmod_vx=x
+ '[' -n x ']'
+ set +x
Shell debugging temporarily silenced: export LMOD_SH_DBG_ON=1 for this output (/p/software/juwelsbooster/lmod/8.4.1/init/bash)
Shell debugging restarted
+ unset __lmod_vx
+ ml purge
++ /p/software/juwelsbooster/lmod/8.4.1/libexec/ml_cmd purge
+ '[' -z '' ']'
+ case "$-" in
+ __lmod_vx=x
+ '[' -n x ']'
+ set +x
Shell debugging temporarily silenced: export LMOD_SH_DBG_ON=1 for this output (/p/software/juwelsbooster/lmod/8.4.1/init/bash)
Shell debugging restarted
+ unset __lmod_vx
+ ml purge
++ /p/software/juwelsbooster/lmod/8.4.1/libexec/ml_cmd purge
+ '[' -z '' ']'
+ case "$-" in
+ __lmod_vx=x
+ '[' -n x ']'
+ set +x
Shell debugging temporarily silenced: export LMOD_SH_DBG_ON=1 for this output (/p/software/juwelsbooster/lmod/8.4.1/init/bash)
Shell debugging restarted
+ unset __lmod_vx
+ ml purge
++ /p/software/juwelsbooster/lmod/8.4.1/libexec/ml_cmd purge
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
import numpy as np
import math
import yaml
import copy
from torch.utils.checkpoint import checkpoint
from .model import CLIP
# -*- coding: utf-8 -*-
"""The BoringModel.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1HvWVVTK8j2Nj52qU4Q4YCyzOm0_aLQF3
# The Boring Model
Replicate a bug you experience, using this model.
@Zasder3
Zasder3 / boring.py
Last active June 12, 2021 07:04
Boring Model DeepSpeed Issue
import os
import torch
from torch import nn
from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split, Dataset
from torchvision.datasets import MNIST
from torchvision import transforms
import pytorch_lightning as pl
from pytorch_lightning.metrics.functional import accuracy