Skip to content

Instantly share code, notes, and snippets.

View cli99's full-sized avatar
🐼

Cheng Li cli99

🐼
View GitHub Profile
@cli99
cli99 / fsdp_actckpt.py
Last active January 30, 2024 05:31
TE + FSDP with act ckpt
# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# See LICENSE for license information.
import os
import argparse
from functools import partial
import contextlib
import torch
import torch.distributed as dist
import torch
import torch.nn as nn
from typing import Optional, Dict, Union, List
import math
from llmfoundry.models.layers.attention import GroupedQueryAttention
from contextlib import nullcontext
from omegaconf import OmegaConf as om
from einops import rearrange
import transformer_engine.pytorch as te
from transformer_engine.common import recipe
@cli99
cli99 / fp8_memory_mpt_test.py
Last active July 15, 2024 07:24
fp8_memory_mpt_test
import datetime
import gc
import pathlib
import torch
from composer.utils import get_device
from omegaconf import OmegaConf as om
from llmfoundry.models.mpt.modeling_mpt import ComposerMPTCausalLM
from composer.core import Precision
from composer import Trainer
@cli99
cli99 / memory_stats.py
Last active October 26, 2023 23:02
torch.cuda.memory_stats
import torch
import torch.nn as nn
from typing import Optional,Dict,Union,List
from contextlib import nullcontext
_MEMORY_KEYS = {
'allocated_bytes.all.current': 'current_allocated_mem',
'active_bytes.all.current': 'current_active_mem',
'inactive_split_bytes.all.current': 'current_inactive_mem',
'reserved_bytes.all.current': 'current_reserved_mem',
@cli99
cli99 / fsdp_mnist.py
Created September 26, 2023 18:08
fsdp mnist
# Based on: https://github.com/pytorch/examples/blob/master/mnist/main.py
import os
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer
from transformers import AutoTokenizer
@cli99
cli99 / nsight.sh
Created February 27, 2023 22:43 — forked from mcarilli/nsight.sh
Favorite nsight systems profiling commands for Pytorch scripts
# This isn't supposed to run as a bash script, i named it with ".sh" for syntax highlighting.
# https://developer.nvidia.com/nsight-systems
# https://docs.nvidia.com/nsight-systems/profiling/index.html
# My preferred nsys (command line executable used to create profiles) commands
#
# In your script, write
# torch.cuda.nvtx.range_push("region name")
# ...
@cli99
cli99 / ipc_demo.py
Created September 30, 2022 18:41 — forked from lebedov/ipc_demo.py
Demonstrate how to pass IPC handles to GPU data between processes in Python
#!/usr/bin/env python
"""
Demonstrate how to pass IPC handles to GPU data between processes in Python.
"""
import ctypes
import numpy as np
import multiprocessing as mp
import zmq
from csv import QUOTE_NONE
from platform import libc_ver
from transformers import pipeline
import transformers
import deepspeed
import torch
import os
from transformers import BertLayer
from deepspeed.module_inject.replace_policy import HFBertLayerPolicy
from transformers import pipeline, AutoModel, AutoTokenizer, AutoConfig
import transformers
import deepspeed
import torch
import os
local_rank = int(os.getenv('LOCAL_RANK', '0'))
world_size = int(os.getenv('WORLD_SIZE', '1'))
def see_memory_usage():
@cli99
cli99 / gist:c95f9ae4a79adab64ebb9d8aedebd52b
Last active September 20, 2022 21:01
ds gpt2 test ar prior
import deepspeed
import transformers
import torch
import datetime
import os
import time
from transformers import pipeline, AutoModel, AutoTokenizer, AutoConfig
import sys
def run_model(name="gpt2", enable_cuda_graph=False, batch=2, seq=65):