This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
| # | |
| # See LICENSE for license information. | |
| import os | |
| import argparse | |
| from functools import partial | |
| import contextlib | |
| import torch | |
| import torch.distributed as dist | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import torch | |
| import torch.nn as nn | |
| from typing import Optional, Dict, Union, List | |
| import math | |
| from llmfoundry.models.layers.attention import GroupedQueryAttention | |
| from contextlib import nullcontext | |
| from omegaconf import OmegaConf as om | |
| from einops import rearrange | |
| import transformer_engine.pytorch as te | |
| from transformer_engine.common import recipe | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import datetime | |
| import gc | |
| import pathlib | |
| import torch | |
| from composer.utils import get_device | |
| from omegaconf import OmegaConf as om | |
| from llmfoundry.models.mpt.modeling_mpt import ComposerMPTCausalLM | |
| from composer.core import Precision | |
| from composer import Trainer | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import torch | |
| import torch.nn as nn | |
| from typing import Optional,Dict,Union,List | |
| from contextlib import nullcontext | |
| _MEMORY_KEYS = { | |
| 'allocated_bytes.all.current': 'current_allocated_mem', | |
| 'active_bytes.all.current': 'current_active_mem', | |
| 'inactive_split_bytes.all.current': 'current_inactive_mem', | |
| 'reserved_bytes.all.current': 'current_reserved_mem', | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # Based on: https://github.com/pytorch/examples/blob/master/mnist/main.py | |
| import os | |
| import argparse | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| import torch.optim as optim | |
| from torchvision import datasets, transforms | |
| from transformers import AutoModelForSeq2SeqLM, DataCollatorForSeq2Seq, Seq2SeqTrainingArguments, Seq2SeqTrainer | |
| from transformers import AutoTokenizer | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | # This isn't supposed to run as a bash script, i named it with ".sh" for syntax highlighting. | |
| # https://developer.nvidia.com/nsight-systems | |
| # https://docs.nvidia.com/nsight-systems/profiling/index.html | |
| # My preferred nsys (command line executable used to create profiles) commands | |
| # | |
| # In your script, write | |
| # torch.cuda.nvtx.range_push("region name") | |
| # ... | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | #!/usr/bin/env python | |
| """ | |
| Demonstrate how to pass IPC handles to GPU data between processes in Python. | |
| """ | |
| import ctypes | |
| import numpy as np | |
| import multiprocessing as mp | |
| import zmq | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from csv import QUOTE_NONE | |
| from platform import libc_ver | |
| from transformers import pipeline | |
| import transformers | |
| import deepspeed | |
| import torch | |
| import os | |
| from transformers import BertLayer | |
| from deepspeed.module_inject.replace_policy import HFBertLayerPolicy | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from transformers import pipeline, AutoModel, AutoTokenizer, AutoConfig | |
| import transformers | |
| import deepspeed | |
| import torch | |
| import os | |
| local_rank = int(os.getenv('LOCAL_RANK', '0')) | |
| world_size = int(os.getenv('WORLD_SIZE', '1')) | |
| def see_memory_usage(): | 
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | import deepspeed | |
| import transformers | |
| import torch | |
| import datetime | |
| import os | |
| import time | |
| from transformers import pipeline, AutoModel, AutoTokenizer, AutoConfig | |
| import sys | |
| def run_model(name="gpt2", enable_cuda_graph=False, batch=2, seq=65): |