Skip to content

Instantly share code, notes, and snippets.

View a-r-r-o-w's full-sized avatar
:octocat:
wandering on a rock

Aryan a-r-r-o-w

:octocat:
wandering on a rock
View GitHub Profile
@a-r-r-o-w
a-r-r-o-w / pipeline_parallel.py
Created October 2, 2024 10:50 — forked from 3outeille/pipeline_parallel.py
Self contained example of how pipeline parallel works (AFAB and 1F1B) in 200 LOC
#VERBOSE=0 torchrun --nproc_per_node 3 self_contained_pp_LOC.py
import os, random, numpy as np, torch, torch.nn as nn, torch.distributed as dist, torch.nn.functional as F
from torch.optim import AdamW
from torch.utils.data import DataLoader, DistributedSampler
from datasets import load_dataset
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
STEP, local_rank, world_size, verbose = 0, int(os.environ["LOCAL_RANK"]), int(os.environ["WORLD_SIZE"]), os.environ.get("VERBOSE", "0") == "1"
def set_all_seed(seed):
@a-r-r-o-w
a-r-r-o-w / cogvideox_essential.md
Last active October 21, 2024 10:39
All the essentials to play with CogVideoX!
@a-r-r-o-w
a-r-r-o-w / convert_cogvideox_t2v_to_bnb.py
Created September 15, 2024 19:44
Conversion of CogVideoX to bitsandbytes. Based on [this](https://github.com/huggingface/diffusers/pull/9213) Diffusers PR
import gc
import torch
from accelerate.utils import compute_module_sizes
from diffusers import BitsAndBytesConfig, CogVideoXPipeline, CogVideoXTransformer3DModel
from diffusers.utils import export_to_video
from transformers import T5EncoderModel
def reset_memory():
@a-r-r-o-w
a-r-r-o-w / llama-flux-cog.py
Created September 12, 2024 22:46
Simple script that uses Llama 3.1 as a caption generation, Flux-dev as an image generator and CogVideoX-I2V for image-to-video generation
#!/usr/bin/env python3
import argparse
import gc
import json
import os
import pathlib
import random
from typing import Any, Dict
@a-r-r-o-w
a-r-r-o-w / naive_video_captioning_cogvideox.py
Created September 6, 2024 15:19
Extremely simple and naive video captioning with MiniCPM-V-2_6 and Llama-3.1-8B-Instruct
import argparse
import os
import random
from typing import Any, Dict
import numpy as np
import pandas as pd
import torch
import transformers
from decord import VideoReader, cpu
@a-r-r-o-w
a-r-r-o-w / cogvideox-low-mem.py
Created August 24, 2024 19:48
Demonstrates how to run 49-frame inference with CogVideoX in 8 GB
# Install torchao from source and Pytorch Nightly
# Other environments have not yet been tested.
import gc
import torch
from diffusers import AutoencoderKLCogVideoX, CogVideoXTransformer3DModel, CogVideoXPipeline
from diffusers.utils import export_to_video
from transformers import T5EncoderModel
from torchao.quantization import (
@a-r-r-o-w
a-r-r-o-w / benchmark.sh
Last active November 11, 2024 04:01
Demonstrates how to use CogVideoX 2B/5B with Diffusers and TorchAO
#!/bin/bash
compile_flags=("" "--compile")
fuse_qkv_flags=("" "--fuse_qkv")
# quantizations=("fp16" "bf16" "fp8" "fp8_e4m3" "fp8_e5m2" "fp6" "int8wo" "int8dq" "int4dq" "int4wo" "autoquant" "sparsify")
quantizations=("fp16" "bf16" "fp6" "int8wo" "int8dq" "int4dq" "int4wo" "autoquant" "sparsify")
device="cuda"
# Check if completed.txt exists and read it into an array
if [ -f completed.txt ]; then
@a-r-r-o-w
a-r-r-o-w / cogvideox-quanto-inference.py
Last active November 26, 2024 02:20
Demonstrates how to use CogVideoX 2B/5B with Diffusers and Optimum-Quanto
import gc
import torch
from diffusers import CogVideoXPipeline
from diffusers.utils import export_to_video
from optimum.quanto import freeze, quantize, qfloat8, qfloat8_e4m3fn, qfloat8_e5m2, qint8, qint4, qint2
def reset_memory(device):
gc.collect()
torch.cuda.empty_cache()
@a-r-r-o-w
a-r-r-o-w / cogvideo-torchao-compile.py
Created August 20, 2024 11:43
Demonstrates CogVideoX quantized WO-inference with torchao
import argparse
import gc
import os
import time
os.environ["TORCH_LOGS"] = "dynamo"
import torch
from diffusers import AutoencoderKLCogVideoX, CogVideoXPipeline, CogVideoXTransformer3DModel, CogVideoXDDIMScheduler
from diffusers.utils import export_to_video
import gc
import torch
from diffusers import CogVideoXPipeline, CogVideoXDDIMScheduler
from diffusers.utils import export_to_video
def reset_memory():
gc.collect()
torch.cuda.empty_cache()