Simo Ryu cloneofsimo

## a.cu
#define DELEGATE_FAST_GELU_GRADIENT_CUDA_KERNEL(T, FMAFunc, TanhFunc)    \
  template <>                                                            \
  __global__ void FastGeluGradientCUDAKernel<T>(                         \
      const int N, const T* dY, const T* X, T* dX) {                     \
    constexpr T kAlpha = M_2_SQRTPI * M_SQRT1_2;                         \
    constexpr T kBeta = kAlpha * gelu_utils::kFastCoeff * T(3);          \
    const int index = blockIdx.x * CAFFE_CUDA_NUM_THREADS + threadIdx.x; \
    if (index < N) {                                                     \
      const T y = TanhFunc(                                              \
          kAlpha *                                                       \

## flash.py
import pytest
import torch

import triton
import triton.language as tl


@triton.jit
def _fwd_kernel(
    Q, K, V, sm_scale,

## prompt_rewight.py
from diffusers import DiffusionPipeline
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import torch
import re

MODEL_CACHE = "./cache"


def split_by_emphasis(text, tokenizer, normalize = True):


## SDXL_trainer.py
# Bootstrapped from Huggingface diffuser's code.

import gc
import math
from pathlib import Path
import torch
import torch.nn as nn

import torch.nn.functional as F

## Loading From Replicate SDXL fine-tuned model.py

import torch
from diffusers import DiffusionPipeline

from safetensors import safe_open
from dataset_and_utils import TokenEmbeddingsHandler


MODEL_CACHE = "./cache"
pipe = DiffusionPipeline.from_pretrained(

## Runner.py
 from rg2.gym import Rg2UEnv, WalkerEnvConfig

 from gym.wrappers import TimeLimit
 from stable_baselines3 import PPO, SAC
 from stable_baselines3.common.monitor import Monitor
 from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv, SubprocVecEnv
 from stable_baselines3.common.callbacks import CheckpointCallback


 if __name__ == "__main__":

## preprocess-videos-latents.py
import os
import csv
import torch
import cv2
import logging
from typing import Tuple, Any, List
from torch.utils.data import DataLoader, Dataset
from multiprocessing import Pool
from streaming import MDSWriter
import ImageReward as RM

## unfold_images.py
import torch
import torch.nn.functional as F
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn

# Load an example image
image_path = "/home/simo/just_dl_stuff/vid2data/vender_1.png"
img = Image.open(image_path).convert("RGB")

## common.md

      
              1 file
            
          
              0 forks
            
          
              2 comments
            
          
              0 stars
            
          
                cloneofsimo
                / common.md
            
            
              Last active
              June 18, 2024 10:48
            
              
                Very common stuff that I forget all the time and I really shouldn't
              
          
    clean clean

black .
autoflake --remove-all-unused-imports -i ./*
isort .
Check basic stuff


## dockersetup.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                cloneofsimo
                / dockersetup.md
            
            
              Last active
              May 1, 2024 09:16
            
              
                dockersetup.md
              
          
    Setting up a Docker environment with PyTorch on your Linux server so that your friends can access it involves several steps, including installing Docker, pulling or building a PyTorch Docker image, and configuring Docker to ensure it is accessible securely from other machines. Here’s a step-by-step guide to get you started:
Step 1: Install Docker


Update your system: Ensure your package lists and installed packages are updated.
sudo apt update && sudo apt upgrade -y


Install Docker: Install Docker using the convenience script from Docker which works on many Linux distributions.
	#define DELEGATE_FAST_GELU_GRADIENT_CUDA_KERNEL(T, FMAFunc, TanhFunc) \
	template <> \
	__global__ void FastGeluGradientCUDAKernel<T>( \
	const int N, const T* dY, const T* X, T* dX) { \
	constexpr T kAlpha = M_2_SQRTPI * M_SQRT1_2; \
	constexpr T kBeta = kAlpha * gelu_utils::kFastCoeff * T(3); \
	const int index = blockIdx.x * CAFFE_CUDA_NUM_THREADS + threadIdx.x; \
	if (index < N) { \
	const T y = TanhFunc( \
	kAlpha * \
	import pytest
	import torch

	import triton
	import triton.language as tl


	@triton.jit
	def _fwd_kernel(
	Q, K, V, sm_scale,
	from diffusers import DiffusionPipeline
	from typing import Any, Callable, Dict, List, Optional, Tuple, Union
	import torch
	import re

	MODEL_CACHE = "./cache"


	def split_by_emphasis(text, tokenizer, normalize = True):
	# Bootstrapped from Huggingface diffuser's code.

	import gc
	import math
	from pathlib import Path
	import torch
	import torch.nn as nn

	import torch.nn.functional as F

	import torch
	from diffusers import DiffusionPipeline

	from safetensors import safe_open
	from dataset_and_utils import TokenEmbeddingsHandler


	MODEL_CACHE = "./cache"
	pipe = DiffusionPipeline.from_pretrained(
	from rg2.gym import Rg2UEnv, WalkerEnvConfig

	from gym.wrappers import TimeLimit
	from stable_baselines3 import PPO, SAC
	from stable_baselines3.common.monitor import Monitor
	from stable_baselines3.common.vec_env import VecNormalize, DummyVecEnv, SubprocVecEnv
	from stable_baselines3.common.callbacks import CheckpointCallback


	if __name__ == "__main__":
	import os
	import csv
	import torch
	import cv2
	import logging
	from typing import Tuple, Any, List
	from torch.utils.data import DataLoader, Dataset
	from multiprocessing import Pool
	from streaming import MDSWriter
	import ImageReward as RM
	import torch
	import torch.nn.functional as F
	from PIL import Image
	import matplotlib.pyplot as plt
	import numpy as np
	import torch.nn as nn

	# Load an example image
	image_path = "/home/simo/just_dl_stuff/vid2data/vender_1.png"
	img = Image.open(image_path).convert("RGB")