Skip to content

Instantly share code, notes, and snippets.

View tiandiao123's full-sized avatar

Cuiqing Li (李崔卿) tiandiao123

  • Shanghai, China
View GitHub Profile
import inspect
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
import PIL.Image
import torch
from transformers import CLIPTextModel, CLIPTextModelWithProjection, CLIPTokenizer
from diffusers import StableDiffusionXLImg2ImgPipeline
from diffusers.image_processor import PipelineImageInput, VaeImageProcessor
from diffusers.loaders import FromSingleFileMixin, StableDiffusionXLLoraLoaderMixin, TextualInversionLoaderMixin
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include <torch/types.h>
#include <c10/util/Half.h>
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include <torch/types.h>
#include <c10/util/Half.h>
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include <torch/types.h>
#include <c10/util/Half.h>
#include <cublas_v2.h>
#include <cstdint>
#include <cuda.h>
#include <cuda_runtime.h>
#include <cuda_fp16.h>
#include <iostream>
#include <torch/torch.h>
#include "cutlass/cutlass.h"
#include "cutlass/gemm/device/gemm_splitk_parallel.h"
#include <torch/extension.h>
#include <cutlass/gemm/gemm.h>
#include <cutlass/epilogue/thread/linear_combination.h>
torch::Tensor bmm_fp16_fp16_f32(torch::Tensor A, torch::Tensor B, float alpha) {
int batch_size = A.size(0);
int M = A.size(1);
int N = B.size(1);
int K = A.size(2);
#include <iostream>
#include "cutlass/cutlass.h"
#include "cutlass/gemm/device/gemm.h"
#include "cutlass/util/host_tensor.h"
#include "cutlass/util/reference/host/tensor_compare.h"
#include "cutlass/util/reference/host/tensor_copy.h"
#include "cutlass/util/reference/host/tensor_fill.h"
#include "cutlass/util/tensor_view_io.h"
#include "helper.h"
import torch
from inference import CaiInferenceConfig, convert_to_ds_model, recover_from_ds_model
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from argparse import ArgumentParser
import time
import torch
from torch.profiler import profile, record_function, ProfilerActivity
parser = ArgumentParser()
parser.add_argument("--name", default="bigscience/bloom-560m", type=str, help="model_name")
import torch
import time
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer
from argparse import ArgumentParser
from transformers import LlamaForCausalLM, LlamaTokenizer
from inference import CaiInferenceConfig, convert_to_ds_model, recover_from_ds_model
from torch.profiler import profile, record_function, ProfilerActivity
from types import MethodType
from typing import Optional, Sequence, Tuple, Union
import torch
import pytest
import torch
import triton
import triton.language as tl
@triton.jit
def max_fn(x, y):
return tl.math.max(x, y)