Skip to content

Instantly share code, notes, and snippets.

@chengscott
Created January 31, 2024 05:54
Show Gist options
  • Save chengscott/e68352b85b2c56bf369d4df20f9b794e to your computer and use it in GitHub Desktop.
Save chengscott/e68352b85b2c56bf369d4df20f9b794e to your computer and use it in GitHub Desktop.
diff --git a/python/aitemplate/backend/cuda/conv2d/common.py b/python/aitemplate/backend/cuda/conv2d/common.py
index 8cf7fb2..ca13a72 100644
--- a/python/aitemplate/backend/cuda/conv2d/common.py
+++ b/python/aitemplate/backend/cuda/conv2d/common.py
@@ -501,6 +501,7 @@ def emit_instance(op):
emiter = cutlass_lib.conv2d_operation.EmitConv2dWithBroadcastInstance()
else:
emiter = cutlass_lib.conv2d_operation.EmitConv2dInstance()
+ op.tile_description.stages = 2
op_def = emiter.emit(op)
return op_def
diff --git a/python/aitemplate/backend/cuda/target_def.py b/python/aitemplate/backend/cuda/target_def.py
index 81d883e..9134500 100644
--- a/python/aitemplate/backend/cuda/target_def.py
+++ b/python/aitemplate/backend/cuda/target_def.py
@@ -153,6 +153,7 @@ class CUDA(Target):
options = [
"-t=0",
"-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1",
+ "-DCUTLASS_NVCC_ARCHS=70",
"-w",
f"-gencode=arch=compute_{self._arch},code=[{','.join(code)}]",
environ.get_compiler_opt_level(),
@@ -429,6 +430,7 @@ class FBCUDA(CUDA):
"-Xcompiler -fPIC",
"-Xcompiler -fvisibility=hidden",
"-DCUTLASS_ENABLE_TENSOR_CORE_MMA=1",
+ "-DCUTLASS_NVCC_ARCHS=70",
"-w",
"--expt-relaxed-constexpr",
f"-gencode=arch=compute_{nvcc_arch},code=[sm_{nvcc_arch},compute_{nvcc_arch}]",
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment