This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| // clang-format off | |
| /* | |
| * SPDX-FileCopyrightText: Copyright (c) 2025-present NVIDIA CORPORATION & AFFILIATES. | |
| * All rights reserved. | |
| * SPDX-License-Identifier: BSD-3-Clause | |
| */ | |
| // clang-format on | |
| #include <benchmark/benchmark.h> | |
| #include <cuda_runtime.h> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| cmake_minimum_required(VERSION 3.18 FATAL_ERROR) | |
| project(CUDASymmetricMemoryExample LANGUAGES CXX CUDA) | |
| # Set C++ standard | |
| set(CMAKE_CXX_STANDARD 17) | |
| set(CMAKE_CXX_STANDARD_REQUIRED ON) | |
| set(CMAKE_CUDA_STANDARD 17) | |
| set(CMAKE_CUDA_STANDARD_REQUIRED ON) | |
| # Find required packages |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| namespace { | |
| // Helper function to find the position of the first stream-parallelized axis in a domain. Returns -1 if no stream-parallelized axis is found. | |
| // This function throws if multiple stream-parallelized axes are found (only one | |
| // is allowed) | |
| int64_t getStreamAxisPos(const std::vector<IterDomain*>& domain) { | |
| int64_t ret = -1; | |
| for (int64_t i = 0; i < (int64_t)domain.size(); i++) { | |
| if (domain[i]->getParallelType() == ParallelType::Stream) { | |
| NVF_CHECK( |