Created
May 24, 2024 22:39
-
-
Save makslevental/76999afb85cdce543538080a2405bcf2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//#include "transactionFWDump.h" | |
#include <cassert> | |
#include <chrono> | |
#include <cstring> | |
#include <iostream> | |
#include <sstream> | |
#include <string> | |
#include "xrt/xrt_bo.h" | |
#include "xrt/xrt_device.h" | |
#include "xrt/xrt_kernel.h" | |
constexpr uint32_t SIZE_4K_HERE = 4 * 1024; | |
constexpr uint32_t OFFSET_3K_HERE = 3 * 1024; | |
#define XAIE_COL_SHIFT 25 | |
#define XAIE_ROW_SHIFT 20 | |
uint64_t get_bo_flags(uint32_t flags, uint32_t ext_flags) { | |
xcl_bo_flags f = {}; | |
f.flags = flags; | |
f.extension = ext_flags; | |
return f.all; | |
} | |
static inline uint64_t getTileAddrHere(uint8_t c, uint8_t r) { | |
return (((uint64_t)r & 0xFFU) << XAIE_ROW_SHIFT) | | |
(((uint64_t)c & 0xFFU) << XAIE_COL_SHIFT); | |
} | |
const std::vector<uint32_t> prolog{ | |
0x00000011, 0x01000405, 0x01000100, 0x0B590100, 0x000055FF, 0x00000001, | |
0x00000010, 0x314E5A5F, 0x635F5F31, 0x676E696C, 0x39354E5F, 0x6E693131, | |
0x5F727473, 0x64726F77, 0x00004573, 0x07BD9630, 0x000055FF, | |
}; | |
void testAdd256UsingDmaOpNoDoubleBuffering() { | |
unsigned int deviceIndex = 0; | |
std::string xclbinFile( | |
"/home/mlevental/dev_projects/mlir-aie/example/final.xclbin"); | |
auto device = xrt::device(deviceIndex); | |
auto xclbin = xrt::xclbin(xclbinFile); | |
device.register_xclbin(xclbin); | |
xrt::hw_context context(device, xclbin.get_uuid()); | |
xrt::kernel kernel(context, "MLIR_AIE"); | |
std::vector<uint32_t> ipuInsts(prolog); | |
std::vector<uint32_t> shimInsts{ | |
100663552, 0, 128, 0, 0, 0, | |
2147483648, 0, 0, 33554432, 33554432, 119316, | |
0, 100663585, 0, 128, 0, 0, | |
0, 2147483648, 0, 0, 33554432, 33554432, | |
119300, 2147483649, 50331648, 65792, | |
}; | |
ipuInsts.reserve(ipuInsts.size() + shimInsts.size()); | |
ipuInsts.insert(ipuInsts.end(), shimInsts.begin(), shimInsts.end()); | |
xrt::bo npuInstructions(device, ipuInsts.size() * sizeof(uint32_t), | |
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(0)); | |
npuInstructions.write(ipuInsts.data()); | |
npuInstructions.sync(XCL_BO_SYNC_BO_TO_DEVICE); | |
// group_id matches kernels.json | |
const int LEN = 128; /* in bytes */ | |
auto in = xrt::bo(device, LEN * sizeof(uint32_t), XRT_BO_FLAGS_HOST_ONLY, | |
kernel.group_id(2)); | |
auto tmp = xrt::bo(device, LEN * sizeof(uint32_t), XRT_BO_FLAGS_HOST_ONLY, | |
kernel.group_id(3)); | |
auto out = xrt::bo(device, LEN * sizeof(uint32_t), XRT_BO_FLAGS_HOST_ONLY, | |
kernel.group_id(4)); | |
auto *inPtr = in.map<uint32_t *>(); | |
auto *tmpPtr = tmp.map<uint32_t *>(); | |
for (int i = 0; i < LEN; ++i) { | |
inPtr[i] = 1; | |
tmpPtr[i] = 0; | |
} | |
in.sync(XCL_BO_SYNC_BO_TO_DEVICE); | |
tmp.sync(XCL_BO_SYNC_BO_TO_DEVICE); | |
out.sync(XCL_BO_SYNC_BO_TO_DEVICE); | |
xrt::run run_(kernel); | |
run_.set_arg(0, npuInstructions); | |
run_.set_arg(1, npuInstructions.size()); | |
run_.set_arg(2, in); | |
run_.set_arg(3, tmp); | |
run_.set_arg(4, out); | |
run_.start(); | |
run_.wait2(); | |
in.sync(XCL_BO_SYNC_BO_FROM_DEVICE); | |
tmp.sync(XCL_BO_SYNC_BO_FROM_DEVICE); | |
out.sync(XCL_BO_SYNC_BO_FROM_DEVICE); | |
auto *outPtr = out.map<uint32_t *>(); | |
for (int i = 0; i < LEN; ++i) | |
std::cout << outPtr[i] << ", "; | |
std::cout << "\n"; | |
} | |
constexpr std::uint32_t DUMP_REGISTERS_OPCODE = 18; | |
void dumpRegistersDPU() { | |
unsigned int deviceIndex = 0; | |
auto device = xrt::device(deviceIndex); | |
std::string xclbinFile( | |
"/home/mlevental/dev_projects/mlir-aie/example/final.xclbin"); | |
auto xclbin = xrt::xclbin(xclbinFile); | |
auto xkernel = xclbin.get_kernel("MLIR_AIE"); | |
auto kernelName = xkernel.get_name(); | |
device.register_xclbin(xclbin); | |
xrt::hw_context context(device, xclbin.get_uuid()); | |
auto kernel = xrt::kernel(context, kernelName); | |
std::vector<uint64_t> regOffsets{ | |
// corestatus | |
0x00032004, | |
// Stream_Switch_Master_Config_AIE_Core0 | |
0x0003F000, | |
// module clock control | |
0x00060000, | |
// Event_Group_Stream_Switch_Enable | |
0x00034518, | |
// core le | |
0x00031150, | |
// Core_CR | |
0x00031170, | |
0x00030C00, | |
0x00030C10, | |
}; | |
std::vector<uint32_t> instructionSequence{ | |
(DUMP_REGISTERS_OPCODE << 24) | static_cast<uint32_t>(regOffsets.size())}; | |
// std::vector<uint32_t> instructionSequence; | |
int col = 0, row = 2; | |
for (const auto ® : regOffsets) { | |
uint64_t absAddr = reg + getTileAddrHere(col, row); | |
instructionSequence.push_back(absAddr & 0xFFFFFFFF); | |
instructionSequence.push_back((absAddr >> 32) & 0xFFFFFFFF); | |
} | |
instructionSequence.insert(instructionSequence.begin(), prolog.begin(), | |
prolog.end()); | |
xrt::bo npuInstructions(device, instructionSequence.size() * sizeof(uint32_t), | |
XCL_BO_FLAGS_CACHEABLE, kernel.group_id(0)); | |
npuInstructions.write(instructionSequence.data()); | |
npuInstructions.sync(XCL_BO_SYNC_BO_TO_DEVICE); | |
// group_id matches kernels.json | |
const int LEN = 128; /* in bytes */ | |
auto in = xrt::bo(device, LEN * sizeof(uint32_t), XRT_BO_FLAGS_HOST_ONLY, | |
kernel.group_id(2)); | |
auto tmp = xrt::bo(device, LEN * sizeof(uint32_t), XRT_BO_FLAGS_HOST_ONLY, | |
kernel.group_id(3)); | |
auto out = xrt::bo(device, LEN * sizeof(uint32_t), XRT_BO_FLAGS_HOST_ONLY, | |
kernel.group_id(4)); | |
in.sync(XCL_BO_SYNC_BO_TO_DEVICE); | |
tmp.sync(XCL_BO_SYNC_BO_TO_DEVICE); | |
out.sync(XCL_BO_SYNC_BO_TO_DEVICE); | |
xrt::run run_(kernel); | |
run_.set_arg(0, npuInstructions); | |
run_.set_arg(1, npuInstructions.size()); | |
run_.set_arg(2, in); | |
run_.set_arg(3, tmp); | |
run_.set_arg(4, out); | |
run_.start(); | |
run_.wait2(); | |
xrt::bo resultBo = | |
xrt::bo(context.get_device(), SIZE_4K_HERE, | |
get_bo_flags(XRT_BO_FLAGS_CACHEABLE, XRT_BO_USE_DEBUG << 4), | |
kernel.group_id(0)); | |
if (!resultBo) | |
throw std::runtime_error("couldn't get resultBo"); | |
resultBo.sync(XCL_BO_SYNC_BO_FROM_DEVICE); | |
uint8_t *resultBoMap = resultBo.map<uint8_t *>(); | |
uint32_t *output = reinterpret_cast<uint32_t *>(resultBoMap + OFFSET_3K_HERE); | |
for (int i = 0; i < 1024; ++i) | |
std::cout << output[i] << ", "; | |
std::cout << "\n"; | |
} | |
int main() { | |
// validate(); | |
// dumpRegistersTransaction(); | |
testAdd256UsingDmaOpNoDoubleBuffering(); | |
dumpRegistersDPU(); | |
} | |
// vim: ts=2 sw=2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment