Skip to content

Instantly share code, notes, and snippets.

@schelleg

schelleg/dpu.cpp

Last active Nov 18, 2020
Embed
What would you like to do?
VSCode_on_ZU+
/*
# Copyright (C) 2020 Xilinx, Inc
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
#include <dirent.h>
#include <stdio.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <unistd.h>
#include <cmath>
#include <cstdio>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <queue>
#include <string>
#include <vector>
#include <opencv2/opencv.hpp>
#include <dnndk/dnndk.h>
#include <opencv2/opencv.hpp>
#include <dnndk/n2cube.h>
using namespace std;
using namespace cv;
#define N2CUBE_SUCCESS 0
#define USE_NEON_OPT
#define RESNET50_WORKLOAD (7.71f)
#define KERNEL_RESNET50 "resnet50_0"
#define INPUT_NODE "conv1"
#define OUTPUT_NODE "fc1000"
int dpuSetInputImageWithScale(void* task_v, const char* nodeName,
const cv::Mat &image, float *mean,
float scale, int idx){
int value;
int8_t *inputAddr;
unsigned char *resized_data;
cv::Mat newImage;
float scaleFix;
int height, width, channel;
DPUTask* task = (DPUTask*)task_v;
height = dpuGetInputTensorHeight(task, nodeName, idx);
width = dpuGetInputTensorWidth(task, nodeName, idx);
channel = dpuGetInputTensorChannel(task, nodeName, idx);
if (height == image.rows && width == image.cols) {
newImage = image;
} else {
newImage = cv::Mat (height, width, CV_8SC3,
(void*)dpuGetInputTensorAddress(task, nodeName, idx));
cv::resize(image, newImage, newImage.size(), 0, 0, cv::INTER_LINEAR);
}
resized_data = newImage.data;
inputAddr = dpuGetInputTensorAddress(task, nodeName, idx);
scaleFix = dpuGetInputTensorScale(task, nodeName, idx);
scaleFix = scaleFix*scale;
if (newImage.channels() == 1) {
for (int idx_h=0; idx_h<height; idx_h++) {
for (int idx_w=0; idx_w<width; idx_w++) {
for (int idx_c=0; idx_c<channel; idx_c++) {
value = *(resized_data+idx_h*width*channel+\
idx_w*channel+idx_c);
value = (int)((value - *(mean+idx_c)) * scaleFix);
inputAddr[idx_h*newImage.cols+idx_w] = (char)value;
}
}
}
} else {
#ifdef USE_NEON_OPT
dpuProcessNormalizion(inputAddr, newImage.data, newImage.rows,
newImage.cols, mean, scaleFix,
newImage.step1());
#else
for (int idx_h=0; idx_h<newImage.rows; idx_h++) {
for (int idx_w=0; idx_w<newImage.cols; idx_w++) {
for (int idx_c=0; idx_c<3; idx_c++) {
value = (int)((newImage.at<Vec3b>(idx_h, idx_w)[idx_c] -\
mean[idx_c]) * scaleFix);
inputAddr[idx_h*newImage.cols*3+idx_w*3+idx_c] = \
(char)value;
}
}
}
#endif
}
return N2CUBE_SUCCESS;
}
int dpuSetInputImage2(void* task_v, const char* nodeName,
const cv::Mat &image, int idx){
float mean[3];
DPUTask* task = (DPUTask*)task_v;
dpuGetKernelMean(task,mean,image.channels());
return dpuSetInputImageWithScale(task_v, nodeName,
image, mean, 1.0f, idx);
}
void ListImages(string const &path, vector<string> &images) {
images.clear();
struct dirent *entry;
struct stat s;
lstat(path.c_str(), &s);
if (!S_ISDIR(s.st_mode)) {
fprintf(stderr, "Error: %s is not a valid directory!\n",
path.c_str());
exit(1);
}
DIR *dir = opendir(path.c_str());
if (dir == nullptr) {
fprintf(stderr, "Error: Open %s path failed.\n", path.c_str());
exit(1);
}
while ((entry = readdir(dir)) != nullptr) {
if (entry->d_type == DT_REG || entry->d_type == DT_UNKNOWN) {
string name = entry->d_name;
string ext = name.substr(name.find_last_of(".") + 1);
if ((ext == "JPEG") || (ext == "jpeg") || (ext == "JPG") ||
(ext == "jpg") || (ext == "PNG") || (ext == "png")) {
images.push_back(name);
}
}
}
closedir(dir);
sort(images.begin(), images.end());
}
void LoadWords(string const &path, vector<string> &kinds) {
kinds.clear();
fstream fkinds(path);
if (fkinds.fail()) {
fprintf(stderr, "Error : Open %s failed.\n", path.c_str());
exit(1);
}
string kind;
while (getline(fkinds, kind)) {
kinds.push_back(kind);
}
fkinds.close();
}
void TopK(const float *d, int size, int k, vector<string> &vkinds) {
assert(d && size > 0 && k > 0);
priority_queue<pair<float, int>> q;
for (auto i = 0; i < size; ++i) {
q.push(pair<float, int>(d[i], i));
}
for (auto i = 0; i < k; ++i) {
pair<float, int> ki = q.top();
printf("top[%d] prob = %-8f name = %s\n", i, d[ki.second],
vkinds[ki.second].c_str());
q.pop();
}
}
void runResnet50(void* task_v, string baseImagePath) {
DPUTask* taskResnet50 = (DPUTask*)task_v;
vector<string> kinds, images;
ListImages(baseImagePath, images);
if (images.size() == 0) {
cerr << "\nError: No images existing under " << baseImagePath << endl;
return;
}
LoadWords(baseImagePath + "words.txt", kinds);
if (kinds.size() == 0) {
cerr << "\nError: No words exist in file words.txt." << endl;
return;
}
int8_t *outAddr = (int8_t *)dpuGetOutputTensorAddress(
taskResnet50, OUTPUT_NODE);
int size = dpuGetOutputTensorSize(taskResnet50, OUTPUT_NODE);
int channel = dpuGetOutputTensorChannel(taskResnet50, OUTPUT_NODE);
float out_scale = dpuGetOutputTensorScale(taskResnet50, OUTPUT_NODE);
float *softmax = new float[size];
for (auto &imageName : images) {
cout << "\nLoad image : " << imageName << endl;
Mat image = imread(baseImagePath + imageName);
dpuSetInputImage2(task_v, INPUT_NODE, image, 0);
cout << "\nRun DPU Task for ResNet50 ..." << endl;
dpuRunTask(taskResnet50);
long long timeProf = dpuGetTaskProfile(taskResnet50);
cout << " DPU Task Execution time: " << (timeProf * 1.0f) << "us\n";
float prof = (RESNET50_WORKLOAD / timeProf) * 1000000.0f;
cout << " DPU Task Performance: " << prof << "GOPS\n";
dpuRunSoftmax(outAddr, softmax, channel, size/channel, out_scale);
TopK(softmax, channel, 5, kinds);
}
delete[] softmax;
}
int run(string baseImagePath) {
DPUKernel *kernelResnet50;
DPUTask *taskResnet50;
dpuOpen();
kernelResnet50 = dpuLoadKernel(KERNEL_RESNET50);
taskResnet50 = dpuCreateTask(kernelResnet50, 0);
runResnet50((void*)taskResnet50, baseImagePath);
dpuDestroyTask(taskResnet50);
dpuDestroyKernel(kernelResnet50);
return 0;
}
int main(){
return run("/home/xilinx/jupyter_notebooks/pynq-dpu/img/");
}
{
"version": "2.0.0",
"tasks": [
{
"type": "process",
"label": "C/C++: g++ build active file",
"command": "/usr/bin/g++",
"args": [
"-g",
"-O2",
"-Wall","-Wpointer-arith","-std=c++11","-ffast-math",
"-L/usr/lib","-L/usr/local/lib","-I/usr/include","-I/usr/local/include",
"${file}",
"-o",
"${fileDirname}/${fileBasenameNoExtension}",
"-ln2cube","-lhineon","-lopencv_videoio","-lopencv_imgcodecs","-lopencv_highgui","-lopencv_imgproc","-lopencv_core","-lpthread"
],
"options": {
"cwd": "/usr/bin"
},
"problemMatcher": [
"$gcc"
],
"group": {
"kind": "build",
"isDefault": true
},
"detail": "compiler: /usr/bin/g++"
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment