Skip to content

Instantly share code, notes, and snippets.

@nvnnghia
Created November 7, 2019 01:32
Show Gist options
  • Save nvnnghia/5ccd29ea8198d241f98211638e13f97d to your computer and use it in GitHub Desktop.
Save nvnnghia/5ccd29ea8198d241f98211638e13f97d to your computer and use it in GitHub Desktop.
#include "common.h"
#include "cudaUtility.h"
#include "mathFunctions.h"
#include "pluginImplement.h"
#include "tensorNet.h"
#include "loadImage.h"
#include "imageBuffer.h"
#include <chrono>
#include <thread>
#include <chrono>
const char* model = "model/pelee/pelee_deploy_iplugin.prototxt";
const char* weight = "model/pelee/pelee_merged.caffemodel";
const char* INPUT_BLOB_NAME = "data";
const char* OUTPUT_BLOB_NAME = "detection_out";
static const uint32_t BATCH_SIZE = 2;
//image buffer size = 10
//dropFrame = false
ConsumerProducerQueue<cv::Mat> *imageBuffer = new ConsumerProducerQueue<cv::Mat>(10,false);
class Timer {
public:
void tic() {
start_ticking_ = true;
start_ = std::chrono::high_resolution_clock::now();
}
void toc() {
if (!start_ticking_)return;
end_ = std::chrono::high_resolution_clock::now();
start_ticking_ = false;
t = std::chrono::duration<double, std::milli>(end_ - start_).count();
//std::cout << "Time: " << t << " ms" << std::endl;
}
double t;
private:
bool start_ticking_ = false;
std::chrono::time_point<std::chrono::high_resolution_clock> start_;
std::chrono::time_point<std::chrono::high_resolution_clock> end_;
};
/* *
* @TODO: unifiedMemory is used here under -> ( cudaMallocManaged )
* */
float* allocateMemory(DimsCHW dims, char* info)
{
float* ptr;
size_t size;
std::cout << "Allocate memory: " << info << std::endl;
size = BATCH_SIZE * dims.c() * dims.h() * dims.w();
assert(!cudaMallocManaged( &ptr, size*sizeof(float)));
return ptr;
}
void loadImg( cv::Mat &input1, cv::Mat &input2, int re_width, int re_height, float *data_unifrom,const float3 mean,const float scale )
{
int i;
int j;
int line_offset;
int offset_g;
int offset_r;
cv::Mat dst1;
cv::Mat dst2;
unsigned char *line1 = NULL;
unsigned char *line2 = NULL;
float *unifrom_data = data_unifrom;
cv::resize( input1, dst1, cv::Size( re_width, re_height ), (0.0), (0.0), cv::INTER_LINEAR );
cv::resize( input2, dst2, cv::Size( re_width, re_height ), (0.0), (0.0), cv::INTER_LINEAR );
offset_g = re_width * re_height;
offset_r = re_width * re_height * 2;
for( i = 0; i < re_height; ++i )
{
line1 = dst1.ptr< unsigned char >( i );
line2 = dst2.ptr< unsigned char >( i );
line_offset = i * re_width;
for( j = 0; j < re_width; ++j )
{
//first image
// b
unifrom_data[ line_offset + j ] = (( float )(line1[ j * 3 ] - mean.x) * scale);
// g
unifrom_data[ offset_g + line_offset + j ] = (( float )(line1[ j * 3 + 1 ] - mean.y) * scale);
// r
unifrom_data[ offset_r + line_offset + j ] = (( float )(line1[ j * 3 + 2 ] - mean.z) * scale);
//second image
// b
unifrom_data[ line_offset + j + re_width * re_height * 3] = (( float )(line2[ j * 3 ] - mean.x) * scale);
// g
unifrom_data[ offset_g + line_offset + j + re_width * re_height * 3] = (( float )(line2[ j * 3 + 1 ] - mean.y) * scale);
// r
unifrom_data[ offset_r + line_offset + j + re_width * re_height * 3] = (( float )(line2[ j * 3 + 2 ] - mean.z) * scale);
}
}
}
//thread read video
void readPicture()
{
cv::VideoCapture cap("testVideo/test.avi");
// cv::VideoCapture cap("/data1/lhi/SANDISK128/nvnn/segmentation/driving_Korea/Danyang/anno_videos/vlc-record-2019-10-30-09h18m05s-driving_in_Korea_Danyang.mp4-.mp4");
cv::Mat image;
while(cap.isOpened())
{
cap >> image;
imageBuffer->add(image);
}
}
int main(int argc, char *argv[])
{
std::vector<std::string> output_vector = {OUTPUT_BLOB_NAME};
TensorNet tensorNet;
tensorNet.LoadNetwork(model,weight,INPUT_BLOB_NAME, output_vector,BATCH_SIZE);
DimsCHW dimsData = tensorNet.getTensorDims(INPUT_BLOB_NAME);
DimsCHW dimsOut = tensorNet.getTensorDims(OUTPUT_BLOB_NAME);
float* data = allocateMemory( dimsData , (char*)"input blob");
std::cout << "allocate data" << std::endl;
float* output = allocateMemory( dimsOut , (char*)"output blob");
std::cout << "allocate output" << std::endl;
int height = 304;
int width = 304;
cv::Mat frame,srcImg, fl_frame;
void* imgCPU;
void* imgCUDA;
Timer timer;
int count =0;
std::thread readTread(readPicture);
readTread.detach();
cv::VideoWriter writer;
while(1)
{
count ++;
imageBuffer->consume(frame);
auto start = std::chrono::system_clock::now();
srcImg = frame.clone();
cv::resize(frame, frame, cv::Size(304,304));
cv::flip(frame,fl_frame,1);
const size_t size = width * height * sizeof(float3);
if( CUDA_FAILED( cudaMalloc( &imgCUDA, 2*size)) )
{
cout <<"Cuda Memory allocation error occured."<<endl;
return false;
}
void* imgData = malloc(2*size);
memset(imgData,0,2*size);
loadImg(frame,fl_frame,height,width,(float*)imgData,make_float3(103.94,116.78,123.68),0.017);
cudaMemcpyAsync(imgCUDA,imgData,2*size,cudaMemcpyHostToDevice);
void* buffers[] = { imgCUDA, output };
timer.tic();
tensorNet.imageInference( buffers, output_vector.size() + 1, BATCH_SIZE);
timer.toc();
double msTime = timer.t;
vector<vector<float> > detections;
for (int k=0; k<100; k++)
{
if(output[7*k+1] == -1)
break;
float classIndex = output[7*k+1];
float confidence = output[7*k+2];
float xmin = output[7*k + 3];
float ymin = output[7*k + 4];
float xmax = output[7*k + 5];
float ymax = output[7*k + 6];
//std::cout << classIndex << " , " << confidence << " , " << xmin << " , " << ymin<< " , " << xmax<< " , " << ymax << std::endl;
int x1 = static_cast<int>(xmin * srcImg.cols);
int y1 = static_cast<int>(ymin * srcImg.rows);
int x2 = static_cast<int>(xmax * srcImg.cols);
int y2 = static_cast<int>(ymax * srcImg.rows);
cv::rectangle(srcImg,cv::Rect2f(cv::Point(x1,y1),cv::Point(x2,y2)),cv::Scalar(255,0,255),1);
}
cv::Size size1;
size1.width = srcImg.cols;
size1.height = srcImg.rows;
auto end = std::chrono::system_clock::now();
std::chrono::duration<double> elapsed_seconds = end-start;
float duration = elapsed_seconds.count()*1000;
std::cout<<"Network processing time: "<<duration<< std::endl;
if (count == 1) {
char fname[256];
sprintf(fname,"result.wmv");
printf(fname);
writer.open(fname, cv::VideoWriter::fourcc('M', 'P', '4', 'V'), 20, size1);
}
else {
//cv::cvtColor(seg_gray, seg_gray, CV_BGR2GRAY);
writer << srcImg;
}
free(imgData);
}
writer.release();
cudaFree(imgCUDA);
cudaFreeHost(imgCPU);
cudaFree(output);
tensorNet.destroy();
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment