Skip to content

Instantly share code, notes, and snippets.

@empyre
Created October 5, 2017 22:05
Show Gist options
  • Save empyre/933985cf98b15461d52c85763478b21d to your computer and use it in GitHub Desktop.
Save empyre/933985cf98b15461d52c85763478b21d to your computer and use it in GitHub Desktop.
//--------------------------------------------------------------------------------------
// This is a experimenting project. The C++ Codes are from several file in the project.
// The first part is rendering in direct3D, the second part is hardware accelerated encoding
// using NVENC in GPU.
//--------------------------------------------------------------------------------------
//--------------------------------------------------------------------------------------
// First part: rendering a frame in direct3D and h.264 encode it using VNENC, then send it to
// client using UDP
//--------------------------------------------------------------------------------------
void CalculateFrameStats( )
{
// Code computes the average frames per second, and also the
// average time it takes to render one frame. These stats
// are appended to the window caption bar.
static int frameCnt = 0;
static float timeElapsed = 0.0f;
std::wstring mMainWndCaption;
frameCnt++;
// Compute averages over one second period.
if ((mTimer.TotalTime() - timeElapsed) >= 1.0f)
{
float fps = (float)frameCnt; // fps = frameCnt / 1
float mspf = 1000.0f / fps;
std::wostringstream outs;
outs.precision(6);
outs << mMainWndCaption << L" "
<< L"FPS: " << fps << L" "
<< L"Frame Time: " << mspf << L" (ms) "
<< L"| Encode Time: " << g_nvEncoder.m_encodeTime<< L" (ms)";
SetWindowText(g_hWnd, outs.str().c_str());
// Reset for next average.
frameCnt = 0;
timeElapsed += 1.0f;
}
}
void EncodeCurrentFrame()
{
}
int WINAPI wWinMain( _In_ HINSTANCE hInstance, _In_opt_ HINSTANCE hPrevInstance, _In_ LPWSTR lpCmdLine, _In_ int nCmdShow )
{
UNREFERENCED_PARAMETER( hPrevInstance );
UNREFERENCED_PARAMETER( lpCmdLine );
if( FAILED( InitWindow( hInstance, nCmdShow ) ) )
return 0;
if( FAILED( InitDevice() ) )
{
CleanupDevice();
return 0;
}
g_nvEncoder.InitD3D11(g_pd3dDevice);
g_nvEncoder.EncodeMain(g_pTexTest, &g_udpServer);
mTimer.Reset();
// Main message loop
MSG msg = {0};
while( WM_QUIT != msg.message )
{
if( PeekMessage( &msg, nullptr, 0, 0, PM_REMOVE ) )
{
TranslateMessage( &msg );
DispatchMessage( &msg );
}
else
{
mTimer.Tick();
g_udpServer.receive();
CalculateFrameStats();
Render();
//Sleep(10);
g_nvEncoder.EncodeCurrentFrame();
}
}
g_nvEncoder.EncodeRelease();
CleanupDevice();
return ( int )msg.wParam;
}
//--------------------------------------------------------------------------------------
// Convert the direct3D texture to CUDA resource and encode it to h.264 stream by using
// NVENC hardware encoder in GPU.
//--------------------------------------------------------------------------------------
NVENCSTATUS CNvEncoder::EncodeCurrentFrame()
{
uint32_t numBytesRead = 0;
//////////////////////////////////////////////////////////////////////////
CUstream stream = 0;
const int nbResources = 1;
CUgraphicsResource ppResources[nbResources] =
{
m_cudaResource,
};
__cu(cuGraphicsMapResources(nbResources, ppResources, stream));
CUarray pArray;
__cu(cuGraphicsSubResourceGetMappedArray(&pArray, m_cudaResource, 0, 0));
// copy the D3D texture to cudaLinearMemory
// __cu(cuMemcpyAtoD(
// m_cudaLinearMemory, // dst device
// pArray, // src array
// 0, // offset
// m_cudaLinearMemorySize)); // extent
CUDA_MEMCPY2D stCUDA_MEMCPY2D;
memset(&stCUDA_MEMCPY2D, 0, sizeof(CUDA_MEMCPY2D));
stCUDA_MEMCPY2D.srcMemoryType= CU_MEMORYTYPE_ARRAY; /**< Source memory type (host, device, array) */
stCUDA_MEMCPY2D.srcArray= pArray; /**< Source array reference */
stCUDA_MEMCPY2D.dstMemoryType= CU_MEMORYTYPE_DEVICE; /**< Destination memory type (host, device, array) */
stCUDA_MEMCPY2D.dstDevice= m_cudaLinearMemory; /**< Destination device pointer */
stCUDA_MEMCPY2D.dstPitch= m_cudaPitch; /**< Destination pitch (ignored when dst is array) */
stCUDA_MEMCPY2D.WidthInBytes= m_encodeConfig.width * 4; /**< Width of 2D memory copy in bytes */
stCUDA_MEMCPY2D.Height= m_encodeConfig.height; /**< Height of 2D memory copy */
__cu(cuMemcpy2D(&stCUDA_MEMCPY2D));
__cu(cuGraphicsUnmapResources(nbResources, ppResources, stream));
//////////////////////////////////////////////////////////////////////////
EncodeFrameConfig stEncodeFrame;
memset(&stEncodeFrame, 0, sizeof(stEncodeFrame));
stEncodeFrame.stride[0] = m_encodeConfig.width;
stEncodeFrame.stride[1] = m_encodeConfig.width;
stEncodeFrame.width = m_encodeConfig.width;
stEncodeFrame.height = m_encodeConfig.height;
EncodeFrame(&stEncodeFrame, false, m_encodeConfig.width, m_encodeConfig.height);
m_numFramesEncoded++;
return NV_ENC_SUCCESS;
}
NVENCSTATUS CNvEncoder::EncodeFrame(EncodeFrameConfig *pEncodeFrame, bool bFlush, uint32_t width, uint32_t height)
{
static int ii = 0;
NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
uint32_t lockedPitch = 0;
EncodeBuffer *pEncodeBuffer = NULL;
if (bFlush)
{
// FlushEncoder();
return NV_ENC_SUCCESS;
}
if (!pEncodeFrame)
{
return NV_ENC_ERR_INVALID_PARAM;
}
pEncodeBuffer = m_EncodeBufferQueue.GetAvailable();
//if(!pEncodeBuffer)
//{
// m_pNvHWEncoder->ProcessOutput(m_EncodeBufferQueue.GetPending());
// pEncodeBuffer = m_EncodeBufferQueue.GetAvailable();
//}
pEncodeBuffer->stInputBfr.nvRegisteredResource = m_nvRegisteredResource;
//Sleep(1);
//////////////////////////////////////////////////////////////////////////
static uint32_t fCounter = 0;
NvQueryPerformanceCounter(&lStart);
// CUcontext pctx;
// CUresult cr;
// cr = cuCtxGetCurrent(&pctx);
nvStatus = m_pNvHWEncoder->NvEncMapInputResource(pEncodeBuffer->stInputBfr.nvRegisteredResource, &pEncodeBuffer->stInputBfr.hInputSurface);
if (nvStatus != NV_ENC_SUCCESS)
return nvStatus;
nvStatus = m_pNvHWEncoder->NvEncEncodeFrame(pEncodeBuffer, NULL, width, height, (NV_ENC_PIC_STRUCT)m_uPicStruct);
nvStatus = m_pNvHWEncoder->NvEncUnmapInputResource(pEncodeBuffer->stInputBfr.hInputSurface);
m_EncodeBufferQueue.Release();
//m_pending = m_EncodeBufferQueue.GetPending();
//m_pNvHWEncoder->ProcessOutput(m_pending);
//NvQueryPerformanceCounter(&lEnd);
//m_tempTime += lEnd - lStart;
fCounter++;
if (fCounter==100)
{
m_encodeTime = ((double)m_tempTime)*1000/ fCounter;
m_encodeTime = (m_encodeTime)/ m_lFreq;
m_tempTime = 0;
fCounter = 0;
}
return nvStatus;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment