empyre/sample

## sample
//--------------------------------------------------------------------------------------
// This is a experimenting project. The C++ Codes are from several file in the project.
// The first part is rendering in direct3D, the second part is hardware accelerated encoding
// using NVENC in GPU.
//--------------------------------------------------------------------------------------
//--------------------------------------------------------------------------------------
// First part: rendering a frame in direct3D and h.264 encode it using VNENC, then send it to
// client using UDP
//--------------------------------------------------------------------------------------
void CalculateFrameStats(  )
{
	// Code computes the average frames per second, and also the
	// average time it takes to render one frame.  These stats
	// are appended to the window caption bar.

	static int frameCnt = 0;
	static float timeElapsed = 0.0f;
	std::wstring mMainWndCaption;

	frameCnt++;

	// Compute averages over one second period.
	if ((mTimer.TotalTime() - timeElapsed) >= 1.0f)
	{
		float fps = (float)frameCnt; // fps = frameCnt / 1
		float mspf = 1000.0f / fps;

		std::wostringstream outs;
		outs.precision(6);
		outs << mMainWndCaption << L"    "
			<< L"FPS: " << fps << L"    "
			<< L"Frame Time: " << mspf << L" (ms) "
			<< L"| Encode Time: " << g_nvEncoder.m_encodeTime<< L" (ms)";
		SetWindowText(g_hWnd, outs.str().c_str());

		// Reset for next average.
		frameCnt = 0;
		timeElapsed += 1.0f;
	}
}

void EncodeCurrentFrame()
{

}

int WINAPI wWinMain( _In_ HINSTANCE hInstance, _In_opt_ HINSTANCE hPrevInstance, _In_ LPWSTR lpCmdLine, _In_ int nCmdShow )
{
    UNREFERENCED_PARAMETER( hPrevInstance );
    UNREFERENCED_PARAMETER( lpCmdLine );

    if( FAILED( InitWindow( hInstance, nCmdShow ) ) )
        return 0;

    if( FAILED( InitDevice() ) )
    {
        CleanupDevice();
        return 0;
    }
	g_nvEncoder.InitD3D11(g_pd3dDevice);
	g_nvEncoder.EncodeMain(g_pTexTest, &g_udpServer);

	mTimer.Reset();
    // Main message loop
    MSG msg = {0};
    while( WM_QUIT != msg.message )
    {
        if( PeekMessage( &msg, nullptr, 0, 0, PM_REMOVE ) )
        {
            TranslateMessage( &msg );
            DispatchMessage( &msg );
        }
        else
        {

			mTimer.Tick();
			g_udpServer.receive();
			CalculateFrameStats();
            Render();
			//Sleep(10);
			g_nvEncoder.EncodeCurrentFrame();


        }
    }

	  g_nvEncoder.EncodeRelease();
    CleanupDevice();


    return ( int )msg.wParam;
}
//--------------------------------------------------------------------------------------
//  Convert the direct3D texture to CUDA resource and encode it to h.264 stream by using
//  NVENC hardware encoder in GPU.
//--------------------------------------------------------------------------------------


NVENCSTATUS CNvEncoder::EncodeCurrentFrame()
{
	uint32_t numBytesRead = 0;

	//////////////////////////////////////////////////////////////////////////
	CUstream   stream = 0;
	const int nbResources = 1;
	CUgraphicsResource ppResources[nbResources] =
	{
		m_cudaResource,
	};
	__cu(cuGraphicsMapResources(nbResources, ppResources, stream));

	CUarray pArray;
	__cu(cuGraphicsSubResourceGetMappedArray(&pArray, m_cudaResource, 0, 0));
	// copy the D3D texture to cudaLinearMemory
// 	__cu(cuMemcpyAtoD(
// 		m_cudaLinearMemory, // dst device
// 		pArray,    // src array
// 		0,       //  offset
// 		m_cudaLinearMemorySize)); // extent

	CUDA_MEMCPY2D stCUDA_MEMCPY2D;
	memset(&stCUDA_MEMCPY2D, 0, sizeof(CUDA_MEMCPY2D));

	stCUDA_MEMCPY2D.srcMemoryType= CU_MEMORYTYPE_ARRAY; /**< Source memory type (host, device, array) */
	stCUDA_MEMCPY2D.srcArray= pArray;           /**< Source array reference */

	stCUDA_MEMCPY2D.dstMemoryType= CU_MEMORYTYPE_DEVICE; /**< Destination memory type (host, device, array) */
	stCUDA_MEMCPY2D.dstDevice= m_cudaLinearMemory;      /**< Destination device pointer */
	stCUDA_MEMCPY2D.dstPitch= m_cudaPitch;            /**< Destination pitch (ignored when dst is array) */

	stCUDA_MEMCPY2D.WidthInBytes= m_encodeConfig.width * 4;        /**< Width of 2D memory copy in bytes */
	stCUDA_MEMCPY2D.Height= m_encodeConfig.height;              /**< Height of 2D memory copy */

	__cu(cuMemcpy2D(&stCUDA_MEMCPY2D));

	__cu(cuGraphicsUnmapResources(nbResources, ppResources, stream));

//////////////////////////////////////////////////////////////////////////
	EncodeFrameConfig stEncodeFrame;
	memset(&stEncodeFrame, 0, sizeof(stEncodeFrame));

	stEncodeFrame.stride[0] = m_encodeConfig.width;
	stEncodeFrame.stride[1] = m_encodeConfig.width;
	stEncodeFrame.width = m_encodeConfig.width;
	stEncodeFrame.height = m_encodeConfig.height;

	EncodeFrame(&stEncodeFrame, false, m_encodeConfig.width, m_encodeConfig.height);
	m_numFramesEncoded++;

	return NV_ENC_SUCCESS;
}


NVENCSTATUS CNvEncoder::EncodeFrame(EncodeFrameConfig *pEncodeFrame, bool bFlush, uint32_t width, uint32_t height)
{
	static int ii = 0;
    NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
    uint32_t lockedPitch = 0;
    EncodeBuffer *pEncodeBuffer = NULL;

    if (bFlush)
    {
       // FlushEncoder();
        return NV_ENC_SUCCESS;
    }

    if (!pEncodeFrame)
    {
        return NV_ENC_ERR_INVALID_PARAM;
    }

     pEncodeBuffer = m_EncodeBufferQueue.GetAvailable();
     //if(!pEncodeBuffer)
     //{
     //    m_pNvHWEncoder->ProcessOutput(m_EncodeBufferQueue.GetPending());
     //    pEncodeBuffer = m_EncodeBufferQueue.GetAvailable();
     //}

	pEncodeBuffer->stInputBfr.nvRegisteredResource = m_nvRegisteredResource;
	//Sleep(1);
	//////////////////////////////////////////////////////////////////////////

	static uint32_t fCounter = 0;
	NvQueryPerformanceCounter(&lStart);
// 	CUcontext pctx;
// 	CUresult cr;
// 	cr = cuCtxGetCurrent(&pctx);

	nvStatus = m_pNvHWEncoder->NvEncMapInputResource(pEncodeBuffer->stInputBfr.nvRegisteredResource, &pEncodeBuffer->stInputBfr.hInputSurface);
	if (nvStatus != NV_ENC_SUCCESS)
		return nvStatus;
    nvStatus = m_pNvHWEncoder->NvEncEncodeFrame(pEncodeBuffer, NULL, width, height, (NV_ENC_PIC_STRUCT)m_uPicStruct);

	nvStatus = m_pNvHWEncoder->NvEncUnmapInputResource(pEncodeBuffer->stInputBfr.hInputSurface);
	m_EncodeBufferQueue.Release();
	//m_pending = m_EncodeBufferQueue.GetPending();
//m_pNvHWEncoder->ProcessOutput(m_pending);


	//NvQueryPerformanceCounter(&lEnd);
	//m_tempTime += lEnd - lStart;
	fCounter++;
	if (fCounter==100)
	{
		m_encodeTime = ((double)m_tempTime)*1000/ fCounter;
		m_encodeTime = (m_encodeTime)/ m_lFreq;
		m_tempTime = 0;
		fCounter = 0;
	}
    return nvStatus;
}
	//--------------------------------------------------------------------------------------
	// This is a experimenting project. The C++ Codes are from several file in the project.
	// The first part is rendering in direct3D, the second part is hardware accelerated encoding
	// using NVENC in GPU.
	//--------------------------------------------------------------------------------------
	//--------------------------------------------------------------------------------------
	// First part: rendering a frame in direct3D and h.264 encode it using VNENC, then send it to
	// client using UDP
	//--------------------------------------------------------------------------------------
	void CalculateFrameStats( )
	{
	// Code computes the average frames per second, and also the
	// average time it takes to render one frame. These stats
	// are appended to the window caption bar.

	static int frameCnt = 0;
	static float timeElapsed = 0.0f;
	std::wstring mMainWndCaption;

	frameCnt++;

	// Compute averages over one second period.
	if ((mTimer.TotalTime() - timeElapsed) >= 1.0f)
	{
	float fps = (float)frameCnt; // fps = frameCnt / 1
	float mspf = 1000.0f / fps;

	std::wostringstream outs;
	outs.precision(6);
	outs << mMainWndCaption << L" "
	<< L"FPS: " << fps << L" "
	<< L"Frame Time: " << mspf << L" (ms) "
	<< L"\| Encode Time: " << g_nvEncoder.m_encodeTime<< L" (ms)";
	SetWindowText(g_hWnd, outs.str().c_str());

	// Reset for next average.
	frameCnt = 0;
	timeElapsed += 1.0f;
	}
	}

	void EncodeCurrentFrame()
	{

	}

	int WINAPI wWinMain( _In_ HINSTANCE hInstance, _In_opt_ HINSTANCE hPrevInstance, _In_ LPWSTR lpCmdLine, _In_ int nCmdShow )
	{
	UNREFERENCED_PARAMETER( hPrevInstance );
	UNREFERENCED_PARAMETER( lpCmdLine );

	if( FAILED( InitWindow( hInstance, nCmdShow ) ) )
	return 0;

	if( FAILED( InitDevice() ) )
	{
	CleanupDevice();
	return 0;
	}
	g_nvEncoder.InitD3D11(g_pd3dDevice);
	g_nvEncoder.EncodeMain(g_pTexTest, &g_udpServer);

	mTimer.Reset();
	// Main message loop
	MSG msg = {0};
	while( WM_QUIT != msg.message )
	{
	if( PeekMessage( &msg, nullptr, 0, 0, PM_REMOVE ) )
	{
	TranslateMessage( &msg );
	DispatchMessage( &msg );
	}
	else
	{

	mTimer.Tick();
	g_udpServer.receive();
	CalculateFrameStats();
	Render();
	//Sleep(10);
	g_nvEncoder.EncodeCurrentFrame();


	}
	}

	g_nvEncoder.EncodeRelease();
	CleanupDevice();


	return ( int )msg.wParam;
	}
	//--------------------------------------------------------------------------------------
	// Convert the direct3D texture to CUDA resource and encode it to h.264 stream by using
	// NVENC hardware encoder in GPU.
	//--------------------------------------------------------------------------------------


	NVENCSTATUS CNvEncoder::EncodeCurrentFrame()
	{
	uint32_t numBytesRead = 0;

	//////////////////////////////////////////////////////////////////////////
	CUstream stream = 0;
	const int nbResources = 1;
	CUgraphicsResource ppResources[nbResources] =
	{
	m_cudaResource,
	};
	__cu(cuGraphicsMapResources(nbResources, ppResources, stream));

	CUarray pArray;
	__cu(cuGraphicsSubResourceGetMappedArray(&pArray, m_cudaResource, 0, 0));
	// copy the D3D texture to cudaLinearMemory
	// __cu(cuMemcpyAtoD(
	// m_cudaLinearMemory, // dst device
	// pArray, // src array
	// 0, // offset
	// m_cudaLinearMemorySize)); // extent

	CUDA_MEMCPY2D stCUDA_MEMCPY2D;
	memset(&stCUDA_MEMCPY2D, 0, sizeof(CUDA_MEMCPY2D));

	stCUDA_MEMCPY2D.srcMemoryType= CU_MEMORYTYPE_ARRAY; /*< Source memory type (host, device, array) /
	stCUDA_MEMCPY2D.srcArray= pArray; /*< Source array reference /

	stCUDA_MEMCPY2D.dstMemoryType= CU_MEMORYTYPE_DEVICE; /*< Destination memory type (host, device, array) /
	stCUDA_MEMCPY2D.dstDevice= m_cudaLinearMemory; /*< Destination device pointer /
	stCUDA_MEMCPY2D.dstPitch= m_cudaPitch; /*< Destination pitch (ignored when dst is array) /

	stCUDA_MEMCPY2D.WidthInBytes= m_encodeConfig.width * 4; /*< Width of 2D memory copy in bytes /
	stCUDA_MEMCPY2D.Height= m_encodeConfig.height; /*< Height of 2D memory copy /

	__cu(cuMemcpy2D(&stCUDA_MEMCPY2D));

	__cu(cuGraphicsUnmapResources(nbResources, ppResources, stream));

	//////////////////////////////////////////////////////////////////////////
	EncodeFrameConfig stEncodeFrame;
	memset(&stEncodeFrame, 0, sizeof(stEncodeFrame));

	stEncodeFrame.stride[0] = m_encodeConfig.width;
	stEncodeFrame.stride[1] = m_encodeConfig.width;
	stEncodeFrame.width = m_encodeConfig.width;
	stEncodeFrame.height = m_encodeConfig.height;

	EncodeFrame(&stEncodeFrame, false, m_encodeConfig.width, m_encodeConfig.height);
	m_numFramesEncoded++;

	return NV_ENC_SUCCESS;
	}


	NVENCSTATUS CNvEncoder::EncodeFrame(EncodeFrameConfig *pEncodeFrame, bool bFlush, uint32_t width, uint32_t height)
	{
	static int ii = 0;
	NVENCSTATUS nvStatus = NV_ENC_SUCCESS;
	uint32_t lockedPitch = 0;
	EncodeBuffer *pEncodeBuffer = NULL;

	if (bFlush)
	{
	// FlushEncoder();
	return NV_ENC_SUCCESS;
	}

	if (!pEncodeFrame)
	{
	return NV_ENC_ERR_INVALID_PARAM;
	}

	pEncodeBuffer = m_EncodeBufferQueue.GetAvailable();
	//if(!pEncodeBuffer)
	//{
	// m_pNvHWEncoder->ProcessOutput(m_EncodeBufferQueue.GetPending());
	// pEncodeBuffer = m_EncodeBufferQueue.GetAvailable();
	//}

	pEncodeBuffer->stInputBfr.nvRegisteredResource = m_nvRegisteredResource;
	//Sleep(1);
	//////////////////////////////////////////////////////////////////////////

	static uint32_t fCounter = 0;
	NvQueryPerformanceCounter(&lStart);
	// CUcontext pctx;
	// CUresult cr;
	// cr = cuCtxGetCurrent(&pctx);

	nvStatus = m_pNvHWEncoder->NvEncMapInputResource(pEncodeBuffer->stInputBfr.nvRegisteredResource, &pEncodeBuffer->stInputBfr.hInputSurface);
	if (nvStatus != NV_ENC_SUCCESS)
	return nvStatus;
	nvStatus = m_pNvHWEncoder->NvEncEncodeFrame(pEncodeBuffer, NULL, width, height, (NV_ENC_PIC_STRUCT)m_uPicStruct);

	nvStatus = m_pNvHWEncoder->NvEncUnmapInputResource(pEncodeBuffer->stInputBfr.hInputSurface);
	m_EncodeBufferQueue.Release();
	//m_pending = m_EncodeBufferQueue.GetPending();
	//m_pNvHWEncoder->ProcessOutput(m_pending);


	//NvQueryPerformanceCounter(&lEnd);
	//m_tempTime += lEnd - lStart;
	fCounter++;
	if (fCounter==100)
	{
	m_encodeTime = ((double)m_tempTime)*1000/ fCounter;
	m_encodeTime = (m_encodeTime)/ m_lFreq;
	m_tempTime = 0;
	fCounter = 0;
	}
	return nvStatus;
	}