Skip to content

Instantly share code, notes, and snippets.

@m1keall1son
Last active February 19, 2022 07:48
Show Gist options
  • Save m1keall1son/33ebaf1271a5234a4ed1d8ba765eafd6 to your computer and use it in GitHub Desktop.
Save m1keall1son/33ebaf1271a5234a4ed1d8ba765eafd6 to your computer and use it in GitHub Desktop.
C++ Program to read a video file and re-encode it to H.264 / AAC using Windows Media Foundation
//USAGE $ program.exe path\\to\\video-in.mp4 path\\to\\video-out.mp4
#include <iostream>
#include <string>
#include <mfidl.h> // Media Foundation interfaces
#include <mfapi.h> // Media Foundation platform APIs
#include <mferror.h> // Media Foundation error codes
#include <mfreadwrite.h>
#include <wmcontainer.h> // ASF-specific components
#include <wmcodecdsp.h> // Windows Media DSP interfaces
#include <Dmo.h> // DMO objects
#include <uuids.h> // Definition for FORMAT_VideoInfo
#include <propvarutil.h>
#include <d3d9.h>
#include <initguid.h>
#include <dxva2api.h>
#include <map>
#include <chrono>
#include <vector>
#include <fstream>
// The required link libraries
#pragma comment(lib, "mfplat")
#pragma comment(lib, "mf")
#pragma comment(lib, "mfuuid")
#pragma comment(lib, "mfreadwrite")
#pragma comment(lib, "msdmo")
#pragma comment(lib, "strmiids")
#pragma comment(lib, "propsys")
#pragma comment(lib, "d3d9")
#pragma comment(lib, "dxva2")
static class MFSingleton
{
public:
MFSingleton() {}
~MFSingleton()
{
if (s_pD3D9Ex != NULL)
{
ULONG refCount = s_pD3D9Ex->Release();
if (refCount > 0)
{
std::cout << "Released Direct3D interface but ref cout > 0" << std::endl;
}
s_pD3D9Ex = NULL;
}
if (s_bMFInitialized)
{
HRESULT hr = MFShutdown();
if (FAILED(hr))
{
std::cout << "Failed to shut down Media Foundation" << std::endl;
}
s_bMFInitialized = false;
}
if (s_bCOMInitialized)
{
CoUninitialize();
s_bCOMInitialized = false;
}
}
static IDirect3D9Ex * getD3D9Ex()
{
init();
return s_pD3D9Ex;
}
private:
static IDirect3D9Ex * s_pD3D9Ex;
static bool s_bCOMInitialized;
static bool s_bMFInitialized;
static bool s_bNVAPIInitialized;
static void init()
{
if (!s_bCOMInitialized)
{
// Initialize COM
HRESULT hr = CoInitializeEx(0, COINIT_APARTMENTTHREADED);
if (hr == S_FALSE)
{
std::cout << "COM already initialized on this thread." << std::endl;
}
else if (hr != S_OK)
{
throw std::runtime_error("Failed to initialize COM.");
}
s_bCOMInitialized = true;
}
if (!s_bMFInitialized)
{
// Initialize Media Foundation
HRESULT hr = MFStartup(MF_SDK_VERSION);
if (FAILED(hr))
{
throw std::runtime_error("Failed to initialize Media Foundation");
}
s_bMFInitialized = true;
}
if (s_pD3D9Ex == NULL)
{
// Initialize D3D9
HRESULT hr = Direct3DCreate9Ex(D3D_SDK_VERSION, &s_pD3D9Ex);
if (FAILED(hr))
{
throw std::runtime_error("Failed to initialize Direct3D");
}
}
}
};
IDirect3D9Ex * MFSingleton::s_pD3D9Ex = NULL;
bool MFSingleton::s_bCOMInitialized = false;
bool MFSingleton::s_bMFInitialized = false;
bool MFSingleton::s_bNVAPIInitialized = false;
// Create global scope instance of MFSingleton to ensure startup and shutdown occurs once
// Do not use this class anywhere else!
static MFSingleton g_MFSingleton;
template <class T> void SafeRelease(T **ppT)
{
if (*ppT)
{
(*ppT)->Release();
*ppT = NULL;
}
}
IMFSourceReaderEx * m_pSourceReader{nullptr};
IDirect3DDeviceManager9 * m_pD3D9DeviceManager{ nullptr };
IDirect3D9Ex * m_pD3D9Ex{ nullptr };
IDirect3DDevice9Ex * m_pD3D9Device{ nullptr };
IDirect3DTexture9 * m_pD3D9Texture{ nullptr };
IDirect3DSurface9 * m_pD3D9Surface{ nullptr };
unsigned int m_iResetToken{0};
IMFSinkWriter *m_pWriter;
DWORD m_readVideoStreamIndex;
DWORD m_readAudioStreamIndex;
DWORD m_writeVideoStreamIndex;
DWORD m_writeAudioStreamIndex;
//video metadata
LONGLONG mediaDuration{ 0 };
UINT32 frameDurationNum{ 0 }, frameDurationDenom{ 0 };
UINT32 width{ 0 }, height{ 0 };
//audio metadata
UINT32 audioChannels{ 0 };
UINT32 audioSamplesPerSecond{ 0 };
UINT32 audioAvgBitrate{ 0 };
UINT32 audioBitsPerSample{ 0 };
UINT32 audioBlockAlign{0};
UINT32 audioAvgBytesPerSecond{ 0 };
UINT32 audioSamplesPerBlock{ 0 };
UINT32 audioValidBitsPerSample{ 0 };
const UINT32 VIDEO_BIT_RATE = 10000000;
const GUID VIDEO_INPUT_FORMAT = MFVideoFormat_RGB32;
const GUID VIDEO_ENCODING_FORMAT = MFVideoFormat_H264;
HRESULT WriteFrame(
BYTE * buffer,
DWORD streamIndex,
LONGLONG timestamp,
LONGLONG duration
)
{
IMFSample *pSample = NULL;
IMFMediaBuffer * pBuffer = NULL;
BYTE *pData = NULL;
UINT32 numPixels = width * height;
DWORD cbWidth = 4 * width;
DWORD cbBuffer = cbWidth * height;
// Create a new memory buffer.
HRESULT hr = MFCreateMemoryBuffer(cbBuffer, &pBuffer);
if (FAILED(hr))
{
throw std::runtime_error("Failed to initialize sink writer: failed to create memory buffer");
}
// Lock the buffer and copy the video frame to the buffer.
hr = pBuffer->Lock(&pData, NULL, NULL);
if (SUCCEEDED(hr))
{
hr = MFCopyImage(
pData, // Destination buffer.
cbWidth, // Destination stride.
buffer, // First row in source image.
cbWidth, // Source stride.
cbWidth, // Image width in bytes.
height // Image height in pixels.
);
}
if (pBuffer)
{
pBuffer->Unlock();
}
// Set the data length of the buffer.
if (SUCCEEDED(hr))
{
hr = pBuffer->SetCurrentLength(cbBuffer);
}
// Create a media sample and add the buffer to the sample.
if (SUCCEEDED(hr))
{
hr = MFCreateSample(&pSample);
}
if (SUCCEEDED(hr))
{
hr = pSample->AddBuffer(pBuffer);
}
// Set the time stamp and the duration.
if (SUCCEEDED(hr))
{
hr = pSample->SetSampleTime(timestamp);
}
if (SUCCEEDED(hr))
{
hr = pSample->SetSampleDuration(duration);
}
// Send the sample to the Sink Writer.
if (SUCCEEDED(hr))
{
hr = m_pWriter->WriteSample(streamIndex, pSample);
}
if (!SUCCEEDED(hr)) {
std::cout << "Error writing video frame" << std::endl;
}
SafeRelease(&pBuffer);
SafeRelease(&pSample);
return hr;
}
HRESULT WriteAudioBuffer(BYTE* buffer, size_t bufferSize, LONGLONG timestamp, LONGLONG duration)
{
HRESULT hr;
IMFSample *pSample = NULL;
BYTE *pData = NULL;
// Create a new memory buffer.
const DWORD cbBuffer = bufferSize;
IMFMediaBuffer * aBuffer = NULL;
hr = MFCreateMemoryBuffer(cbBuffer, &aBuffer);
// Lock the buffer and copy the video frame to the buffer.
if (SUCCEEDED(hr)) {
hr = aBuffer->Lock(&pData, NULL, NULL);
}
if (SUCCEEDED(hr)) {
memcpy(pData, buffer, cbBuffer);
}
if (aBuffer) {
aBuffer->Unlock();
}
// Set the data length of the buffer.
if (SUCCEEDED(hr)) {
hr = aBuffer->SetCurrentLength(cbBuffer);
}
// Create a media sample and add the buffer to the sample.
if (SUCCEEDED(hr)) {
hr = MFCreateSample(&pSample);
}
else
{
throw std::runtime_error("Error writing audio frame: unable to create sample");
}
if (SUCCEEDED(hr)) {
hr = pSample->AddBuffer(aBuffer);
}
// Set the sample time
if (SUCCEEDED(hr)) {
hr = pSample->SetSampleTime(timestamp);
}
// Set the sample duration
if (SUCCEEDED(hr)) {
hr = pSample->SetSampleDuration(duration);
}
// Send the sample to the Sink Writer.
if (SUCCEEDED(hr)) {
hr = m_pWriter->WriteSample(m_writeAudioStreamIndex, pSample);
}
if (!SUCCEEDED(hr)) {
std::cout << "Error writing audio frame" << std::endl;
}
SafeRelease(&pSample);
SafeRelease(&aBuffer);
return hr;
}
struct MFBufferAccess {
~MFBufferAccess()
{
HRESULT hr = m_pBuffer->Unlock();
if (hr != S_OK)
{
SafeRelease(&m_pBuffer);
std::cout << "Unable to unlock audio buffer" << std::endl;
}
SafeRelease(&m_pBuffer);
}
MFBufferAccess(IMFSample* pSample)
{
DWORD numBuffers;
HRESULT hr = pSample->GetBufferCount(&numBuffers);
if (hr != S_OK)
{
std::cout << "Unable to query audio buffer count" << std::endl;
}
if (numBuffers > 1)
{
hr = pSample->ConvertToContiguousBuffer(&m_pBuffer);
}
else
{
hr = pSample->GetBufferByIndex(0, &m_pBuffer);
}
if (hr != S_OK)
{
SafeRelease(&m_pBuffer);
std::cout << "Unable to fetch audio buffer" << std::endl;
}
hr = m_pBuffer->Lock(&data, &maxSize, &size);
if (hr != S_OK)
{
SafeRelease(&m_pBuffer);
std::cout << "Unable to lock audio buffer for reading" << std::endl;
}
}
BYTE* data{ nullptr };
DWORD size{ 0 };
DWORD maxSize{ 0 };
private:
IMFMediaBuffer *m_pBuffer{ NULL };
};
int main(int argc, char *argv[])
{
//--------------------------------
// SETUP DEVICES
//--------------------------------
m_pD3D9Ex = MFSingleton::getD3D9Ex();
IMFAttributes * pAttr = nullptr;
//enable hardware decoding to convert yuv formats to RGB32
HRESULT hr = MFCreateAttributes(&pAttr, 1);
hr = pAttr->SetUINT32(MF_SOURCE_READER_ENABLE_ADVANCED_VIDEO_PROCESSING, TRUE);
if (hr != S_OK)
{
return 1;
}
if (SUCCEEDED(hr))
{
hr = pAttr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, true);
}
if (SUCCEEDED(hr))
{
hr = pAttr->SetUINT32(MF_LOW_LATENCY, false);
}
IMFSourceReader * pReader = NULL;
std::string path(argv[1]);
hr = MFCreateSourceReaderFromURL(std::wstring(path.begin(), path.end()).c_str(), pAttr, &pReader);
if (hr != S_OK)
{
SafeRelease(&pReader);
return 1;
}
hr = pReader->QueryInterface<IMFSourceReaderEx>(&m_pSourceReader);
if (hr != S_OK)
{
SafeRelease(&pReader);
return 1;
}
SafeRelease(&pReader);
DWORD dwStreamIndex = 0;
IMFMediaType *pNativeType = NULL;
m_readVideoStreamIndex = MF_SOURCE_READER_INVALID_STREAM_INDEX;
m_readAudioStreamIndex = MF_SOURCE_READER_INVALID_STREAM_INDEX;
do
{
hr = m_pSourceReader->GetNativeMediaType(dwStreamIndex, 0, &pNativeType);
if (hr == S_OK)
{
GUID majorType;
hr = pNativeType->GetGUID(MF_MT_MAJOR_TYPE, &majorType);
if (hr == S_OK)
{
if (m_readVideoStreamIndex == MF_SOURCE_READER_INVALID_STREAM_INDEX && majorType == MFMediaType_Video)
{
m_readVideoStreamIndex = dwStreamIndex;
}
else if (m_readAudioStreamIndex == MF_SOURCE_READER_INVALID_STREAM_INDEX && majorType == MFMediaType_Audio)
{
m_readAudioStreamIndex = dwStreamIndex;
}
}
SafeRelease(&pNativeType);
++dwStreamIndex;
}
} while (hr == S_OK);
SafeRelease(&pNativeType);
hr = S_OK;
if (hr != S_OK)
{
std::cout << "Couldn't get stream indices from file." << std::endl;
return 1;
}
if (m_readVideoStreamIndex != MF_SOURCE_READER_INVALID_STREAM_INDEX)
{
// read video metadata
PROPVARIANT var;
HRESULT hr = m_pSourceReader->GetPresentationAttribute(MF_SOURCE_READER_MEDIASOURCE,
MF_PD_DURATION, &var);
if (SUCCEEDED(hr))
{
hr = PropVariantToInt64(var, &mediaDuration);
PropVariantClear(&var);
}
if (hr != S_OK)
{
SafeRelease(&m_pSourceReader);
std::cout << "couldn't read media duration" << std::endl;
return 1;
}
std::cout << "Video duration: " << ((double)mediaDuration / 10000000.0) << std::endl;
IMFMediaType *pNativeType = NULL;
// Find the native format of the stream.
hr = m_pSourceReader->GetNativeMediaType(m_readVideoStreamIndex, 0, &pNativeType);
if (FAILED(hr))
{
return hr;
}
hr = MFGetAttributeRatio(pNativeType, MF_MT_FRAME_RATE, &frameDurationNum, &frameDurationDenom);
std::cout << "Video frame rate: " << frameDurationNum << " / " << frameDurationDenom << " ("<< ((double)frameDurationNum/(double)frameDurationDenom) <<" fps)" << std::endl;
if (hr != S_OK)
{
SafeRelease(&m_pSourceReader);
std::cout << "couldn't get frame duration from media" << std::endl;
return 1;
}
hr = MFGetAttributeSize(pNativeType, MF_MT_FRAME_SIZE, &width, &height);
std::cout << "Video width: " << width << " height: " << height << std::endl;
SafeRelease(&pNativeType);
if (hr != S_OK)
{
SafeRelease(&m_pSourceReader);
std::cout << "couldn't get frame size from media." << std::endl;
return 1;
}
}
else
{
// For our pourposes, no video streams is fatal
SafeRelease(&m_pSourceReader);
std::cout << "media has no video stream" << std::endl;
return 1;
}
if (m_readAudioStreamIndex != MF_SOURCE_READER_INVALID_STREAM_INDEX)
{
IMFMediaType *pNativeType = NULL;
// Find the native format of the stream.
hr = m_pSourceReader->GetNativeMediaType(m_readAudioStreamIndex, 0, &pNativeType);
if (FAILED(hr))
{
return hr;
}
hr = pNativeType->GetUINT32(MF_MT_AUDIO_NUM_CHANNELS, &audioChannels);
std::cout << "Audio channels: " << audioChannels << std::endl;
if (hr != S_OK)
{
std::cout << "couldn't read audio channel num from media." << std::endl;
return 1;
}
hr = pNativeType->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, &audioSamplesPerSecond);
if (FAILED(hr))
{
return hr;
}
std::cout << "Audio channels: " << audioSamplesPerSecond << std::endl;
hr = pNativeType->GetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, &audioBlockAlign);
if (SUCCEEDED(hr))
{
std::cout << "Audio block align: " << audioBlockAlign << std::endl;
}
hr = pNativeType->GetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, &audioBitsPerSample);
if (SUCCEEDED(hr))
{
std::cout << "Audio bits per sample: " << audioBitsPerSample << std::endl;
}
hr = pNativeType->GetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, &audioAvgBytesPerSecond);
if (SUCCEEDED(hr))
{
std::cout << "Audio avg bytes per second: " << audioAvgBytesPerSecond << std::endl;
}
hr = pNativeType->GetUINT32(MF_MT_AUDIO_SAMPLES_PER_BLOCK, &audioSamplesPerBlock);
if (SUCCEEDED(hr))
{
std::cout << "Audio samples per block: " << audioSamplesPerBlock << std::endl;
}
hr = pNativeType->GetUINT32(MF_MT_AUDIO_VALID_BITS_PER_SAMPLE, &audioValidBitsPerSample);
if (SUCCEEDED(hr))
{
std::cout << "Audio valid bits per sample: " << audioValidBitsPerSample << std::endl;
}
SafeRelease(&pNativeType);
}
else
{
SafeRelease(&m_pSourceReader);
std::cout << "No audio streams found." << std::endl;
return 1;
}
// Configure the video stream
pNativeType = NULL;
IMFMediaType *pType = NULL;
GUID majorType, subtype;
// Find the native format of the stream.
hr = m_pSourceReader->GetNativeMediaType(m_readVideoStreamIndex, 0, &pNativeType);
if (FAILED(hr))
{
std::cout << "couldn't get native media type from video stream." << std::endl;
return 1;
}
hr = pNativeType->GetGUID(MF_MT_MAJOR_TYPE, &majorType);
hr = MFCreateMediaType(&pType);
if (majorType != MFMediaType_Video)
{
std::cout << "wrong type for video stream." << std::endl;
return 1;
}
hr = pType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
const GUID videoSubtype = MFVideoFormat_RGB32; // Works on AMD and NVidia with HW Accel, AMD skips second or third frame replacing with duplicate of previous frame
//const GUID videoSubtype = MFVideoFormat_ARGB32; // Works on NVidia with HW Accel, but not AMD
//const GUID videoSubtype = MFVideoFormat_NV12; // fastest but has missing first frame on AMD, Dynamic range issues on NV without changing settings, color space issues everywhere
hr = pType->SetGUID(MF_MT_SUBTYPE, videoSubtype);
hr = m_pSourceReader->SetCurrentMediaType(m_readVideoStreamIndex, NULL, pType);
if (hr != S_OK)
{
SafeRelease(&m_pSourceReader);
switch (hr)
{
case MF_E_INVALIDMEDIATYPE:
std::cout << "At least one decoder was found for the native stream type, but the type specified was rejected." << std::endl;
return 1;
break;
case MF_E_INVALIDREQUEST:
std::cout << "One or more sample requests are still pending." << std::endl;
return 1;
break;
case MF_E_INVALIDSTREAMNUMBER:
std::cout << "The stream index parameter is invalid." << std::endl;
return 1;
break;
case MF_E_TOPO_CODEC_NOT_FOUND:
std::cout << "Could not find a decoder for the native stream type." << std::endl;
return 1;
break;
default:
std::cout << "Failed to configure stream." << std::endl;
return 1;
break;
}
}
SafeRelease(&pNativeType);
SafeRelease(&pType);
pNativeType = NULL;
pType = NULL;
// Find the native format of the stream.
hr = m_pSourceReader->GetNativeMediaType(m_readAudioStreamIndex, 0, &pNativeType);
if (FAILED(hr))
{
std::cout << "failed to get native media type from source reader." << std::endl;
return 1;
}
hr = pNativeType->GetGUID(MF_MT_MAJOR_TYPE, &majorType);
hr = MFCreateMediaType(&pType);
if (majorType != MFMediaType_Audio)
{
std::cout << "wrong type for audio stream." << std::endl;
return 1;
}
hr = pType->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
hr = pType->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM);
hr = m_pSourceReader->SetCurrentMediaType(m_readAudioStreamIndex, NULL, pType);
hr = m_pSourceReader->GetCurrentMediaType(m_readAudioStreamIndex, &pType);
SafeRelease(&pNativeType);
SafeRelease(&pType);
if (hr != S_OK)
{
SafeRelease(&m_pSourceReader);
switch (hr)
{
case MF_E_INVALIDMEDIATYPE:
std::cout << "At least one decoder was found for the native stream type, but the type specified was rejected." << std::endl;
return 1;
break;
case MF_E_INVALIDREQUEST:
std::cout << "One or more sample requests are still pending." << std::endl;
return 1;
break;
case MF_E_INVALIDSTREAMNUMBER:
std::cout << "The stream index parameter is invalid." << std::endl;
return 1;
break;
case MF_E_TOPO_CODEC_NOT_FOUND:
std::cout << "Could not find a decoder for the native stream type." << std::endl;
return 1;
break;
default:
std::cout << "Failed to configure stream." << std::endl;
return 1;
break;
}
}
//--------------------------------
// CONFIGURE WRITER
//--------------------------------
{
IMFMediaType *pMediaTypeOutV = NULL;
IMFMediaType *pMediaTypeOutA = NULL;
IMFMediaType *pMediaTypeInV = NULL;
IMFMediaType *pMediaTypeInA = NULL;
HRESULT hr = S_OK;
IMFAttributes * pAttr = nullptr;
// Create attributes
hr = MFCreateAttributes(&pAttr, 1);
if (SUCCEEDED(hr))
{
// Enable HW accelerated encoding
hr = pAttr->SetUINT32(MF_READWRITE_ENABLE_HARDWARE_TRANSFORMS, true);
}
// Create the SinkWriter
if (SUCCEEDED(hr))
{
std::string output(argv[2]);
hr = MFCreateSinkWriterFromURL(std::wstring(output.begin(), output.end()).c_str(), NULL, pAttr, &m_pWriter);
}
//////////////////////////
////WRITER - VIDEO OUTPUT
//////////////////////////
// Set the video output media type.
hr = MFCreateMediaType(&pMediaTypeOutV);
hr = pMediaTypeOutV->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
hr = pMediaTypeOutV->SetGUID(MF_MT_SUBTYPE, VIDEO_ENCODING_FORMAT);
hr = pMediaTypeOutV->SetUINT32(MF_MT_AVG_BITRATE, VIDEO_BIT_RATE);
hr = pMediaTypeOutV->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
hr = MFSetAttributeSize(pMediaTypeOutV, MF_MT_FRAME_SIZE, width, height);
// frame rate is the inverse of frame duration
hr = MFSetAttributeRatio(pMediaTypeOutV, MF_MT_FRAME_RATE, frameDurationNum, frameDurationDenom);
hr = MFSetAttributeRatio(pMediaTypeOutV, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
hr = pMediaTypeOutV->SetUINT32(MF_MT_VIDEO_NOMINAL_RANGE, MFNominalRange_Normal);
hr = m_pWriter->AddStream(pMediaTypeOutV, &m_writeVideoStreamIndex);
if (FAILED(hr))
{
std::cout << "Couldn't add output video type to video writer." << std::endl;
return 1;
}
///////////////////////
////WRITER VIDEO INPUT
///////////////////////
hr = MFCreateMediaType(&pMediaTypeInV);
hr = pMediaTypeInV->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Video);
hr = pMediaTypeInV->SetGUID(MF_MT_SUBTYPE, VIDEO_INPUT_FORMAT);
hr = pMediaTypeInV->SetUINT32(MF_MT_INTERLACE_MODE, MFVideoInterlace_Progressive);
hr = MFSetAttributeSize(pMediaTypeInV, MF_MT_FRAME_SIZE, width, height);
// frame rate is the inverse of frame duration
hr = MFSetAttributeRatio(pMediaTypeInV, MF_MT_FRAME_RATE, frameDurationNum, frameDurationDenom);
hr = MFSetAttributeRatio(pMediaTypeInV, MF_MT_PIXEL_ASPECT_RATIO, 1, 1);
hr = pMediaTypeInV->SetUINT32(MF_MT_DEFAULT_STRIDE, width * 4);
hr = m_pWriter->SetInputMediaType(m_writeVideoStreamIndex, pMediaTypeInV, NULL);
if (FAILED(hr))
{
std::cout << "Couldn't add input video type to video writer." << std::endl;
return 1;
}
//////////////////////////
////WRITER - AUDIO OUTPUT
//////////////////////////
hr = MFCreateMediaType(&pMediaTypeOutA);
hr = pMediaTypeOutA->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
hr = pMediaTypeOutA->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_AAC);
hr = pMediaTypeOutA->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, audioChannels);
hr = pMediaTypeOutA->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, audioSamplesPerSecond);
hr = pMediaTypeOutA->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, 16);
hr = pMediaTypeOutA->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, 24000);
hr = m_pWriter->AddStream(pMediaTypeOutA, &m_writeAudioStreamIndex);
if (FAILED(hr))
{
std::cout << "Couldn't add output video type to video writer." << std::endl;
return 1;
}
//////////////////////////
////WRITER - AUDIO INPUT
//////////////////////////
// Calculate derived values.
UINT32 bitsPerSample = 8 * sizeof(int16_t);
UINT32 blockAlign = audioChannels * bitsPerSample / 8;
UINT32 bytesPerSecond = blockAlign * audioSamplesPerSecond;
hr = MFCreateMediaType(&pMediaTypeInA);
hr = pMediaTypeInA->SetGUID(MF_MT_MAJOR_TYPE, MFMediaType_Audio);
hr = pMediaTypeInA->SetGUID(MF_MT_SUBTYPE, MFAudioFormat_PCM);
hr = pMediaTypeInA->SetUINT32(MF_MT_AUDIO_NUM_CHANNELS, audioChannels);
hr = pMediaTypeInA->SetUINT32(MF_MT_AUDIO_SAMPLES_PER_SECOND, audioSamplesPerSecond);
hr = pMediaTypeInA->SetUINT32(MF_MT_AUDIO_BLOCK_ALIGNMENT, blockAlign);
hr = pMediaTypeInA->SetUINT32(MF_MT_AUDIO_AVG_BYTES_PER_SECOND, bytesPerSecond);
hr = pMediaTypeInA->SetUINT32(MF_MT_AUDIO_BITS_PER_SAMPLE, bitsPerSample);
hr = m_pWriter->SetInputMediaType(m_writeAudioStreamIndex, pMediaTypeInA, NULL);
if (FAILED(hr))
{
std::cout << "Couldn't add input audio type to video writer." << std::endl;
return 1;
}
SafeRelease(&pMediaTypeInV);
SafeRelease(&pMediaTypeOutV);
SafeRelease(&pMediaTypeInA);
SafeRelease(&pMediaTypeOutA);
SafeRelease(&pAttr);
if (FAILED(hr))
{
std::cout << "Couldn't initialized video writer." << std::endl;
return 1;
}
}
//--------------------------------
// READ MEADIA AND WRITE TO DISK
//--------------------------------
//set position to 0
PROPVARIANT var;
hr = InitPropVariantFromInt64(0, &var);
if (SUCCEEDED(hr))
{
hr = m_pSourceReader->SetCurrentPosition(GUID_NULL, var);
PropVariantClear(&var);
}
IMFSample *pSample = NULL;
DWORD streamIndex, flags;
LONGLONG sampleTimeStamp;
LONGLONG videoFrameTimeStamp;
LONGLONG audioFrameTimeStamp;
LONGLONG videoOffset{0};
LONGLONG audioOffset{0};
//Read the first video frame to get the videoOffset
hr = m_pSourceReader->ReadSample(
m_readVideoStreamIndex, // Stream index.
0, // Flags.
&streamIndex, // Receives the actual stream index.
&flags, // Receives status flags.
&videoOffset, // Receives the time stamp.
&pSample // Receives the sample or NULL.
);
SafeRelease(&pSample);
//set position to 0 again after reading first frame
hr = InitPropVariantFromInt64(0, &var);
if (SUCCEEDED(hr))
{
hr = m_pSourceReader->SetCurrentPosition(GUID_NULL, var);
PropVariantClear(&var);
}
hr = m_pWriter->BeginWriting();
size_t frame{ 0 };
while (true)
{
auto frameStart = std::chrono::system_clock::now();
hr = m_pSourceReader->ReadSample(
MF_SOURCE_READER_ANY_STREAM, // Stream index.
0, // Flags.
&streamIndex, // Receives the actual stream index.
&flags, // Receives status flags.
&sampleTimeStamp, // Receives the time stamp.
&pSample // Receives the sample or NULL.
);
if (streamIndex != m_readVideoStreamIndex && streamIndex != m_readAudioStreamIndex)
{
SafeRelease(&pSample);
continue;
}
if (!(flags & MF_SOURCE_READERF_ENDOFSTREAM))
{
if (streamIndex == m_readVideoStreamIndex)
{
frame++;
//OFFSET THE VIDEO STREAM TO PRODUCE A COPY WHO'S STREAM STARTS AT 0
videoFrameTimeStamp = sampleTimeStamp - videoOffset;
}
else if (streamIndex == m_readAudioStreamIndex)
{
audioFrameTimeStamp = sampleTimeStamp;
}
}
if (FAILED(hr))
{
std::cout << "failed to read sample" << std::endl;
break;
}
bool reachedEnd = (flags & MF_SOURCE_READERF_ENDOFSTREAM);
if (reachedEnd || pSample == nullptr)
{
std::cout << "Reached end of video." << std::endl;
SafeRelease(&pSample);
break;
}
if (flags & MF_SOURCE_READERF_NEWSTREAM)
{
std::cout << L"New stream." << std::endl;
}
if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED)
{
std::cout << L"Native type changed." << std::endl;
}
if (flags & MF_SOURCE_READERF_CURRENTMEDIATYPECHANGED)
{
std::cout << L"Current type changed." << std::endl;
}
if (flags & MF_SOURCE_READERF_STREAMTICK)
{
std::cout << L"Stream tick." << std::endl;
}
if (flags & MF_SOURCE_READERF_NATIVEMEDIATYPECHANGED)
{
std::cout << "Stream format changed." << std::endl;
return 1;
}
LONGLONG llDuration;
hr = pSample->GetSampleDuration(&llDuration);
if (hr != S_OK)
{
std::cout << "Unable to query sample duration" << std::endl;
return false;
}
{
MFBufferAccess access(pSample);
std::cout << "Writing " << (streamIndex == m_readAudioStreamIndex ? "Audio" : "Video") << " sample @ " << sampleTimeStamp << " frame: " << (frame) << " duration: " << llDuration / 10000.0 << "ms" << std::endl;
std::cout << "\tbuffer size: " << access.size << " maxSize: " << access.maxSize << std::endl;
if (streamIndex == m_readVideoStreamIndex)
{
WriteFrame(access.data, m_writeVideoStreamIndex, videoFrameTimeStamp, llDuration);
}
else if (streamIndex == m_readAudioStreamIndex)
{
/*
// UNCOMMENT THIS BLOCK TO FIX THE AUDIO OFFSET SYNC ISSUE
// Not sure how many blocks i need or why i need them??
static bool stored{ false };
if (!stored)
{
size_t numberOfSilenceBlocks = 2; //how to derive how many a file needs!? seems aribrary
size_t samples = 1024 * numberOfSilenceBlocks;
audioOffset = samples * 10000000 / audioSamplesPerSecond;
std::vector<uint8_t> silence(samples * (LONGLONG)audioChannels * sizeof(int16_t), 0);
WriteAudioBuffer(silence.data(), silence.size(), audioFrameTimeStamp, audioOffset);
}
*/
LONGLONG audioTime = audioFrameTimeStamp + audioOffset;
WriteAudioBuffer(access.data, access.size, audioTime, llDuration);
}
else
{
std::cout << "Read unknown sample from stream: " << streamIndex;
}
}
SafeRelease(&pSample);
}
SafeRelease(&pSample);
m_pWriter->Finalize();
SafeRelease(&m_pWriter);
return 0;
}
@emcodem
Copy link

emcodem commented Mar 23, 2021

needed to #include "stdafx.h" at the top, otherwise it runs without modification, thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment