Skip to content

Instantly share code, notes, and snippets.

@jvcleave
Last active November 23, 2021 17:00
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jvcleave/20ba83656e3a165a19df70e390889679 to your computer and use it in GitHub Desktop.
Save jvcleave/20ba83656e3a165a19df70e390889679 to your computer and use it in GitHub Desktop.
#include "ofMain.h"
#include "AvFoundationH264DecoderListener.h"
#import <Foundation/Foundation.h>
#import <AVFoundation/AVFoundation.h>
#import <Accelerate/Accelerate.h>
#import <CoreMedia/CoreMedia.h>
#import <AVFoundation/AVPlayer.h>
#import <VideoToolbox/VideoToolbox.h>
@interface AvFoundationH264Decoder : NSObject
{
@public
AvFoundationH264DecoderListener* listener;
}
@property (nonatomic, assign) CMVideoFormatDescriptionRef videoFormat;
@property (nonatomic, assign) VTDecompressionSessionRef decompressionSession;
@property (nonatomic, assign) int spsSize;
@property (nonatomic, assign) int ppsSize;
-(void) readFrameData:(unsigned char*)frameData withSize:(int)frameDataSize;
-(void) processDecodedFrame: (CVImageBufferRef)imageBuffer;
@property (nonatomic, assign) NSData* spsData;
@property (nonatomic, assign) NSData* ppsData;
@end
#import "AvFoundationH264Decoder.h"
//https://github.com/htaiwan/HWDecoder
//https://stackoverflow.com/a/24890903/553229
//https://stackoverflow.com/a/29525001/553229
@implementation AvFoundationH264Decoder
NSString * const naluTypesStrings[] = {
@"Unspecified (non-VCL)",
@"Coded slice of a non-IDR picture (VCL)",
@"Coded slice data partition A (VCL)",
@"Coded slice data partition B (VCL)",
@"Coded slice data partition C (VCL)",
@"Coded slice of an IDR picture (VCL)",
@"Supplemental enhancement information (SEI) (non-VCL)",
@"Sequence parameter set (non-VCL)",
@"Picture parameter set (non-VCL)",
@"Access unit delimiter (non-VCL)",
@"End of sequence (non-VCL)",
@"End of stream (non-VCL)",
@"Filler data (non-VCL)",
@"Sequence parameter set extension (non-VCL)",
@"Prefix NAL unit (non-VCL)",
@"Subset sequence parameter set (non-VCL)",
@"Reserved (non-VCL)",
@"Reserved (non-VCL)",
@"Reserved (non-VCL)",
@"Coded slice of an auxiliary coded picture without partitioning (non-VCL)",
@"Coded slice extension (non-VCL)",
@"Coded slice extension for depth view components (non-VCL)",
@"Reserved (non-VCL)",
@"Reserved (non-VCL)",
@"Unspecified (non-VCL)",
@"Unspecified (non-VCL)",
@"Unspecified (non-VCL)",
@"Unspecified (non-VCL)",
@"Unspecified (non-VCL)",
@"Unspecified (non-VCL)",
@"Unspecified (non-VCL)",
@"Unspecified (non-VCL)",
};
void uncaughtExceptionHandler(NSException *exception)
{
NSLog(@"exception %@", exception);
}
-(id) init
{
self = [super init];
NSSetUncaughtExceptionHandler(&uncaughtExceptionHandler);
_spsSize = 0;
_ppsSize = 0;
_videoFormat = NULL;
_decompressionSession = NULL;
_spsData = nil;
_ppsData = nil;
return self;
}
-(void) processDecodedFrame: (CVImageBufferRef)imageBuffer
{
if(imageBuffer)
{
// Lock the base address of the pixel buffer
CVPixelBufferLockBaseAddress(imageBuffer, 0);
// Get the number of bytes per row for the pixel buffer
void *baseAddress = CVPixelBufferGetBaseAddress(imageBuffer);
// Get the number of bytes per row for the pixel buffer
size_t bytesPerRow = CVPixelBufferGetBytesPerRow(imageBuffer);
// Get the pixel buffer width and height
size_t width = CVPixelBufferGetWidth(imageBuffer);
size_t height = CVPixelBufferGetHeight(imageBuffer);
ofPixels rgbConvertPixels;
rgbConvertPixels.allocate(width, height, 3);
vImage_Buffer srcImg;
srcImg.width = width;
srcImg.height = height;
srcImg.data = (unsigned char *)CVPixelBufferGetBaseAddress(imageBuffer);
srcImg.rowBytes = bytesPerRow;
vImage_Buffer dstImg;
dstImg.width = srcImg.width;
dstImg.height = srcImg.height;
dstImg.rowBytes = width*3;
dstImg.data = rgbConvertPixels.getData();
vImage_Error err;
err = vImageConvert_BGRA8888toRGB888(&srcImg, &dstImg, kvImageNoFlags);
ofLog(OF_LOG_VERBOSE, "w: %zu h: %zu bytesPerRow:%zu", width, height, bytesPerRow);
CVPixelBufferUnlockBaseAddress(imageBuffer,0);
if(listener)
{
listener->onFrameDecoded(rgbConvertPixels);
}
}else
{
ofLogError(__func__) << "NO IMAGEBUFFFER";
}
}
void onDecompress(void *decompressionOutputRefCon,
void *sourceFrameRefCon,
OSStatus status,
VTDecodeInfoFlags infoFlags,
CVImageBufferRef imageBuffer,
CMTime presentationTimeStamp,
CMTime presentationDuration )
{
AvFoundationH264Decoder *streamManager = (__bridge AvFoundationH264Decoder *)decompressionOutputRefCon;
if (status != noErr || !imageBuffer)
{
ofLog(OF_LOG_ERROR, "Error decompresssing frame at time: %.3f error: %d infoFlags: %u", (float)presentationTimeStamp.value/presentationTimeStamp.timescale, (int)status, (unsigned int)infoFlags);
return;
}
ofLog(OF_LOG_VERBOSE, "Success decompresssing frame at time: %.3f error: %d infoFlags: %u", (float)presentationTimeStamp.value/presentationTimeStamp.timescale, (int)status, (unsigned int)infoFlags);
[streamManager processDecodedFrame:imageBuffer];
}
-(void) readFrameData:(unsigned char*)frameData withSize:(int)frameDataSize
{
int SPS_TYPE = 7;
int PPS_TYPE = 8;
int IFRAME_TYPE = 1;
int PFRAME_TYPE = 5;
OSStatus status;
uint8_t* data = (uint8_t *)frameData;
int size = frameDataSize;
// 1. Get SPS,PPS form stream data, and create CMFormatDescription, VTDecompressionSession
if (_spsData == nil && _ppsData == nil)
{
int startCodeSPSIndex = 0;
int startCodePPSIndex = 0;
int spsLength = 0;
int ppsLength = 0;
for (int i = 0; i < size; i++)
{
if (i >= 3)
{
if (data[i] == 0x01 && data[i-1] == 0x00 && data[i-2] == 0x00 && data[i-3] == 0x00) {
if (startCodeSPSIndex == 0)
{
startCodeSPSIndex = i;
}
if (i > startCodeSPSIndex)
{
startCodePPSIndex = i;
}
}
}
}
spsLength = startCodePPSIndex - startCodeSPSIndex - 4;
ppsLength = size - (startCodePPSIndex + 1);
ofLog(OF_LOG_VERBOSE, "startCodeSPSIndex --> %i", startCodeSPSIndex);
ofLog(OF_LOG_VERBOSE, "startCodePPSIndex --> %i", startCodePPSIndex);
ofLog(OF_LOG_VERBOSE, "spsLength --> %i", spsLength);
ofLog(OF_LOG_VERBOSE, "ppsLength --> %i", ppsLength);
int nalu_type;
nalu_type = ((uint8_t) data[startCodeSPSIndex + 1] & 0x1F);
NSLog(@"NALU with Type \"%@\" received.", naluTypesStrings[nalu_type]);
if (nalu_type == SPS_TYPE)
{
_spsData = [NSData dataWithBytes:&(data[startCodeSPSIndex + 1]) length: spsLength];
}
nalu_type = ((uint8_t) data[startCodePPSIndex + 1] & 0x1F);
NSLog(@"NALU with Type \"%@\" received.", naluTypesStrings[nalu_type]);
if (nalu_type == PPS_TYPE)
{
_ppsData = [NSData dataWithBytes:&(data[startCodePPSIndex + 1]) length: ppsLength];
}
// 2. create CMFormatDescription
if (_spsData != nil && _ppsData != nil)
{
const uint8_t* const parameterSetPointers[2] = { (const uint8_t*)[_spsData bytes], (const uint8_t*)[_ppsData bytes] };
const size_t parameterSetSizes[2] = { [_spsData length], [_ppsData length] };
status = CMVideoFormatDescriptionCreateFromH264ParameterSets(kCFAllocatorDefault,
2,
parameterSetPointers,
parameterSetSizes,
4,
&_videoFormat);
if(status == noErr)
{
ofLogVerbose(__func__) << "CMVideoFormatDescription creation PASS";
}else
{
ofLogError(__func__) << "CMVideoFormatDescription creation FAIL";
}
}
// 3. create VTDecompressionSession
VTDecompressionOutputCallbackRecord callback;
callback.decompressionOutputCallback = onDecompress;
callback.decompressionOutputRefCon = (__bridge void *)self;
NSDictionary *destinationImageBufferAttributes =[NSDictionary dictionaryWithObjectsAndKeys:[NSNumber numberWithBool:NO],
(id)kCVPixelBufferOpenGLCompatibilityKey,
[NSNumber numberWithInt:kCVPixelFormatType_32BGRA],
(id)kCVPixelBufferPixelFormatTypeKey, nil];
status = VTDecompressionSessionCreate(kCFAllocatorDefault,
_videoFormat,
NULL,
(__bridge CFDictionaryRef)destinationImageBufferAttributes,
&callback,
&_decompressionSession);
if(status == noErr)
{
ofLogVerbose(__func__) << "VTDecompressionSessionCreate creation PASS";
}else
{
ofLogError(__func__) << "VTDecompressionSessionCreate creation FAIL";
}
/*
int32_t timeSpan = 90000;
CMSampleTimingInfo timingInfo;
timingInfo.presentationTimeStamp = CMTimeMake(0, timeSpan);
timingInfo.duration = CMTimeMake(3000, timeSpan);
timingInfo.decodeTimeStamp = kCMTimeInvalid;
*/
}
int startCodeIndex = 0;
for (int i = 0; i < 5; i++)
{
if (data[i] == 0x01)
{
startCodeIndex = i;
break;
}
}
int nalu_type = ((uint8_t)data[startCodeIndex + 1] & 0x1F);
NSLog(@"NALU with Type \"%@\" received.", naluTypesStrings[nalu_type]);
if (nalu_type == IFRAME_TYPE || nalu_type == PFRAME_TYPE)
{
// 4. get NALUnit payload into a CMBlockBuffer,
CMBlockBufferRef videoBlock = NULL;
status = CMBlockBufferCreateWithMemoryBlock(NULL,
data,
frameDataSize,
kCFAllocatorNull,
NULL,
0,
frameDataSize,
0,
&videoBlock);
if(status == kCMBlockBufferNoErr)
{
ofLogVerbose(__func__) << "CMBlockBufferCreateWithMemoryBlock PASS";
}else
{
ofLogError(__func__) << "CMBlockBufferCreateWithMemoryBlock FAIL";
}
// 5. making sure to replace the separator code with a 4 byte length code (the length of the NalUnit including the unit code)
int removeHeaderSize = frameDataSize - 4;
const uint8_t sourceBytes[] = { (uint8_t)(removeHeaderSize >> 24),
(uint8_t)(removeHeaderSize >> 16),
(uint8_t)(removeHeaderSize >> 8),
(uint8_t)removeHeaderSize};
status = CMBlockBufferReplaceDataBytes(sourceBytes, videoBlock, 0, 4);
if(status == kCMBlockBufferNoErr)
{
ofLogVerbose(__func__) << "CMBlockBufferReplaceDataBytes PASS";
}else
{
ofLogError(__func__) << "CMBlockBufferReplaceDataBytes FAIL";
}
// 6. create a CMSampleBuffer.
CMSampleBufferRef sampleBuffer = NULL;
const size_t sampleSizeArray[] = {static_cast<size_t>(frameDataSize)};
status = CMSampleBufferCreate(kCFAllocatorDefault,
videoBlock,
true,
NULL,
NULL,
_videoFormat,
1,
0,
NULL,
1,
sampleSizeArray,
&sampleBuffer);
if(status == noErr)
{
ofLogVerbose(__func__) << "CMSampleBufferCreate PASS";
}else
{
ofLogError(__func__) << "CMSampleBufferCreate FAIL";
}
// 7. use VTDecompressionSessionDecodeFrame
VTDecodeFrameFlags flags = kVTDecodeFrame_EnableAsynchronousDecompression;
VTDecodeInfoFlags flagOut;
status = VTDecompressionSessionDecodeFrame(_decompressionSession,
sampleBuffer,
NULL,
&sampleBuffer,
&flagOut);
if(status == noErr)
{
ofLogVerbose(__func__) << "VTDecompressionSessionDecodeFrame PASS";
}else
{
ofLogError(__func__) << "VTDecompressionSessionDecodeFrame FAIL";
}
CFRelease(sampleBuffer);
CFRelease(videoBlock);
//[self.delegate startDecodeData];
// /* Flush in-process frames. */
// VTDecompressionSessionFinishDelayedFrames(session);
// /* Block until our callback has been called with the last frame. */
// VTDecompressionSessionWaitForAsynchronousFrames(session);
//
// /* Clean up. */
// VTDecompressionSessionInvalidate(session);
// CFRelease(session);
// CFRelease(videoFormatDescr);
}
}
@end
#pragma once
class AvFoundationH264DecoderListener
{
public:
AvFoundationH264DecoderListener()
{
}
~AvFoundationH264DecoderListener()
{
}
virtual void onFrameDecoded(ofPixels& pixels) = 0;
};
#pragma once
#include "ofMain.h"
#ifdef __OBJC__
#import "AvFoundationH264Decoder.h"
#endif
#include "AvFoundationH264DecoderListener.h"
class ofxAvFoundationH264Player: public AvFoundationH264DecoderListener
{
public:
ofxAvFoundationH264Player();
~ofxAvFoundationH264Player();
ofTexture texture;
vector<ofPixels>framePixels;
void readFrameData(vector<unsigned char>& data);
void readFrameData(unsigned char* bufferData, int size);
void clearFrames();
void update();
void draw();
void onFrameDecoded(ofPixels& pixels) override;
int frameCounter;
int fps;
float previousUpdateTime;
float frameTime;
void setFPS(int fps_);
void initDecoder();
void closeDecoder();
#ifdef __OBJC__
AvFoundationH264Decoder* decoder;
#else
void * decoder;
#endif
};
#include "ofxAvFoundationH264Player.h"
ofxAvFoundationH264Player::ofxAvFoundationH264Player()
{
initDecoder();
setFPS(25);
previousUpdateTime = ofGetElapsedTimef();
}
void ofxAvFoundationH264Player::readFrameData(vector<unsigned char>& bufferData)
{
readFrameData((unsigned char*)&bufferData[0], bufferData.size());
}
void ofxAvFoundationH264Player::readFrameData(unsigned char* bufferData, int size)
{
[decoder readFrameData:bufferData withSize:size];
}
void ofxAvFoundationH264Player::onFrameDecoded(ofPixels& pixels)
{
framePixels.push_back(pixels);
ofLog() << "numFrames: " << framePixels.size();
}
void ofxAvFoundationH264Player::clearFrames()
{
framePixels.clear();
texture.clear();
closeDecoder();
initDecoder();
}
void ofxAvFoundationH264Player::setFPS(int fps_)
{
fps = fps_;
frameTime = 1.0 / fps;
}
void ofxAvFoundationH264Player::update()
{
if(!framePixels.empty())
{
float now = ofGetElapsedTimef();
if (now - previousUpdateTime >= frameTime)
{
if(!texture.isAllocated())
{
texture.allocate(framePixels.front());
}
texture.loadData(framePixels[frameCounter]);
if(frameCounter+1 < framePixels.size())
{
frameCounter++;
}else
{
frameCounter = 0;
}
previousUpdateTime = now;
}
}
}
void ofxAvFoundationH264Player::draw()
{
if(texture.isAllocated())
{
texture.draw(0, 0);
}
}
void ofxAvFoundationH264Player::initDecoder()
{
frameCounter = 0;
decoder = [[AvFoundationH264Decoder alloc] init];
decoder->listener = this;
}
void ofxAvFoundationH264Player::closeDecoder()
{
if (decoder)
{
[decoder release];
decoder = NULL;
}
}
ofxAvFoundationH264Player::~ofxAvFoundationH264Player()
{
closeDecoder();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment