Skip to content

Instantly share code, notes, and snippets.

@leandromoreira
Created May 6, 2024 13:25
Show Gist options
  • Save leandromoreira/d290d4e594dd0f5e49e07d557d790b5d to your computer and use it in GitHub Desktop.
Save leandromoreira/d290d4e594dd0f5e49e07d557d790b5d to your computer and use it in GitHub Desktop.
package main
import (
"errors"
"fmt"
"log"
"strconv"
"strings"
"github.com/asticode/go-astiav"
"github.com/asticode/go-astikit"
)
var (
c = astikit.NewCloser()
inputFormatContext *astiav.FormatContext
outputFormatContext *astiav.FormatContext
streams = make(map[int]*stream) // Indexed by input stream index
)
type stream struct {
buffersinkContext *astiav.FilterContext
buffersrcContext *astiav.FilterContext
decCodec *astiav.Codec
decCodecContext *astiav.CodecContext
decFrame *astiav.Frame
encCodec *astiav.Codec
encCodecContext *astiav.CodecContext
encPkt *astiav.Packet
filterFrame *astiav.Frame
filterGraph *astiav.FilterGraph
inputStream *astiav.Stream
outputStream *astiav.Stream
}
var (
inputX = "/app/sample_h264_aac.mp4"
input = &inputX
outputX = "/app/sample_h264_converted_aac_opus.mp4"
output = &outputX
)
func main() {
// Handle ffmpeg logs
astiav.SetLogLevel(astiav.LogLevelDebug)
astiav.SetLogCallback(func(clz astiav.Classer, l astiav.LogLevel, fmt, msg string) {
log.Printf("ffmpeg log: %s (level: %d) \n", strings.TrimSpace(msg), l)
})
// Parse flags
// flag.Parse()
// Usage
inputX := "/input.mp4"
input := &inputX
outputX := "/output.mp4"
output := &outputX
if *input == "" || *output == "" {
log.Println("Usage: <binary path> -i <input path> -o <output path>")
return
}
// We use an astikit.Closer to free all resources properly
defer c.Close()
// Open input file
if err := openInputFile(); err != nil {
log.Fatal(fmt.Errorf("main: opening input file failed: %w", err))
}
// Open output file
if err := openOutputFile(); err != nil {
log.Fatal(fmt.Errorf("main: opening output file failed: %w", err))
}
// Init filters
if err := initFilters(); err != nil {
log.Fatal(fmt.Errorf("main: initializing filters failed: %w", err))
}
// Alloc packet
pkt := astiav.AllocPacket()
c.Add(pkt.Free)
// Loop through packets
for {
// Read frame
if err := inputFormatContext.ReadFrame(pkt); err != nil {
if errors.Is(err, astiav.ErrEof) {
break
}
log.Fatal(fmt.Errorf("main: reading frame failed: %w", err))
}
// Get stream
s, ok := streams[pkt.StreamIndex()]
if !ok {
continue
}
// Update packet
pkt.RescaleTs(s.inputStream.TimeBase(), s.decCodecContext.TimeBase())
// Send packet
if err := s.decCodecContext.SendPacket(pkt); err != nil {
log.Fatal(fmt.Errorf("main: sending packet failed: %w", err))
}
// Loop
for {
// Receive frame
if err := s.decCodecContext.ReceiveFrame(s.decFrame); err != nil {
if errors.Is(err, astiav.ErrEof) || errors.Is(err, astiav.ErrEagain) {
break
}
log.Fatal(fmt.Errorf("main: receiving frame failed: %w", err))
}
// Filter, encode and write frame
if err := filterEncodeWriteFrame(s.decFrame, s); err != nil {
log.Fatal(fmt.Errorf("main: filtering, encoding and writing frame failed: %w", err))
}
}
}
// Loop through streams
for _, s := range streams {
// Flush filter
if err := filterEncodeWriteFrame(nil, s); err != nil {
log.Fatal(fmt.Errorf("main: filtering, encoding and writing frame failed: %w", err))
}
// Flush encoder
if err := encodeWriteFrame(nil, s); err != nil {
log.Fatal(fmt.Errorf("main: encoding and writing frame failed: %w", err))
}
}
// Write trailer
if err := outputFormatContext.WriteTrailer(); err != nil {
log.Fatal(fmt.Errorf("main: writing trailer failed: %w", err))
}
// Success
log.Println("success")
}
func openInputFile() (err error) {
// Alloc input format context
if inputFormatContext = astiav.AllocFormatContext(); inputFormatContext == nil {
err = errors.New("main: input format context is nil")
return
}
c.Add(inputFormatContext.Free)
// Open input
if err = inputFormatContext.OpenInput(*input, nil, nil); err != nil {
err = fmt.Errorf("main: opening input failed: %w", err)
return
}
c.Add(inputFormatContext.CloseInput)
// Find stream info
if err = inputFormatContext.FindStreamInfo(nil); err != nil {
err = fmt.Errorf("main: finding stream info failed: %w", err)
return
}
// Loop through streams
for _, is := range inputFormatContext.Streams() {
// Only process audio or video
if is.CodecParameters().MediaType() != astiav.MediaTypeAudio &&
is.CodecParameters().MediaType() != astiav.MediaTypeVideo {
continue
}
// Create stream
s := &stream{inputStream: is}
// Find decoder
if s.decCodec = astiav.FindDecoder(is.CodecParameters().CodecID()); s.decCodec == nil {
err = errors.New("main: codec is nil")
return
}
// Alloc codec context
if s.decCodecContext = astiav.AllocCodecContext(s.decCodec); s.decCodecContext == nil {
err = errors.New("main: codec context is nil")
return
}
c.Add(s.decCodecContext.Free)
// Update codec context
if err = is.CodecParameters().ToCodecContext(s.decCodecContext); err != nil {
err = fmt.Errorf("main: updating codec context failed: %w", err)
return
}
// Set framerate
if is.CodecParameters().MediaType() == astiav.MediaTypeVideo {
s.decCodecContext.SetFramerate(inputFormatContext.GuessFrameRate(is, nil))
}
// Open codec context
if err = s.decCodecContext.Open(s.decCodec, nil); err != nil {
err = fmt.Errorf("main: opening codec context failed: %w", err)
return
}
// Alloc frame
s.decFrame = astiav.AllocFrame()
c.Add(s.decFrame.Free)
// Store stream
streams[is.Index()] = s
}
return
}
func openOutputFile() (err error) {
// Alloc output format context
if outputFormatContext, err = astiav.AllocOutputFormatContext(nil, "", *output); err != nil {
err = fmt.Errorf("main: allocating output format context failed: %w", err)
return
} else if outputFormatContext == nil {
err = errors.New("main: output format context is nil")
return
}
c.Add(outputFormatContext.Free)
// Loop through streams
for _, is := range inputFormatContext.Streams() {
// Get stream
s, ok := streams[is.Index()]
if !ok {
continue
}
// Create output stream
if s.outputStream = outputFormatContext.NewStream(nil); s.outputStream == nil {
err = errors.New("main: output stream is nil")
return
}
// Get codec id
codecID := astiav.CodecIDH264
if s.decCodecContext.MediaType() == astiav.MediaTypeAudio {
// #### FORCED OPUS
codecID = astiav.CodecIDOpus
}
// Find encoder
if s.encCodec = astiav.FindEncoder(codecID); s.encCodec == nil {
err = errors.New("main: codec is nil")
return
}
// Alloc codec context
if s.encCodecContext = astiav.AllocCodecContext(s.encCodec); s.encCodecContext == nil {
err = errors.New("main: codec context is nil")
return
}
c.Add(s.encCodecContext.Free)
// Update codec context
if s.decCodecContext.MediaType() == astiav.MediaTypeAudio {
if v := s.encCodec.ChannelLayouts(); len(v) > 0 {
s.encCodecContext.SetChannelLayout(v[0])
} else {
s.encCodecContext.SetChannelLayout(s.decCodecContext.ChannelLayout())
}
s.encCodecContext.SetChannels(s.decCodecContext.Channels())
// s.encCodecContext.SetSampleRate(s.decCodecContext.SampleRate())
log.Println("############# original sample rate", s.decCodecContext.SampleRate())
// #### FORCED OPUS REQUIRED 48K
s.encCodecContext.SetSampleRate(48000)
if v := s.encCodec.SampleFormats(); len(v) > 0 {
s.encCodecContext.SetSampleFormat(v[0])
} else {
s.encCodecContext.SetSampleFormat(s.decCodecContext.SampleFormat())
}
// s.encCodecContext.SetTimeBase(s.decCodecContext.TimeBase())
s.encCodecContext.SetTimeBase(astiav.NewRational(1, 48000))
// #### FORCED TB SINCE TB WAS BEING SET AS 0
log.Println("############# set TB for audio ", s.encCodecContext.TimeBase())
} else {
s.encCodecContext.SetHeight(s.decCodecContext.Height())
if v := s.encCodec.PixelFormats(); len(v) > 0 {
s.encCodecContext.SetPixelFormat(v[0])
} else {
s.encCodecContext.SetPixelFormat(s.decCodecContext.PixelFormat())
}
s.encCodecContext.SetSampleAspectRatio(s.decCodecContext.SampleAspectRatio())
// s.encCodecContext.SetTimeBase(s.decCodecContext.TimeBase())
// #### FORCED TB for video as well
s.encCodecContext.SetTimeBase(astiav.NewRational(1, s.decCodecContext.Framerate().Num()))
log.Println("############# set TB for video ", s.encCodecContext.TimeBase())
s.encCodecContext.SetWidth(s.decCodecContext.Width())
s.encCodecContext.SetFramerate(s.decCodecContext.Framerate())
// #### FORCED FPS but still the output media is being marked as variable framerate
}
// Update flags
if s.decCodecContext.Flags().Has(astiav.CodecContextFlagGlobalHeader) {
s.encCodecContext.SetFlags(s.encCodecContext.Flags().Add(astiav.CodecContextFlagGlobalHeader))
}
// Open codec context
if err = s.encCodecContext.Open(s.encCodec, nil); err != nil {
err = fmt.Errorf("main: opening codec context failed: %w", err)
return
}
// Update codec parameters
if err = s.outputStream.CodecParameters().FromCodecContext(s.encCodecContext); err != nil {
err = fmt.Errorf("main: updating codec parameters failed: %w", err)
return
}
// Update stream
s.outputStream.SetTimeBase(s.encCodecContext.TimeBase())
}
// If this is a file, we need to use an io context
if !outputFormatContext.OutputFormat().Flags().Has(astiav.IOFormatFlagNofile) {
var ioContext *astiav.IOContext
// Open io context
ioContext, err = astiav.OpenIOContext(*output, astiav.NewIOContextFlags(astiav.IOContextFlagWrite))
if err != nil {
err = fmt.Errorf("main: opening io context failed: %w", err)
return
}
c.AddWithError(ioContext.Closep)
// Update output format context
outputFormatContext.SetPb(ioContext)
}
// Write header
if err = outputFormatContext.WriteHeader(nil); err != nil {
err = fmt.Errorf("main: writing header failed: %w", err)
return
}
return
}
func initFilters() (err error) {
// Loop through output streams
for _, s := range streams {
// Alloc graph
if s.filterGraph = astiav.AllocFilterGraph(); s.filterGraph == nil {
err = errors.New("main: graph is nil")
return
}
c.Add(s.filterGraph.Free)
// Alloc outputs
outputs := astiav.AllocFilterInOut()
if outputs == nil {
err = errors.New("main: outputs is nil")
return
}
c.Add(outputs.Free)
// Alloc inputs
inputs := astiav.AllocFilterInOut()
if inputs == nil {
err = errors.New("main: inputs is nil")
return
}
c.Add(inputs.Free)
// Switch on media type
var args astiav.FilterArgs
var buffersrc, buffersink *astiav.Filter
var content string
switch s.decCodecContext.MediaType() {
case astiav.MediaTypeAudio:
log.Println("############# mono check ", s.decCodecContext.ChannelLayout().String())
args = astiav.FilterArgs{
// "channel_layout": s.decCodecContext.ChannelLayout().String(),
// #### FORCED literal mono here since it was printing mono\xx\xx dunno why yet
"channel_layout": "mono",
"sample_fmt": s.decCodecContext.SampleFormat().Name(),
"sample_rate": strconv.Itoa(s.decCodecContext.SampleRate()),
"time_base": s.decCodecContext.TimeBase().String(),
}
log.Println("############# args for audio ", args)
buffersrc = astiav.FindFilterByName("abuffer")
buffersink = astiav.FindFilterByName("abuffersink")
// content = fmt.Sprintf("aformat=sample_rates=48000:sample_fmts=%s:channel_layouts=%s", s.encCodecContext.SampleFormat().Name(), s.encCodecContext.ChannelLayout().String())
// #### FORCED sr and sf, I opted out to inform two sample fmt, yet there's still errors on the output of
// Sample format was not set or was invali
content = fmt.Sprintf("aformat=sample_rates=48000:sample_fmts=%s|%s:channel_layouts=%s", s.decCodecContext.SampleFormat().Name(), s.encCodecContext.SampleFormat().Name(), s.encCodecContext.ChannelLayout().String())
log.Println("############# content filter for audio ", content)
default:
args = astiav.FilterArgs{
"pix_fmt": strconv.Itoa(int(s.decCodecContext.PixelFormat())),
"pixel_aspect": s.decCodecContext.SampleAspectRatio().String(),
// "time_base": s.decCodecContext.TimeBase().String(),
// #### FORCED tb to be 1/30
"time_base": astiav.NewRational(1, s.decCodecContext.Framerate().Num()).String(),
"video_size": strconv.Itoa(s.decCodecContext.Width()) + "x" + strconv.Itoa(s.decCodecContext.Height()),
}
log.Println("############# args for video ", args)
buffersrc = astiav.FindFilterByName("buffer")
buffersink = astiav.FindFilterByName("buffersink")
content = fmt.Sprintf("format=pix_fmts=%s", s.encCodecContext.PixelFormat().Name())
log.Println("############# content filter for video ", content)
}
// Check filters
if buffersrc == nil {
err = errors.New("main: buffersrc is nil")
return
}
if buffersink == nil {
err = errors.New("main: buffersink is nil")
return
}
// Create filter contexts
if s.buffersrcContext, err = s.filterGraph.NewFilterContext(buffersrc, "in", args); err != nil {
err = fmt.Errorf("main: creating buffersrc context failed (args=%#v) : %w", args, err)
return
}
if s.buffersinkContext, err = s.filterGraph.NewFilterContext(buffersink, "out", nil); err != nil {
err = fmt.Errorf("main: creating buffersink context failed: %w", err)
return
}
// Update outputs
outputs.SetName("in")
outputs.SetFilterContext(s.buffersrcContext)
outputs.SetPadIdx(0)
outputs.SetNext(nil)
// Update inputs
inputs.SetName("out")
inputs.SetFilterContext(s.buffersinkContext)
inputs.SetPadIdx(0)
inputs.SetNext(nil)
// Parse
if err = s.filterGraph.Parse(content, inputs, outputs); err != nil {
err = fmt.Errorf("main: parsing filter failed: %w", err)
return
}
// Configure
if err = s.filterGraph.Configure(); err != nil {
err = fmt.Errorf("main: configuring filter failed: %w", err)
return
}
// Alloc frame
s.filterFrame = astiav.AllocFrame()
c.Add(s.filterFrame.Free)
// Alloc packet
s.encPkt = astiav.AllocPacket()
c.Add(s.encPkt.Free)
}
return
}
func filterEncodeWriteFrame(f *astiav.Frame, s *stream) (err error) {
// Add frame
if err = s.buffersrcContext.BuffersrcAddFrame(f, astiav.NewBuffersrcFlags(astiav.BuffersrcFlagKeepRef)); err != nil {
err = fmt.Errorf("main: adding frame failed: %w", err)
return
}
// Loop
for {
// Unref frame
s.filterFrame.Unref()
// Get frame
if err = s.buffersinkContext.BuffersinkGetFrame(s.filterFrame, astiav.NewBuffersinkFlags()); err != nil {
if errors.Is(err, astiav.ErrEof) || errors.Is(err, astiav.ErrEagain) {
err = nil
break
}
err = fmt.Errorf("main: getting frame failed: %w", err)
return
}
// Reset picture type
s.filterFrame.SetPictureType(astiav.PictureTypeNone)
// Encode and write frame
if err = encodeWriteFrame(s.filterFrame, s); err != nil {
err = fmt.Errorf("main: encoding and writing frame failed: %w", err)
return
}
}
return
}
func encodeWriteFrame(f *astiav.Frame, s *stream) (err error) {
// Unref packet
s.encPkt.Unref()
/* check for valid frame size */
// https://ffmpeg.org/doxygen/2.8/libavcodec_2utils_8c_source.html#l01883
// 1930 if (frame) {
// 1931 if (avctx->codec->capabilities & AV_CODEC_CAP_SMALL_LAST_FRAME) {
// 1932 if (frame->nb_samples > avctx->frame_size) {
// 1933 av_log(avctx, AV_LOG_ERROR, "more samples than frame size (avcodec_encode_audio2)\n");
// 1934 ret = AVERROR(EINVAL);
// 1935 goto end;
// 1936 }
// 1937 } else if (!(avctx->codec->capabilities & AV_CODEC_CAP_VARIABLE_FRAME_SIZE)) {
// 1938 if (frame->nb_samples < avctx->frame_size &&
// 1939 !avctx->internal->last_audio_frame) {
// 1940 ret = pad_last_frame(avctx, &padded_frame, frame);
// 1941 if (ret < 0)
// 1942 goto end;
// 1943
// 1944 frame = padded_frame;
// 1945 avctx->internal->last_audio_frame = 1;
// 1946 }
// 1947
// 1948 if (frame->nb_samples != avctx->frame_size) {
// 1949 av_log(avctx, AV_LOG_ERROR, "nb_samples (%d) != frame_size (%d) (avcodec_encode_audio2)\n", frame->nb_samples, avctx->frame_size);
// 1950 ret = AVERROR(EINVAL);
// 1951 goto end;
// 1952 }
// 1953 }
// 1954 }
// #### FORCED NbSample, since opus is frame size variable we must set the proper size
log.Println("############# f", f, "s", s)
if f != nil && s != nil && s.encCodecContext != nil {
log.Println("############# original f nb samples", f.NbSamples())
log.Println("############# original f size", s.encCodecContext.FrameSize())
// if f.NbSamples() > s.encCodecContext.FrameSize() {
// log.Println("############# changed n samples", s.encCodecContext.FrameSize())
// f.SetNbSamples(s.encCodecContext.FrameSize())
// // f.SetNbSamples(960)
// } else {
f.SetNbSamples(s.encCodecContext.FrameSize())
log.Println("############# changed n samples", s.encCodecContext.FrameSize())
// }
} else {
log.Println("############# encodeWriteFrame NIL")
}
// frame->pts = av_rescale_q(frame->pts, decoder_ctx->pkt_timebase,
// encoder_ctx->time_base);
if f != nil {
// #### FORCED pts since it was complaining about it
log.Println("############# PTS before", f.Pts())
f.SetPts(astiav.RescaleQ(f.Pts(), s.decCodecContext.TimeBase(), s.encCodecContext.TimeBase()))
log.Println("############# PTS after", f.Pts())
}
// Send frame
if err = s.encCodecContext.SendFrame(f); err != nil {
err = fmt.Errorf("main: sending frame failed: %w", err)
return
}
// Loop
for {
// Receive packet
if err = s.encCodecContext.ReceivePacket(s.encPkt); err != nil {
if errors.Is(err, astiav.ErrEof) || errors.Is(err, astiav.ErrEagain) {
err = nil
break
}
err = fmt.Errorf("main: receiving packet failed: %w", err)
return
}
// Update pkt
s.encPkt.SetStreamIndex(s.outputStream.Index())
s.encPkt.RescaleTs(s.encCodecContext.TimeBase(), s.outputStream.TimeBase())
// Write frame
if err = outputFormatContext.WriteInterleavedFrame(s.encPkt); err != nil {
err = fmt.Errorf("main: writing frame failed: %w", err)
return
}
}
return
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment