MikuAuahDark/program.cpp

## program.cpp
// clang -Inav/include -Lnav/lib --std=c++17 program.cpp lodepng.cpp -lnav
// Get lodepng.cpp from https://github.com/lvandeve/lodepng
// See https://github.com/MikuAuahDark/nav for more information about NAV.

#include <algorithm>
#include <array>
#include <cstdlib>
#include <fstream>
#include <iostream>
#include <list>
#include <stdexcept>
#include <string>
#include <sstream>
#include <vector>
#include <type_traits>

#include "lodepng.h"
#include "nav/nav.h"

constexpr size_t MINUS_1 = -1;

struct NavInputGuard
{
	NavInputGuard(nav_input &in)
	: input(&in)
	{}

	~NavInputGuard()
	{
		if (input->close)
			input->closef();
	}

	nav_input *input;
};

template<typename T>
struct binary_data
{
	static_assert(std::is_integral<T>::value, "binary_data not integral value");
	using remove_signed = std::make_unsigned_t<T>;
	static constexpr size_t size = sizeof(T);

	binary_data(T v): value(v) {}
	binary_data(const binary_data<T> &) = default;
	binary_data(binary_data<T> &&) = default;
	std::array<uint8_t, size> bytes() const
	{
		std::array<uint8_t, size> b {};
		remove_signed temp = (remove_signed) value;

		for (int i = 0; i < size && temp; i++)
		{
			b[i] = temp & 0xFF;
			temp >>= 8;
		}

		return b;
	}

	T value;
};

static std::vector<std::string> convertArgs(int argc, char *argv[])
{
	std::vector<std::string> result;

	for (size_t i = 0; i < argc; i++)
		result.emplace_back(argv[i]);

	return result;
}

static void closeInput(nav_input *input)
{
	input->closef();
}

static void usage(const std::vector<std::string> &args, bool hasout)
{
	std::cout << "Usage: " << args[0] << " <audio|video|enum> <input file>";

	if (hasout)
		std::cout << " <output file/dir>";
	else
		std::cout << " [output file/dir]";

	std::cout << std::endl;
}

static std::string parseAudioFormat(nav_audioformat fmt)
{
	std::stringstream ss;

	if (NAV_AUDIOFORMAT_ISFLOAT(fmt))
		ss << "pcm_f" << NAV_AUDIOFORMAT_BITSIZE(fmt) << (NAV_AUDIOFORMAT_ISLITTLEENDIAN(fmt) ? "le" : "be");
	else
		ss << "pcm_" << (NAV_AUDIOFORMAT_ISUNSIGNED(fmt) ? "u" : "s") << NAV_AUDIOFORMAT_BITSIZE(fmt) << (NAV_AUDIOFORMAT_ISLITTLEENDIAN(fmt) ? "le" : "be");
	return ss.str();
}

static const char *pixelFormatToString(nav_pixelformat pixfmt)
{
	switch (pixfmt)
	{
		case NAV_PIXELFORMAT_RGB8:
			return "rgb8";
		case NAV_PIXELFORMAT_YUV420:
			return "yuv420p";
		case NAV_PIXELFORMAT_YUV444:
			return "yuv444p";
		case NAV_PIXELFORMAT_NV12:
			return "nv12";
		default:
			return "unknown";
	}
}

// https://learn.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#converting-420-yuv-to-422-yuv
static uint8_t simplewebp__do_uv_fancy_upsampling(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t x, uint8_t y)
{
	switch (y * 2 + x)
	{
		case 0:
			return (9u*a + 3u*b + 3u*c + d + 8u) / 16u;
		case 1:
			return (3u*a + 9u*b + c + 3u*d + 8u) / 16u;
		case 2:
			return (3u*a + b + 9u*c + 3u*d + 8u) / 16u;
		case 3:
			return (a + 3u*b + 3u*c + 9u*d + 8u) / 16u;
		default:
			return 0;
	}
}

static int simplewebp__multhi(int v, int coeff)
{
	return (v * coeff) >> 8;
}

static uint8_t simplewebp__yuv2rgb_clip8(int v)
{
	return ((v & ~16383) == 0) ? ((uint8_t) (v >> 6)) : (v < 0) ? 0 : 255;
}

static void simplewebp__yuv2rgb_plain(uint8_t y, uint8_t u, uint8_t v, uint8_t *rgb)
{
	int yhi = simplewebp__multhi(y, 19077);

	rgb[0] = simplewebp__yuv2rgb_clip8(yhi + simplewebp__multhi(v, 26149) - 14234);
	rgb[1] = simplewebp__yuv2rgb_clip8(yhi - simplewebp__multhi(u, 6419) - simplewebp__multhi(v, 13320) + 8708);
	rgb[2] = simplewebp__yuv2rgb_clip8(yhi + simplewebp__multhi(u, 33050) - 17685);
}

template<typename T>
constexpr const T &clamp(const T &value, const T &min, const T &max)
{
	return std::min<T>(std::max<T>(value, min), max);
}

static std::vector<uint8_t> convertPixelFormat(nav_pixelformat pixfmt, uint32_t width, uint32_t height, const uint8_t *buf)
{
	if (pixfmt != NAV_PIXELFORMAT_UNKNOWN)
	{
		size_t dimension = ((size_t) width) * height;
		if (pixfmt == NAV_PIXELFORMAT_RGB8)
			return std::vector<uint8_t>(buf, buf + dimension * 3);

		std::vector<uint8_t> result(dimension * 3);
		const uint8_t *uv = buf + dimension;
		uint8_t *dest = result.data();

		if (pixfmt == NAV_PIXELFORMAT_YUV420 || pixfmt == NAV_PIXELFORMAT_NV12)
		{
			size_t uvw = ((size_t) width + 1) / 2;
			size_t uvh = ((size_t) height + 1) / 2;

			for (size_t i = 0; i < dimension; i++)
			{
				size_t xp = i % width;
				size_t yp = i / width;
				uint8_t y = buf[i];
				uint8_t ut[4], vt[4];
				size_t xpp = (xp + 1) / 2;
				size_t ypp = (yp + 1) / 2;
				xpp = xpp == 0 ? 0 : (xpp - 1); // NOTE: Can't use std::max because size_t is unsigned.
				ypp = ypp == 0 ? 0 : (ypp - 1);
				size_t xppm = std::min(xpp + 1, uvw - 1);
				size_t yppm = std::min(ypp + 1, uvh - 1);

				if (pixfmt == NAV_PIXELFORMAT_YUV420)
				{
					// UV planar
					size_t udim = uvw * uvh;
					ut[0] = uv[ypp * uvw + xpp];   // a
					ut[1] = uv[ypp * uvw + xppm];  // b
					ut[2] = uv[yppm * uvw + xpp];  // c
					ut[3] = uv[yppm * uvw + xppm]; // d
					vt[0] = uv[udim + ypp * uvw + xpp];   // a
					vt[1] = uv[udim + ypp * uvw + xppm];  // b
					vt[2] = uv[udim + yppm * uvw + xpp];  // c
					vt[3] = uv[udim + yppm * uvw + xppm]; // d
				}
				else
				{
					// UV interleaved (NV12)
					ut[0] = uv[(ypp * uvw + xpp) * 2];   // a
					ut[1] = uv[(ypp * uvw + xppm) * 2];  // b
					ut[2] = uv[(yppm * uvw + xpp) * 2];  // c
					ut[3] = uv[(yppm * uvw + xppm) * 2]; // d
					vt[0] = uv[1 + (ypp * uvw + xpp) * 2];   // a
					vt[1] = uv[1 + (ypp * uvw + xppm) * 2];  // b
					vt[2] = uv[1 + (yppm * uvw + xpp) * 2];  // c
					vt[3] = uv[1 + (yppm * uvw + xppm) * 2]; // d
				}

				uint8_t u = simplewebp__do_uv_fancy_upsampling(ut[0], ut[1], ut[2], ut[3], (~xp) & 1, (~yp) & 1);
				uint8_t v = simplewebp__do_uv_fancy_upsampling(vt[0], vt[1], vt[2], vt[3], (~xp) & 1, (~yp) & 1);
				simplewebp__yuv2rgb_plain(y, u, v, dest + i * 3);
			}

			return result;
		}
		else if (pixfmt == NAV_PIXELFORMAT_YUV444)
		{
			for (size_t i = 0; i < dimension; i++)
			{
				size_t xp = i % width;
				size_t yp = i / width;
				uint8_t y = buf[i];
				uint8_t u = buf[i + dimension];
				uint8_t v = buf[i + dimension * 2];
				simplewebp__yuv2rgb_plain(y, u, v, dest + i * 3);
			}

			return result;
		}
	}

	return std::vector<uint8_t>();
}

static std::string joinPath(const std::string &p1, const std::string &p2)
{
	std::string newp1 = p1;
	std::transform(p1.begin(), p1.end(), newp1.begin(), [](char c) { return c == '\\' ? '/' : c; });
	return newp1.back() == '/' ? (newp1 + p2) : (newp1 + "/" + p2);
}

template<typename T>
std::ostream &operator<<(std::ostream &ostr, const binary_data<T> &bd)
{
	const auto array = bd.bytes();
	return ostr.write((const char*) array.data(), array.size());
}

int main(int argc, char *argv[])
{
	using UniqueNAV = std::unique_ptr<nav_t, decltype(&nav_close)>;

	std::vector<std::string> args = convertArgs(argc, argv);
	std::ios_base::sync_with_stdio(false);

	if (args.size() < 3)
	{
		usage(args, false);
		return 1;
	}

	int mode = -1;
	if (args[1] == "audio" || args[1] == "a")
		mode = 1;
	else if (args[1] == "video" || args[1] == "v")
		mode = 2;
	else if (args[1] == "enum" || args[1] == "e")
		mode = 0;
	if (mode == -1)
	{
		usage(args, false);
		return 1;
	}
	else if (mode > 0 && args.size() < 4)
	{
		usage(args, true);
		return 1;
	}

	nav_input mediaInput;
	NavInputGuard _g(mediaInput);

	if (!nav_input_populate_from_file(&mediaInput, args[2].c_str()))
	{
		std::cerr << "nav_input_populate_from_file(): " << nav_error() << std::endl;
		return 1;
	}

	UniqueNAV navInst(nav_open(&mediaInput, args[2].c_str()), nav_close);
	if (!navInst)
	{
		std::cerr << "nav_open(): " << nav_error() << std::endl;
		return 1;
	}

	size_t nstreams = nav_nstreams(navInst.get());
	size_t streamIndex = MINUS_1;
	nav_audioformat audioFormat = 0;
	nav_pixelformat pixelFormat = NAV_PIXELFORMAT_UNKNOWN;
	uint32_t width = 0, height = 0, sampleRate = 0, nchannels = 0;

	if (mode == 0)
	{
		// Enumerate only
		std::cout << "List of streams" << std::endl;
		for (size_t i = 0; i < nstreams; i++)
		{
			nav_streaminfo_t *sinfo = nav_stream_info(navInst.get(), i);

			switch (nav_streaminfo_type(sinfo))
			{
				case NAV_STREAMTYPE_AUDIO:
				{
					std::cout << i << " audio stream ";
					std::cout << nav_audio_sample_rate(sinfo) << "Hz ";
					std::cout << nav_audio_nchannels(sinfo) << "ch ";
					std::cout << parseAudioFormat(nav_audio_format(sinfo)) << std::endl;
					break;
				}
				case NAV_STREAMTYPE_VIDEO:
				{
					uint32_t w, h;
					nav_video_dimensions(sinfo, &w, &h);
					std::cout << i << " video stream " << w << "x" << h;
					std::cout << " " << nav_video_fps(sinfo) << " FPS ";
					std::cout << pixelFormatToString(nav_video_pixel_format(sinfo)) << std::endl;
					break;
				}
				default:
				{
					std::cout << i << " unknown stream" << std::endl;
					break;
				}
			}
		}

		return 0;
	}
	else
	{
		for (size_t i = 0; i < nstreams; i++)
		{
			nav_streaminfo_t *sinfo = nav_stream_info(navInst.get(), i);
			nav_streamtype type = nav_streaminfo_type(sinfo);
			if (streamIndex == MINUS_1)
			{
				if (mode == 1 && type == NAV_STREAMTYPE_AUDIO)
				{
					streamIndex = i;
					audioFormat = nav_audio_format(sinfo);
					sampleRate = nav_audio_sample_rate(sinfo);
					nchannels = nav_audio_nchannels(sinfo);
				}
				else if (mode == 2 && type == NAV_STREAMTYPE_VIDEO)
				{
					streamIndex = i;
					pixelFormat = nav_video_pixel_format(sinfo);
					nav_video_dimensions(sinfo, &width, &height);
				}
				else
					nav_stream_enable(navInst.get(), i, false);
			}
			else
				nav_stream_enable(navInst.get(), i, false);
		}

		if (streamIndex == MINUS_1)
		{
			std::cerr << "Cannot find " << (mode == 1 ? "audio" : "video") << " stream in file." << std::endl;
			return 1;
		}
	}

	std::list<std::vector<uint8_t>> audioSamples;
	size_t totalAudioSamples = 0;
	size_t frameCount = 0;

	while (true)
	{
		using UniqueNAVFrame = std::unique_ptr<nav_frame_t, decltype(&nav_frame_free)>;
		UniqueNAVFrame frame(nav_read(navInst.get()), nav_frame_free);

		if (!frame)
		{
			const char *err = nav_error();

			if (err)
			{
				std::cerr << "Cannot read stream: " << err << std::endl;
				return 1;
			}

			break;
		}

		if (nav_frame_streamindex(frame.get()) == streamIndex)
		{
			if (mode == 1)
			{
				// Audio frame
				const uint8_t *buf = (const uint8_t*) nav_frame_buffer(frame.get());
				size_t size = nav_frame_size(frame.get());
				audioSamples.emplace_back(buf, buf + size);
				totalAudioSamples += size;

				std::cout << "Total sample " << totalAudioSamples << std::endl;

				if (sizeof(size_t) > 4 && totalAudioSamples > UINT32_MAX)
				{
					std::cerr << "Cannot write file larger than 4GB for now" << std::endl;
					return 1;
				}
			}
			else if (mode == 2)
			{
				// Video frame
				const uint8_t *buf = (const uint8_t*) nav_frame_buffer(frame.get());

				try
				{
					std::stringstream ss;
					ss << ++frameCount << "-" << nav_frame_tell(frame.get()) << ".png";

					std::string path = joinPath(args[3], ss.str());
					std::vector<uint8_t> rgb = convertPixelFormat(pixelFormat, width, height, buf);
					unsigned lodepngerr = lodepng::encode(path.c_str(), rgb, width, height, LCT_RGB);

					if (lodepngerr)
						throw std::runtime_error(lodepng_error_text(lodepngerr));

					std::cout << "Frame " << frameCount << std::endl;
				}
				catch (const std::exception &e)
				{
					std::cerr << "Cannot save: " << e.what() << std::endl;
					return 1;
				}
			}
		}
	}

	if (mode == 1)
	{
		// Encode to WAV
		uint32_t size =
			12 /* WAVE + "fmt " + <size> */
			+ 2 /* format */
			+ 2 /* nchannels */
			+ 4 /* sample rate */
			+ 4 /* sample rate * sample size */
			+ 4 /* sample size = nchannels * bps / 8 */
			+ 2 /* bps */
			+ 8 /* "data" + <size> */
			+ totalAudioSamples;
		uint32_t sampleSize = nchannels * ((NAV_AUDIOFORMAT_BITSIZE(audioFormat) + 7) / 8);
		uint32_t smp = sampleRate * sampleSize;

		try
		{
			std::ofstream f(args[3], std::ios_base::out | std::ios_base::binary);
			f << "RIFF" << binary_data<uint32_t>(size)
			<< "WAVEfmt " << binary_data<uint32_t>(16)
			<< binary_data<uint16_t>(NAV_AUDIOFORMAT_ISFLOAT(audioFormat) ? 3 : 1)
			<< binary_data<uint16_t>(nchannels)
			<< binary_data<uint32_t>(sampleRate)
			<< binary_data<uint32_t>(smp)
			<< binary_data<uint16_t>((uint16_t) sampleSize)
			<< binary_data<uint16_t>(NAV_AUDIOFORMAT_BITSIZE(audioFormat))
			<< "data"
			<< binary_data<uint32_t>((uint32_t) totalAudioSamples);

			for (const std::vector<uint8_t> &samples: audioSamples)
				f.write((const char*) samples.data(), samples.size());
		}
		catch (const std::exception &e)
		{
			std::cerr << "Cannot save WAV: " << e.what() << std::endl;
			return 1;
		}
	}

	return 0;
}
	// clang -Inav/include -Lnav/lib --std=c++17 program.cpp lodepng.cpp -lnav
	// Get lodepng.cpp from https://github.com/lvandeve/lodepng
	// See https://github.com/MikuAuahDark/nav for more information about NAV.

	#include <algorithm>
	#include <array>
	#include <cstdlib>
	#include <fstream>
	#include <iostream>
	#include <list>
	#include <stdexcept>
	#include <string>
	#include <sstream>
	#include <vector>
	#include <type_traits>

	#include "lodepng.h"
	#include "nav/nav.h"

	constexpr size_t MINUS_1 = -1;

	struct NavInputGuard
	{
	NavInputGuard(nav_input &in)
	: input(&in)
	{}

	~NavInputGuard()
	{
	if (input->close)
	input->closef();
	}

	nav_input *input;
	};

	template<typename T>
	struct binary_data
	{
	static_assert(std::is_integral<T>::value, "binary_data not integral value");
	using remove_signed = std::make_unsigned_t<T>;
	static constexpr size_t size = sizeof(T);

	binary_data(T v): value(v) {}
	binary_data(const binary_data<T> &) = default;
	binary_data(binary_data<T> &&) = default;
	std::array<uint8_t, size> bytes() const
	{
	std::array<uint8_t, size> b {};
	remove_signed temp = (remove_signed) value;

	for (int i = 0; i < size && temp; i++)
	{
	b[i] = temp & 0xFF;
	temp >>= 8;
	}

	return b;
	}

	T value;
	};

	static std::vector<std::string> convertArgs(int argc, char *argv[])
	{
	std::vector<std::string> result;

	for (size_t i = 0; i < argc; i++)
	result.emplace_back(argv[i]);

	return result;
	}

	static void closeInput(nav_input *input)
	{
	input->closef();
	}

	static void usage(const std::vector<std::string> &args, bool hasout)
	{
	std::cout << "Usage: " << args[0] << " <audio\|video\|enum> <input file>";

	if (hasout)
	std::cout << " <output file/dir>";
	else
	std::cout << " [output file/dir]";

	std::cout << std::endl;
	}

	static std::string parseAudioFormat(nav_audioformat fmt)
	{
	std::stringstream ss;

	if (NAV_AUDIOFORMAT_ISFLOAT(fmt))
	ss << "pcm_f" << NAV_AUDIOFORMAT_BITSIZE(fmt) << (NAV_AUDIOFORMAT_ISLITTLEENDIAN(fmt) ? "le" : "be");
	else
	ss << "pcm_" << (NAV_AUDIOFORMAT_ISUNSIGNED(fmt) ? "u" : "s") << NAV_AUDIOFORMAT_BITSIZE(fmt) << (NAV_AUDIOFORMAT_ISLITTLEENDIAN(fmt) ? "le" : "be");
	return ss.str();
	}

	static const char *pixelFormatToString(nav_pixelformat pixfmt)
	{
	switch (pixfmt)
	{
	case NAV_PIXELFORMAT_RGB8:
	return "rgb8";
	case NAV_PIXELFORMAT_YUV420:
	return "yuv420p";
	case NAV_PIXELFORMAT_YUV444:
	return "yuv444p";
	case NAV_PIXELFORMAT_NV12:
	return "nv12";
	default:
	return "unknown";
	}
	}

	// https://learn.microsoft.com/en-us/windows/win32/medfound/recommended-8-bit-yuv-formats-for-video-rendering#converting-420-yuv-to-422-yuv
	static uint8_t simplewebp__do_uv_fancy_upsampling(uint8_t a, uint8_t b, uint8_t c, uint8_t d, uint8_t x, uint8_t y)
	{
	switch (y * 2 + x)
	{
	case 0:
	return (9ua + 3ub + 3u*c + d + 8u) / 16u;
	case 1:
	return (3ua + 9ub + c + 3u*d + 8u) / 16u;
	case 2:
	return (3ua + b + 9uc + 3u*d + 8u) / 16u;
	case 3:
	return (a + 3ub + 3uc + 9u*d + 8u) / 16u;
	default:
	return 0;
	}
	}

	static int simplewebp__multhi(int v, int coeff)
	{
	return (v * coeff) >> 8;
	}

	static uint8_t simplewebp__yuv2rgb_clip8(int v)
	{
	return ((v & ~16383) == 0) ? ((uint8_t) (v >> 6)) : (v < 0) ? 0 : 255;
	}

	static void simplewebp__yuv2rgb_plain(uint8_t y, uint8_t u, uint8_t v, uint8_t *rgb)
	{
	int yhi = simplewebp__multhi(y, 19077);

	rgb[0] = simplewebp__yuv2rgb_clip8(yhi + simplewebp__multhi(v, 26149) - 14234);
	rgb[1] = simplewebp__yuv2rgb_clip8(yhi - simplewebp__multhi(u, 6419) - simplewebp__multhi(v, 13320) + 8708);
	rgb[2] = simplewebp__yuv2rgb_clip8(yhi + simplewebp__multhi(u, 33050) - 17685);
	}

	template<typename T>
	constexpr const T &clamp(const T &value, const T &min, const T &max)
	{
	return std::min<T>(std::max<T>(value, min), max);
	}

	static std::vector<uint8_t> convertPixelFormat(nav_pixelformat pixfmt, uint32_t width, uint32_t height, const uint8_t *buf)
	{
	if (pixfmt != NAV_PIXELFORMAT_UNKNOWN)
	{
	size_t dimension = ((size_t) width) * height;
	if (pixfmt == NAV_PIXELFORMAT_RGB8)
	return std::vector<uint8_t>(buf, buf + dimension * 3);

	std::vector<uint8_t> result(dimension * 3);
	const uint8_t *uv = buf + dimension;
	uint8_t *dest = result.data();

	if (pixfmt == NAV_PIXELFORMAT_YUV420 \|\| pixfmt == NAV_PIXELFORMAT_NV12)
	{
	size_t uvw = ((size_t) width + 1) / 2;
	size_t uvh = ((size_t) height + 1) / 2;

	for (size_t i = 0; i < dimension; i++)
	{
	size_t xp = i % width;
	size_t yp = i / width;
	uint8_t y = buf[i];
	uint8_t ut[4], vt[4];
	size_t xpp = (xp + 1) / 2;
	size_t ypp = (yp + 1) / 2;
	xpp = xpp == 0 ? 0 : (xpp - 1); // NOTE: Can't use std::max because size_t is unsigned.
	ypp = ypp == 0 ? 0 : (ypp - 1);
	size_t xppm = std::min(xpp + 1, uvw - 1);
	size_t yppm = std::min(ypp + 1, uvh - 1);

	if (pixfmt == NAV_PIXELFORMAT_YUV420)
	{
	// UV planar
	size_t udim = uvw * uvh;
	ut[0] = uv[ypp * uvw + xpp]; // a
	ut[1] = uv[ypp * uvw + xppm]; // b
	ut[2] = uv[yppm * uvw + xpp]; // c
	ut[3] = uv[yppm * uvw + xppm]; // d
	vt[0] = uv[udim + ypp * uvw + xpp]; // a
	vt[1] = uv[udim + ypp * uvw + xppm]; // b
	vt[2] = uv[udim + yppm * uvw + xpp]; // c
	vt[3] = uv[udim + yppm * uvw + xppm]; // d
	}
	else
	{
	// UV interleaved (NV12)
	ut[0] = uv[(ypp * uvw + xpp) * 2]; // a
	ut[1] = uv[(ypp * uvw + xppm) * 2]; // b
	ut[2] = uv[(yppm * uvw + xpp) * 2]; // c
	ut[3] = uv[(yppm * uvw + xppm) * 2]; // d
	vt[0] = uv[1 + (ypp * uvw + xpp) * 2]; // a
	vt[1] = uv[1 + (ypp * uvw + xppm) * 2]; // b
	vt[2] = uv[1 + (yppm * uvw + xpp) * 2]; // c
	vt[3] = uv[1 + (yppm * uvw + xppm) * 2]; // d
	}

	uint8_t u = simplewebp__do_uv_fancy_upsampling(ut[0], ut[1], ut[2], ut[3], (~xp) & 1, (~yp) & 1);
	uint8_t v = simplewebp__do_uv_fancy_upsampling(vt[0], vt[1], vt[2], vt[3], (~xp) & 1, (~yp) & 1);
	simplewebp__yuv2rgb_plain(y, u, v, dest + i * 3);
	}

	return result;
	}
	else if (pixfmt == NAV_PIXELFORMAT_YUV444)
	{
	for (size_t i = 0; i < dimension; i++)
	{
	size_t xp = i % width;
	size_t yp = i / width;
	uint8_t y = buf[i];
	uint8_t u = buf[i + dimension];
	uint8_t v = buf[i + dimension * 2];
	simplewebp__yuv2rgb_plain(y, u, v, dest + i * 3);
	}

	return result;
	}
	}

	return std::vector<uint8_t>();
	}

	static std::string joinPath(const std::string &p1, const std::string &p2)
	{
	std::string newp1 = p1;
	std::transform(p1.begin(), p1.end(), newp1.begin(), [](char c) { return c == '\\' ? '/' : c; });
	return newp1.back() == '/' ? (newp1 + p2) : (newp1 + "/" + p2);
	}

	template<typename T>
	std::ostream &operator<<(std::ostream &ostr, const binary_data<T> &bd)
	{
	const auto array = bd.bytes();
	return ostr.write((const char*) array.data(), array.size());
	}

	int main(int argc, char *argv[])
	{
	using UniqueNAV = std::unique_ptr<nav_t, decltype(&nav_close)>;

	std::vector<std::string> args = convertArgs(argc, argv);
	std::ios_base::sync_with_stdio(false);

	if (args.size() < 3)
	{
	usage(args, false);
	return 1;
	}

	int mode = -1;
	if (args[1] == "audio" \|\| args[1] == "a")
	mode = 1;
	else if (args[1] == "video" \|\| args[1] == "v")
	mode = 2;
	else if (args[1] == "enum" \|\| args[1] == "e")
	mode = 0;
	if (mode == -1)
	{
	usage(args, false);
	return 1;
	}
	else if (mode > 0 && args.size() < 4)
	{
	usage(args, true);
	return 1;
	}

	nav_input mediaInput;
	NavInputGuard _g(mediaInput);

	if (!nav_input_populate_from_file(&mediaInput, args[2].c_str()))
	{
	std::cerr << "nav_input_populate_from_file(): " << nav_error() << std::endl;
	return 1;
	}

	UniqueNAV navInst(nav_open(&mediaInput, args[2].c_str()), nav_close);
	if (!navInst)
	{
	std::cerr << "nav_open(): " << nav_error() << std::endl;
	return 1;
	}

	size_t nstreams = nav_nstreams(navInst.get());
	size_t streamIndex = MINUS_1;
	nav_audioformat audioFormat = 0;
	nav_pixelformat pixelFormat = NAV_PIXELFORMAT_UNKNOWN;
	uint32_t width = 0, height = 0, sampleRate = 0, nchannels = 0;

	if (mode == 0)
	{
	// Enumerate only
	std::cout << "List of streams" << std::endl;
	for (size_t i = 0; i < nstreams; i++)
	{
	nav_streaminfo_t *sinfo = nav_stream_info(navInst.get(), i);

	switch (nav_streaminfo_type(sinfo))
	{
	case NAV_STREAMTYPE_AUDIO:
	{
	std::cout << i << " audio stream ";
	std::cout << nav_audio_sample_rate(sinfo) << "Hz ";
	std::cout << nav_audio_nchannels(sinfo) << "ch ";
	std::cout << parseAudioFormat(nav_audio_format(sinfo)) << std::endl;
	break;
	}
	case NAV_STREAMTYPE_VIDEO:
	{
	uint32_t w, h;
	nav_video_dimensions(sinfo, &w, &h);
	std::cout << i << " video stream " << w << "x" << h;
	std::cout << " " << nav_video_fps(sinfo) << " FPS ";
	std::cout << pixelFormatToString(nav_video_pixel_format(sinfo)) << std::endl;
	break;
	}
	default:
	{
	std::cout << i << " unknown stream" << std::endl;
	break;
	}
	}
	}

	return 0;
	}
	else
	{
	for (size_t i = 0; i < nstreams; i++)
	{
	nav_streaminfo_t *sinfo = nav_stream_info(navInst.get(), i);
	nav_streamtype type = nav_streaminfo_type(sinfo);
	if (streamIndex == MINUS_1)
	{
	if (mode == 1 && type == NAV_STREAMTYPE_AUDIO)
	{
	streamIndex = i;
	audioFormat = nav_audio_format(sinfo);
	sampleRate = nav_audio_sample_rate(sinfo);
	nchannels = nav_audio_nchannels(sinfo);
	}
	else if (mode == 2 && type == NAV_STREAMTYPE_VIDEO)
	{
	streamIndex = i;
	pixelFormat = nav_video_pixel_format(sinfo);
	nav_video_dimensions(sinfo, &width, &height);
	}
	else
	nav_stream_enable(navInst.get(), i, false);
	}
	else
	nav_stream_enable(navInst.get(), i, false);
	}

	if (streamIndex == MINUS_1)
	{
	std::cerr << "Cannot find " << (mode == 1 ? "audio" : "video") << " stream in file." << std::endl;
	return 1;
	}
	}

	std::list<std::vector<uint8_t>> audioSamples;
	size_t totalAudioSamples = 0;
	size_t frameCount = 0;

	while (true)
	{
	using UniqueNAVFrame = std::unique_ptr<nav_frame_t, decltype(&nav_frame_free)>;
	UniqueNAVFrame frame(nav_read(navInst.get()), nav_frame_free);

	if (!frame)
	{
	const char *err = nav_error();

	if (err)
	{
	std::cerr << "Cannot read stream: " << err << std::endl;
	return 1;
	}

	break;
	}

	if (nav_frame_streamindex(frame.get()) == streamIndex)
	{
	if (mode == 1)
	{
	// Audio frame
	const uint8_t buf = (const uint8_t) nav_frame_buffer(frame.get());
	size_t size = nav_frame_size(frame.get());
	audioSamples.emplace_back(buf, buf + size);
	totalAudioSamples += size;

	std::cout << "Total sample " << totalAudioSamples << std::endl;

	if (sizeof(size_t) > 4 && totalAudioSamples > UINT32_MAX)
	{
	std::cerr << "Cannot write file larger than 4GB for now" << std::endl;
	return 1;
	}
	}
	else if (mode == 2)
	{
	// Video frame
	const uint8_t buf = (const uint8_t) nav_frame_buffer(frame.get());

	try
	{
	std::stringstream ss;
	ss << ++frameCount << "-" << nav_frame_tell(frame.get()) << ".png";

	std::string path = joinPath(args[3], ss.str());
	std::vector<uint8_t> rgb = convertPixelFormat(pixelFormat, width, height, buf);
	unsigned lodepngerr = lodepng::encode(path.c_str(), rgb, width, height, LCT_RGB);

	if (lodepngerr)
	throw std::runtime_error(lodepng_error_text(lodepngerr));

	std::cout << "Frame " << frameCount << std::endl;
	}
	catch (const std::exception &e)
	{
	std::cerr << "Cannot save: " << e.what() << std::endl;
	return 1;
	}
	}
	}
	}

	if (mode == 1)
	{
	// Encode to WAV
	uint32_t size =
	12 /* WAVE + "fmt " + <size> */
	+ 2 /* format */
	+ 2 /* nchannels */
	+ 4 /* sample rate */
	+ 4 /* sample rate * sample size */
	+ 4 /* sample size = nchannels * bps / 8 */
	+ 2 /* bps */
	+ 8 /* "data" + <size> */
	+ totalAudioSamples;
	uint32_t sampleSize = nchannels * ((NAV_AUDIOFORMAT_BITSIZE(audioFormat) + 7) / 8);
	uint32_t smp = sampleRate * sampleSize;

	try
	{
	std::ofstream f(args[3], std::ios_base::out \| std::ios_base::binary);
	f << "RIFF" << binary_data<uint32_t>(size)
	<< "WAVEfmt " << binary_data<uint32_t>(16)
	<< binary_data<uint16_t>(NAV_AUDIOFORMAT_ISFLOAT(audioFormat) ? 3 : 1)
	<< binary_data<uint16_t>(nchannels)
	<< binary_data<uint32_t>(sampleRate)
	<< binary_data<uint32_t>(smp)
	<< binary_data<uint16_t>((uint16_t) sampleSize)
	<< binary_data<uint16_t>(NAV_AUDIOFORMAT_BITSIZE(audioFormat))
	<< "data"
	<< binary_data<uint32_t>((uint32_t) totalAudioSamples);

	for (const std::vector<uint8_t> &samples: audioSamples)
	f.write((const char*) samples.data(), samples.size());
	}
	catch (const std::exception &e)
	{
	std::cerr << "Cannot save WAV: " << e.what() << std::endl;
	return 1;
	}
	}

	return 0;
	}