Skip to content

Instantly share code, notes, and snippets.

@ayuusweetfish
Last active February 28, 2024 17:25
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ayuusweetfish/7c7791162bf31a9c78dd61f2ac79889a to your computer and use it in GitHub Desktop.
Save ayuusweetfish/7c7791162bf31a9c78dd61f2ac79889a to your computer and use it in GitHub Desktop.
Recording LÖVE animations into a video file
// cc % -O2 -I../kissfft-131.1.0 ../kissfft-131.1.0/libkissfft-float.a
#include <math.h>
#include <stdio.h>
#include "kiss_fftr.h"
int main()
{
float x[100];
for (int i = 0; i < 100; i++) {
x[i] = sinf((float)i * 0.75f) * 5000 + i % 44;
printf("%2d %10.7f\n", i, x[i]);
}
// Hann window
if (0) for (int i = 0; i < 100; i++)
x[i] *= (1 - cosf((float)i / 100 * 2 * M_PI)) * 0.5f;
kiss_fftr_cfg fft_cfg = kiss_fftr_alloc(100, 0, NULL, NULL);
kiss_fft_cpx y[51];
kiss_fftr(fft_cfg, x, y);
for (int i = 0; i <= 50; i++)
printf("%2d %14.7f %14.7f %14.7f\n", i,
y[i].r / 50, y[i].i / 50,
sqrtf(y[i].r*y[i].r + y[i].i*y[i].i) / 50);
return 0;
}
local isRecord = (os.getenv('record') ~= nil)
local GLOBAL_SCALE = (isRecord and 2 or 1.25)
local W = 960 * GLOBAL_SCALE
local H = 600 * GLOBAL_SCALE
love.window.setMode(
W, H,
{ fullscreen = false, highdpi = false }
)
local track_meta = {
{'短笛', 'Picc', 0.7, 0.85, 0.5, {}},
{'长笛', 'Fl', 0.7, 0.85, 0.5, {}},
{'双簧管', 'Ob', 0.7, 0.85, 0.5, {}},
{'单簧管', 'Cl', 0.7, 0.85, 0.5, {}},
{'大管', 'Bsn', 0.7, 0.85, 0.5, {}},
{'圆号', 'Hn', 1.0, 0.7, 0.5, {}},
{'圆号', 'Hn', 1.0, 0.7, 0.5, {}},
{'小号', 'Tpt', 1.0, 0.7, 0.5, {}},
{'长号', 'Tbn', 1.0, 0.7, 0.5, {}},
{'定音鼓', 'Timp', 0.9, 0.65, 0.9, {strike = true}},
{'钟琴', 'Glsp', 0.9, 0.65, 0.9, {strike = true}},
{'颤音琴', 'Vib', 0.9, 0.65, 0.9, {}},
{'马林巴琴', 'Mrm', 0.9, 0.65, 0.9, {strike = true}},
{'小鼓', 'Sn Dr', 0.8, 0.7, 0.85, {strike = true}},
{'大鼓', 'Bs Dr', 0.8, 0.7, 0.85, {strike = true}},
{'筒鼓', 'Toms', 0.8, 0.7, 0.85, {strike = true}},
{'三角铁', 'Tri', 0.8, 0.7, 0.85, {strike = true}},
{'吊钹', 'Susp Cym', 0.8, 0.7, 0.85, {strike = true}},
{'风铃', 'Wn Ch', 0.8, 0.7, 0.85, {}},
{'沙锤', 'Mrcs', 0.8, 0.7, 0.85, {strike = true}},
{'竖琴', 'Hp', 1.0, 0.6, 0.6, {strike = true}},
{'第一小提琴', 'I Vln', 0.7, 0.8, 0.95, {}},
{'第二小提琴', 'II Vln', 0.7, 0.8, 0.95, {}},
{'中提琴', 'Vla', 0.7, 0.8, 0.95, {}},
{'大提琴', 'Vlc', 0.7, 0.8, 0.95, {}},
{'低音提琴', 'Cb', 0.7, 0.8, 0.95, {}},
}
local n_tracks = #track_meta
local x_padding = 0.4
local track_width = W / (n_tracks + x_padding * 2)
local track_x = function (i)
return track_width * (x_padding + i - 0.5)
end
local map_midi_track = function (midi_track)
if midi_track <= 20 then return midi_track + 1
else return midi_track end
end
local sections = {
[0] = {'蓝星飞行日记', 'Planet Blue Flight Journal', ''},
{'I. 在黄昏起飞', 'Mit der einbrechenden Dämmerung', 'As twilight falls', 0},
{'II. 什锦莓果酱', 'Berry jam medley', '', 107},
{'III. 暮蓝时刻', 'L’Heure bleue', 'Blue hour', 240},
{'IV. 星海拾梦', 'Ubi somnia stellarum lustrant', 'Where stars’ dreams scatter', 310},
{'V. 鸽咕咕', 'ハトなり', 'Call of doves', 472},
{'VI. 晴天圆舞曲', 'Valse du ciel clair', 'Waltz of the clear sky', 499.5},
{'VII. 大块之染', 'Mise à la masse', 'Grounded land', 606},
}
local events = (function ()
local events = {}
local division = nil
local bar = 0
local bar_len = 1
local bar_start = -1
local last_ticks = 0
local tick_secs = 0
local time_secs = 0
for line in love.filesystem.lines('TwAll-22a.txt') do
if line == '' or line:sub(1, 1) == '#' then goto line_continue end
if division == nil then
division = tonumber(line)
goto line_continue
end
local ticks, track, ty, args_str = string.match(line, '(%d+) (%d+) (%w+) ([%d ]+)')
ticks = tonumber(ticks)
track = tonumber(track)
local args = {}
for w in string.gmatch(args_str, '%d+') do
args[#args + 1] = tonumber(w)
end
local delta_ticks = ticks - last_ticks
-- New bar?
while ticks >= bar_start + bar_len do
bar = bar + 1
bar_start = bar_start + bar_len
events[#events + 1] = {
time = time_secs + (bar_start - last_ticks) * tick_secs,
type = 'bar',
number = bar,
}
-- print(events[#events].time, events[#events].type, events[#events].number)
end
time_secs = time_secs + delta_ticks * tick_secs
last_ticks = ticks
-- Time signature or tempo change
if ty == 'TimeSig' then
bar_len = args[1] / args[2] * 4 * division
elseif ty == 'Tempo' then
tick_secs = (60 / (args[1] * division))
elseif ty == 'NT' then
events[#events + 1] = {
time = time_secs,
type = 'note',
track = track,
pitch = args[1],
len = args[2] * tick_secs,
vel = args[3],
}
-- print(events[#events].time, events[#events].type, events[#events].pitch)
end
::line_continue::
end
return events
end)()
local pitch_low = 28 -- E1
local pitch_high = 112 -- E8
local n_pitches = pitch_high - pitch_low + 1
local ffi = require('ffi')
ffi.cdef [[
void spectrogram(const void *data, int len, int *o_n_windows, float **sp, float **enr);
void spectrogram_preprocessed(const void *data, int len, int *o_n_windows, float **sp, float **enr);
]]
local libspectrogram_path = os.tmpname()
print(libspectrogram_path)
local libspectrogram_data, libspectrogram_data_len =
love.filesystem.read('data', 'libspectrogram.dylib')
local libspectrogram_f = io.open(libspectrogram_path, 'wb')
libspectrogram_f:write(libspectrogram_data:getString())
libspectrogram_f:close()
local libspectrogram = ffi.load(libspectrogram_path)
local sp_ns_windows = {}
local spectrograms = {}
local energy_waveforms = {}
for i = 1, #track_meta do
local sp_data_bytes, sp_data_len =
love.filesystem.read('data', 'tracks/TwAll-22a-' .. i .. '.ogg.bin')
local n_windows = ffi.new('int[1]')
local spectrogram = ffi.new('float *[1]')
local energy_waveform = ffi.new('float *[1]')
libspectrogram.spectrogram_preprocessed(
sp_data_bytes:getFFIPointer(), sp_data_len,
n_windows, spectrogram, energy_waveform)
n_windows = n_windows[0]
spectrogram = spectrogram[0]
energy_waveform = energy_waveform[0]
print(spectrogram, n_windows, spectrogram, energy_waveform)
sp_ns_windows[i] = n_windows
spectrograms[i] = spectrogram
energy_waveforms[i] = energy_waveform
end
local MUSIC_DELAY = 3
local music = love.audio.newSource('TwAll-22a.ogg', 'stream')
local y_low = H * 0.82
local y_high = H * 0.11
local pitch_y = function (pitch)
return y_low + (y_high - y_low) * (pitch - pitch_low) / (pitch_high - pitch_low)
end
local pitch_hz = function (pitch)
return 440 * 2 ^ ((pitch - 69) / 12)
end
-- print(pitch_hz(69), pitch_hz(pitch_low), pitch_hz(pitch_high))
local sp_bin_subdiv = 4
local sp_n_spline = math.floor(sp_bin_subdiv * (pitch_high - pitch_low))
local sp_meshes = {}
for i = 1, n_tracks do
sp_meshes[i] = love.graphics.newMesh(
--[[{
{'VertexPosition', 'float', 2},
},]]
(sp_n_spline + 1) * 2,
'strip',
'dynamic'
)
end
local font = love.graphics.newFont('AaKaiSong2WanZi2.ttf', 18 * GLOBAL_SCALE)
local font_md = love.graphics.newFont('AaKaiSong2WanZi2.ttf', 21 * GLOBAL_SCALE)
local font_lg = love.graphics.newFont('AaKaiSong2WanZi2.ttf', 24 * GLOBAL_SCALE)
local font_en = love.graphics.newFont('Imprima-Regular.ttf', 18 * GLOBAL_SCALE)
local font_en_md = love.graphics.newFont('Imprima-Regular.ttf', 21 * GLOBAL_SCALE)
local instr_text = {}
for i = 1, n_tracks do
instr_text[i] = {
love.graphics.newText(font, track_meta[i][1]),
love.graphics.newText(font_en, track_meta[i][2])
}
end
local section_text = {}
local section_orig_text = {}
local section_en_text = {}
for i = 0, #sections do
section_text[i] = love.graphics.newText(font_lg, sections[i][1])
section_orig_text[i] = love.graphics.newText(i == 5 and font_md or font_en_md, sections[i][2])
if sections[i][3] ~= '' then
section_en_text[i] = love.graphics.newText(font_en_md, '/ ' .. sections[i][3])
end
end
local playhead_text_bar = nil
local playhead_text_timecode = nil
local playhead_last_upd_bar = nil
local playhead_text_timecode = nil
local cubic_interpolation = function (x, fn)
local i = math.floor(x)
local t = x - i
local pn1 = fn(i - 1)
local p0 = fn(i)
local p1 = fn(i + 1)
local p2 = fn(i + 2)
local m0 = (p1 - pn1) / 2
local m1 = (p2 - p0) / 2
local t2 = t * t
local t3 = t2 * t
return
(2 * t3 - 3 * t2) * (p0 - p1) + p0 +
(t3 - t2) * (m0 + m1) + (t - t2) * m0
end
local draw = function (t)
love.graphics.clear(0.13, 0.12, 0.15, 1)
local music_time = t - MUSIC_DELAY
if not isRecord then
if music_time < 0 or music_time > music:getDuration() then
music:pause()
else
music:play()
if math.abs(music:tell() - music_time) > 0.02 then
music:seek(music_time)
end
end
end
local INSTR_TEXT_INTV = 15
local INSTR_TEXT_FADE_DUR = 0.4
local t1 = t % INSTR_TEXT_INTV
local a1 = 1
if t1 >= INSTR_TEXT_INTV - INSTR_TEXT_FADE_DUR then
local x = (t1 - (INSTR_TEXT_INTV - INSTR_TEXT_FADE_DUR)) / INSTR_TEXT_FADE_DUR
a1 = (x < 0.5 and 1 - x * x * 2 or (1-x) * (1-x) * 2)
end
local instr_name_alpha = {a1, 1 - a1}
if t % (INSTR_TEXT_INTV * 2) >= INSTR_TEXT_INTV then
instr_name_alpha[1], instr_name_alpha[2]
= instr_name_alpha[2], instr_name_alpha[1]
end
for i = 1, n_tracks do
local x_cen = track_x(i)
local r, g, b = track_meta[i][3], track_meta[i][4], track_meta[i][5]
-- Central line
love.graphics.setColor(r, g, b, 0.25)
love.graphics.setLineWidth(1 * GLOBAL_SCALE)
love.graphics.line(x_cen, y_low, x_cen, y_high)
-- Spectrogram
local sp_n_windows = sp_ns_windows[i]
local spectrogram = spectrograms[i]
local energy_waveform = energy_waveforms[i]
local map_sp_val = function (sp_value)
return 1 - math.exp(-sp_value * 200)
end
local sp_window_index = math.floor(music_time / (2048 / 44100))
local spline = {{}, {}}
for ch = 0, 1 do
local sp_values = {}
local spline_ch = spline[ch + 1]
-- Linear interpolation of spectrogram over time
for bin = 0, 84 do
sp_values[bin] = cubic_interpolation(
sp_window_index,
function (i)
if i < 0 or i >= sp_n_windows then return 0 end
return spectrogram[i * (2 * n_pitches) + 0 * n_pitches + bin]
end
)
end
-- Cubic spline interpolation between bins
for p = 0, sp_n_spline do
spline_ch[p] = math.max(0, cubic_interpolation(
math.max(0, math.min(84, p / sp_bin_subdiv)),
function (i) return sp_values[math.max(0, math.min(84, i))] end
))
end
end
local vertices = {}
for p = 0, sp_n_spline do
local y = pitch_y(pitch_low + p / sp_bin_subdiv)
vertices[#vertices + 1] = { -track_width * 0.4 * map_sp_val(spline[1][p]), y }
vertices[#vertices + 1] = { track_width * 0.4 * map_sp_val(spline[2][p]), y }
end
sp_meshes[i]:setVertices(vertices)
love.graphics.setColor(r, g, b, 0.15)
love.graphics.draw(sp_meshes[i], x_cen, 0)
-- Instrument name text
local energy = cubic_interpolation(
sp_window_index,
function (i)
if i < 0 or i >= sp_n_windows then return 0 end
return energy_waveform[i]
end
)
energy = 1 - math.exp(-energy * 300)
for j = 1, 2 do
love.graphics.setColor(r, g, b, instr_name_alpha[j] * (0.3 + 0.7 * energy))
local text = instr_text[i][j]
love.graphics.draw(text,
x_cen + W * (j == 1 and 0.005 or 0.0015), y_low + H * 0.021, -math.pi / 3,
1, 1, text:getWidth(), text:getHeight() / 2)
end
end
-- Binary chop for event index
local lo, hi = 0, #events + 1
while lo < hi - 1 do
local mid = math.floor((lo + hi) / 2)
if events[mid].time >= music_time then hi = mid
else lo = mid end
end
local event_index = lo
local bar = nil
local active_notes = {}
for i = event_index, 1, -1 do
if events[i].time < music_time - 20 then break end
if bar == nil and events[i].type == 'bar' then
bar = events[i].number
end
if events[i].type == 'note'
and events[i].time <= music_time
and events[i].time + events[i].len + 0.6 >= music_time
then
local end_time = events[i].time + events[i].len
if track_meta[map_midi_track(events[i].track)][6].strike then
end_time = math.min(end_time, events[i].time + 0.1)
end
active_notes[#active_notes + 1] = {
track = map_midi_track(events[i].track),
pitch = events[i].pitch,
vel = events[i].vel,
into = music_time - events[i].time,
fade = math.max(0, music_time - end_time),
}
end
end
if bar == nil then bar = 1 end
for i = 1, #active_notes do
local track = active_notes[i].track
local r, g, b = track_meta[track][3], track_meta[track][4], track_meta[track][5]
local x = track_x(track)
local y = pitch_y(active_notes[i].pitch)
local w = math.min(1, active_notes[i].into / 0.2)
w = 1 - (1 - w)^5
w = w * (0.3 + 0.7 * math.sqrt(active_notes[i].vel / 127))
local a1 = 1 - 0.3 * math.min(1, active_notes[i].into / 0.3)
local a2 = math.max(0, 1 - active_notes[i].fade / 0.3)
love.graphics.setColor(r, g, b, a1 * a2)
love.graphics.setLineWidth(1 * GLOBAL_SCALE)
love.graphics.line(
x - w * track_width * 0.25, y,
x + w * track_width * 0.25, y
)
end
if playhead_last_upd_bar ~= bar then
playhead_last_upd_bar = bar
playhead_text_bar = love.graphics.newText(font_en, string.format('bar %d / 408', bar))
end
local timecode = math.max(0, math.floor(music_time))
if playhead_last_upd_timecode ~= timecode then
playhead_last_upd_timecode = timecode
playhead_text_timecode = love.graphics.newText(font_en,
string.format('%02d:%02d', math.floor(timecode / 60), timecode % 60))
end
local section_idx = 0
local section_alpha = 1
for i = 1, #sections do
local t1 = music_time - sections[i][4]
if t1 >= -0.25 and t1 <= 15.25 then
if t1 < 0 then
section_alpha = -t1 / 0.25
elseif t1 < 15 then
section_idx = i
section_alpha = math.min(1, t1 / 0.25, (15 - t1) / 0.25)
else
section_alpha = (t1 - 15) / 0.25
end
break
end
end
if section_idx == 0 and music_time > sections[1][4] then
section_alpha = section_alpha * 0.25
end
love.graphics.setColor(0.99, 0.99, 0.99, section_alpha)
love.graphics.draw(section_text[section_idx], W * 0.016, W * 0.016)
love.graphics.draw(section_orig_text[section_idx],
W * 0.050 + section_text[section_idx]:getWidth(),
W * 0.016 + (section_text[section_idx]:getHeight() - section_orig_text[section_idx]:getHeight()))
love.graphics.setColor(0.99, 0.99, 0.99, section_alpha * 0.5)
if section_en_text[section_idx] ~= nil then
love.graphics.draw(section_en_text[section_idx],
W * 0.064 + section_text[section_idx]:getWidth() + section_orig_text[section_idx]:getWidth(),
W * 0.016 + (section_text[section_idx]:getHeight() - section_orig_text[section_idx]:getHeight()))
end
if music_time >= -0.25 and music_time <= 713.25 then
local alpha = math.min(1, (music_time + 0.25) / 0.25, (713.25 - music_time) / 0.25)
love.graphics.setColor(0.99, 0.99, 0.99, alpha * 0.5)
love.graphics.draw(playhead_text_bar, W * 0.99 - playhead_text_bar:getWidth(), W * 0.01)
love.graphics.setColor(0.99, 0.99, 0.99, alpha * 0.1)
love.graphics.draw(playhead_text_timecode, W * 0.99 - playhead_text_timecode:getWidth(), W * 0.032)
end
end
local T = 0
local frameTime = 1 / 60
local frame = 0
function love.update(dt)
if not isRecord then
if love.keyboard.isDown('space') then
-- no-op
elseif love.keyboard.isDown('left') then
T = T - dt * (love.keyboard.isDown('lshift') and 100 or 10)
elseif love.keyboard.isDown('right') then
T = T + dt * (love.keyboard.isDown('lshift') and 100 or 10)
else
T = T + dt
end
end
end
ffi.cdef [[
void *popen(const char *command, const char *mode);
size_t fwrite(const void *restrict ptr, size_t size, size_t nitems, void *restrict stream);
int fclose(void *stream);
]]
function love.draw()
if not isRecord then
draw(T)
else
local pipe = ffi.C.popen(
string.format('ffmpeg -f rawvideo -pixel_format rgba -video_size %dx%d -r 60 -i - -pix_fmt yuv420p -crf 26 output.mp4 -y', W, H),
'w')
local n_frames = 60 * 719
for i = 0, n_frames - 1 do
draw(i * frameTime)
love.graphics.present()
love.graphics.captureScreenshot(function (img)
local w, h = img:getDimensions()
ffi.C.fwrite(img:getFFIPointer(), img:getSize(), 1, pipe)
if i == n_frames - 1 then ffi.C.fclose(pipe) end
end)
end
love.event.quit()
end
end
names=(
Piccolo_1,_2
Flute_1,_2
Oboe_1,_2
Clarinet_1,_2
Bassoon_1,_2
Horn_1,_2
Horn_3,_4
Trumpet_1,_2
Trombone_1,_2
Timpani
Glockenspiel
Vibraphone
"Marimba_(Single_Stave)"
Snare_Drum
Bass_Drum
Concert_Toms
Triangle
Suspended_Cymbal
Metal_Wind_Chimes
Maracas
Harp
First_Violins
Second_Violins
Violas
Violoncellos
Contrabasses
)
i=0
for track in ${names[@]}; do
i=$((i + 1))
echo $i $track
# mv ~/Downloads/TwAll-22a-${track}.ogg ./tracks/TwAll-22a-$i.ogg
./spectrogram ./tracks/TwAll-22a-$i.ogg ./tracks/TwAll-22a-$i.ogg.bin
done
// cc spectrogram.c -O2 -I../kissfft-131.1.0 ../kissfft-131.1.0/libkissfft-float.a -dynamiclib -o libspectrogram.dylib
// cc spectrogram.c -O2 -I../kissfft-131.1.0 ../kissfft-131.1.0/libkissfft-float.a -o spectrogram
#include "stb_vorbis.h"
#define MA_NO_ENCODING
#define MA_NO_ENGINE
#define MA_NO_MP3
#define MA_NO_FLAC
#define MINIAUDIO_IMPLEMENTATION
#include "miniaudio.h"
#include "kiss_fftr.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h> // malloc
#include <stdint.h> // uint32_t, uint8_t
#include <string.h> // memset
#define FFT_SIZE_LARGE 16384
#define FFT_SIZE_SMALL 8192
#define WINDOW_STEP 2048
#define PITCH_LOW 28
#define PITCH_HIGH 112
void spectrogram(const void *data, int len, int *o_n_windows, float **sp, float **enr)
{
ma_decoder decoder;
ma_decoder_config cfg = ma_decoder_config_init(ma_format_f32, 2, 44100);
assert(ma_decoder_init_memory(data, len, &cfg, &decoder) == MA_SUCCESS);
// Sample = stereo sample frame (L + R)
ma_uint64 n_samples;
assert(ma_decoder_get_length_in_pcm_frames(&decoder, &n_samples) == MA_SUCCESS);
fprintf(stderr, "length = %llu\n", n_samples);
float *pcm = (float *)malloc(sizeof(float) * n_samples * 2);
ma_decoder_read_pcm_frames(&decoder, pcm, n_samples, NULL);
kiss_fftr_cfg fft_cfg_large = kiss_fftr_alloc(FFT_SIZE_LARGE, 0, NULL, NULL);
kiss_fftr_cfg fft_cfg_small = kiss_fftr_alloc(FFT_SIZE_SMALL, 0, NULL, NULL);
float *hann_window_large = (float *)malloc(sizeof(float) * FFT_SIZE_LARGE);
float *hann_window_small = (float *)malloc(sizeof(float) * FFT_SIZE_SMALL);
for (int ty = 0; ty <= 1; ty++) {
int fft_size = (ty == 0 ? FFT_SIZE_LARGE : FFT_SIZE_SMALL);
float *hann_window = (ty == 0 ? hann_window_large : hann_window_small);
for (int j = 0; j < fft_size; j++) {
hann_window[j] = 0.5f * (1 - cosf((float)j / fft_size * (2 * M_PI)));
}
}
float *fft_window = (float *)malloc(sizeof(float) * FFT_SIZE_LARGE);
kiss_fft_cpx *fft_result = (kiss_fft_cpx *)malloc(sizeof(kiss_fft_cpx) * (FFT_SIZE_LARGE / 2 + 1));
int n_windows = (n_samples + (WINDOW_STEP - 1)) / WINDOW_STEP;
int n_bins = PITCH_HIGH - PITCH_LOW + 1;
float *binned_result = (float *)malloc(sizeof(float) * n_windows * 2 * n_bins);
// binned_result[n_windows][2][n_bins]
memset(binned_result, 0, sizeof(float) * n_windows * 2 * n_bins);
float *energy_result = (float *)malloc(sizeof(float) * n_windows);
float last_energy = 0; // EMA
float bin_index_large[n_bins + 1], bin_index_small[n_bins + 1];
for (int i = 0; i <= n_bins; i++) {
float freq = 440 * powf(2, (PITCH_LOW + i - 69 - 0.5f) / 12.f);
// (FFT size / 2) / 22050
bin_index_large[i] = (freq * FFT_SIZE_LARGE / 44100);
bin_index_small[i] = (freq * FFT_SIZE_SMALL / 44100);
}
for (int i = 0; i < n_windows; i++) {
if (i * 100 / n_windows != (i - 1) * 100 / n_windows) fprintf(stderr, "%d%%\n", i * 100 / n_windows);
int sample_start = i * WINDOW_STEP;
for (int ch = 0; ch < 2; ch++) {
for (int ty = 0; ty <= 1; ty++) {
int fft_size = (ty == 0 ? FFT_SIZE_LARGE : FFT_SIZE_SMALL);
kiss_fftr_cfg fft_cfg = (ty == 0 ? fft_cfg_large : fft_cfg_small);
float *bin_index = (ty == 0 ? bin_index_large : bin_index_small);
float *hann_window = (ty == 0 ? hann_window_large : hann_window_small);
for (int j = 0; j < fft_size; j++) {
int index = sample_start + (j - fft_size / 2);
fft_window[j] = hann_window[j] * (index < 0 || index >= n_samples ? 0 : pcm[index * 2 + ch]);
}
kiss_fftr(fft_cfg, fft_window, fft_result);
for (int bin = 0; bin < n_bins; bin++) {
float bin_sum = 0;
for (int j = (int)bin_index[bin] + 1; j < (int)bin_index[bin + 1]; j++) {
#define cplx_norm2(_x) ((_x).r * (_x).r + (_x).i * (_x).i)
bin_sum += cplx_norm2(fft_result[j]);
}
if ((int)bin_index[bin] == (int)bin_index[bin + 1]) {
int j = (int)bin_index[bin];
bin_sum += cplx_norm2(fft_result[j]) * (bin_index[bin + 1] - bin_index[bin]);
} else {
int j1 = (int)bin_index[bin];
float j1f = j1 + 1 - bin_index[bin];
int j2 = (int)bin_index[bin + 1];
float j2f = bin_index[bin + 1] - j2;
bin_sum += (
cplx_norm2(fft_result[j1]) * j1f +
cplx_norm2(fft_result[j2]) * j2f
);
}
float bin_value = sqrtf(bin_sum) / (fft_size / 2);
float bin_weight = (float)bin / (n_bins - 1);
if (ty == 0) bin_weight = 1 - bin_weight;
binned_result[i * (2 * n_bins) + ch * n_bins + bin] += bin_value * bin_weight;
}
if (ty == 0) {
float energy = 0;
for (int i = 1; i <= fft_size / 2; i++)
energy += cplx_norm2(fft_result[i]);
energy = sqrtf(energy) / (fft_size / 2);
last_energy = last_energy * 0.9 + energy * 0.1;
energy_result[i] = last_energy;
}
}
}
}
ma_decoder_uninit(&decoder);
if (o_n_windows != NULL) *o_n_windows = n_windows;
if (sp != NULL) *sp = binned_result;
if (enr != NULL) *enr = energy_result;
}
static inline void put_u32(FILE *f, uint32_t x)
{
fputc((x >> 0) & 0xff, f);
fputc((x >> 8) & 0xff, f);
fputc((x >> 16) & 0xff, f);
fputc((x >> 24) & 0xff, f);
}
static inline void put_f32(FILE *f, float x)
{
uint32_t x_i = *(uint32_t *)&x;
put_u32(f, x_i);
}
static inline uint32_t get_u32(const void *p)
{
uint8_t *b = (uint8_t *)p;
return (
((uint32_t)b[0] << 0) |
((uint32_t)b[1] << 8) |
((uint32_t)b[2] << 16) |
((uint32_t)b[3] << 24)
);
}
static inline float get_f32(const void *p)
{
uint32_t x = get_u32(p);
return *(float *)&x;
}
void spectrogram_preprocessed(const void *data, int len, int *o_n_windows, float **sp, float **enr)
{
int n_bins = PITCH_HIGH - PITCH_LOW + 1;
int n_windows = len / 4 / (2 * n_bins + 1);
float *binned_result = (float *)malloc(sizeof(float) * n_windows * 2 * n_bins);
float *energy_result = (float *)malloc(sizeof(float) * n_windows);
for (int i = 0; i < n_windows; i++) {
for (int j = 0; j < 2 * n_bins; j++) {
binned_result[i * (2 * n_bins) + j] = get_f32(data);
data += 4;
}
energy_result[i] = get_f32(data);
data += 4;
}
if (o_n_windows != NULL) *o_n_windows = n_windows;
if (sp != NULL) *sp = binned_result;
if (enr != NULL) *enr = energy_result;
}
int main(int argc, char *argv[])
{
if (argc <= 1) {
fprintf(stderr, "%s <audio file> [<output file>]\n", argv[0]);
return 0;
}
fprintf(stderr, "%s %s\n", argv[1], argv[2]);
FILE *f = fopen(argv[1], "rb");
assert(f != NULL);
fseek(f, 0, SEEK_END);
long len = ftell(f);
fseek(f, 0, SEEK_SET);
void *buf = malloc(len);
fread(buf, len, 1, f);
fclose(f);
f = (argc >= 3 ? fopen(argv[2], "wb") : stdout);
assert(f != NULL);
int n_windows;
float *sp, *enr;
spectrogram(buf, len, &n_windows, &sp, &enr);
int n_bins = PITCH_HIGH - PITCH_LOW + 1;
/*
printf("%d\n", n_windows);
for (int i = 0; i < n_windows; i++) {
for (int ch = 0; ch < 2; ch++)
for (int j = 0; j < n_bins; j++)
printf("%.8f%c", s[i * (2 * n_bins) + ch * n_bins + j], j == n_bins - 1 ? '\n' : ' ');
}
*/
// put_u32(f, n_windows);
for (int i = 0; i < n_windows; i++) {
for (int ch = 0; ch < 2; ch++)
for (int j = 0; j < n_bins; j++)
put_f32(f, sp[i * (2 * n_bins) + ch * n_bins + j]);
put_f32(f, enr[i]);
}
fclose(f);
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment