Last active
February 28, 2024 17:25
-
-
Save ayuusweetfish/7c7791162bf31a9c78dd61f2ac79889a to your computer and use it in GitHub Desktop.
Recording LÖVE animations into a video file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// cc % -O2 -I../kissfft-131.1.0 ../kissfft-131.1.0/libkissfft-float.a | |
#include <math.h> | |
#include <stdio.h> | |
#include "kiss_fftr.h" | |
int main() | |
{ | |
float x[100]; | |
for (int i = 0; i < 100; i++) { | |
x[i] = sinf((float)i * 0.75f) * 5000 + i % 44; | |
printf("%2d %10.7f\n", i, x[i]); | |
} | |
// Hann window | |
if (0) for (int i = 0; i < 100; i++) | |
x[i] *= (1 - cosf((float)i / 100 * 2 * M_PI)) * 0.5f; | |
kiss_fftr_cfg fft_cfg = kiss_fftr_alloc(100, 0, NULL, NULL); | |
kiss_fft_cpx y[51]; | |
kiss_fftr(fft_cfg, x, y); | |
for (int i = 0; i <= 50; i++) | |
printf("%2d %14.7f %14.7f %14.7f\n", i, | |
y[i].r / 50, y[i].i / 50, | |
sqrtf(y[i].r*y[i].r + y[i].i*y[i].i) / 50); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
local isRecord = (os.getenv('record') ~= nil) | |
local GLOBAL_SCALE = (isRecord and 2 or 1.25) | |
local W = 960 * GLOBAL_SCALE | |
local H = 600 * GLOBAL_SCALE | |
love.window.setMode( | |
W, H, | |
{ fullscreen = false, highdpi = false } | |
) | |
local track_meta = { | |
{'短笛', 'Picc', 0.7, 0.85, 0.5, {}}, | |
{'长笛', 'Fl', 0.7, 0.85, 0.5, {}}, | |
{'双簧管', 'Ob', 0.7, 0.85, 0.5, {}}, | |
{'单簧管', 'Cl', 0.7, 0.85, 0.5, {}}, | |
{'大管', 'Bsn', 0.7, 0.85, 0.5, {}}, | |
{'圆号', 'Hn', 1.0, 0.7, 0.5, {}}, | |
{'圆号', 'Hn', 1.0, 0.7, 0.5, {}}, | |
{'小号', 'Tpt', 1.0, 0.7, 0.5, {}}, | |
{'长号', 'Tbn', 1.0, 0.7, 0.5, {}}, | |
{'定音鼓', 'Timp', 0.9, 0.65, 0.9, {strike = true}}, | |
{'钟琴', 'Glsp', 0.9, 0.65, 0.9, {strike = true}}, | |
{'颤音琴', 'Vib', 0.9, 0.65, 0.9, {}}, | |
{'马林巴琴', 'Mrm', 0.9, 0.65, 0.9, {strike = true}}, | |
{'小鼓', 'Sn Dr', 0.8, 0.7, 0.85, {strike = true}}, | |
{'大鼓', 'Bs Dr', 0.8, 0.7, 0.85, {strike = true}}, | |
{'筒鼓', 'Toms', 0.8, 0.7, 0.85, {strike = true}}, | |
{'三角铁', 'Tri', 0.8, 0.7, 0.85, {strike = true}}, | |
{'吊钹', 'Susp Cym', 0.8, 0.7, 0.85, {strike = true}}, | |
{'风铃', 'Wn Ch', 0.8, 0.7, 0.85, {}}, | |
{'沙锤', 'Mrcs', 0.8, 0.7, 0.85, {strike = true}}, | |
{'竖琴', 'Hp', 1.0, 0.6, 0.6, {strike = true}}, | |
{'第一小提琴', 'I Vln', 0.7, 0.8, 0.95, {}}, | |
{'第二小提琴', 'II Vln', 0.7, 0.8, 0.95, {}}, | |
{'中提琴', 'Vla', 0.7, 0.8, 0.95, {}}, | |
{'大提琴', 'Vlc', 0.7, 0.8, 0.95, {}}, | |
{'低音提琴', 'Cb', 0.7, 0.8, 0.95, {}}, | |
} | |
local n_tracks = #track_meta | |
local x_padding = 0.4 | |
local track_width = W / (n_tracks + x_padding * 2) | |
local track_x = function (i) | |
return track_width * (x_padding + i - 0.5) | |
end | |
local map_midi_track = function (midi_track) | |
if midi_track <= 20 then return midi_track + 1 | |
else return midi_track end | |
end | |
local sections = { | |
[0] = {'蓝星飞行日记', 'Planet Blue Flight Journal', ''}, | |
{'I. 在黄昏起飞', 'Mit der einbrechenden Dämmerung', 'As twilight falls', 0}, | |
{'II. 什锦莓果酱', 'Berry jam medley', '', 107}, | |
{'III. 暮蓝时刻', 'L’Heure bleue', 'Blue hour', 240}, | |
{'IV. 星海拾梦', 'Ubi somnia stellarum lustrant', 'Where stars’ dreams scatter', 310}, | |
{'V. 鸽咕咕', 'ハトなり', 'Call of doves', 472}, | |
{'VI. 晴天圆舞曲', 'Valse du ciel clair', 'Waltz of the clear sky', 499.5}, | |
{'VII. 大块之染', 'Mise à la masse', 'Grounded land', 606}, | |
} | |
local events = (function () | |
local events = {} | |
local division = nil | |
local bar = 0 | |
local bar_len = 1 | |
local bar_start = -1 | |
local last_ticks = 0 | |
local tick_secs = 0 | |
local time_secs = 0 | |
for line in love.filesystem.lines('TwAll-22a.txt') do | |
if line == '' or line:sub(1, 1) == '#' then goto line_continue end | |
if division == nil then | |
division = tonumber(line) | |
goto line_continue | |
end | |
local ticks, track, ty, args_str = string.match(line, '(%d+) (%d+) (%w+) ([%d ]+)') | |
ticks = tonumber(ticks) | |
track = tonumber(track) | |
local args = {} | |
for w in string.gmatch(args_str, '%d+') do | |
args[#args + 1] = tonumber(w) | |
end | |
local delta_ticks = ticks - last_ticks | |
-- New bar? | |
while ticks >= bar_start + bar_len do | |
bar = bar + 1 | |
bar_start = bar_start + bar_len | |
events[#events + 1] = { | |
time = time_secs + (bar_start - last_ticks) * tick_secs, | |
type = 'bar', | |
number = bar, | |
} | |
-- print(events[#events].time, events[#events].type, events[#events].number) | |
end | |
time_secs = time_secs + delta_ticks * tick_secs | |
last_ticks = ticks | |
-- Time signature or tempo change | |
if ty == 'TimeSig' then | |
bar_len = args[1] / args[2] * 4 * division | |
elseif ty == 'Tempo' then | |
tick_secs = (60 / (args[1] * division)) | |
elseif ty == 'NT' then | |
events[#events + 1] = { | |
time = time_secs, | |
type = 'note', | |
track = track, | |
pitch = args[1], | |
len = args[2] * tick_secs, | |
vel = args[3], | |
} | |
-- print(events[#events].time, events[#events].type, events[#events].pitch) | |
end | |
::line_continue:: | |
end | |
return events | |
end)() | |
local pitch_low = 28 -- E1 | |
local pitch_high = 112 -- E8 | |
local n_pitches = pitch_high - pitch_low + 1 | |
local ffi = require('ffi') | |
ffi.cdef [[ | |
void spectrogram(const void *data, int len, int *o_n_windows, float **sp, float **enr); | |
void spectrogram_preprocessed(const void *data, int len, int *o_n_windows, float **sp, float **enr); | |
]] | |
local libspectrogram_path = os.tmpname() | |
print(libspectrogram_path) | |
local libspectrogram_data, libspectrogram_data_len = | |
love.filesystem.read('data', 'libspectrogram.dylib') | |
local libspectrogram_f = io.open(libspectrogram_path, 'wb') | |
libspectrogram_f:write(libspectrogram_data:getString()) | |
libspectrogram_f:close() | |
local libspectrogram = ffi.load(libspectrogram_path) | |
local sp_ns_windows = {} | |
local spectrograms = {} | |
local energy_waveforms = {} | |
for i = 1, #track_meta do | |
local sp_data_bytes, sp_data_len = | |
love.filesystem.read('data', 'tracks/TwAll-22a-' .. i .. '.ogg.bin') | |
local n_windows = ffi.new('int[1]') | |
local spectrogram = ffi.new('float *[1]') | |
local energy_waveform = ffi.new('float *[1]') | |
libspectrogram.spectrogram_preprocessed( | |
sp_data_bytes:getFFIPointer(), sp_data_len, | |
n_windows, spectrogram, energy_waveform) | |
n_windows = n_windows[0] | |
spectrogram = spectrogram[0] | |
energy_waveform = energy_waveform[0] | |
print(spectrogram, n_windows, spectrogram, energy_waveform) | |
sp_ns_windows[i] = n_windows | |
spectrograms[i] = spectrogram | |
energy_waveforms[i] = energy_waveform | |
end | |
local MUSIC_DELAY = 3 | |
local music = love.audio.newSource('TwAll-22a.ogg', 'stream') | |
local y_low = H * 0.82 | |
local y_high = H * 0.11 | |
local pitch_y = function (pitch) | |
return y_low + (y_high - y_low) * (pitch - pitch_low) / (pitch_high - pitch_low) | |
end | |
local pitch_hz = function (pitch) | |
return 440 * 2 ^ ((pitch - 69) / 12) | |
end | |
-- print(pitch_hz(69), pitch_hz(pitch_low), pitch_hz(pitch_high)) | |
local sp_bin_subdiv = 4 | |
local sp_n_spline = math.floor(sp_bin_subdiv * (pitch_high - pitch_low)) | |
local sp_meshes = {} | |
for i = 1, n_tracks do | |
sp_meshes[i] = love.graphics.newMesh( | |
--[[{ | |
{'VertexPosition', 'float', 2}, | |
},]] | |
(sp_n_spline + 1) * 2, | |
'strip', | |
'dynamic' | |
) | |
end | |
local font = love.graphics.newFont('AaKaiSong2WanZi2.ttf', 18 * GLOBAL_SCALE) | |
local font_md = love.graphics.newFont('AaKaiSong2WanZi2.ttf', 21 * GLOBAL_SCALE) | |
local font_lg = love.graphics.newFont('AaKaiSong2WanZi2.ttf', 24 * GLOBAL_SCALE) | |
local font_en = love.graphics.newFont('Imprima-Regular.ttf', 18 * GLOBAL_SCALE) | |
local font_en_md = love.graphics.newFont('Imprima-Regular.ttf', 21 * GLOBAL_SCALE) | |
local instr_text = {} | |
for i = 1, n_tracks do | |
instr_text[i] = { | |
love.graphics.newText(font, track_meta[i][1]), | |
love.graphics.newText(font_en, track_meta[i][2]) | |
} | |
end | |
local section_text = {} | |
local section_orig_text = {} | |
local section_en_text = {} | |
for i = 0, #sections do | |
section_text[i] = love.graphics.newText(font_lg, sections[i][1]) | |
section_orig_text[i] = love.graphics.newText(i == 5 and font_md or font_en_md, sections[i][2]) | |
if sections[i][3] ~= '' then | |
section_en_text[i] = love.graphics.newText(font_en_md, '/ ' .. sections[i][3]) | |
end | |
end | |
local playhead_text_bar = nil | |
local playhead_text_timecode = nil | |
local playhead_last_upd_bar = nil | |
local playhead_text_timecode = nil | |
local cubic_interpolation = function (x, fn) | |
local i = math.floor(x) | |
local t = x - i | |
local pn1 = fn(i - 1) | |
local p0 = fn(i) | |
local p1 = fn(i + 1) | |
local p2 = fn(i + 2) | |
local m0 = (p1 - pn1) / 2 | |
local m1 = (p2 - p0) / 2 | |
local t2 = t * t | |
local t3 = t2 * t | |
return | |
(2 * t3 - 3 * t2) * (p0 - p1) + p0 + | |
(t3 - t2) * (m0 + m1) + (t - t2) * m0 | |
end | |
local draw = function (t) | |
love.graphics.clear(0.13, 0.12, 0.15, 1) | |
local music_time = t - MUSIC_DELAY | |
if not isRecord then | |
if music_time < 0 or music_time > music:getDuration() then | |
music:pause() | |
else | |
music:play() | |
if math.abs(music:tell() - music_time) > 0.02 then | |
music:seek(music_time) | |
end | |
end | |
end | |
local INSTR_TEXT_INTV = 15 | |
local INSTR_TEXT_FADE_DUR = 0.4 | |
local t1 = t % INSTR_TEXT_INTV | |
local a1 = 1 | |
if t1 >= INSTR_TEXT_INTV - INSTR_TEXT_FADE_DUR then | |
local x = (t1 - (INSTR_TEXT_INTV - INSTR_TEXT_FADE_DUR)) / INSTR_TEXT_FADE_DUR | |
a1 = (x < 0.5 and 1 - x * x * 2 or (1-x) * (1-x) * 2) | |
end | |
local instr_name_alpha = {a1, 1 - a1} | |
if t % (INSTR_TEXT_INTV * 2) >= INSTR_TEXT_INTV then | |
instr_name_alpha[1], instr_name_alpha[2] | |
= instr_name_alpha[2], instr_name_alpha[1] | |
end | |
for i = 1, n_tracks do | |
local x_cen = track_x(i) | |
local r, g, b = track_meta[i][3], track_meta[i][4], track_meta[i][5] | |
-- Central line | |
love.graphics.setColor(r, g, b, 0.25) | |
love.graphics.setLineWidth(1 * GLOBAL_SCALE) | |
love.graphics.line(x_cen, y_low, x_cen, y_high) | |
-- Spectrogram | |
local sp_n_windows = sp_ns_windows[i] | |
local spectrogram = spectrograms[i] | |
local energy_waveform = energy_waveforms[i] | |
local map_sp_val = function (sp_value) | |
return 1 - math.exp(-sp_value * 200) | |
end | |
local sp_window_index = math.floor(music_time / (2048 / 44100)) | |
local spline = {{}, {}} | |
for ch = 0, 1 do | |
local sp_values = {} | |
local spline_ch = spline[ch + 1] | |
-- Linear interpolation of spectrogram over time | |
for bin = 0, 84 do | |
sp_values[bin] = cubic_interpolation( | |
sp_window_index, | |
function (i) | |
if i < 0 or i >= sp_n_windows then return 0 end | |
return spectrogram[i * (2 * n_pitches) + 0 * n_pitches + bin] | |
end | |
) | |
end | |
-- Cubic spline interpolation between bins | |
for p = 0, sp_n_spline do | |
spline_ch[p] = math.max(0, cubic_interpolation( | |
math.max(0, math.min(84, p / sp_bin_subdiv)), | |
function (i) return sp_values[math.max(0, math.min(84, i))] end | |
)) | |
end | |
end | |
local vertices = {} | |
for p = 0, sp_n_spline do | |
local y = pitch_y(pitch_low + p / sp_bin_subdiv) | |
vertices[#vertices + 1] = { -track_width * 0.4 * map_sp_val(spline[1][p]), y } | |
vertices[#vertices + 1] = { track_width * 0.4 * map_sp_val(spline[2][p]), y } | |
end | |
sp_meshes[i]:setVertices(vertices) | |
love.graphics.setColor(r, g, b, 0.15) | |
love.graphics.draw(sp_meshes[i], x_cen, 0) | |
-- Instrument name text | |
local energy = cubic_interpolation( | |
sp_window_index, | |
function (i) | |
if i < 0 or i >= sp_n_windows then return 0 end | |
return energy_waveform[i] | |
end | |
) | |
energy = 1 - math.exp(-energy * 300) | |
for j = 1, 2 do | |
love.graphics.setColor(r, g, b, instr_name_alpha[j] * (0.3 + 0.7 * energy)) | |
local text = instr_text[i][j] | |
love.graphics.draw(text, | |
x_cen + W * (j == 1 and 0.005 or 0.0015), y_low + H * 0.021, -math.pi / 3, | |
1, 1, text:getWidth(), text:getHeight() / 2) | |
end | |
end | |
-- Binary chop for event index | |
local lo, hi = 0, #events + 1 | |
while lo < hi - 1 do | |
local mid = math.floor((lo + hi) / 2) | |
if events[mid].time >= music_time then hi = mid | |
else lo = mid end | |
end | |
local event_index = lo | |
local bar = nil | |
local active_notes = {} | |
for i = event_index, 1, -1 do | |
if events[i].time < music_time - 20 then break end | |
if bar == nil and events[i].type == 'bar' then | |
bar = events[i].number | |
end | |
if events[i].type == 'note' | |
and events[i].time <= music_time | |
and events[i].time + events[i].len + 0.6 >= music_time | |
then | |
local end_time = events[i].time + events[i].len | |
if track_meta[map_midi_track(events[i].track)][6].strike then | |
end_time = math.min(end_time, events[i].time + 0.1) | |
end | |
active_notes[#active_notes + 1] = { | |
track = map_midi_track(events[i].track), | |
pitch = events[i].pitch, | |
vel = events[i].vel, | |
into = music_time - events[i].time, | |
fade = math.max(0, music_time - end_time), | |
} | |
end | |
end | |
if bar == nil then bar = 1 end | |
for i = 1, #active_notes do | |
local track = active_notes[i].track | |
local r, g, b = track_meta[track][3], track_meta[track][4], track_meta[track][5] | |
local x = track_x(track) | |
local y = pitch_y(active_notes[i].pitch) | |
local w = math.min(1, active_notes[i].into / 0.2) | |
w = 1 - (1 - w)^5 | |
w = w * (0.3 + 0.7 * math.sqrt(active_notes[i].vel / 127)) | |
local a1 = 1 - 0.3 * math.min(1, active_notes[i].into / 0.3) | |
local a2 = math.max(0, 1 - active_notes[i].fade / 0.3) | |
love.graphics.setColor(r, g, b, a1 * a2) | |
love.graphics.setLineWidth(1 * GLOBAL_SCALE) | |
love.graphics.line( | |
x - w * track_width * 0.25, y, | |
x + w * track_width * 0.25, y | |
) | |
end | |
if playhead_last_upd_bar ~= bar then | |
playhead_last_upd_bar = bar | |
playhead_text_bar = love.graphics.newText(font_en, string.format('bar %d / 408', bar)) | |
end | |
local timecode = math.max(0, math.floor(music_time)) | |
if playhead_last_upd_timecode ~= timecode then | |
playhead_last_upd_timecode = timecode | |
playhead_text_timecode = love.graphics.newText(font_en, | |
string.format('%02d:%02d', math.floor(timecode / 60), timecode % 60)) | |
end | |
local section_idx = 0 | |
local section_alpha = 1 | |
for i = 1, #sections do | |
local t1 = music_time - sections[i][4] | |
if t1 >= -0.25 and t1 <= 15.25 then | |
if t1 < 0 then | |
section_alpha = -t1 / 0.25 | |
elseif t1 < 15 then | |
section_idx = i | |
section_alpha = math.min(1, t1 / 0.25, (15 - t1) / 0.25) | |
else | |
section_alpha = (t1 - 15) / 0.25 | |
end | |
break | |
end | |
end | |
if section_idx == 0 and music_time > sections[1][4] then | |
section_alpha = section_alpha * 0.25 | |
end | |
love.graphics.setColor(0.99, 0.99, 0.99, section_alpha) | |
love.graphics.draw(section_text[section_idx], W * 0.016, W * 0.016) | |
love.graphics.draw(section_orig_text[section_idx], | |
W * 0.050 + section_text[section_idx]:getWidth(), | |
W * 0.016 + (section_text[section_idx]:getHeight() - section_orig_text[section_idx]:getHeight())) | |
love.graphics.setColor(0.99, 0.99, 0.99, section_alpha * 0.5) | |
if section_en_text[section_idx] ~= nil then | |
love.graphics.draw(section_en_text[section_idx], | |
W * 0.064 + section_text[section_idx]:getWidth() + section_orig_text[section_idx]:getWidth(), | |
W * 0.016 + (section_text[section_idx]:getHeight() - section_orig_text[section_idx]:getHeight())) | |
end | |
if music_time >= -0.25 and music_time <= 713.25 then | |
local alpha = math.min(1, (music_time + 0.25) / 0.25, (713.25 - music_time) / 0.25) | |
love.graphics.setColor(0.99, 0.99, 0.99, alpha * 0.5) | |
love.graphics.draw(playhead_text_bar, W * 0.99 - playhead_text_bar:getWidth(), W * 0.01) | |
love.graphics.setColor(0.99, 0.99, 0.99, alpha * 0.1) | |
love.graphics.draw(playhead_text_timecode, W * 0.99 - playhead_text_timecode:getWidth(), W * 0.032) | |
end | |
end | |
local T = 0 | |
local frameTime = 1 / 60 | |
local frame = 0 | |
function love.update(dt) | |
if not isRecord then | |
if love.keyboard.isDown('space') then | |
-- no-op | |
elseif love.keyboard.isDown('left') then | |
T = T - dt * (love.keyboard.isDown('lshift') and 100 or 10) | |
elseif love.keyboard.isDown('right') then | |
T = T + dt * (love.keyboard.isDown('lshift') and 100 or 10) | |
else | |
T = T + dt | |
end | |
end | |
end | |
ffi.cdef [[ | |
void *popen(const char *command, const char *mode); | |
size_t fwrite(const void *restrict ptr, size_t size, size_t nitems, void *restrict stream); | |
int fclose(void *stream); | |
]] | |
function love.draw() | |
if not isRecord then | |
draw(T) | |
else | |
local pipe = ffi.C.popen( | |
string.format('ffmpeg -f rawvideo -pixel_format rgba -video_size %dx%d -r 60 -i - -pix_fmt yuv420p -crf 26 output.mp4 -y', W, H), | |
'w') | |
local n_frames = 60 * 719 | |
for i = 0, n_frames - 1 do | |
draw(i * frameTime) | |
love.graphics.present() | |
love.graphics.captureScreenshot(function (img) | |
local w, h = img:getDimensions() | |
ffi.C.fwrite(img:getFFIPointer(), img:getSize(), 1, pipe) | |
if i == n_frames - 1 then ffi.C.fclose(pipe) end | |
end) | |
end | |
love.event.quit() | |
end | |
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
names=( | |
Piccolo_1,_2 | |
Flute_1,_2 | |
Oboe_1,_2 | |
Clarinet_1,_2 | |
Bassoon_1,_2 | |
Horn_1,_2 | |
Horn_3,_4 | |
Trumpet_1,_2 | |
Trombone_1,_2 | |
Timpani | |
Glockenspiel | |
Vibraphone | |
"Marimba_(Single_Stave)" | |
Snare_Drum | |
Bass_Drum | |
Concert_Toms | |
Triangle | |
Suspended_Cymbal | |
Metal_Wind_Chimes | |
Maracas | |
Harp | |
First_Violins | |
Second_Violins | |
Violas | |
Violoncellos | |
Contrabasses | |
) | |
i=0 | |
for track in ${names[@]}; do | |
i=$((i + 1)) | |
echo $i $track | |
# mv ~/Downloads/TwAll-22a-${track}.ogg ./tracks/TwAll-22a-$i.ogg | |
./spectrogram ./tracks/TwAll-22a-$i.ogg ./tracks/TwAll-22a-$i.ogg.bin | |
done |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// cc spectrogram.c -O2 -I../kissfft-131.1.0 ../kissfft-131.1.0/libkissfft-float.a -dynamiclib -o libspectrogram.dylib | |
// cc spectrogram.c -O2 -I../kissfft-131.1.0 ../kissfft-131.1.0/libkissfft-float.a -o spectrogram | |
#include "stb_vorbis.h" | |
#define MA_NO_ENCODING | |
#define MA_NO_ENGINE | |
#define MA_NO_MP3 | |
#define MA_NO_FLAC | |
#define MINIAUDIO_IMPLEMENTATION | |
#include "miniaudio.h" | |
#include "kiss_fftr.h" | |
#include <assert.h> | |
#include <stdio.h> | |
#include <stdlib.h> // malloc | |
#include <stdint.h> // uint32_t, uint8_t | |
#include <string.h> // memset | |
#define FFT_SIZE_LARGE 16384 | |
#define FFT_SIZE_SMALL 8192 | |
#define WINDOW_STEP 2048 | |
#define PITCH_LOW 28 | |
#define PITCH_HIGH 112 | |
void spectrogram(const void *data, int len, int *o_n_windows, float **sp, float **enr) | |
{ | |
ma_decoder decoder; | |
ma_decoder_config cfg = ma_decoder_config_init(ma_format_f32, 2, 44100); | |
assert(ma_decoder_init_memory(data, len, &cfg, &decoder) == MA_SUCCESS); | |
// Sample = stereo sample frame (L + R) | |
ma_uint64 n_samples; | |
assert(ma_decoder_get_length_in_pcm_frames(&decoder, &n_samples) == MA_SUCCESS); | |
fprintf(stderr, "length = %llu\n", n_samples); | |
float *pcm = (float *)malloc(sizeof(float) * n_samples * 2); | |
ma_decoder_read_pcm_frames(&decoder, pcm, n_samples, NULL); | |
kiss_fftr_cfg fft_cfg_large = kiss_fftr_alloc(FFT_SIZE_LARGE, 0, NULL, NULL); | |
kiss_fftr_cfg fft_cfg_small = kiss_fftr_alloc(FFT_SIZE_SMALL, 0, NULL, NULL); | |
float *hann_window_large = (float *)malloc(sizeof(float) * FFT_SIZE_LARGE); | |
float *hann_window_small = (float *)malloc(sizeof(float) * FFT_SIZE_SMALL); | |
for (int ty = 0; ty <= 1; ty++) { | |
int fft_size = (ty == 0 ? FFT_SIZE_LARGE : FFT_SIZE_SMALL); | |
float *hann_window = (ty == 0 ? hann_window_large : hann_window_small); | |
for (int j = 0; j < fft_size; j++) { | |
hann_window[j] = 0.5f * (1 - cosf((float)j / fft_size * (2 * M_PI))); | |
} | |
} | |
float *fft_window = (float *)malloc(sizeof(float) * FFT_SIZE_LARGE); | |
kiss_fft_cpx *fft_result = (kiss_fft_cpx *)malloc(sizeof(kiss_fft_cpx) * (FFT_SIZE_LARGE / 2 + 1)); | |
int n_windows = (n_samples + (WINDOW_STEP - 1)) / WINDOW_STEP; | |
int n_bins = PITCH_HIGH - PITCH_LOW + 1; | |
float *binned_result = (float *)malloc(sizeof(float) * n_windows * 2 * n_bins); | |
// binned_result[n_windows][2][n_bins] | |
memset(binned_result, 0, sizeof(float) * n_windows * 2 * n_bins); | |
float *energy_result = (float *)malloc(sizeof(float) * n_windows); | |
float last_energy = 0; // EMA | |
float bin_index_large[n_bins + 1], bin_index_small[n_bins + 1]; | |
for (int i = 0; i <= n_bins; i++) { | |
float freq = 440 * powf(2, (PITCH_LOW + i - 69 - 0.5f) / 12.f); | |
// (FFT size / 2) / 22050 | |
bin_index_large[i] = (freq * FFT_SIZE_LARGE / 44100); | |
bin_index_small[i] = (freq * FFT_SIZE_SMALL / 44100); | |
} | |
for (int i = 0; i < n_windows; i++) { | |
if (i * 100 / n_windows != (i - 1) * 100 / n_windows) fprintf(stderr, "%d%%\n", i * 100 / n_windows); | |
int sample_start = i * WINDOW_STEP; | |
for (int ch = 0; ch < 2; ch++) { | |
for (int ty = 0; ty <= 1; ty++) { | |
int fft_size = (ty == 0 ? FFT_SIZE_LARGE : FFT_SIZE_SMALL); | |
kiss_fftr_cfg fft_cfg = (ty == 0 ? fft_cfg_large : fft_cfg_small); | |
float *bin_index = (ty == 0 ? bin_index_large : bin_index_small); | |
float *hann_window = (ty == 0 ? hann_window_large : hann_window_small); | |
for (int j = 0; j < fft_size; j++) { | |
int index = sample_start + (j - fft_size / 2); | |
fft_window[j] = hann_window[j] * (index < 0 || index >= n_samples ? 0 : pcm[index * 2 + ch]); | |
} | |
kiss_fftr(fft_cfg, fft_window, fft_result); | |
for (int bin = 0; bin < n_bins; bin++) { | |
float bin_sum = 0; | |
for (int j = (int)bin_index[bin] + 1; j < (int)bin_index[bin + 1]; j++) { | |
#define cplx_norm2(_x) ((_x).r * (_x).r + (_x).i * (_x).i) | |
bin_sum += cplx_norm2(fft_result[j]); | |
} | |
if ((int)bin_index[bin] == (int)bin_index[bin + 1]) { | |
int j = (int)bin_index[bin]; | |
bin_sum += cplx_norm2(fft_result[j]) * (bin_index[bin + 1] - bin_index[bin]); | |
} else { | |
int j1 = (int)bin_index[bin]; | |
float j1f = j1 + 1 - bin_index[bin]; | |
int j2 = (int)bin_index[bin + 1]; | |
float j2f = bin_index[bin + 1] - j2; | |
bin_sum += ( | |
cplx_norm2(fft_result[j1]) * j1f + | |
cplx_norm2(fft_result[j2]) * j2f | |
); | |
} | |
float bin_value = sqrtf(bin_sum) / (fft_size / 2); | |
float bin_weight = (float)bin / (n_bins - 1); | |
if (ty == 0) bin_weight = 1 - bin_weight; | |
binned_result[i * (2 * n_bins) + ch * n_bins + bin] += bin_value * bin_weight; | |
} | |
if (ty == 0) { | |
float energy = 0; | |
for (int i = 1; i <= fft_size / 2; i++) | |
energy += cplx_norm2(fft_result[i]); | |
energy = sqrtf(energy) / (fft_size / 2); | |
last_energy = last_energy * 0.9 + energy * 0.1; | |
energy_result[i] = last_energy; | |
} | |
} | |
} | |
} | |
ma_decoder_uninit(&decoder); | |
if (o_n_windows != NULL) *o_n_windows = n_windows; | |
if (sp != NULL) *sp = binned_result; | |
if (enr != NULL) *enr = energy_result; | |
} | |
static inline void put_u32(FILE *f, uint32_t x) | |
{ | |
fputc((x >> 0) & 0xff, f); | |
fputc((x >> 8) & 0xff, f); | |
fputc((x >> 16) & 0xff, f); | |
fputc((x >> 24) & 0xff, f); | |
} | |
static inline void put_f32(FILE *f, float x) | |
{ | |
uint32_t x_i = *(uint32_t *)&x; | |
put_u32(f, x_i); | |
} | |
static inline uint32_t get_u32(const void *p) | |
{ | |
uint8_t *b = (uint8_t *)p; | |
return ( | |
((uint32_t)b[0] << 0) | | |
((uint32_t)b[1] << 8) | | |
((uint32_t)b[2] << 16) | | |
((uint32_t)b[3] << 24) | |
); | |
} | |
static inline float get_f32(const void *p) | |
{ | |
uint32_t x = get_u32(p); | |
return *(float *)&x; | |
} | |
void spectrogram_preprocessed(const void *data, int len, int *o_n_windows, float **sp, float **enr) | |
{ | |
int n_bins = PITCH_HIGH - PITCH_LOW + 1; | |
int n_windows = len / 4 / (2 * n_bins + 1); | |
float *binned_result = (float *)malloc(sizeof(float) * n_windows * 2 * n_bins); | |
float *energy_result = (float *)malloc(sizeof(float) * n_windows); | |
for (int i = 0; i < n_windows; i++) { | |
for (int j = 0; j < 2 * n_bins; j++) { | |
binned_result[i * (2 * n_bins) + j] = get_f32(data); | |
data += 4; | |
} | |
energy_result[i] = get_f32(data); | |
data += 4; | |
} | |
if (o_n_windows != NULL) *o_n_windows = n_windows; | |
if (sp != NULL) *sp = binned_result; | |
if (enr != NULL) *enr = energy_result; | |
} | |
int main(int argc, char *argv[]) | |
{ | |
if (argc <= 1) { | |
fprintf(stderr, "%s <audio file> [<output file>]\n", argv[0]); | |
return 0; | |
} | |
fprintf(stderr, "%s %s\n", argv[1], argv[2]); | |
FILE *f = fopen(argv[1], "rb"); | |
assert(f != NULL); | |
fseek(f, 0, SEEK_END); | |
long len = ftell(f); | |
fseek(f, 0, SEEK_SET); | |
void *buf = malloc(len); | |
fread(buf, len, 1, f); | |
fclose(f); | |
f = (argc >= 3 ? fopen(argv[2], "wb") : stdout); | |
assert(f != NULL); | |
int n_windows; | |
float *sp, *enr; | |
spectrogram(buf, len, &n_windows, &sp, &enr); | |
int n_bins = PITCH_HIGH - PITCH_LOW + 1; | |
/* | |
printf("%d\n", n_windows); | |
for (int i = 0; i < n_windows; i++) { | |
for (int ch = 0; ch < 2; ch++) | |
for (int j = 0; j < n_bins; j++) | |
printf("%.8f%c", s[i * (2 * n_bins) + ch * n_bins + j], j == n_bins - 1 ? '\n' : ' '); | |
} | |
*/ | |
// put_u32(f, n_windows); | |
for (int i = 0; i < n_windows; i++) { | |
for (int ch = 0; ch < 2; ch++) | |
for (int j = 0; j < n_bins; j++) | |
put_f32(f, sp[i * (2 * n_bins) + ch * n_bins + j]); | |
put_f32(f, enr[i]); | |
} | |
fclose(f); | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment