Skip to content

Instantly share code, notes, and snippets.

@lxl66566
Created August 17, 2024 10:18
Show Gist options
  • Save lxl66566/f7dc49be8a08f2746b4179ccd3b2b378 to your computer and use it in GitHub Desktop.
Save lxl66566/f7dc49be8a08f2746b4179ccd3b2b378 to your computer and use it in GitHub Desktop.
Test audio speedup by using soundtouch
import ctypes
import math
import numpy as np
import scipy.io.wavfile as wavfile
# 加载DLL
soundtouch_dll = ctypes.CDLL("soundtouch_dll-2.3.3/SoundTouchDLL_x64.dll")
# 定义函数原型
soundtouch_dll.soundtouch_createInstance.restype = ctypes.c_void_p
soundtouch_dll.soundtouch_destroyInstance.argtypes = [ctypes.c_void_p]
soundtouch_dll.soundtouch_setRate.argtypes = [ctypes.c_void_p, ctypes.c_float]
soundtouch_dll.soundtouch_setChannels.argtypes = [ctypes.c_void_p, ctypes.c_uint]
soundtouch_dll.soundtouch_setSampleRate.argtypes = [ctypes.c_void_p, ctypes.c_uint]
soundtouch_dll.soundtouch_putSamples_i16.argtypes = [
ctypes.c_void_p,
ctypes.POINTER(ctypes.c_short),
ctypes.c_uint,
]
soundtouch_dll.soundtouch_receiveSamples_i16.argtypes = [
ctypes.c_void_p,
ctypes.POINTER(ctypes.c_short),
ctypes.c_uint,
]
soundtouch_dll.soundtouch_receiveSamples_i16.restype = ctypes.c_uint
soundtouch_dll.soundtouch_putSamples.argtypes = [
ctypes.c_void_p,
ctypes.POINTER(ctypes.c_float),
ctypes.c_uint,
]
soundtouch_dll.soundtouch_receiveSamples.argtypes = [
ctypes.c_void_p,
ctypes.POINTER(ctypes.c_float),
ctypes.c_uint,
]
soundtouch_dll.soundtouch_receiveSamples.restype = ctypes.c_uint
def process_f32(handle, data, rate):
output_len = math.ceil(len(data) / rate)
# output_len = math.ceil(len(data))
soundtouch_dll.soundtouch_putSamples(handle, data, len(data))
# 获取处理后的音频数据
processed_samples = (ctypes.c_float * output_len)()
received_samples = soundtouch_dll.soundtouch_receiveSamples(
handle, processed_samples, output_len
)
return received_samples
def process_i16(handle, data, rate):
output_len = math.ceil(len(data) / rate)
# output_len = math.ceil(len(data))
soundtouch_dll.soundtouch_putSamples_i16(handle, data, len(data))
# 获取处理后的音频数据
processed_samples = (ctypes.c_int16 * output_len)()
received_samples = soundtouch_dll.soundtouch_receiveSamples_i16(
handle, processed_samples, output_len
)
return received_samples
def transform(data):
# return np.clip(data, -32768, 32767).astype(np.int16)
return data.astype(np.float32)
def main():
# 假设你已经有了WAV文件的音频数据samples
sample_rate, data = wavfile.read("origin.wav")
# 确保数据是双声道
if len(data.shape) != 2 or data.shape[1] != 2:
raise ValueError("Input WAV file must be stereo (2 channels).")
# 分离双声道数据
left_channel = data[:, 0]
right_channel = data[:, 1]
# 转换为 f64 并归一化
left_channel_transform = transform(left_channel)
right_channel_transform = transform(right_channel)
# 创建SoundTouch实例
handle = soundtouch_dll.soundtouch_createInstance()
# 设置参数
channels = 2 # 立体声
rate = 2.0 # 增加100%的播放速度
soundtouch_dll.soundtouch_setRate(handle, rate)
soundtouch_dll.soundtouch_setChannels(handle, channels)
soundtouch_dll.soundtouch_setSampleRate(handle, sample_rate)
left_out = process_f32(handle, left_channel_transform, rate)
right_out = process_f32(handle, right_channel_transform, rate)
# 合并双声道数据
output = np.column_stack((left_out, right_out))
# 写入WAV文件
wavfile.write("output.wav", sample_rate, output.astype(np.int16))
# 销毁SoundTouch实例
soundtouch_dll.soundtouch_destroyInstance(handle)
def test_f32():
# 测试
handle = soundtouch_dll.soundtouch_createInstance()
receive = np.ctypeslib.as_ctypes(np.array(range(1, 100), dtype=np.float32))
ret = process_f32(handle, receive, 2)
print(ret, receive[:])
soundtouch_dll.soundtouch_destroyInstance(handle)
def test_i16():
# 测试
handle = soundtouch_dll.soundtouch_createInstance()
receive = np.ctypeslib.as_ctypes(np.array(range(1, 100), dtype=np.int16))
ret = process_i16(handle, receive, 2)
print(ret, receive[:])
soundtouch_dll.soundtouch_destroyInstance(handle)
test_i16()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment