Skip to content

Instantly share code, notes, and snippets.

@kuguma
Created July 25, 2022 10:34
Show Gist options
  • Star 4 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kuguma/7fe941951c2d73325b323c3b7f1a26c0 to your computer and use it in GitHub Desktop.
Save kuguma/7fe941951c2d73325b323c3b7f1a26c0 to your computer and use it in GitHub Desktop.
waveファイルを無音区間で分割して吐き出すやつ
from struct import unpack
from scipy.io import wavfile
import scipy.io
import numpy as np
import os, sys
import glob
threshold = 0
silence_min_frames = 1024
def split_wav(wavfilepath, output_dir):
print("-----------------------------------------")
sr, rdata = wavfile.read(wavfilepath, mmap=True)
n_ch = rdata.shape[1]
assert(n_ch == 2)
n_frames = rdata.shape[0]
length_sec = n_frames / sr
print(f"{wavfilepath} | Fs = {sr} | ch = {n_ch} | frames = {n_frames} | length = {length_sec}")
silence_area = []
def is_silence(rdata, idx):
return rdata[idx][0] <= threshold and rdata[idx][1] <= threshold
# 簡単化のため、先頭と末尾に十分な量の無音区間を追加
zero_area = np.zeros((silence_min_frames,2), dtype = rdata.dtype)
data = np.vstack((zero_area, rdata))
data = np.vstack((data, zero_area))
n_frames += silence_min_frames * 2
print("START")
# データは必ず無音区間から始まる
under_silence = True
silence_start = 0
# 無音区間の検出
for i in range(1, n_frames):
if is_silence(data, i):
if under_silence:
# 無音継続中
pass
else:
# 無音区間開始
under_silence = True
silence_start = i
else:
if under_silence:
# 無音区間終了
silence_end = i - 1
if silence_end - silence_start >= silence_min_frames:
silence_area.append( (silence_start, silence_end) )
print(f"silence_area : {silence_start} to {silence_end}")
silence_start = -1
under_silence = False
else:
# 音声区間継続
pass
# 最後は必ず無音区間
silence_area.append( (silence_start, n_frames-1) )
print(f"silence_area : {silence_start} to {n_frames-1}")
# チェック
if len(silence_area) == 1:
raise RuntimeError("This is silence file") # これは完全な無音ファイル
if len(silence_area) == 2:
raise RuntimeError("This file does not need to be split") # そもそも分割する必要がないファイル
# 音声区間に変換
audio_area = []
for i in range(0, len(silence_area)-1):
audio_start = silence_area[i][1]+1
audio_end = silence_area[i+1][0]-1
audio_area.append( (audio_start, audio_end) )
# print(f"audio_area #{i} : {audio_start} to {audio_end}")
# 出力
for i, area in enumerate(audio_area):
name,ext = os.path.splitext( os.path.basename(wavfilepath) )
output_fname = f"{output_dir}/{name} part{i+1:0=2}{ext}"
wavfile.write(output_fname, sr, data[area[0]:area[1]+1])
print(f"output : {output_fname} | {area[0]} to {area[1]}")
print("END")
print("-----------------------------------------")
def main():
target_dir = sys.argv[1]
output_dir = f"{target_dir}/split_data"
os.makedirs(output_dir, exist_ok=True)
for path in glob.glob(f"{target_dir}/*.wav"):
split_wav(path, output_dir)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment