Skip to content

Instantly share code, notes, and snippets.

@aqzlpm11
Created January 21, 2021 06:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aqzlpm11/5517378adadaefb7a23bb45c19ad33c2 to your computer and use it in GitHub Desktop.
Save aqzlpm11/5517378adadaefb7a23bb45c19ad33c2 to your computer and use it in GitHub Desktop.
统计语音时长
from pathlib import Path
import soundfile as sf
from tqdm import tqdm
def get_info(audio_file_list):
res = []
for wav in tqdm(audio_file_list):
if not Path(wav).exists():
print(f"Warning: File not exists: {wav}")
continue
with sf.SoundFile(wav) as f:
res.append({
'file': wav,
'duration': len(f) / f.samplerate,
'channels': f.channels,
'sample_rate':f.samplerate
})
return res
import click
@click.command()
@click.argument('dir_path')
@click.option('--ext', default='.wav')
def run(dir_path, ext='.wav'):
"""
对dir_path下所有扩展名为ext文件,进行统计。
"""
print(f"processing {dir_path}, ext={ext}")
audio_file_list = [str(w) for w in Path(dir_path).rglob('*'+ext)]
# audio_file_list = ds['wav'].values
infos = get_info(audio_file_list)
print(f"文件数:{len(infos)}")
print(f"信道数:{set([v['channels'] for v in infos])}")
print(f"采样率:{set([v['sample_rate'] for v in infos])}")
durations = [v['duration'] for v in infos]
print(f"时长区间: [{min(durations):.2f}s ~ {max(durations):.2f}s]")
print(f"总时长:{sum(durations) / 60 / 60:.2f}h")
if __name__ == '__main__':
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment