Skip to content

Instantly share code, notes, and snippets.

@mapa17
Created June 28, 2020 20:58
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mapa17/aa9ef769d8b1a906e75908e9686513a3 to your computer and use it in GitHub Desktop.
Save mapa17/aa9ef769d8b1a906e75908e9686513a3 to your computer and use it in GitHub Desktop.
# Helper script to extract speaker embeddings (x-vectors)
# Call with
# python xvector_extractor.py [INPUT FOLDER containing x-vectors as scp] [OUTPUT File]
import torchaudio
import glob
import pickle
import click
@click.command()
@click.argument('folder', type=click.Path(exists=True))
@click.argument('output', type=click.Path(exists=False))
def xvector_extract(folder: str, output: click.Path):
xvector_files = glob.glob(f'{folder}/**/*.scp', recursive=True)
print(f'Found {len(xvector_files)} scp files. Loading them ...')
d = {}
for fname in xvector_files:
try:
d1 = {u: d for u,d in torchaudio.kaldi_io.read_vec_flt_scp(fname)}
print(f'Reading {len(d1)} xvectors from {fname} ...')
d.update(d1)
except Exception as e:
print(f'Reading {fname} failed!\n{e}')
print(f'Finished reading {len(d)} xvectors!')
foutname = f'{output}.pkl'
print(f'Writing aggregated xvector file to {foutname} ...')
with open(foutname, mode='wb') as fout:
pickle.dump(d, fout, protocol=pickle.HIGHEST_PROTOCOL)
print(f'Done!')
if __name__ == "__main__":
xvector_extract()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment