Created
January 28, 2018 23:37
-
-
Save yamachu/f1074fe631d6581ebfd2248188c2fffe to your computer and use it in GitHub Desktop.
Harvestが少ないサンプル数で動作しない件
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-01-28T23:34:07.697989Z", | |
"start_time": "2018-01-28T23:34:07.688821Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"import world4py\n", | |
"\n", | |
"world4py._WORLD_LIBRARY_PATH = '/Users/yamachu/.anyenv/envs/pyenv/versions/anaconda3-4.3.1/lib/python3.6/site-packages/world4py/libworld.dylib'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-01-28T23:34:08.323488Z", | |
"start_time": "2018-01-28T23:34:08.108810Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"from world4py.np import apis, tools, structures" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-01-28T23:34:08.332717Z", | |
"start_time": "2018-01-28T23:34:08.326787Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"x, fs, nbit = tools.get_wave_parameters('./demo/sample.wav')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-01-28T23:34:08.370992Z", | |
"start_time": "2018-01-28T23:34:08.336913Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(array([ 0.00000000e+00, 0.00000000e+00, 0.00000000e+00, ...,\n", | |
" 0.00000000e+00, 0.00000000e+00, 3.05175781e-05]), 16000, 16)" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"x, fs, nbit" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-01-28T23:34:08.395165Z", | |
"start_time": "2018-01-28T23:34:08.386219Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Help on function harvest in module world4py.np.apis:\n", | |
"\n", | |
"harvest(x, fs, f0_floor=71.0, f0_ceil=800.0, frame_period=5.0, **dummy)\n", | |
" F0 extract by Harvest\n", | |
" \n", | |
" Args:\n", | |
" x (ndarray(dtype=double, ndim=1)): Input waveform\n", | |
" fs (int): Sampling frequency [Hz]\n", | |
" f0_floor (double, optional): Floor of F0 estimation\n", | |
" f0_ceil (double, optional): Ceil of F0 estimation\n", | |
" frame_period (double, optional): Frame shift [ms]\n", | |
" \n", | |
" Returns:\n", | |
" ndarray(dtype=double, ndim=1): extracted F0\n", | |
" ndarray(dtype=double, ndim=1): Temporal positions\n", | |
" \n", | |
" Notice:\n", | |
" Harvest tend to consider F0 as voiced compared to Dio\n", | |
"\n" | |
] | |
} | |
], | |
"source": [ | |
"help(apis.harvest)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"ExecuteTime": { | |
"end_time": "2018-01-28T23:34:08.497691Z", | |
"start_time": "2018-01-28T23:34:08.454609Z" | |
} | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(array([ 0. , 483.68521641, 477.32521021, 462.67253647,\n", | |
" 431.64963385, 386.43432658, 344.71813637, 315.4458332 ,\n", | |
" 294.01678507, 279.44260727, 276.99089551, 288.12248287,\n", | |
" 308.25729845, 327.89784622, 341.95233607, 350.21905512,\n", | |
" 353.63351189, 353.41241359, 349.85006721, 344.08769285, 0. ]),\n", | |
" array([ 0. , 0.005, 0.01 , 0.015, 0.02 , 0.025, 0.03 , 0.035,\n", | |
" 0.04 , 0.045, 0.05 , 0.055, 0.06 , 0.065, 0.07 , 0.075,\n", | |
" 0.08 , 0.085, 0.09 , 0.095, 0.1 ]))" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"apis.harvest(x[0:1600], fs, frame_period=5.0)" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"サンプル数が1410ぐらいになるとセグフォで落ちる \n", | |
"=> 大体100msecぐらいは最低でもHarvestには必要そう" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": { | |
"ExecuteTime": { | |
"start_time": "2018-01-28T23:34:23.652Z" | |
} | |
}, | |
"outputs": [], | |
"source": [ | |
"apis.harvest(x[0:1410], fs, frame_period=5.0)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
もうちょい調べてみたら違うみたい
無声区間と評価された部分が大半占めてると死ぬっぽい(1600サンプルのみを使う場合)
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
正直100msecあってもF0の抽出誤りが起きまくるからなんともという感じ.
無声を有声に誤る箇所が多くなる(非周期性指標でカバーするしかない)
また有声区間のF0の誤りのレンジこそ小さいので無視できるかも...?