Skip to content

Instantly share code, notes, and snippets.

@seungwonpark
Last active November 29, 2021 15:33
Show Gist options
  • Save seungwonpark/78dd69730ecee631e16018228c83af89 to your computer and use it in GitHub Desktop.
Save seungwonpark/78dd69730ecee631e16018228c83af89 to your computer and use it in GitHub Desktop.
Split CSD (Children's Song Dataset)
Display the source blob
Display the rendered blob
Raw
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "683d01fc",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import glob\n",
"import tqdm\n",
"import random\n",
"import hashlib\n",
"import soundfile as sf\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "9cb040c8",
"metadata": {},
"outputs": [],
"source": [
"with open('english/csd_english_meta_rm_abc.txt', 'r', encoding='utf-8') as f:\n",
" lines = [line.strip().split('|') for line in f.readlines()]"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "0aae2596",
"metadata": {},
"outputs": [],
"source": [
"lengths = list()\n",
"\n",
"for line in lines:\n",
" path, text, _ = line\n",
" wavpath = os.path.join('english', path)\n",
" wav, sr = sf.read(wavpath)\n",
" lengths.append(len(wav) / sr)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "deabeda6",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7f07696da550>]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAEWCAYAAAB8LwAVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAtbElEQVR4nO3deXwddb3/8dcne9I0SZc03dKmtBQoW4Ei+6IsAqJ4vSguKKvodcMFuSr+hOv13gt6XVDkYhVEvCxucNkEQZBF9rZ2pYXu+5I2bZNmT87n98dM4DScpCdtz5lJ8n4+HufROd9Zvp+ZnM5n5jsz3zF3R0REpLucqAMQEZF4UoIQEZGUlCBERCQlJQgREUlJCUJERFJSghARkZSUIGSPzOwZM7syDvWb2SfM7Ik05rnTzL6X+egGLm1DUYKQfsXd73b3s6OOIx1mtsrMzow6jqj19QBD2y0+lCBE+hkzy406BhkclCAGATOrNrP7zazWzLaZ2S1mNsXMnjWznWa21cx+lzT9WWa2JBx3C2Bp1nO5mS02s+1m9hczm5g0zs3ss2a21Mx2mNnPzczCcblm9sMwjpVm9oVw+rwUdVxqZn8Ph83MfmxmW8ys3swWmNlhSZMPM7NHzazBzF4xs8lprIOb2efCOBvM7N/NbLKZvRjW8XszK0ia/nwzmxuu04tmdkRY/ltgAvCwme0ys2vD8j+Y2aZw2z5nZoemEdOdZvY/ZvZnM2sE3h0eZX/dzOabWaOZ3W5mVWb2WBj3X81sWBrL3lM8I83syXCZz3b7mx4ajqszs81m9q0Uy/8P4BTglnA73GJmJ4Z/6+pwmiPD38zBPW03iYi76zOAP0AuMA/4MTAEKAJOBu4FriM4SCgCTg6nHwk0ABcC+cBXgA7gyj3UcwGwDDgEyAO+DbyYNN6BR4AKgh1ALXBOOO6zwOvAeGAY8Ndw+rxw/DNd9QOXAn8Ph98LzA6XaWHdY8JxdwLbgHeF8dwN3JfG9nLgQaAMOBRoBZ4CDgDKwzgvCac9CtgCHBdu50uAVUBhOH4VcGa35V8ODAUKgZ8Ac9OI6U5gJ3BS0t9rFfAyUAWMC+OYE8ZUBDwNXJ/GsnuMJ6y3ATg1HH9z0rYfCmwEvhbWNxQ4roc63vr7JZX9RxhjMbAA+ELSuHdsN32i+egMYuB7FzAW+Lq7N7p7i7v/HWgHJgJjk8oAzgMWufsf3b2dYKexKY16Pgv8l7svdvcO4D+B6clHnMCN7r7D3dcAfwOmh+UfAW5293Xuvh24Mc11ayfYMR0MWFj3xqTxD7j7q2E8dyfVtyffd/d6d18ELASecPcV7r4TeIxgJwxwFfALd3/F3Tvd/TcECeX4nhbs7ne4e4O7twI3AEeaWXkaMT3o7i+4e8LdW8Kyn7n7ZndfDzwPvOLu/wjHP5AUZ4/SiOdRd38uHH8dcEJ45H8+sMndfxj+fhrc/ZU01qPLDQQJ91VgPfDzPswrWaIEMfBVA6vDnWSyawmOul81s0VmdnlYPhZY2zWRu3vy915MBG4Om1p2AHXh8sclTZOcaJqA0lR1plkf7v40cAvBzmWLmc00s7I06tuTzUnDzSm+dy1nIvC1rnUO17uaYH3eIWxKu9HMlptZPcGRMgRnbXuSapukG2dKacaT/FvYRfB3HUuwnsvTiDul8ODjTuAw4Ifh70xiRgli4FsLTOjenu/um9z90+4+FvgMcKuZTSFoNqjumi68TlDNnq0FPuPuFUmfYnd/MY15NxI0L3VJp76u9fipux8DTAOmAl9Pd979YC3wH93WucTd7+0Kr9v0HydoijuT4Oi5JixP5xpPJnag6cST/FsoBYYDGwjW/YA063lH7GY2Drge+DXwQzMr7G16iYYSxMD3KsEO+EYzG2JmRWZ2kpl92My6dsrbCf5TJoBHgUPN7ENhUvkSMDqNem4Dvtl1kdPMys3sw2nG+HvgajMbZ2YVwL+mM5OZHWtmx5lZPtAItITrkC2/BD4bxmDh9n2fmQ0Nx29m953oUIImqG1ACUEzXJTSiec8Mzs5vDD/78DL7r6W4HrSGDP7spkVmtlQMzuuh3p22w7hQcedwO3AFQS/z3/vaXqJjhLEAOfuncD7gSnAGmAdcBFwLPCKme0CHgKuDtvZtwIfJrgOsA04EHghjXoeAG4C7gubKxYC56YZ5i+BJ4D5wD+APxNcGO/cw3xl4bzbgdVhvD9Is8595u6zgE8TNHNtJ7hIf2nSJP8FfDtsfroGuCuMcz3Bxe6XsxVrD9KJ5x6CI/064BjgYgB3bwDOIvhtbQKWAu+Gtx5mXJS0jJuBC8M7lX5KcNAxCvh/YdPSZcBlZnZKOH337SYRMTX9SdyY2bnAbe4+cY8Ti0jG6AxCImdmxWZ2npnlJbVNPxB1XCKDnRKEpM3MbgsfXur+uW1fFw38G0EzzT+AxcB39jXelBWZndLDOuzKRH19iGtRD3F9Yh+X+4kelrtoz3PLYKcmJhERSUlnECIiktI7+rqJo5EjR3pNTU3UYYiI9CuzZ8/e6u6Vezt/v0gQNTU1zJo1K+owRET6FTNbvS/zq4lJRERSUoIQEZGUlCBERCQlJQgREUlJCUJERFJSghARkZSUIEREJCUlCBGRGGpu6+S7D7/Omm1NkcWgBCEiEkO/eG45d7ywkk31LXueOEOUIEREYujNzQ2MLC3g2JphkcWgBCEiEkOrtjZxyJgygje0RkMJQkQkZmobWnl9Yz3TqysijUMJQkQkZnY0tQEwtWpopHFkLEGY2R1mtsXMFqYY9zUzczMbman6RUT6q6a2TgBKCnIjjSOTZxB3Aud0LzSzauBsYE0G6xYR6bea24MEUZw/QBOEuz8H1KUY9WPgWkDvOhURSaE5PIMoHsBnEO9gZhcA6919XhrTXmVms8xsVm1tbRaiExGJh407g2cfRpYWRhpH1hKEmZUA3wK+k8707j7T3We4+4zKyr1+Y56ISL+zpSFIEGMriiONI5tnEJOBScA8M1sFjAfmmNnoLMYgIhJ7rR0J8nON3JzonoGALL6T2t0XAKO6vodJYoa7b81WDCIi/UFbR4KC3OifQsjkba73Ai8BB5nZOjO7IlN1iYgMJK0dnRTkRZ8gMnYG4e4f28P4mkzVLSLSn7V1JCjMi/YOJtCT1CIisdPWkYjFGUT0EYiIyG52tXZE/hQ1KEGIiMTOmromqoeXRB2GEoSISNzsaGpnxJCCqMNQghARiZuGlg5KC7P2FEKPlCBERGKkvTNBc3snQ4vyow5FCUJEJE7Wb28GYExFUcSRKEGIiMTK6romAGpGDIk4EiUIEZFYWRMmiAm6i0lERJKtq2uiIDeHUUOj7eoblCBERGJlTV0T44cXkxNxT66gBCEiEitr6ppi0bwEShAiIrHh7qzZ1sREJQgREUlW19hGQ2tHLLrZACUIEZHYeGrJFgCmV1dEG0hICUJEJAYWrNvJdQ8s4ODRQ5UgRETkbfPW7aC905n5yRnkxeB1o6AEISISCy3tnQCUF0ffB1MXJQgRkRho7UgAUJgfn91yfCIRERnEWto7MYPCGLxqtEvGIjGzO8xsi5ktTCr7gZktMbP5ZvaAmVVkqn4Rkf6kpb2TwrwczKJ/grpLJlPVncA53cqeBA5z9yOAN4FvZrB+EZF+o66xnWEl0b9FLlnGEoS7PwfUdSt7wt07wq8vA+MzVb+ISH+ypaGFUWXRvwMiWZSNXZcDj/U00syuMrNZZjartrY2i2GJiGRfW0ciVtcfIKIEYWbXAR3A3T1N4+4z3X2Gu8+orKzMXnAiIhFwh9wYXX8AyPpbsc3sUuB84Ax392zXLyISR53u5Megi+9kWU0QZnYOcC1wmrs3ZbNuEZE4S7iTE7MziEze5nov8BJwkJmtM7MrgFuAocCTZjbXzG7LVP0iIv1JwoldgsjYGYS7fyxF8e2Zqk9EpD9LJJyYtTDpSWoRkThIuJMbswyhBCEiEgMJJ1ZPUYMShIhILCQSHrvbXJUgRERiIOFOTsz2yDELR0RkcEq4q4lJRETeKRHDJ6mVIEREYiB4UC7qKHanBCEiEgOdCScnZhlCCUJEJAY8hk9SK0GIiMSAmphERCSlzoSepBYRkRT0JLWIiKTkriepRUSkm4aWdrY3tVFenB91KLtRghARidh1Dywk4XDWtKqoQ9mNEoSISIQSCeeheRsoK8rjyOqKqMPZjRKEiEiEmts7AfjwjOqII3knJQgRkQg1tQUJomZEScSRvJMShIhIhJrDBFFckLE3QO81JQgRkQg1tXcAUFKQG3Ek75SxBGFmd5jZFjNbmFQ23MyeNLOl4b/DMlW/iEh/0PTWGcQgShDAncA53cq+ATzl7gcCT4XfRUQGra4mppL8QZQg3P05oK5b8QXAb8Lh3wAfzFT9IiL9QdcZRImuQVDl7hvD4U1Aj0+FmNlVZjbLzGbV1tZmJzoRkSxraguuQQy2JqZeubsD3sv4me4+w91nVFZWZjEyEZHseXjeBgBGDCmIOJJ3ynaC2GxmYwDCf7dkuX4RkVh5dWXQEj9MCYKHgEvC4UuAB7Ncv4hIrHQmnCtPnhR1GCll8jbXe4GXgIPMbJ2ZXQHcCJxlZkuBM8PvIiKDVmtHgoK8eD6SlrHL5u7+sR5GnZGpOkVE+pPOhNORcArz4neBGvQktYhIZNo6EgCxPYOIZ1QiIoNAV4IoVIIQEZFkLR3BQ3KF+fHcFcczKhGRQaCxNXhIbkgMn6KGPVykNrPxwEeBU4CxQDOwEHgUeMzdExmPUERkgOrqZmNIYT9LEGb2a2Ac8AhwE8FDbUXAVIJO+K4zs2+EfS6JiEgfvb6hHoAhMexmA3o/g/ihuy9MUb4QuN/MCoAJmQlLRGTgm716OwBTqkojjiS1HhNEquQQvr+h2t3nu3sbsCyTwYmIDER1jW386vkV/G7WWt53xBhGDS2KOqSU9niR2syeMbMyMxsOzAF+aWY/znxoIiID0+9eW8utzyxn4ogS/uW0yVGH06N0royUu3u9mV0J3OXu15vZ/EwHJiIyEDW2dnDT40sozMvhmWtOx8yiDqlH6dzmmhf2vPoRggvWIiKyl257djkApxxYGevkAOkliO8CfwGWuftrZnYAsDSzYYmIDEyLNtSTl2Pc+omjow5lj/bYxOTufwD+kPR9BfDPmQxKRGQg2lzfwtNLtvC+w8fEtv+lZD1GaGbfDi9M9zT+PWZ2fmbCEhEZWOpb2jnnJ8FjY1ecEs/3P3TX2xnEAuBhM2shuHupluBBuQOB6cBfgf/MdIAiIgPBtX+Yz/amdj501DiOnjAs6nDS0ttzEA8CD5rZgcBJwBigHvhf4Cp3b85OiCIi/dumnS08vmgTY8qL+MGHj4w6nLSlcw1iKbooLSKy177/+BIAZn5yBrk58b5zKVn8r5KIiPRjC9bt5P5/rOfYmmEcPr486nD6RAlCRCSDbn7qTXJzjG+ce3DUofSZEoSISIY0tXXw3NKtfOqEiRwzscebQmNrj9cgzKwS+DRQkzy9u1++t5Wa2VeAKwEnuFvqMndv2dvliYjE0YNzN9DWkeCsQ6qiDmWvpNMX04PA8wS3tXbua4VmNg74EjDN3ZvN7PcELyW6c1+XLSISF7NW1fHN+xcwuXIIx07qf2cPkF6CKHH3f81AvcVm1g6UABv28/JFRCL1pznrAPjuBYeRn9s/W/PTifoRMztvf1Xo7uuB/wbWABuBne7+RPfpzOwqM5tlZrNqa2v3V/UiIlnx+sYGTjhgBCdNGRl1KHutt642GsysHriaIEk0m1l9UvleCV86dAEwieA910PM7OLu07n7THef4e4zKisr97Y6EZFIrK1rYlLlkKjD2Ce9PUk9NEN1ngmsdPdaADO7HziR4AltEZF+r6W9k7rGNsaWx/NNcelK541yT6VT1gdrgOPNrMSCztDPABbvw/JERGJlc31wU+bo8uKII9k3PZ5BmFkRMAQYGTYLdT0fXgaM29sK3f0VM/sjQQeAHcA/gJl7uzwRkbhZXrsLgPHDBmiCAD4DfJngOsGcpPJ64JZ9qdTdrweu35dliIjE1SPzN1KQm8OR4yuiDmWf9HYN4mbgZjP7orv/LIsxiYj0W+7Oi8u2cfKBIykuyI06nH2SznMQ683sQ93KdgIL3H1LBmISEem3ltfuYlN9C59/9+SoQ9ln6SSIK4ATgL+F308HZgOTzOy77v7bDMUmItLvzF27E4DDxvWvnltTSSdB5AOHuPtmADOrAu4CjgOeA5QgRGTQ29XawaPzN/DvjyxmyqhSjujn1x8gvQQxvis5hLYA1e5eF3aVISIyqD37Zi1f+/1ctu5qY9qYMn74kSP71YuBepJOgnjGzB4B/hB+/+ewbAiwI1OBiYj0BwvX7+TyO19jWEkBd152LKdNrSR4xKv/SydBfJ4gKZwUfr8L+JO7O/DuTAUmIhJ3DS3tXHXXLIrzc3ngcydSPbwk6pD2q3TeSe3AH8OPiIgAf16wka/9fh7N7Z3cdvExAy45QHovDPoQcBMwiuBpaiPIG2UZjk1EJHZe31DPH2ev49cvrmR0WRG3f2QGJ07uvz229iadJqbvA+93d/WXJCKDlrtz+99X8r1Hg13h9OoK7v308f3+YbjepJMgNis5iMhglEg4K7c10tLeyZubG/jeo4vJzzUe/dIpTK4sHRB3KvUmnQQxy8x+B/wf0NpV6O73ZyooEZE4uOaP87h/zvrdyh76wslMrcrU2xDiJZ0EUQY0AWcnlTmgBCEiA1JbR4LrH1rE/XPWUzOihG+edwgAo4YWcsiYwXP5NZ27mC7LRiAiInHwwrKtXHLHq3QknHMOHc2PLjqSkoJ0jqUHnnReGDTVzJ4ys4Xh9yPM7NuZD01EJLtaOzr505x1dCScWz5+FLd98phBmxwgjQQB/BL4JtAO4O7zgY9mMigRkWxKJJwH567nlJv+xv1z1vPeQ6s4/4ixUYcVuXRSY4m7v9rt0fGODMUjIpJVa+ua+NgvX2bd9mYOHVvG9e8/lDMOGRV1WLGQToLYamaTCS5MY2YXAhszGpWISBa0dST4+K+C5PCd86fxyRMmkp+bTsPK4JBuX0wzgYPNbD2wEvhERqMSEcmg7z++hJVbG2ls62RtXTOfO30yl588KeqwYiedu5hWAGeGvbfmuHuDmX0Z+EmGYxMR2e/un7OOW59ZDsDUqlKOmlCh5NCDtC/Pu3tj0tevsg8JwswqgF8BhxE0XV3u7i/t7fJERNLR1NbBvz38OgeMHMIDnzuJ8pL8qEOKtb29f2tfny+/GXjc3S80swJg4HWDKCKxctuzy3l84SZ2Nrfzk4umKzmkYW8ThO9thWZWDpwKXArg7m1A294uT0RkT+58YSU3PrYEgAuPGc+pUysjjqh/6DFBmFkDqROBAcX7UOckoBb4tZkdCcwGru7WhIWZXQVcBTBhwoR9qE5EBrNbn1nG9x9/gwnDS3jiK6dSlD9we1/d33q8n8vdh7p7WYrPUHffl0cL84Cjgf9x96OARuAbKeqf6e4z3H1GZaWyvYj0TUt7Jzc8tIjvP/4GB48eyj2fPk7JoY+iuOF3HbDO3V8Jv/+RIGGIiOwXzW2d/OefF3Pni6s4dWol9376eMYP06XOvsp6JyPuvsnM1prZQe7+BnAG8Hq24xCRgWnh+p1cdudr1Da0cuEx4/nBhUfQrScISVNUvVB9Ebg7vINpBaAeY0VknyUSzpfu+we5Ztxz5XGcMHmEksM+iCRBuPtcYEYUdYvIwLVqWyMrahv5z386nBOnDMz3RGeTOh0RkQHjtVV1ABwxvjziSAaGwdvRuYgMGC8u28rvZ63lkfkbmTamjINHD45XgmaaEoSI9FuNrR389Oml/OLZFeTlGBcfP5EvvmcKeeqRdb9QghCRfqetI8HX/jCPh+dtAGDGxGH84pPHMKK0MOLIBhYlCBHpd/7t4UU8PG8Dh40r4+LjJvLRd6m3hUxQghCR2Ht9Qz2rtzXS0tHJlvpW7n5lDXk5xt1XHk95sTrdyxQlCBGJJXensa2Tmx5bwm9fXr3buCmjSrnnyuOUHDJMCUJEYuepxZu5+r657GrtAODkKSP5+nsPoqw4n+L8XEYNLSQnRw/AZZoShIjEhrtz81NLue3Z5eSa8dWzplIzcggfOHJs1KENSkoQIhIbLy3fxk/+upRpY8r48UXTOUjPM0RKCUJEYmFF7S6uvGsWhXk53PPp46goKYg6pEFPCUJEItPQ0s7zS7fys6eXsXhjPfm5xi8/NUPJISaUIEQkqxpbO3htVR2/eHYFL63YBsCE4SV8+32HcOYhVdSMHBJxhNJFCUJEMqa9M8Gzb9Ty6qo65q/bwfLaRmobWgEoK8rjM6cdwHGThnPqgZXqHiOGlCBEZL9qbO1g1urtPDR3A39ZtIldrR2YwWFjyzl9aiU1I4dQM2IIJ04ewbAhakqKMyUIEdlnLe2dPDh3PX+as55Zq+pIOJQW5nH2tCrOOKSKU6aOpKxID7X1N0oQIrJXOhPOkk31LFpfz8znV7Bsyy5GlxXxudOncGR1BaccOJKi/Nyow5R9oAQhImlbW9fE80u3MmtVHc++Wcu2xjYAqsoK+dWnZnDaQZXk61rCgKEEISK92tXawd+XbuWOF1by6srgjW0jSws4+cCRvOfgUUwcMYRpY8ooyFNiGGgiSxBmlgvMAta7+/lRxSEiu9vR1Ma8dTt5YdlWnl+6lcUb6wEYWVrIteccxNnTqphcWYqZ+kIa6KI8g7gaWAyURRiDiBD0gTRv3U5+9OSbPPdmLQAFeTlMrSrlq2dN5dCxZZwweQQlBWp0GEwi+Wub2XjgfcB/AF+NIgYRCdQ1tnHdAwt4bOEmcnOMfzl9MsfWDOPEybrIPNhFdTjwE+BaQD1xiUTg9Q31/O2NLdz32hrW1jUDcOmJNXzxPVP02k55S9YThJmdD2xx99lmdnov010FXAUwYYJeJyjSF50JZ9uuVtbtaGbJxgbmrd3B5oYWtu5qZdPOVrbuCp5mnl5dwUUzqjlzWhUHj1Zrr+wuijOIk4APmNl5QBFQZmb/6+4XJ0/k7jOBmQAzZszw7IcpEh/uTnun09rRyZJNDdQ1trGrpYMNO5rZ2dxOQ0sHtbta2dLQwub6VrbtaiWR9L9maFEeE0eUUFlayKFjyjl8fDlnH1rFqKFF0a2UxF7WE4S7fxP4JkB4BnFN9+QgMpi4O29sbuCl5dve2uFvrm9lc30Lm+pbqG9u321n311JQS6lhXmMLC2kqixIAFVlhYwqK6KqrIiDRw9lTHmR+jqSPtMtCSJZ0NrRyc6mdhxwDzqxm7NmO3PX7uC5N2tZXtsIQFF+DhXFBYwqK2T8sBJm1AyjoriAvFwjL8fIy81hcmUpY8qLGFKYx5jyIl1IloyJNEG4+zPAM1HGIJJJ63c0c8Wdr7F0yy46U5wGFOfncmBVKdecPZXzjxirrq4lVnQGIbIfNbZ28PjCTWyqb+GFZVtZtKGenc3tfGTGeA4fX0GOgWGYwYGjSjlqwjByc/TAmcSTEoTIXmpq6+D1DfUs3tTAuromFm2oZ86a7TS1dQLBk8enTa3knMNGc97hYyKOVqTvlCBE0rBw/U7mrNkOQHNbJ08v2cKs1dvfajYqyM1h6uhSzj9iDO89dDQnTdFDZtL/KUGIdPPS8m38ZdEmmts6ae3oZOW2Juat3bHbNKPLivjMqQdwxPgKjqwup7K0UHcJyYCjBCGDjrvTmXA63XEPHiq799U1PPtmLRt2NLO8tpGSglzKivIpyMuhoiSf75w/jXMPH01BmATKivPVrbUMeEoQMii8ubmBb//fQuav20FLeyLlNOOHFTN+WDGXnTSJC48ZryYiGfSUIGTAcXeW1zZS39LOhh3N3PPKGl5cvg2Ai4+fwIghheTmGLk5wd1EuWZUlOTzwaPGUZinpCDSRQlCBoTmtk5eWrGVV1bW8ewbtSzZ1LDb+PceWsU1Zx/EgVXqH1IkXUoQ0q+0dyZYUdvI/XPWsX5H0AtpQ0sHz4bvMAAYV1HMl844kOnV5YwaWsT4YcVUlBREFbJIv6UEIbHn7jy+cBN/WbSJ55Zupa6xjfxco3pYCRjkmHHhMeM5btJwTp1aSVWZOqAT2R+UICSWOhPOyq2NPDxvA/e9tobN9a0U5edwbM1w3n/kWE6eMpKxFcVRhykyoClBSORa2juZt3YHC9bvZOPOFhpa2vnbG7XUNgTvLHj3QZV85czR/NPRuogskk1KEJI1nQlnS0MLq7Y2MXt1HSu2NrK8tpHFG+pp6wxuPS0pyKU4P5ejJw7jrGlVHFVdoQvLIhFRgpD9pqMzQV1TG7UNrWxpaGX99mY27Wxh5bZG5q/bwYYdLbv1aDqmvIhJI4dw6Uk1HDdpOFNGlTJxhHozFYkLJQhJSyLhbGtsY3N9C1saWlhR28iauibW1DWxoyl43qB2VyverUfr3BxjdFkRR1aXc8GR4xhTUcTY8mKOmlChO4tEYk4JYhBr70zQ3N5Jc1sn9c3ttHYk2NnczuptTaza1si67U1s2hm8wnJLQwvtnbvv/Yvzc5k8agjDSgo4dWolY8uLqBxaSOXQQkaWFjK2opiqsiJ1Zy3STylBDHCNrR08vWQLf5i9jlVbG2lu76Ql/HTf4ScryM2hengxo8uLOO6A4VSVFTE6fIXliNICakYMYWRpAWba+YsMVEoQA0BHZ4JXV9bxj7U7WLh+Jw0tHW91Rrdo/U4a2zoZWVrAuyYNp7w4n6L84EJwUX4uJQXBv2XF+RTl5TCkMI9JI4foyF9ElCD6q407m3li0WZqG1r588KNrAjfaTyytIAJw0ve6mvohMkjuPCYat59cKVuERWRPlGC6EcSCeeVlXXc+swynl+69a3yqVWlfOf8aZx+UCU1I4aQoyN/EdkPsp4gzKwauAuoAhyY6e43ZzuO/qS5rZNfPr+C3768mtqGVqrKCjlrWhUfnD6Ocw8brYQgIhkRxRlEB/A1d59jZkOB2Wb2pLu/HkEskWvvTPDYwk3UNrTi7jS0dDBnzXYWb2ygraOT1o4EbZ0J3OHEySP41nkHc/a00Qwp1MmfiGRW1vcy7r4R2BgON5jZYmAcMCgSxK7WDlZvC64XzFm9nV+/sIoVWxt3m2Z0WRGnTa1kaFEehXk5FObnctLkEbxr0nDdNSQiWRPpYaiZ1QBHAa9EGcf+1taRYMOOZjbXt1DX2EZdUxsL1u1kwfqdLNnUsNvTxNPGlPFvHziUC6aPDV9gYwwpyFUiEJHIRZYgzKwU+BPwZXevTzH+KuAqgAkTJmQ5uj3btquVl1fUUdfUxuqtjTy9ZAu1u1p7fL5gaFEe06sr+NzpkzlkTBm5OcaooYVMr65QMhCRWIokQZhZPkFyuNvd7081jbvPBGYCzJgxo+cnurJoV2sHP3tqKbNXb2fOmu10nQjk5xqTK0v54PRxlBblUZKfy+jyIkaXFzF8SAHDSgoYXVaki8ki0q9EcReTAbcDi939R9muf2+0dya4/e8rufVvy6hv6WBy5RA+e9pkTpoykgOrSqkoLqAgLyfqMEVE9qsoziBOAj4JLDCzuWHZt9z9zxHE0qtEwrnn1TX84rnlrK1rZnp1BZ86YSIfOnp81KGJiGRcFHcx/R2IXVtLR2eCbY1trNvezGur6lhZ28i8dTtYsqmB8uJ8fv7xoznv8NG6XiAig8agvpl+zbYmvvfo6yzdsou1dU10JN1dNHxIAVMqS7n+/dP41Ak16pdIRAadQZ0grv3TPF5eUcf7Dh/DuYeNZkxFMZWlhRxbM4wRpYVRhyciEqlBmyA6E86rK+u44uRJ/L/zp0UdjohI7AzaW28aWtpJOIytKI46FBGRWBq0CWLd9mYAyovzI45ERCSeBl0T0+zVdfzXn5cwa/V2zODg0UOjDklEJJYGVYKYs2Y7F//qVYaV5POFd0/hrGlVHDauPOqwRERiaVAkiOa2Tp5aspnHFmyiub2Th684iSmjdOYgItKbQZEgPn/PHJ5esgWAKaNKlRxERNIw4BPE7NXbeXrJFkaWFvL7zxxP5VA93yAiko4BnSB++tRSfvTkmwA88LkTqR5eEnFEIiL9x4BOEFVlhXxw+ljeffAoJQcRkT4a0AniomMncNGx8XvZkIhIfzBoH5QTEZHeKUGIiEhKShAiIpKSEoSIiKSkBCEiIikpQYiISEpKECIikpIShIiIpGTuHnUMe2RmtcDqvZx9JLB1P4aTDf0xZuifcSvm7FDM2ZMc90R3r9zbBfWLBLEvzGyWu8+IOo6+6I8xQ/+MWzFnh2LOnv0Zt5qYREQkJSUIERFJaTAkiJlRB7AX+mPM0D/jVszZoZizZ7/FPeCvQYiIyN4ZDGcQIiKyF5QgREQkpQGdIMzsHDN7w8yWmdk3oo6ni5lVm9nfzOx1M1tkZleH5TeY2Xozmxt+zkua55vherxhZu+NKO5VZrYgjG1WWDbczJ40s6Xhv8PCcjOzn4YxzzezoyOI96CkbTnXzOrN7Mtx3M5mdoeZbTGzhUllfd62ZnZJOP1SM7skgph/YGZLwrgeMLOKsLzGzJqTtvltSfMcE/6uloXrZVmOuc+/h2zuW3qI+XdJ8a4ys7lh+f7dzu4+ID9ALrAcOAAoAOYB06KOK4xtDHB0ODwUeBOYBtwAXJNi+mlh/IXApHC9ciOIexUwslvZ94FvhMPfAG4Kh88DHgMMOB54JQa/h03AxDhuZ+BU4Ghg4d5uW2A4sCL8d1g4PCzLMZ8N5IXDNyXFXJM8XbflvBquh4XrdW6WY+7T7yHb+5ZUMXcb/0PgO5nYzgP5DOJdwDJ3X+HubcB9wAURxwSAu2909znhcAOwGBjXyywXAPe5e6u7rwSWEaxfHFwA/CYc/g3wwaTyuzzwMlBhZmMiiK/LGcByd+/tifzItrO7PwfUpYinL9v2vcCT7l7n7tuBJ4Fzshmzuz/h7h3h15eB8b0tI4y7zN1f9mAvdhdvr+d+18N27klPv4es7lt6izk8C/gIcG9vy9jb7TyQE8Q4YG3S93X0vhOOhJnVAEcBr4RFXwhPz+/oalIgPuviwBNmNtvMrgrLqtx9Yzi8CagKh+MSc5ePsvt/ojhv5y593bZxi/9ygiPVLpPM7B9m9qyZnRKWjSOIs0tUMffl9xCn7XwKsNndlyaV7bftPJATROyZWSnwJ+DL7l4P/A8wGZgObCQ4dYyTk939aOBc4PNmdmryyPDIJHb3TZtZAfAB4A9hUdy38zvEddv2xMyuAzqAu8OijcAEdz8K+Cpwj5mVRRVfN/3u95DkY+x+4LNft/NAThDrgeqk7+PDslgws3yC5HC3u98P4O6b3b3T3RPAL3m7eSMW6+Lu68N/twAPEMS3uavpKPx3Szh5LGIOnQvMcffNEP/tnKSv2zYW8ZvZpcD5wCfCxEbYTLMtHJ5N0IY/NYwvuRkq6zHvxe8hLts5D/gQ8Luusv29nQdygngNONDMJoVHkB8FHoo4JuCtdsPbgcXu/qOk8uQ2+n8Cuu5aeAj4qJkVmtkk4ECCC05ZY2ZDzGxo1zDBxciFYWxdd8tcAjyYFPOnwjtujgd2JjWXZNtuR1lx3s7d9HXb/gU428yGhc0kZ4dlWWNm5wDXAh9w96ak8kozyw2HDyDYtivCuOvN7Pjw/8WneHs9sxVzX38Pcdm3nAkscfe3mo72+3bO1JX3OHwI7vZ4kyCLXhd1PElxnUzQXDAfmBt+zgN+CywIyx8CxiTNc124Hm+Qwbs8eon5AIK7NeYBi7q2JzACeApYCvwVGB6WG/DzMOYFwIyItvUQYBtQnlQWu+1MkMA2Au0E7cNX7M22JWj3XxZ+Losg5mUE7fNdv+vbwmn/OfzdzAXmAO9PWs4Mgp3ycuAWwh4eshhzn38P2dy3pIo5LL8T+Gy3affrdlZXGyIiktJAbmISEZF9oAQhIiIpKUGIiEhKShAiIpKSEoSIiKSkBCF7ZGYfNDM3s4OjjqW7sCfLkXuY5lt7sdxLzeyWvY+sz/XVJPfWmcF6Cs3sr2FPnxftSzzhNB/f/1FKXChBSDo+Bvw9/Lc/6nOC6G/Cp2rTcRSAu09399/taeI9qAGUIAYwJQjpVdhf1MkEDxR9NKk818z+28wWhp2cfTEsP9bMXjSzeWb2qpkN7X40bmaPmNnp4fAuC94hsCg8sn2XmT1jZivM7APhND3O3y3W/ws7ElzU1Zmgmd0IFIdHzHeHZReHsc01s18kPXl6mZm9aWavAif1sD1usKBDt64YvxSW73bEbWbXmNkN4fAzZvZjM5tlZovDbXS/Be9s+F7S4vPM7O5wmj+aWUk4/zEWdLw228z+Ym93v/GMmf3EgndzXN0tzuHh9phvZi+b2RFmNgr4X+DYcN0nd5vnmPDvNg/4fFJ5jZk9b2Zzws+J4agbgVPCZX2ll+mkv8rWk6L69M8P8Ang9nD4ReCYcPhfgD/ydt//wwn6xl8BHBuWlQF5wKXALUnLfAQ4PRx2widUCfp3egLIB44E5oblvc2/ivAdFbz9pHExwROjI8Lvu5LmPQR4GMgPv99K0O3AGGANUBmuxwvJdSbNf0O4HQqBkQRPaefTrR9+4BrghnD4Gd5+L8LVwIawvkKCJ2NHhPM7cFI43R3hMvLD+irD8ouAO5KWe2sPf7efAdeHw+9J2panA4/0MM984NRw+Add6wOUAEXh8IHArFTL6mk6ffrvJ93TUhm8PgbcHA7fF36fTdAPzG0e9v3v7nVmdjiw0d1fC8vqAaz3F1e1AY+HwwuAVndvN7MFBDvNvviSmf1TOFxNsJPa1m2aM4BjgNfCuIoJOsE7DnjG3WvDmH9H0MlZKo+6eyvQamZbeLsb7t509dWzAFjkYb9UZrYijHUHsNbdXwin+1/gSwTb5jDgyTDeXIJuF7r01Ex0MkG3C7j702Y2wnrp1dOCN79VePDuAQi6nzg3HM4HbjGz6UAnPW+XdKeTfkIJQnpkZsMJjj4PNzMn2Dm5mX29j4vqYPfmzKKk4XYPDzmBBNAK4O6JpHb13ubvivV0gqR1grs3mdkzqaYj6MfoN+7+zW7zfzDNdaErxlAnwf+jPcXYNU+C3edP8Pb/w+793ngY7yJ3P6GHWBrTjHlffAXYTHBWlwO07ON00k/oGoT05kLgt+4+0d1r3L0aWEnwkpIngc907cTDZPIGMMbMjg3LhobjVwHTzSzHzKrp+1va0pm/HNgeJoeDCV6t2KXdgu7VIej87sKwPb6rrX4iwQubTguPtPOBD/cxxs3AqHD+QoLurvtqgpl1JYKPE9wY8AZQ2VVuZvlmdmgay3qeoHmwK3lu7TqjS8XddwA7zOzksOgTSaPLCc4ME8AnCQ4UABoIXpm7p+mkn1KCkN58jOC6QLI/heW/Imiznx9e1Py4B69fvAj4WVj2JMGR9AsEieV14KcEvUz2RTrzP05wkXcxwcXTl5PGzQzjvNvdXwe+TfBmvPlhjGPCJp8bgJfC+hb3JUB3bwe+S9Ad9JPAkr7MH3qD4EVMiwneKf0/4Ta9ELgp3KZzgXQu/t4AHBOu44283W14by4Dfm5mcwnOXLrcClwS1n8wb5+1zAc6wwvbX+llOumn1JuriIikpDMIERFJSQlCRERSUoIQEZGUlCBERCQlJQgREUlJCUJERFJSghARkZT+P7r3yOzZIqVLAAAAAElFTkSuQmCC\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.title('csd_english_meta_rm_abc.txt')\n",
"plt.xlabel('Accumulated number of data')\n",
"plt.ylabel('Length (s)')\n",
"plt.plot(sorted(lengths))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "3ec9fb91",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1684/1684 [00:00<00:00, 2689.83it/s]\n"
]
}
],
"source": [
"train_list = list()\n",
"val_list = list()\n",
"test_list = list()\n",
"\n",
"for line in tqdm.tqdm(lines):\n",
" path, text, _ = line\n",
" path = path.replace('.wav', '-22k.wav')\n",
" wavpath = os.path.join('english', path)\n",
" wav, sr = sf.read(wavpath)\n",
" assert sr == 22050\n",
" length = len(wav) / sr\n",
" \n",
" entry = '%s|%s|%s' % (os.path.join('CSD', 'english', path), text, \"CSD\")\n",
" h = hashlib.md5(entry.encode()).hexdigest()\n",
" h = int(h, 16)\n",
" \n",
" if length > 10.0:\n",
" if h % 2 == 0:\n",
" val_list.append(entry)\n",
" else:\n",
" test_list.append(entry)\n",
" else:\n",
" if h % 16 == 0:\n",
" val_list.append(entry)\n",
" elif h % 16 == 1:\n",
" test_list.append(entry)\n",
" else:\n",
" train_list.append(entry)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1dd56461",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1435 131 118\n"
]
}
],
"source": [
"print(len(train_list), len(val_list), len(test_list))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "c41e8b9a",
"metadata": {},
"outputs": [],
"source": [
"with open(os.path.join('english', 'csd_en_train_22k.txt'), 'w', encoding='utf-8') as f:\n",
" for line in train_list:\n",
" f.write('%s\\n' % line)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "24bce6fe",
"metadata": {},
"outputs": [],
"source": [
"with open(os.path.join('english', 'csd_en_val_22k.txt'), 'w', encoding='utf-8') as f:\n",
" for line in val_list:\n",
" f.write('%s\\n' % line)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "9994423e",
"metadata": {},
"outputs": [],
"source": [
"with open(os.path.join('english', 'csd_en_test_22k.txt'), 'w', encoding='utf-8') as f:\n",
" for line in test_list:\n",
" f.write('%s\\n' % line)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d34493c9",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment