Skip to content

Instantly share code, notes, and snippets.

@lifeeric
Forked from AlexTMjugador/dataset-download.py
Created September 2, 2021 18:43
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lifeeric/6baa3ca5b5c2c98bfb75d3e034a9750c to your computer and use it in GitHub Desktop.
Save lifeeric/6baa3ca5b5c2c98bfb75d3e034a9750c to your computer and use it in GitHub Desktop.
Freesound API Python script to download original sound files
#!/usr/bin/python3
from freesound import FreesoundClient
from pyrate_limiter import Limiter, RequestRate, Duration
import requests
import fileinput
import os
CLIENT_ID = os.environ["CLIENT_ID"]
CLIENT_SECRET = os.environ["CLIENT_SECRET"]
DATASET_DIRECTORY = os.environ["DATASET_DIRECTORY"]
RESULTS_PER_PAGE = 150 # Maximum allowed by Freesound
OAUTH_AUTHORIZE_URL = f"https://freesound.org/apiv2/oauth2/authorize/?client_id={CLIENT_ID}&response_type=code"
print("Reading OAuth2 authorization code from file or standard input")
print("Authorization endpoint:", OAUTH_AUTHORIZE_URL)
with fileinput.input() as auth_code_input:
oauth2_auth_code = auth_code_input.readline().rstrip()
print("Exchanging OAuth2 authorization token for access token")
oauth2_token_response = requests.post(
"https://freesound.org/apiv2/oauth2/access_token/",
data={
"client_id": CLIENT_ID,
"client_secret": CLIENT_SECRET,
"grant_type": "authorization_code",
"code": oauth2_auth_code
}
).json()
if "access_token" in oauth2_token_response:
oauth2_token = oauth2_token_response["access_token"]
else:
print("Error response:", oauth2_token_response)
exit(1)
fs = FreesoundClient()
fs.set_token(oauth2_token, auth_type="oauth")
# Limit API method call rate as per Freesound policy.
# See: https://freesound.org/docs/api/overview.html#throttling
fs_api_limiter = Limiter(
RequestRate(60, Duration.MINUTE),
RequestRate(2000, Duration.DAY)
)
@fs_api_limiter.ratelimit("fs", delay=True)
def get_sounds(fs):
return fs.text_search(
filter="type:ogg",
fields="id,name,download",
page_size=RESULTS_PER_PAGE
)
@fs_api_limiter.ratelimit("fs", delay=True)
def retrieve_sound(sound):
sound.retrieve(DATASET_DIRECTORY)
@fs_api_limiter.ratelimit("fs", delay=True)
def next_page(pager):
return pager.next_page()
# Get all Ogg sounds from Freesound
sounds_pager = get_sounds(fs)
# Download their original, user-uploaded Ogg file to a directory.
# While this code is simple and works well in most circumstances,
# it is not extremely robust. Currently, the OAuth token expires
# after 86400 seconds (a day). Therefore, this script will cease to
# work if its execution lasts longer than a day, because it does not
# refresh the token. Also, errors are handled by retrying until the
# operation succeeds, which may not be always desirable
remaining_sounds = sounds_pager.count
print("Downloading", remaining_sounds, "sounds")
while remaining_sounds > 0:
for sound in sounds_pager:
print(f"- Downloading \"{sound.name}\"")
while True:
try:
retrieve_sound(sound)
break
except Exception as exc:
print(f"! Exception occurred while downloading sound \"{sound.name}\": {exc}. Retrying...")
remaining_sounds -= 1
if remaining_sounds > 0:
print("- Moving to the next page")
sounds_pager = next_page(sounds_pager)
certifi==2021.5.30
charset-normalizer==2.0.4
freesound-python==1.0
idna==3.2
pyrate-limiter==2.3.4
requests==2.26.0
urllib3==1.26.6
#!/bin/sh -e
# Sets up a temporary Python 3 virtual environment with the appropriate
# dependencies, and then runs the dataset download script.
#
# This script and the download script were tested to work with Python 3.9.2.
FREESOUND_ZIP_FILE=$(mktemp --tmpdir freesound-pythonXXX.zip)
readonly FREESOUND_ZIP_FILE
FREESOUND_PYTHON_WORKDIR=$(mktemp --tmpdir -d freesound-pythonXXX)
readonly FREESOUND_PYTHON_WORKDIR
WORKDIR=$(pwd)
readonly WORKDIR
cd "$FREESOUND_PYTHON_WORKDIR"
# Download the revision of freesound-python originally used
echo "> Downloading freesound-python"
wget -nv --show-progress -O "$FREESOUND_ZIP_FILE" \
https://github.com/MTG/freesound-python/archive/1861ad0f557d8d567525e5c91ae9c2727f2c7959.zip
unzip "$FREESOUND_ZIP_FILE"
rm -f "$FREESOUND_ZIP_FILE"
# Create temporary virtual environment to install Python modules into
echo "> Setting up temporary Python virtual environment"
python3 -m venv .
. bin/activate
# Install freesound-python and pip requirements
echo "> Installing dependencies"
cd freesound-python-1861ad0f557d8d567525e5c91ae9c2727f2c7959
python3 setup.py install
pip install -r "$WORKDIR"/requirements.txt
echo
echo "*****************************"
echo "* Executing download script *"
echo "*****************************"
echo
python3 "$WORKDIR"/dataset-download.py "$@" || true
cd
rm -rf "$FREESOUND_PYTHON_WORKDIR"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment