Last active
January 29, 2024 17:06
-
-
Save napolitano/2192f290bb11a55e3b20a9e038aaa1af to your computer and use it in GitHub Desktop.
Python script to download the full sound loop library from Soundraw.ai for the purpose of research related to a blog article
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Proof of concept script to retrieve content | |
# files from soundraw for private research only | |
# | |
# Provided "as is" for free use without any | |
# warranty | |
# | |
# NOTICE: | |
# Soundraw's library is very large and contains | |
# more than 550.000 Files with more than 190 GByte | |
# of volume. You need fast internet / time and | |
# enough free storage space. There's no | |
# exception handling in this script! | |
# | |
# WARNING: | |
# Be aware that these files are copyrighted | |
# and are not "free for use" or redistribution | |
# If you do so, this might be subject for legal | |
# measures against you | |
# | |
# If in doubt contact soundraw.io and ask | |
# ---------------------------------------------- | |
import requests | |
import os | |
import time | |
import urllib.parse | |
import urllib.request | |
import xml.etree.ElementTree as ET | |
sourceurl = "https://soundraw-web.s3-ap-northeast-1.amazonaws.com" | |
xmlfile = "soundraw-contents" | |
subfolder = "soundraw-loops" | |
def recursivelyLoadFileList(sourceurl, getParameter, targetfile, numberOfFiles): | |
fullXmlFileName = targetfile + str(numberOfFiles) + ".xml" | |
response = requests.get(sourceurl + "/" + getParameter) | |
with open(fullXmlFileName, "wb") as file: | |
file.write(response.content) | |
tree = ET.parse(fullXmlFileName) | |
root = tree.getroot() | |
nextPage = "" | |
for child in root.getchildren(): | |
if child.tag.endswith("NextContinuationToken"): | |
nextPage = urllib.parse.quote(child.text) | |
print("token: " + nextPage) | |
if nextPage != "": | |
numberOfFiles += 1 | |
recursivelyLoadFileList(sourceurl, "?list-type=2&continuation-token="+nextPage, targetfile, numberOfFiles) | |
else: | |
return numberOfFiles | |
def loadContent(sourceurl, xmlfile, subfolder, xmlFileCount, maxXmlFileCount): | |
tree = ET.parse(xmlfile + str(xmlFileCount) + ".xml") | |
root = tree.getroot() | |
steps = 0 | |
pauseAfterSteps = 300 | |
pauseValue = 0 | |
time.sleep(0) | |
if not os.path.exists(subfolder): | |
os.makedirs(subfolder) | |
print("Processing S3-Bucket-List " + str(xmlFileCount) + " of " + str(maxXmlFileCount)) | |
print("======================================") | |
print(" ") | |
for child in root.getchildren(): | |
if child.tag.endswith("Contents"): | |
for subchild in child.getchildren(): | |
if subchild.tag.endswith("Key"): | |
subsubfolder = subchild.text.split("_") | |
if not os.path.exists(subfolder + "/" + subsubfolder[0]): | |
os.makedirs(subfolder + "/" + subsubfolder[0]) | |
if subsubfolder[4] == "dr": | |
subsubfolder[4] = "Drums" | |
elif subsubfolder[4] == "ff": | |
subsubfolder[4] = "Fill Front" | |
elif subsubfolder[4] == "fe": | |
subsubfolder[4] = "Fill End" | |
elif subsubfolder[4] == "me": | |
subsubfolder[4] = "Melody" | |
elif subsubfolder[4] == "bs": | |
subsubfolder[4] = "Bass" | |
elif subsubfolder[4] == "bc": | |
subsubfolder[4] = "Background Chords" | |
elif subsubfolder[4] == "bc1": | |
subsubfolder[4] = "Background Chords Extra 1" | |
elif subsubfolder[4] == "bc2": | |
subsubfolder[4] = "Background Chords Extra 2" | |
elif subsubfolder[4] == "se": | |
subsubfolder[4] = "Ad-Lib" | |
if not os.path.exists(subfolder + "/" + subsubfolder[0] + "/" + subsubfolder[4]): | |
os.makedirs(subfolder + "/" + subsubfolder[0] + "/" +subsubfolder[4]) | |
if len(subsubfolder) > 7: | |
if subsubfolder[7].endswith("m4a"): | |
subsubfolder[7] = subsubfolder[6] | |
else: | |
subsubfolder.append("vocals") | |
if not os.path.exists(subfolder + "/" + subsubfolder[0] + "/" + subsubfolder[4] + "/" + subsubfolder[7]): | |
os.makedirs(subfolder + "/" + subsubfolder[0] + "/" + subsubfolder[4] + "/" + subsubfolder[7]) | |
if not os.path.exists(subfolder + "/" + subsubfolder[0] + "/" + subsubfolder[4] + "/" + subsubfolder[7] + "/" + subsubfolder[3]): | |
os.makedirs(subfolder + "/" + subsubfolder[0] + "/" + subsubfolder[4] + "/" + subsubfolder[7] + "/" + subsubfolder[3]) | |
if not os.path.exists(subfolder + "/" + subsubfolder[0] + "/" + subsubfolder[4] + "/" + subsubfolder[7] + "/" + subsubfolder[3] + "/" + subchild.text): | |
steps += 1 | |
if steps > pauseAfterSteps: | |
steps = 0 | |
time.sleep(pauseValue) | |
print("Loading: " + sourceurl + '/' + subchild.text) | |
print("Writing: " + subfolder + "/" + subsubfolder[0] + "/" + subsubfolder[4] + "/" + | |
subsubfolder[7] + "/" + subsubfolder[3] + "/" + subchild.text) | |
urllib.request.urlretrieve(sourceurl + "/" + subchild.text, subfolder + "/" + subsubfolder[0] + "/" + subsubfolder[4] + "/" + subsubfolder[7] + "/" + subsubfolder[3] + "/" + subchild.text) | |
if xmlFileCount < maxXmlFileCount: | |
xmlFileCount += 1 | |
loadContent(sourceurl, xmlfile, subfolder, xmlFileCount, maxXmlFileCount) | |
#maxXmlFileCount = recursivelyLoadFileList(sourceurl, "?list-type=2", xmlfile, 0) | |
maxXmlFileCount = 553 | |
loadContent(sourceurl, xmlfile, subfolder, 500, maxXmlFileCount) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment