Last active
September 17, 2017 21:00
-
-
Save kosso/4ed65e4bb74efaa60867643df7918d64 to your computer and use it in GitHub Desktop.
A simple script to download all your Audioboom.com recordings, photos and metadata. version 0.1. Could eventually be extended upload to Archive.org .. or anywhere.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import requests | |
import json | |
import sys | |
# archiveboom.py : Downloads all Audioboom audio clips, associated photos and metadata.. | |
# Eventually, it will also upload to Archive.org. | |
# Read the blog post which led to this: https://blog.kosso.co.uk/2017/09/06/about-audioboom/ | |
# Author : Kosso | |
# Audioboo.com username | |
# EDIT THIS | |
# audioboom_username = 'yourusername' | |
# Or Prompt user input | |
audioboom_username = raw_input('Enter your Audioboom username: ') | |
# Edit this if you need to | |
start_page = 1 | |
per_page = 100 | |
# Set to zero for no limit. ie: it won't stop until Ctrl+C. | |
max_downloads = 0 | |
# Leave this | |
count = 0 | |
def get_booms(p): | |
global count | |
abort = False | |
url = 'https://api.audioboom.com/audio_clips?username='+audioboom_username+'&page[items]='+str(per_page)+'&page[number]='+str(p) | |
print('URL : '+url) | |
r = requests.get(url) | |
data = r.json() | |
body = data['body'] | |
totals = body['totals'] | |
if p == 1: | |
print('user: '+audioboom_username+' - total: '+str(totals['count'])) | |
print('') | |
for clip in body['audio_clips']: | |
print('-----------------------------------------------') | |
mp3_url = clip['urls']['high_mp3'] | |
print('TITLE : '+clip['title']) | |
print('mp3 url : '+mp3_url) | |
naming = mp3_url.split('/') | |
mp3_name = naming[-1] | |
name = mp3_name.split('.mp3') | |
json_name = name[0]+'.json' | |
print('mp3 name : ' + mp3_name) | |
print('downloading mp3 ...') | |
r = requests.get(mp3_url, stream=True) | |
total_length = r.headers.get('content-length') | |
with open(mp3_name, 'wb') as f: | |
if total_length is None: # No content length header? | |
f.write(r.content) | |
else: | |
dl = 0 | |
total_length = int(total_length) | |
# Display a progress bar , since these might be large | |
for chunk in r.iter_content(chunk_size=4096): | |
dl += len(chunk) | |
f.write(chunk) | |
done = int(50 * dl / total_length) | |
sys.stdout.write("\r[%s%s]" % ('=' * done, ' ' * (50-done)) ) | |
sys.stdout.flush() | |
print('\ndownloaded: '+mp3_name) | |
# Download waveform image if available | |
if 'wave_img' in clip['urls']: | |
print('downloading waveform image ...') | |
wave_image_name = name[0]+'-wave.png' | |
r = requests.get(clip['urls']['wave_img'], stream=True) | |
with open(wave_image_name, 'wb') as f: | |
for chunk in r.iter_content(): | |
f.write(chunk) | |
# Download photo image if available | |
if 'post_image' in clip['urls']: | |
print('downloading post image ...') | |
post_image_name = name[0]+'.jpg' | |
r = requests.get(clip['urls']['post_image']['original'], stream=True) | |
with open(post_image_name, 'wb') as f: | |
for chunk in r.iter_content(): | |
f.write(chunk) | |
# Write the JSON data for each mp3 file seperately | |
with open(json_name, 'w') as outfile: | |
json.dump(clip, outfile) | |
print('writing JSON file: ' + json_name) | |
count = count + 1 | |
print('done '+str(count)+'/'+str(totals['count'])+' files') | |
if max_downloads > 0 and count == max_downloads: | |
print('MAX DOWNLOADS REACHED. Aborting.') | |
abort = True | |
break | |
# Get next page if needed | |
if not abort and count < totals['count']: | |
get_booms(p+1) | |
print('===== ArchiveBoom =============================') | |
get_booms(start_page) | |
print('===============================================') | |
print('ALL DONE!') | |
print('') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment