Skip to content

Instantly share code, notes, and snippets.

@joelanders
Created September 4, 2017 15:46
Show Gist options
  • Save joelanders/c594837d166b7d9234fd91597719220e to your computer and use it in GitHub Desktop.
Save joelanders/c594837d166b7d9234fd91597719220e to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
import requests
import json
import os.path
import sys
import traceback
import time
# https://www.discogs.com/label/3889-Itiswhatitis-Recordings -> 3889
def url_to_id(url):
return url.split("/")[-1].split("-")[0]
# https://www.discogs.com/label/3889-Itiswhatitis-Recordings -> Itiswhatitis-Recordings
def url_to_name(url):
return "-".join(url.split("/")[-1].split("-")[1:])
def all_rels_from_label(label_id):
params = {'per_page': 100, 'page': 1}
headers = {'User-Agent': 'FooBarApp/3.0'}
base = 'https://api.discogs.com/labels/'
# base = 'https://api.discogs.com/artists/'
url = base + label_id + '/releases'
all_releases = []
while True:
r = requests.get(url, params=params, headers=headers)
try:
l = r.json()['releases']
except KeyError:
print("KeyError on %s" % r.json())
break # TODO: can do better?
all_releases += l
print('added %s release(s) from page %s and have %s in total' %
(len(l), params['page'], len(all_releases)))
if r.json()['pagination']['pages'] > params['page']:
params['page'] += 1
else:
break
return all_releases
def full_rel_from_rel(rel):
url = rel["resource_url"]
print(url)
return requests.get(url).json()
def vids_from_full_rel(full):
if "videos" in full:
return [v["uri"] for v in full["videos"]]
else:
return []
def main(url):
l_id = url_to_id(url)
rels = []
if os.path.isfile('rels-'+l_id):
with open('rels-'+l_id, 'r') as f:
rels = json.load(f)
print('got %s item(s) from %s (from disk)' % (len(rels), l_id))
else:
rels = all_rels_from_label(l_id)
print('got %s rels from %s' % (len(rels), l_id))
# move this into all_rels_from.. probably to stop re-writing cached things
with open('rels-'+l_id, 'w') as f:
f.write(json.dumps(rels, indent=2))
l_name = url_to_name(url)
if not os.path.isdir(l_name):
os.mkdir(l_name)
for r in rels:
try:
time.sleep(2)
full = full_rel_from_rel(r)
vids = vids_from_full_rel(full)
print('got %s vid(s) from %s - %s - %s' % (len(vids), r["artist"], r["title"], r["catno"]))
dirname = r["catno"].replace("/", ".")
if not os.path.isdir("%s/%s" % (l_name, dirname)):
os.mkdir("%s/%s" % (l_name, dirname))
with open("%s/%s/.urls" % (l_name, dirname), "a") as f:
for v in vids:
f.write("%s\n" % v)
except ():
print("-"*60)
#print("error in %s - %s" % (r["artist"], r["title"]))
#traceback.print_exc(file=sys.stdout)
#print("-"*60)
if __name__ == '__main__':
if ((len(sys.argv) != 2) or
(sys.argv[1].split("/")[3] != "label")):
print("takes 1 argument: the url of the label")
exit()
main(sys.argv[1])
print("now do something like `for d in *; do (cd $d && youtube-dl --format bestaudio --ignore-errors --batch-file .urls); done`")
smallville-recs
├── GROOVE 159 . CD 68
├── PUDELVILLE01
│   └── Benjamin Brunn - 28.10.2012 Live at Golden Pudel Club-EjbE8RJt4l4.webm
├── S 104LP
├── SMALLVILLE
│   ├── Christopher Rau - Girl-FH3xfd5tlzs.webm
│   ├── Christopher Rau - Swag Lude-nwVYyr3vFOI.webm
│   ├── Christopher Rau - Unlimited Dancemoves-Jh4M9YlG5wc.webm
│   └── Christopher Rau - Weird Alps-EK7N5Vs_No4.webm
├── SMALLVILLE 01
│   └── Julius Steinhoff & Hammouda - Chestnuts Way [Smallville]-71jvVM8kfsM.webm
├── SMALLVILLE 02
│   ├── Denis Karimani Infinite-8mRr9kKHUfY.webm
│   ├── Denis Karimani - Realtime Island (Ndru Remix)-lFJKfmNhJB0.webm
│   └── Dennis Karimani - Realtime Island-hVk2c7q1zoA.webm
├── SMALLVILLE 03
│   └── Laps - Fog Off-HnwVKRlao7M.webm
├── SMALLVILLE 04
│   ├── DJ SWAP - Consequence (The Walk EP [Smallville Records] )-pNaFmBeVUOE.webm
│   └── Orbeat - Richie Hawtin @ Lido Circe (Naples) 29_06_2007--DDhgIcYap4.webm
├── SMALLVILLE 05
│   └── Jacek Sienkiewicz - Living in Oblivion-kKcpiQXO0QE.webm
[...etc...]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment