Skip to content

Instantly share code, notes, and snippets.

@xziyue
Created December 26, 2020 05:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save xziyue/7093a1eccebdfd9505d0ed9669577568 to your computer and use it in GitHub Desktop.
Save xziyue/7093a1eccebdfd9505d0ed9669577568 to your computer and use it in GitHub Desktop.
Pull song lists from Apple Music for Groovy BOT
from urllib.request import urlopen
from bs4 import BeautifulSoup
import json
import os
from youtube_search import YoutubeSearch
list_url = 'https://music.apple.com/us/playlist/a-list-pop/pl.5ee8333dbe944d9f9151e97d92d1ead9'
youtube_url = 'https://www.youtube.com'
use_cached = True
cached_filename = 'crawl_list.html'
has_cached = os.path.exists(cached_filename)
html_src = None
if use_cached:
if has_cached:
with open(cached_filename, 'rb') as infile:
html_src = infile.read()
if html_src is None:
with urlopen(list_url) as response:
html_src = response.read()
with open(cached_filename, 'wb') as outfile:
outfile.write(html_src)
assert html_src is not None
song_list = []
soup = BeautifulSoup(html_src, 'html5lib')
for song_item in soup.find_all('div', {'class' : 'song-wrapper'}):
label = song_item.find('div', {'class' : 'typography-label'}).text.strip()
artist = song_item.find('div', {'class' : 'typography-caption'}).text.strip()
assert label is not None
assert artist is not None
song_info = {
'label' : label,
'artist' : artist
}
song_list.append(song_info)
for song in song_list:
keyword = '{artist:} - {label:}'.format(**song)
yt_serach_result = YoutubeSearch(keyword, max_results=3).to_dict()
song['url'] = youtube_url + yt_serach_result[0]['url_suffix']
print('-queue {}'.format(song['url']))
with open('crawl_list.json', 'w') as outfile:
json.dump(song_list, outfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment