Skip to content

Instantly share code, notes, and snippets.

@tomatosoupcan
Created April 24, 2023 12:49
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tomatosoupcan/399adc52e516746a1d254c536b3f00b0 to your computer and use it in GitHub Desktop.
Save tomatosoupcan/399adc52e516746a1d254c536b3f00b0 to your computer and use it in GitHub Desktop.
Scrape Data from Music League for Analysis
import requests
import spotipy
import pandas as pd
from spotipy.oauth2 import SpotifyClientCredentials
#define api information
user = 'ml_userid'
base_url = 'https://app.musicleague.com/api/v1/'
url = base_url + 'users/' + user + '/leagues'
cookie = {'session':'session cookie from browser'}
def get_member(members, id):
for member in members:
if member['user']['id'] == id:
return member['user']['name']
return 'Error collecting name'
def spoticheck(uri):
track = sp.track(uri)
t_name = track['name']
t_artist = track['album']['artists'][0]['name']
t_artist_id = track['album']['artists'][0]['id']
t_genres = ''
artist = sp.artist(t_artist_id)
for genre in artist['genres']:
if t_genres == '':
t_genres = genre
else:
t_genres += ',' + genre
return t_name,t_artist,t_genres
#define spotify info
s_client_id = 'spotify api client id'
s_client_secret = 'spotify api client secret'
sp = spotipy.Spotify(auth_manager=SpotifyClientCredentials(client_id=s_client_id,
client_secret=s_client_secret))
#initialize df
df = pd.DataFrame(columns=['League','Round','Song','Artist','Genres','Submitter','Voter','Points'])
#get leagues
req_leagues = requests.get(url,cookies=cookie)
#iterate leagues
for league in req_leagues.json():
l_name, l_id = league['name'], league['id']
#print(l_name)
#get members for that league
url = base_url + 'leagues/' + l_id + '/members'
req_members = requests.get(url, cookies=cookie)
#get rounds for that league
url = base_url + 'leagues/' + l_id + '/rounds'
req_rounds = requests.get(url, cookies=cookie)
for round in req_rounds.json():
try:
r_name, r_id = round['name'], round['id']
#print(' ' + r_name)
url = base_url + 'leagues/' + l_id + '/rounds/' + r_id + '/results'
req_subs = requests.get(url, cookies=cookie)
for result in req_subs.json()['standings']:
s_score, s_uri, s_votes, s_submitter = result['pointsPossible'], result['submission']['spotifyUri'], result['votes'], result['submission']['submitterId']
s_name,s_artist,s_genres = spoticheck(s_uri)
s_submitter = get_member(req_members.json(), s_submitter)
#print(' ' + s_submitter + ': ' + s_info + ': ' + str(s_score))
for vote in s_votes:
v_id, v_points = vote['voterId'], vote['weight']
v_id = get_member(req_members.json(), v_id)
df.loc[len(df)] = [l_name,r_name,s_name,s_artist,s_genres,s_submitter,v_id,v_points]
#print(' ' + v_id + ': ' + str(v_points))
except:
continue
#print(' Round still running or other error')
df.to_csv('output.csv', index=None)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment