Skip to content

Instantly share code, notes, and snippets.

@niklasvincent
Last active May 1, 2020 07:04
Show Gist options
  • Save niklasvincent/62ebad21352124dd9ac7296407e5506f to your computer and use it in GitHub Desktop.
Save niklasvincent/62ebad21352124dd9ac7296407e5506f to your computer and use it in GitHub Desktop.
Most played songs by year on last.fm

Most played songs by year from last.fm

How to use

Download last_fm_to_csv.py from this Gist (you might have to click "Raw" and then right-click and select "Save as").

You'll need an API key from last.fm, which you can get here. You can leave every field apart from Contact email and Application name blank.

Once you've got the API key, run the following in a terminal (assuming last_fm_to_csv.py got downloaded to your Downloads folder):

python ~/Downloads/last_fm_to_csv.py LAST_FM_USERNAME LAST_FM_API_KEY

replace LAST_FM_USERNAME with your last.fm username and LAST_FM_API_KEY with the API key you got when you registered a new API Account in the previous step.

Final results

The script will output the total number of pages of tracks it will have to go through and finally output two CSV files:

17 total pages of tracks to retrieve from last.fm
10
Wrote all played songs to LAST_FM_USERNAME_played_songs.csv
Wrote most played songs by year to LAST_FM_USERNAME_most_played_songs_by_year.csv
  • LAST_FM_USERNAME_played_songs.csv will contain all the songs the user has ever reported as played to last.fm
  • LAST_FM_USERNAME_most_played_songs_by_year.csv will contain all songs ordered by year and number of times they were played that year.
import csv
import json
import time
import urllib2
import sys
from collections import defaultdict, namedtuple
from datetime import datetime
Song = namedtuple("Song", ["artist_name", "track_name"])
PlayedSong = namedtuple("PlayedSong", ["time_played", "song"])
def get_played_songs(username, api_key, limit=200, page=1):
url = "https://ws.audioscrobbler.com/2.0/?method=user.getrecenttracks&user={}&api_key={}&limit={}&page={}&format=json"
played_songs = []
responses = []
request_url = url.format(username, api_key, limit, page)
response = urllib2.urlopen(request_url)
data = json.load(response)
pages = int(data["recenttracks"]["@attr"]["totalPages"])
print("{} total pages of tracks to retrieve from last.fm for {}".format(pages, username))
for page in range(1, int(pages) + 1):
if page % 10 == 0:
print(page)
try:
request_url = url.format(username, api_key, limit, page)
data = _fetch_json(request_url)
responses.append(data)
except Exception as err:
print("Got error whilst fetching page {}: {}".format(page, str(err)))
for response in responses:
scrobbles = response
for scrobble in scrobbles["recenttracks"]["track"]:
artist_name = scrobble["artist"]["#text"]
track_name = scrobble["name"]
time_played = datetime.fromtimestamp(float(scrobble["date"]["uts"]))
song = Song(
artist_name=artist_name,
track_name=track_name,
)
played_song = PlayedSong(
time_played=time_played,
song=song,
)
played_songs.append(played_song)
return played_songs
def _fetch_json(request_url, nbr_of_retries=3):
if nbr_of_retries == 0:
raise Exception("Failed to fetch page {} after multiple attempts")
try:
response = urllib2.urlopen(request_url)
data = json.load(response)
return data
except:
time.sleep(0.5)
return _fetch_json(request_url, nbr_of_retries=nbr_of_retries-1)
def build_most_played_songs_by_year(played_songs):
songs_by_year = defaultdict(list)
for played_song in played_songs:
songs_by_year[played_song.time_played.year].append(played_song)
for year in songs_by_year.keys():
songs_by_year[year] = sort_songs_by_most_played(songs_by_year[year])
return songs_by_year
def sort_songs_by_most_played(played_songs):
nbr_of_plays_per_song = defaultdict(int)
for played_song in played_songs:
nbr_of_plays_per_song[played_song.song] += 1
songs_sorted_by_number_of_plays = sorted(
[(n, s) for s, n in nbr_of_plays_per_song.iteritems()],
key=lambda x: x[0],
reverse=True
)
return songs_sorted_by_number_of_plays
def main(username, api_key):
played_songs = get_played_songs(username=username, api_key=api_key)
most_played_songs_by_year = build_most_played_songs_by_year(played_songs)
# Outut raw data to a CSV
played_songs_csv_filename = "{}_played_songs.csv".format(username)
with open(played_songs_csv_filename, "wb") as csvfile:
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(["Time Played", "Artist", "Track"])
for played_song in played_songs:
writer.writerow([
played_song.time_played.strftime("%m/%d/%Y %H:%M:%S"),
played_song.song.artist_name.encode('utf-8'),
played_song.song.track_name.encode('utf-8'),
])
print("Wrote all played songs to {}".format(played_songs_csv_filename))
# Outut the top lists by year to a CSV
most_played_songs_by_year_csv_filename = "{}_most_played_songs_by_year.csv".format(username)
with open(most_played_songs_by_year_csv_filename, "wb") as csvfile:
writer = csv.writer(csvfile, delimiter=",", quotechar='"', quoting=csv.QUOTE_MINIMAL)
writer.writerow(["Year", "Number of Times Played", "Artist", "Track"])
for year in most_played_songs_by_year.keys():
for nbr_of_times_played, song in most_played_songs_by_year[year]:
writer.writerow([
year,
nbr_of_times_played,
song.artist_name.encode('utf-8'),
song.track_name.encode('utf-8'),
])
print("Wrote most played songs by year to {}".format(most_played_songs_by_year_csv_filename))
if __name__ == "__main__":
if len(sys.argv) > 2:
main(username=sys.argv[1], api_key=sys.argv[2])
else:
print("Usage: {} LAST_FM_USERNAME LAST_FM_API_KEY".format(sys.argv[0]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment