Skip to content

Instantly share code, notes, and snippets.

package isabelpong;
import java.awt.Color;
import java.awt.Point;
import java.awt.Rectangle;
import java.awt.event.KeyEvent;
import java.util.Random;
import processing.core.PApplet;
@MartinWeiss12
MartinWeiss12 / spotify-wrapped.py
Last active December 11, 2023 02:41
JSON ETL
# path to your folder with all endsong json files
path = ''
# path where excel files will be saved to
output_path = ''
files = [os.path.join(path, f) for f in os.listdir(path)
if os.path.isfile(os.path.join(path, f)) and f.endswith('.json')]
data_frames = [pd.read_json(file).astype({col: 'float64' for col in pd.read_json(file)
.select_dtypes(include='bool').columns}) for file in files]
data = pd.concat([pd.read_json(file) for file in files])
@MartinWeiss12
MartinWeiss12 / spotifyWrapped.py
Last active December 17, 2022 05:16
DataFrame ETL
msPlayedList = (data['ms_played']).tolist()
est = pytz.timezone('US/Eastern')
utc = pytz.utc
fmt = '%Y-%m-%d %H:%M:%S'
for ind in data.index:
date = (data['ts'][ind])
holdTS = datetime(int(date[0:4]), int(date[5:7]),
int(date[8:10]), int(date[11:13]),
int(date[14:16]), int(date[17:19]), tzinfo = utc)
estTS = holdTS.astimezone(est).strftime(fmt)
@MartinWeiss12
MartinWeiss12 / spotify-wrapped.py
Last active December 11, 2023 19:44
Convert UTC to desired timezone
timezone = pytz.timezone('US/Eastern')
datetime.now(tz=timezone)
filtered_data['ts'] = pd.to_datetime(filtered_data['ts'], utc=True)
filtered_data['ts'] = filtered_data['ts'].dt.tz_convert(timezone)
filtered_data['ts'] = filtered_data['ts'].dt.strftime('%Y-%m-%d %H:%M:%S')
# filter FROM a date
# start_date = '2020-01-01'
# filtered_data = filtered_data[filtered_data['ts'] >= start_date]
@MartinWeiss12
MartinWeiss12 / spotify-wrapped.py
Last active December 11, 2023 22:03
Get Album Image URL
track_uris = cleaned_df['Track URI']
unique_track_uris = list(set(track_uris))
n_unique_track_sublists = len(unique_track_uris) // 20 + (len(unique_track_uris) % 20 > 0)
unique_track_sublists = [unique_track_uris[i * 20 : (i + 1) * 20] for i in range(n_unique_track_sublists)]
track_artist_album_df = pd.DataFrame(columns=['Track URI', 'Artist URI', 'Album URI', 'Album Image URL'])
start_time = time.time()
for track_list in unique_track_sublists:
params = {'ids': ','.join(track_list)}
response = requests.get('https://api.spotify.com/v1/tracks', headers=headers, params=params)
@MartinWeiss12
MartinWeiss12 / spotifyWrapped.py
Last active December 17, 2022 05:34
Top Albums
topAlbumsDf = pd.DataFrame(columns = ['Rank', 'Album', 'Streams'])
topAlbums = []
albumList = data['Album'].tolist()
for i in range(50): #change to see different number of top albums
def most_frequent(albumList):
return max(set(albumList), key = albumList.count)
topAlbum = most_frequent(albumList)
topAlbumsDf = topAlbumsDf.append({'Rank': i+1, 'Album': topAlbum,
'Streams': albumList.count(topAlbum)}, ignore_index = True)
topAlbums.append(topAlbum)
@MartinWeiss12
MartinWeiss12 / spotify-wrapped.py
Last active March 4, 2024 02:35
URI Matching
track_artist_album_df['Artist Image URL'] = track_artist_album_df['Artist URI']
.map(unique_artist_image_url_dict)
spotify_data = pd.merge(cleaned_df, track_artist_album_df, on='Track URI', how='left')
def get_top_100(entity, spotify_data):
if entity == 'Track':
spotify_data[['Track', 'Album']] = spotify_data[['Track', 'Album']]
.apply(lambda x: x.str.replace('Feat', 'feat'))
track_artist_groups = spotify_data.groupby(['Track', 'Artist'])
top_100_tracks = get_top_100('Track', spotify_data)[['Rank', 'Track', 'Album', 'Artist', 'Streams']]
top_100_tracks.to_excel(f'{output_path}top-100-tracks.xlsx', index=False)
top_100_artists = get_top_100('Artist', spotify_data)[['Rank', 'Artist', 'Streams', 'Artist Image URL']]
top_100_artists.to_excel(f'{output_path}top-100-artists.xlsx', index=False)
top_100_albums = get_top_100('Album', spotify_data)[['Rank', 'Album', 'Artist', 'Streams', 'Album Image URL']]
top_100_albums.to_excel(f'{output_path}top-100-albums.xlsx', index=False)
@MartinWeiss12
MartinWeiss12 / spotify-wrapped.py
Last active December 11, 2023 22:01
Most Consecutive Streams
mpi = 0
max_count = 0
unique_uri_list = []
for i in range(len(uri_list) - 1):
if uri_list[i] == uri_list[i+1]:
count = count + 1
if count > max_count:
max_count = count
mpi = i
unique_uri_list.append(uri_list[i])
path = r'' #path of your spotify top tracks excel file
outputPath = '' #path where to output your spotify grid
imageFolderPath = '' #path for the folder that will store album cover images
data = pd.read_excel(path)
rankList = (data['Rank']).tolist()
trackList = (data['Track']).tolist()
streamList = (data['Streams']).tolist()
trackUriList = (data['URI']).tolist()
albumUriList = []