Skip to content

Instantly share code, notes, and snippets.

@Luuk3333
Last active May 23, 2024 10:52
Show Gist options
  • Save Luuk3333/2dfae8ad0960569afa993f8382e7244d to your computer and use it in GitHub Desktop.
Save Luuk3333/2dfae8ad0960569afa993f8382e7244d to your computer and use it in GitHub Desktop.
Calculates your most skipped Spotify songs
import json
import os
#
# Place this script in spotify data request > MyData directory.
#
THRESHOLD = 3000 # time in milliseconds to determine a 'skip'
AMOUNT = 100 # amount of skipped tracks to display
total_count = 0
data = {}
part = 0
while True:
filename = 'StreamingHistory{}.json'.format(part)
if not os.path.isfile(filename):
print('Read {} history json files.'.format(part+1))
break
part = part + 1
with open(filename, 'r', encoding='utf-8') as myfile:
jsondata=myfile.read()
items = json.loads(jsondata)
for item in items:
if item['msPlayed'] < THRESHOLD:
slug = item['artistName'] + item['trackName']
if slug in data:
data[slug]['count'] = data[slug]['count'] + 1
else:
data[slug] = {
'artist': item['artistName'],
'track': item['trackName'],
'count': 1
}
total_count = total_count + 1
print('\nTracks listened:', total_count)
print('Unique tracks: ', len(data))
print('\nSkipped tracks:')
# Convert to list
l = []
for k, v in data.items():
l.append(v)
# Sort by count
sort = sorted(l, key=lambda k: k['count'], reverse=True)
for index, item in enumerate(sort[:AMOUNT]):
print('#{} {} - {} ({}x)'.format(index+1, item['artist'], item['track'], item['count']))
# print('{} - {}'.format(item['artist'], item['track']))
@RobertJanVlak
Copy link

RobertJanVlak commented Nov 6, 2023

I made a new revision of your script to have more control over te result and a little more documentation. Not really good in python, but this seems to work as intended:

import json
import os
from datetime import datetime

# Go to Privacy settings in spotify account
# Download your data (this request will take a couple of days for spotify)
# Place this script in the Spotify data request > MyData directory.
# Run this script in a terminal

THRESHOLD = 3000  # Time in milliseconds to determine a 'skip'
THRESHOLD_PLAYED = 100000  # Time in milliseconds to determine a 'play'
AMOUNT = 100 # Amount of skipped tracks to display
FILTER_MORE_PLAYED_THEN_SKIPPED = True

# Define your start and end date to filter (in the format 'YYYY-MM-DD HH:MM')
start_date = '2023-01-01 00:00'
end_date = '2023-12-31 00:00'

start_datetime = datetime.strptime(start_date, '%Y-%m-%d %H:%M')
end_datetime = datetime.strptime(end_date, '%Y-%m-%d %H:%M')

total_count = 0
data = {}

part = 0
while True:
    filename = 'StreamingHistory{}.json'.format(part)

    if not os.path.isfile(filename):
        print('Read {} history json files.'.format(part + 1))
        break

    part = part + 1

    with open(filename, 'r', encoding='utf-8') as myfile:
        jsondata = myfile.read()

    items = json.loads(jsondata)

    for item in items:
        end_time = datetime.strptime(item['endTime'], '%Y-%m-%d %H:%M')

        if start_datetime <= end_time < end_datetime:
            slug = item['artistName'] + item['trackName']

            if item['msPlayed'] >= THRESHOLD_PLAYED:
                if slug in data:
                    data[slug]['play_count'] += 1
            elif item['msPlayed'] < THRESHOLD:
                if slug in data:
                    data[slug]['count'] += 1
                else:
                    data[slug] = {
                        'artist': item['artistName'],
                        'track': item['trackName'],
                        'count': 1,
                        'play_count': 0
                    }

            total_count += 1

# Filter tracks where the count of plays is higher than the count of skips when FILTER_MORE_PLAYED_THEN_SKIPPED: True
if FILTER_MORE_PLAYED_THEN_SKIPPED:
    filtered_data = {k: v for k, v in data.items() if v['play_count'] <= v['count']}
else:
    filtered_data = data

print('\nTracks listened:', total_count)
print('Unique tracks:', len(data))
print('TOP 10 most played between dates:')

# Sort and display the top played tracks
top_played = sorted(data.values(), key=lambda k: k['play_count'], reverse=True)

for index, item in enumerate(top_played[:10]):
    print('#{} {} - {} (Played: {}x)'.format(index + 1, item['artist'], item['track'], item['play_count']))

print('\nSkipped tracks between dates:')

# Sort and display the filtered tracks
sorted_filtered = sorted(filtered_data.values(), key=lambda k: k['count'], reverse=True)

for index, item in enumerate(sorted_filtered[:AMOUNT]):
    print('#{} {} - {} (Skipped: {}x, Played: {}x)'.format(index + 1, item['artist'], item['track'], item['count'], item['play_count']))

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment