Skip to content

Instantly share code, notes, and snippets.

@badescunicu
Last active December 1, 2022 21:26
Show Gist options
  • Save badescunicu/e80aaa54e3987ae89889 to your computer and use it in GitHub Desktop.
Save badescunicu/e80aaa54e3987ae89889 to your computer and use it in GitHub Desktop.
Beatport top 100 track names parser
#!/usr/bin/python
import sys
import urllib
import re
from datetime import datetime
# usage: ./top100beatport <genre> <url_to_top100>
def extract_file_names(url):
url_bulk_text = urllib.urlopen(url).read()
pattern = r'<td class="secondColumn">.*?title="(.*?)"'
matched_track_names = re.findall(pattern, url_bulk_text)
pattern_for_artist = r'<td class="secondColumn">.*?title=".*?".*?<td>.*?title="(.*?)"'
matched_artist = re.findall(pattern_for_artist, url_bulk_text)
return zip(matched_artist, matched_track_names)
def main():
if (len(sys.argv) < 3):
print "usage: ./top100beatport <genre> <url_to_top100>"
return
today = datetime.now()
top100 = extract_file_names(sys.argv[2])
f_out = open("beatportTop100_{genre}_{day}_{month}.txt".format(genre=sys.argv[1], day=today.day, month=today.month), "w")
for entry in top100:
f_out.write(entry[0] + ' - ' + entry[1] + '\n')
f_out.close()
if __name__ == '__main__':
main()
@badescunicu
Copy link
Author

The output of this program can be fed to roll.io to listen to the songs.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment