Skip to content

Instantly share code, notes, and snippets.

@andrewmd5
Last active August 29, 2015 14:03
Show Gist options
  • Save andrewmd5/cf2ef6fd8f7e168966fe to your computer and use it in GitHub Desktop.
Save andrewmd5/cf2ef6fd8f7e168966fe to your computer and use it in GitHub Desktop.
Getting Reddits Summer GOTO Music
import urllib2
import re
import HTMLParser
html_parser = HTMLParser.HTMLParser()
text_file = open("music.txt", "a")
def get_between(source, start, stop):
data = re.compile(start + '(.*?)' + stop).search(source)
if data:
found = data.group(1)
return found.replace('\n', '')
else:
return 'none'
req = urllib2.Request('http://www.reddit.com/r/Music/comments/29tt6a/what_is_your_goto_summer_driving_song/?limit=500',
headers={'User-Agent': 'Mozilla/5.0'})
html = urllib2.urlopen(req).read()
song_link = re.findall(r"<a.*?\s*href=\"(.*?)\".*?>(.*?)</a>", html)
for link in song_link:
if 'youtube' in link[0]:
req = urllib2.Request(
link[0],
headers={'User-Agent': 'Mozilla/5.0'})
html = urllib2.urlopen(req).read()
title = get_between(html, '<title>', '</title>')
title = title.replace(' - YouTube', '')
song_link = html_parser.unescape(link[0])
unescape_title = html_parser.unescape(title)
print('Song: %s | %s \n' % (unescape_title, song_link))
text_file.write(('%s|%s\n' % (unescape_title, song_link)))
text_file.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment