andrewmd5/redditmusic.py

## redditmusic.py
import urllib2
import re

import HTMLParser

html_parser = HTMLParser.HTMLParser()
text_file = open("music.txt", "a")


def get_between(source, start, stop):
    data = re.compile(start + '(.*?)' + stop).search(source)
    if data:
        found = data.group(1)
        return found.replace('\n', '')
    else:
        return 'none'


req = urllib2.Request('http://www.reddit.com/r/Music/comments/29tt6a/what_is_your_goto_summer_driving_song/?limit=500',
                      headers={'User-Agent': 'Mozilla/5.0'})
html = urllib2.urlopen(req).read()

song_link = re.findall(r"<a.*?\s*href=\"(.*?)\".*?>(.*?)</a>", html)
for link in song_link:
    if 'youtube' in link[0]:
        req = urllib2.Request(
            link[0],
            headers={'User-Agent': 'Mozilla/5.0'})
        html = urllib2.urlopen(req).read()

        title = get_between(html, '<title>', '</title>')
        title = title.replace(' - YouTube', '')
        song_link = html_parser.unescape(link[0])
        unescape_title = html_parser.unescape(title)
        print('Song: %s | %s \n' % (unescape_title, song_link))
        text_file.write(('%s|%s\n' % (unescape_title, song_link)))

text_file.close()
	import urllib2
	import re

	import HTMLParser

	html_parser = HTMLParser.HTMLParser()
	text_file = open("music.txt", "a")


	def get_between(source, start, stop):
	data = re.compile(start + '(.*?)' + stop).search(source)
	if data:
	found = data.group(1)
	return found.replace('\n', '')
	else:
	return 'none'


	req = urllib2.Request('http://www.reddit.com/r/Music/comments/29tt6a/what_is_your_goto_summer_driving_song/?limit=500',
	headers={'User-Agent': 'Mozilla/5.0'})
	html = urllib2.urlopen(req).read()

	song_link = re.findall(r"<a.?\shref=\"(.?)\".?>(.*?)</a>", html)
	for link in song_link:
	if 'youtube' in link[0]:
	req = urllib2.Request(
	link[0],
	headers={'User-Agent': 'Mozilla/5.0'})
	html = urllib2.urlopen(req).read()

	title = get_between(html, '<title>', '</title>')
	title = title.replace(' - YouTube', '')
	song_link = html_parser.unescape(link[0])
	unescape_title = html_parser.unescape(title)
	print('Song: %s \| %s \n' % (unescape_title, song_link))
	text_file.write(('%s\|%s\n' % (unescape_title, song_link)))

	text_file.close()