adityatj/asrbot.py

## asrbot.py
import requests,json,re,time
from bs4 import BeautifulSoup
from HTMLParser import HTMLParser

## Dependencies
##
## BeautifulSoup 4
## Requests 2.30

__author__ = '/u/something'
__version__= '1.0a'

delay=True
headers = {'User-Agent' : 'asrbot/1.0a by /u/zackdota'}
malhead = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36'}
master = dict()
parser = HTMLParser()

r = requests.post('http://www.reddit.com/r/Animesuggest/new/.json?limit=100', data=None, headers=headers)
data=json.loads(r.text)
for child in data['data']['children']:
    if child['data']['link_flair_text'] == '[Request]':
        print 'Processing Link Id: %s...' % child['data']['id']
        r = requests.post(child['data']['url'] + '.json?limit=100', data=None, headers=headers)
        data2=json.loads(r.text)
        for child in data2[1]['data']['children']:
            soup = BeautifulSoup(parser.unescape(child['data']['body_html']))
            for a in soup.find_all('a'):
                link = a.get('href')
                if 'http://myanimelist.net/anime/' in link:
                    if link in master:
                        master[link]['count'] += 1
                    else:
                        #Get Anime info
                        r = requests.post(link, data=None, headers=malhead)
                        soup2 = BeautifulSoup(r.text)
                        head = soup2.find_all('h1')[0]
                        div = head.find_all('div')[0]
                        name = head.text[len(div.text):]
                        master[link] = {'count' : 1, 'name' : name}
                        #Give delay for MAL requests
                        if delay:
                            time.sleep(10)
                        #Download Anime image
                        img = soup2.find_all('img')[0].get('src')
                        r = requests.get(img, stream=True, headers=malhead)
                        f = open(re.sub(r'[^a-zA-Z0-9]','_', name)+'.jpg', 'wb')
                        for chunk in r.iter_content(2048):
                            f.write(chunk)
                        #Give delay for MAL requests
                        if delay:
                            time.sleep(10)


for val in master.values():
    print '%s\t%s' %(val['name'],val['count'])
	import requests,json,re,time
	from bs4 import BeautifulSoup
	from HTMLParser import HTMLParser

	## Dependencies
	##
	## BeautifulSoup 4
	## Requests 2.30

	__author__ = '/u/something'
	__version__= '1.0a'

	delay=True
	headers = {'User-Agent' : 'asrbot/1.0a by /u/zackdota'}
	malhead = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36'}
	master = dict()
	parser = HTMLParser()

	r = requests.post('http://www.reddit.com/r/Animesuggest/new/.json?limit=100', data=None, headers=headers)
	data=json.loads(r.text)
	for child in data['data']['children']:
	if child['data']['link_flair_text'] == '[Request]':
	print 'Processing Link Id: %s...' % child['data']['id']
	r = requests.post(child['data']['url'] + '.json?limit=100', data=None, headers=headers)
	data2=json.loads(r.text)
	for child in data2[1]['data']['children']:
	soup = BeautifulSoup(parser.unescape(child['data']['body_html']))
	for a in soup.find_all('a'):
	link = a.get('href')
	if 'http://myanimelist.net/anime/' in link:
	if link in master:
	master[link]['count'] += 1
	else:
	#Get Anime info
	r = requests.post(link, data=None, headers=malhead)
	soup2 = BeautifulSoup(r.text)
	head = soup2.find_all('h1')[0]
	div = head.find_all('div')[0]
	name = head.text[len(div.text):]
	master[link] = {'count' : 1, 'name' : name}
	#Give delay for MAL requests
	if delay:
	time.sleep(10)
	#Download Anime image
	img = soup2.find_all('img')[0].get('src')
	r = requests.get(img, stream=True, headers=malhead)
	f = open(re.sub(r'[^a-zA-Z0-9]','_', name)+'.jpg', 'wb')
	for chunk in r.iter_content(2048):
	f.write(chunk)
	#Give delay for MAL requests
	if delay:
	time.sleep(10)


	for val in master.values():
	print '%s\t%s' %(val['name'],val['count'])