Skip to content

Instantly share code, notes, and snippets.

@adityatj
Last active December 10, 2017 17:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save adityatj/ac9b40e8e54caec4e880 to your computer and use it in GitHub Desktop.
Save adityatj/ac9b40e8e54caec4e880 to your computer and use it in GitHub Desktop.
AS Recommendation Bot [rough version]
import requests,json,re,time
from bs4 import BeautifulSoup
from HTMLParser import HTMLParser
## Dependencies
##
## BeautifulSoup 4
## Requests 2.30
__author__ = '/u/something'
__version__= '1.0a'
delay=True
headers = {'User-Agent' : 'asrbot/1.0a by /u/zackdota'}
malhead = {'User-Agent' : 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36'}
master = dict()
parser = HTMLParser()
r = requests.post('http://www.reddit.com/r/Animesuggest/new/.json?limit=100', data=None, headers=headers)
data=json.loads(r.text)
for child in data['data']['children']:
if child['data']['link_flair_text'] == '[Request]':
print 'Processing Link Id: %s...' % child['data']['id']
r = requests.post(child['data']['url'] + '.json?limit=100', data=None, headers=headers)
data2=json.loads(r.text)
for child in data2[1]['data']['children']:
soup = BeautifulSoup(parser.unescape(child['data']['body_html']))
for a in soup.find_all('a'):
link = a.get('href')
if 'http://myanimelist.net/anime/' in link:
if link in master:
master[link]['count'] += 1
else:
#Get Anime info
r = requests.post(link, data=None, headers=malhead)
soup2 = BeautifulSoup(r.text)
head = soup2.find_all('h1')[0]
div = head.find_all('div')[0]
name = head.text[len(div.text):]
master[link] = {'count' : 1, 'name' : name}
#Give delay for MAL requests
if delay:
time.sleep(10)
#Download Anime image
img = soup2.find_all('img')[0].get('src')
r = requests.get(img, stream=True, headers=malhead)
f = open(re.sub(r'[^a-zA-Z0-9]','_', name)+'.jpg', 'wb')
for chunk in r.iter_content(2048):
f.write(chunk)
#Give delay for MAL requests
if delay:
time.sleep(10)
for val in master.values():
print '%s\t%s' %(val['name'],val['count'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment