Skip to content

Instantly share code, notes, and snippets.

@adammenges
Last active August 29, 2015 14:20
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
Star You must be signed in to star a gist
Embed
What would you like to do?
ml-bot
##############################################################################
## ##
## ##
## ( \/ )( ) ___( _ \ / \(_ _) ##
## / \/ \/ (_/\(___)) _ (( O ) )( ##
## \_)(_/\____/ (____/ \__/ (__) ##
## ##
## ##
## ##
## The beginings of an ml-bot. To start, he'll let us know when ##
## there is a trending post on r/machinelearning ##
## ##
## ##
##############################################################################
from boltons.cacheutils import LRU
import pickle
import os
import json
import requests
pushed_path = 'pushed.pickle'
USER_AGENT = "linux:com.adammenges.herpaderp-mlbot:v0.00000001 (by adammenges)"
cache = LRU(max_size=10000)
if os.path.exists(pushed_path):
with open(pushed_path, 'rb') as f:
pushed = pickle.load(f)
else:
pushed = LRU(max_size=2000)
# Should be moved over to helper lib
def requests_get(url):
try:
response = requests.get(
url,
headers={'User-Agent': USER_AGENT},
timeout=60,
verify=False,
)
except Exception, e:
print("ERROR: " + str(e))
return ""
if 'Too Many Requests' in response.reason:
print "Pausing..."
time.sleep(30)
return requests_get(url)
if 'content-type' in response.headers:
if 'html' in response.headers['content-type']:
return response.text
elif 'json' in response.headers['content-type']:
return response.text
else:
print('Not HTML/JSON') # Proper logging at some point
return "" # Support PDFs, etc., later...
# Do some work here once we have enough data to choose a good threshold
def running_mean():
pass
def refresh_data():
try:
print('Starting to get r/ml data')
r = requests_get('http://reddit.com/r/machinelearning.json')
j = json.loads(r)
for i in j['data']['children']:
k = i['data']
cache[k['url']] = k
except Exception, e:
print('ERROR: ' + str(e))
def push(url):
push_url = 'https://hooks.slack.com/services/****'
headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
pushed[url] = True
payload = {
'text': "New top post on r/machinelearning! \n\n <{}>".format(url),
"username": "ml-bot",
"icon_emoji": ":smiling_imp:"
}
r = requests.post(push_url, data=json.dumps(payload))
with open(pushed_path, 'wb') as f:
pickle.dump(pushed, f)
return r
def run():
import time
while(True):
refresh_data()
for url, item in cache.items():
if (item['ups'] > 50) and (url not in pushed):
push(url)
print('sleeping...')
time.sleep(600)
print('about to begin...') # So I don't kill it the second it starts by accident, and end up ruining the dumped cache.
time.sleep(20)
run()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment