Skip to content

Instantly share code, notes, and snippets.

@jaseg
Last active August 29, 2015 14:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jaseg/17ebf3144c89f4494d25 to your computer and use it in GitHub Desktop.
Save jaseg/17ebf3144c89f4494d25 to your computer and use it in GitHub Desktop.
Pipe new releases from torrent index RSS feeds into transmission
#!/usr/bin/env python3
import time
import sys
from datetime import datetime
import feedparser
import requests
import json
import sqlite3
import ast
from dateutil.parser import parse as parsetime
try:
import re2 as re
except ImportError:
import re
from settings import TRANSMISSION_URL
def log(*args):
print(time.strftime('\x1B[93m[%m-%d %H:%M:%S]\x1B[0m'), *args+('\x1B[0m',))
log('\033[92mStarted up.\033[0m')
def feed_gen(term):
page = 1
while True:
log('\033[37mFetching page {}\033[0m'.format(page))
data = feedparser.parse('http://www.nyaa.se/?page=rss&term={}&sort=2&offset={}'.format(term, page))
if data.status != requests.codes.ok:
log('\033[91mProblem receiving feed:\033[0m HTTP status code {}.'.format(data.status))
return
if len(data.entries) == 0:
return
for item in data.entries:
if 'Trusted' in item.get('summary_detail', {}).get('value', ''):
yield item.title, item.link
page += 1
def download(url):
session = requests.get(TRANSMISSION_URL).headers['X-Transmission-Session-Id']
res = requests.post(TRANSMISSION_URL, data=json.dumps({'method': 'torrent-add', 'arguments': {'filename': url}}),
headers={'X-Transmission-Session-Id': session})
if res.status_code != requests.codes.ok:
log('\033[91mProblem talking to transmission:\033[0m', res.status_code, res.text)
elif 'torrent-duplicate' in res.text:
log('\033[93mTorrent was already added.\033[0m nop()')
if len(sys.argv) != 3:
print('Usage: {} regex searchterm'.format(sys.argv[0]))
ex = re.compile(sys.argv[1])
term = sys.argv[2]
for title, url in feed_gen(term):
if ex.match(title):
log('\033[92mMatched:\033[0m "{}" with url {}'.format(title, url))
download(url)
log('\033[92mDone.\033[0m')
TRANSMISSION_URL='http://localhost:9091/transmission/rpc'
#!/usr/bin/env python3
import time
import sys
from datetime import datetime
import sqlite3
from dateutil.parser import parse as parsetime
from collections import namedtuple
import re
LOW_POLL_INTERVAL = 3600*3
HIGH_POLL_INTERVAL = 300
TIME_TOLERANCE = 3600
OBSOLETION_TIME = 3600*24
REGEX_FILE = 'titles.regex'
INTERVALS = { 'd': 1, 'w': 7, 'b': 14, '': 7, 'x': False }
RegexEntry = namedtuple('RegexEntry', ['due', 'interval', 'regex'])
# Somewhat of a complete overkill
conn = sqlite3.connect('torrents.db')
with conn as c:
c.execute("CREATE TABLE IF NOT EXISTS snoozelist (expression text UNIQUE, enddate int)")
def get_snooze(ex, fallback, iv):
fbt = int(fallback.timestamp())
now = time.time()
if fbt < now-TIME_TOLERANCE:
fbt += 3600*24 * iv - TIME_TOLERANCE
st = c.execute("SELECT enddate FROM snoozelist WHERE expression=?", (ex,)).fetchone()
if st and st[0] > now-OBSOLETION_TIME:
return st[0]
return fbt
def load_regex_list():
import re
with open(REGEX_FILE) as f:
return [ RegexEntry(get_snooze(ex, parsetime(time), INTERVALS[ivs]), INTERVALS[ivs], re.compile(ex)) for ivs,time,ex in
( line[:-1].split('/') for line in f.readlines() ) ]
regexes = load_regex_list()
if len(sys.argv) == 1:
print('\033[37mLoaded regexes:\033[0m')
for idx, (due, _iv, re) in enumerate(regexes):
print('\033[96m('+str(idx)+')\033[0m\t'+str(datetime.fromtimestamp(due)), '\033[37m'+re.pattern+'\033[0m')
else:
elem = regexes[int(sys.argv[1])]
print('Putting regex /{}/ into snooze mode.'.format(elem.regex.pattern))
enddate = int(elem.due + elem.interval * 3600*24)
with conn as c:
c.execute("INSERT OR REPLACE INTO snoozelist (expression, enddate) VALUES (?, ?)", (elem.regex.pattern, enddate))
#!/usr/bin/env python3
import time
from datetime import datetime
import feedparser
import requests
import json
import sqlite3
import ast
import os
import signal
from dateutil.parser import parse as parsetime
from threading import Lock
from collections import namedtuple
import re
from settings import TRANSMISSION_URL
LOW_POLL_INTERVAL = 3600*3
HIGH_POLL_INTERVAL = 300
TIME_TOLERANCE = 3600
OBSOLETION_TIME = 3600*24
REGEX_FILE = 'titles.regex'
INTERVALS = { 'd': 1, 'w': 7, 'b': 14, '': 7, 'x': False }
RegexEntry = namedtuple('RegexEntry', ['due', 'interval', 'regex'])
# Somewhat of a complete overkill
conn = sqlite3.connect('torrents.db')
with conn as c:
c.execute("CREATE TABLE IF NOT EXISTS meta (key text UNIQUE, value text)")
c.execute("CREATE TABLE IF NOT EXISTS snoozelist (expression text UNIQUE, enddate int)")
c.execute("INSERT OR IGNORE INTO meta (key, value) VALUES ('last_published', '(0,)')")
def log(*args):
print('\x1B[93m[{}@{}]\x1B[0m'.format(os.getpid(), time.strftime('%m-%d %H:%M:%S')), *args+('\x1B[0m',))
def get_snooze(ex, fallback, iv):
fbt = int(fallback.timestamp())
now = time.time()
if fbt < now-TIME_TOLERANCE:
fbt += 3600*24 * iv - TIME_TOLERANCE
st = c.execute("SELECT enddate FROM snoozelist WHERE expression=?", (ex,)).fetchone()
if st and st[0] > now-OBSOLETION_TIME:
return st[0]
return fbt
# Format for regex list:
#Tue 08h30/^foobar.baz
# Where the slash separates approximate release time and regex and the release
# time can be anything dateutil comprehends. The release time does not need to
# be accurate, it is perfectly fine if this is off by a time zone or two—this
# script will compensate for that.
def load_regex_list():
import re
with open(REGEX_FILE) as f:
regexes = { RegexEntry(get_snooze(ex, parsetime(time), INTERVALS[ivs]), INTERVALS[ivs], re.compile(ex)) for ivs,time,ex in
( line[:-1].split('/') for line in f.readlines() ) }
log('\033[37mLoaded regexes:\033[0m')
for due, _iv, re in sorted(regexes, key=lambda re: re.due):
log(datetime.fromtimestamp(due), '\033[37m'+re.pattern+'\033[0m')
return regexes
regexes = None
regex_timestamp = 0
def poll_regex_list():
global regex_timestamp, regexes
if os.path.getmtime(REGEX_FILE) > regex_timestamp:
regexes = load_regex_list()
regex_timestamp = time.time()
log('\033[92mStarted up.\033[0m')
def feed_gen():
with conn as c:
etag = (c.execute("SELECT value FROM meta WHERE key='etag'").fetchone() or (0,))[0]
modified = (c.execute("SELECT value FROM meta WHERE key='modified'").fetchone() or (0,))[0]
# little hack...
last_published = ast.literal_eval(c.execute("SELECT value FROM meta WHERE key='last_published'").fetchone()[0])
data = feedparser.parse('http://www.nyaa.se/?page=rss', etag=etag, modified=modified)
if data.status == 200:
for item in data.entries:
if item.published_parsed > last_published and 'Trusted' in item.get('summary_detail', {}).get('value', ''):
yield item.title, item.link
elif data.status != 304:
log('\033[91mProblem receiving feed:\033[0m HTTP status code {}.'.format(data.status))
last_published = max(item.published_parsed for item in data.entries)
with conn as c:
if 'etag' in data:
c.execute("INSERT OR REPLACE INTO meta (key, value) VALUES ('etag', ?)", (data.etag,))
if 'modified' in data:
c.execute("INSERT OR REPLACE INTO meta (key, value) VALUES ('modified', ?)", (data.modified,))
c.execute("INSERT OR REPLACE INTO meta (key, value) VALUES ('last_published', ?)", (str(tuple(last_published)),))
def download(url):
session = requests.get(TRANSMISSION_URL).headers['X-Transmission-Session-Id']
res = requests.post(TRANSMISSION_URL, data=json.dumps({'method': 'torrent-add', 'arguments': {'filename': url}}),
headers={'X-Transmission-Session-Id': session})
if res.status_code != requests.codes.ok:
log('\033[91mProblem talking to transmission:\033[0m', res.status_code, res.text)
elif 'torrent-duplicate' in res.text:
log('\033[93mTorrent was already added.\033[0m nop()')
update_lock = Lock()
def update():
global update_lock, regexes
if update_lock.acquire():
try:
poll_regex_list()
for title, url in feed_gen():
matching = {ex for ex in regexes if ex.regex.match(title)}
if matching:
log('\033[92mMatched:\033[0m "{}" with url {}'.format(title, url))
download(url)
with conn as c:
for elem in matching:
if elem.interval:
log('Putting regex /{}/ into snooze mode.'.format(elem.regex.pattern))
regexes.remove(elem)
enddate = int(time.time() + elem.interval * 3600*24)
regexes.add(RegexEntry(enddate, elem.interval, elem.regex))
c.execute("INSERT OR REPLACE INTO snoozelist (expression, enddate) VALUES (?, ?)", (elem.regex.pattern, enddate))
now, next_date = time.time(), min(e.due for e in regexes if e.interval)
iv = int(max(min(2*LOW_POLL_INTERVAL, next_date-now)/2, HIGH_POLL_INTERVAL))
log('\033[96mPolled.\033[0m Next poll scheduled in {} seconds.'.format(iv))
signal.alarm(iv)
finally:
update_lock.release()
def sig_handler(signum, frame):
update()
signal.signal(signal.SIGHUP, sig_handler)
signal.signal(signal.SIGALRM, sig_handler)
update()
while True:
signal.pause()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment