Last active
August 29, 2015 14:02
-
-
Save jaseg/17ebf3144c89f4494d25 to your computer and use it in GitHub Desktop.
Pipe new releases from torrent index RSS feeds into transmission
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import time | |
import sys | |
from datetime import datetime | |
import feedparser | |
import requests | |
import json | |
import sqlite3 | |
import ast | |
from dateutil.parser import parse as parsetime | |
try: | |
import re2 as re | |
except ImportError: | |
import re | |
from settings import TRANSMISSION_URL | |
def log(*args): | |
print(time.strftime('\x1B[93m[%m-%d %H:%M:%S]\x1B[0m'), *args+('\x1B[0m',)) | |
log('\033[92mStarted up.\033[0m') | |
def feed_gen(term): | |
page = 1 | |
while True: | |
log('\033[37mFetching page {}\033[0m'.format(page)) | |
data = feedparser.parse('http://www.nyaa.se/?page=rss&term={}&sort=2&offset={}'.format(term, page)) | |
if data.status != requests.codes.ok: | |
log('\033[91mProblem receiving feed:\033[0m HTTP status code {}.'.format(data.status)) | |
return | |
if len(data.entries) == 0: | |
return | |
for item in data.entries: | |
if 'Trusted' in item.get('summary_detail', {}).get('value', ''): | |
yield item.title, item.link | |
page += 1 | |
def download(url): | |
session = requests.get(TRANSMISSION_URL).headers['X-Transmission-Session-Id'] | |
res = requests.post(TRANSMISSION_URL, data=json.dumps({'method': 'torrent-add', 'arguments': {'filename': url}}), | |
headers={'X-Transmission-Session-Id': session}) | |
if res.status_code != requests.codes.ok: | |
log('\033[91mProblem talking to transmission:\033[0m', res.status_code, res.text) | |
elif 'torrent-duplicate' in res.text: | |
log('\033[93mTorrent was already added.\033[0m nop()') | |
if len(sys.argv) != 3: | |
print('Usage: {} regex searchterm'.format(sys.argv[0])) | |
ex = re.compile(sys.argv[1]) | |
term = sys.argv[2] | |
for title, url in feed_gen(term): | |
if ex.match(title): | |
log('\033[92mMatched:\033[0m "{}" with url {}'.format(title, url)) | |
download(url) | |
log('\033[92mDone.\033[0m') | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
TRANSMISSION_URL='http://localhost:9091/transmission/rpc' |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import time | |
import sys | |
from datetime import datetime | |
import sqlite3 | |
from dateutil.parser import parse as parsetime | |
from collections import namedtuple | |
import re | |
LOW_POLL_INTERVAL = 3600*3 | |
HIGH_POLL_INTERVAL = 300 | |
TIME_TOLERANCE = 3600 | |
OBSOLETION_TIME = 3600*24 | |
REGEX_FILE = 'titles.regex' | |
INTERVALS = { 'd': 1, 'w': 7, 'b': 14, '': 7, 'x': False } | |
RegexEntry = namedtuple('RegexEntry', ['due', 'interval', 'regex']) | |
# Somewhat of a complete overkill | |
conn = sqlite3.connect('torrents.db') | |
with conn as c: | |
c.execute("CREATE TABLE IF NOT EXISTS snoozelist (expression text UNIQUE, enddate int)") | |
def get_snooze(ex, fallback, iv): | |
fbt = int(fallback.timestamp()) | |
now = time.time() | |
if fbt < now-TIME_TOLERANCE: | |
fbt += 3600*24 * iv - TIME_TOLERANCE | |
st = c.execute("SELECT enddate FROM snoozelist WHERE expression=?", (ex,)).fetchone() | |
if st and st[0] > now-OBSOLETION_TIME: | |
return st[0] | |
return fbt | |
def load_regex_list(): | |
import re | |
with open(REGEX_FILE) as f: | |
return [ RegexEntry(get_snooze(ex, parsetime(time), INTERVALS[ivs]), INTERVALS[ivs], re.compile(ex)) for ivs,time,ex in | |
( line[:-1].split('/') for line in f.readlines() ) ] | |
regexes = load_regex_list() | |
if len(sys.argv) == 1: | |
print('\033[37mLoaded regexes:\033[0m') | |
for idx, (due, _iv, re) in enumerate(regexes): | |
print('\033[96m('+str(idx)+')\033[0m\t'+str(datetime.fromtimestamp(due)), '\033[37m'+re.pattern+'\033[0m') | |
else: | |
elem = regexes[int(sys.argv[1])] | |
print('Putting regex /{}/ into snooze mode.'.format(elem.regex.pattern)) | |
enddate = int(elem.due + elem.interval * 3600*24) | |
with conn as c: | |
c.execute("INSERT OR REPLACE INTO snoozelist (expression, enddate) VALUES (?, ?)", (elem.regex.pattern, enddate)) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import time | |
from datetime import datetime | |
import feedparser | |
import requests | |
import json | |
import sqlite3 | |
import ast | |
import os | |
import signal | |
from dateutil.parser import parse as parsetime | |
from threading import Lock | |
from collections import namedtuple | |
import re | |
from settings import TRANSMISSION_URL | |
LOW_POLL_INTERVAL = 3600*3 | |
HIGH_POLL_INTERVAL = 300 | |
TIME_TOLERANCE = 3600 | |
OBSOLETION_TIME = 3600*24 | |
REGEX_FILE = 'titles.regex' | |
INTERVALS = { 'd': 1, 'w': 7, 'b': 14, '': 7, 'x': False } | |
RegexEntry = namedtuple('RegexEntry', ['due', 'interval', 'regex']) | |
# Somewhat of a complete overkill | |
conn = sqlite3.connect('torrents.db') | |
with conn as c: | |
c.execute("CREATE TABLE IF NOT EXISTS meta (key text UNIQUE, value text)") | |
c.execute("CREATE TABLE IF NOT EXISTS snoozelist (expression text UNIQUE, enddate int)") | |
c.execute("INSERT OR IGNORE INTO meta (key, value) VALUES ('last_published', '(0,)')") | |
def log(*args): | |
print('\x1B[93m[{}@{}]\x1B[0m'.format(os.getpid(), time.strftime('%m-%d %H:%M:%S')), *args+('\x1B[0m',)) | |
def get_snooze(ex, fallback, iv): | |
fbt = int(fallback.timestamp()) | |
now = time.time() | |
if fbt < now-TIME_TOLERANCE: | |
fbt += 3600*24 * iv - TIME_TOLERANCE | |
st = c.execute("SELECT enddate FROM snoozelist WHERE expression=?", (ex,)).fetchone() | |
if st and st[0] > now-OBSOLETION_TIME: | |
return st[0] | |
return fbt | |
# Format for regex list: | |
#Tue 08h30/^foobar.baz | |
# Where the slash separates approximate release time and regex and the release | |
# time can be anything dateutil comprehends. The release time does not need to | |
# be accurate, it is perfectly fine if this is off by a time zone or two—this | |
# script will compensate for that. | |
def load_regex_list(): | |
import re | |
with open(REGEX_FILE) as f: | |
regexes = { RegexEntry(get_snooze(ex, parsetime(time), INTERVALS[ivs]), INTERVALS[ivs], re.compile(ex)) for ivs,time,ex in | |
( line[:-1].split('/') for line in f.readlines() ) } | |
log('\033[37mLoaded regexes:\033[0m') | |
for due, _iv, re in sorted(regexes, key=lambda re: re.due): | |
log(datetime.fromtimestamp(due), '\033[37m'+re.pattern+'\033[0m') | |
return regexes | |
regexes = None | |
regex_timestamp = 0 | |
def poll_regex_list(): | |
global regex_timestamp, regexes | |
if os.path.getmtime(REGEX_FILE) > regex_timestamp: | |
regexes = load_regex_list() | |
regex_timestamp = time.time() | |
log('\033[92mStarted up.\033[0m') | |
def feed_gen(): | |
with conn as c: | |
etag = (c.execute("SELECT value FROM meta WHERE key='etag'").fetchone() or (0,))[0] | |
modified = (c.execute("SELECT value FROM meta WHERE key='modified'").fetchone() or (0,))[0] | |
# little hack... | |
last_published = ast.literal_eval(c.execute("SELECT value FROM meta WHERE key='last_published'").fetchone()[0]) | |
data = feedparser.parse('http://www.nyaa.se/?page=rss', etag=etag, modified=modified) | |
if data.status == 200: | |
for item in data.entries: | |
if item.published_parsed > last_published and 'Trusted' in item.get('summary_detail', {}).get('value', ''): | |
yield item.title, item.link | |
elif data.status != 304: | |
log('\033[91mProblem receiving feed:\033[0m HTTP status code {}.'.format(data.status)) | |
last_published = max(item.published_parsed for item in data.entries) | |
with conn as c: | |
if 'etag' in data: | |
c.execute("INSERT OR REPLACE INTO meta (key, value) VALUES ('etag', ?)", (data.etag,)) | |
if 'modified' in data: | |
c.execute("INSERT OR REPLACE INTO meta (key, value) VALUES ('modified', ?)", (data.modified,)) | |
c.execute("INSERT OR REPLACE INTO meta (key, value) VALUES ('last_published', ?)", (str(tuple(last_published)),)) | |
def download(url): | |
session = requests.get(TRANSMISSION_URL).headers['X-Transmission-Session-Id'] | |
res = requests.post(TRANSMISSION_URL, data=json.dumps({'method': 'torrent-add', 'arguments': {'filename': url}}), | |
headers={'X-Transmission-Session-Id': session}) | |
if res.status_code != requests.codes.ok: | |
log('\033[91mProblem talking to transmission:\033[0m', res.status_code, res.text) | |
elif 'torrent-duplicate' in res.text: | |
log('\033[93mTorrent was already added.\033[0m nop()') | |
update_lock = Lock() | |
def update(): | |
global update_lock, regexes | |
if update_lock.acquire(): | |
try: | |
poll_regex_list() | |
for title, url in feed_gen(): | |
matching = {ex for ex in regexes if ex.regex.match(title)} | |
if matching: | |
log('\033[92mMatched:\033[0m "{}" with url {}'.format(title, url)) | |
download(url) | |
with conn as c: | |
for elem in matching: | |
if elem.interval: | |
log('Putting regex /{}/ into snooze mode.'.format(elem.regex.pattern)) | |
regexes.remove(elem) | |
enddate = int(time.time() + elem.interval * 3600*24) | |
regexes.add(RegexEntry(enddate, elem.interval, elem.regex)) | |
c.execute("INSERT OR REPLACE INTO snoozelist (expression, enddate) VALUES (?, ?)", (elem.regex.pattern, enddate)) | |
now, next_date = time.time(), min(e.due for e in regexes if e.interval) | |
iv = int(max(min(2*LOW_POLL_INTERVAL, next_date-now)/2, HIGH_POLL_INTERVAL)) | |
log('\033[96mPolled.\033[0m Next poll scheduled in {} seconds.'.format(iv)) | |
signal.alarm(iv) | |
finally: | |
update_lock.release() | |
def sig_handler(signum, frame): | |
update() | |
signal.signal(signal.SIGHUP, sig_handler) | |
signal.signal(signal.SIGALRM, sig_handler) | |
update() | |
while True: | |
signal.pause() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment