Skip to content

Instantly share code, notes, and snippets.

@Waester
Last active March 22, 2023 21:09
Show Gist options
  • Save Waester/59df0fb9c35e461e342cf83221193fc6 to your computer and use it in GitHub Desktop.
Save Waester/59df0fb9c35e461e342cf83221193fc6 to your computer and use it in GitHub Desktop.
RSS = [
{ 'url': 'https://example.com' },
{ 'url': 'https://example.com', 'regex': '^.+720p.*$' },
]
PROXIES = {
'http': 'http://example.com:8080',
'https': 'https://example.com:8443',
}
TORRENT_DIR = 'torrents'
DRYRUN = False
#!/usr/bin/env python3
import feedparser
import os
import re
import requests
import subprocess
import json
import config
from datetime import datetime
try:
with open('.timestamps', 'r') as _file:
timestamps = json.load(_file)
except FileNotFoundError:
timestamps = dict()
def get_torrent(session, url):
filename = url.split('/')[-1]
os.makedirs(config.TORRENT_DIR, exist_ok=True)
try:
torrent = session.get(url, timeout=5, proxies=config.PROXIES)
except (requests.exceptions.Timeout, requests.exceptions.ProxyError):
print('Request timeout: {}'.format(url))
return
with open(f'{config.TORRENT_DIR}/{filename}', 'wb') as _file:
_file.write(torrent.content)
def get_items(parsed_feed, feed):
items = list()
if 'regex' not in feed:
feed['regex'] = ''
for item in parsed_feed['entries']:
if re.match(feed['regex'], item['title']):
if 'date_parsed' in item:
items.append({'date': datetime(*item['date_parsed'][:6]), 'title': item['title'], 'link': item['link']})
else:
items.append({'date': datetime(*item['published_parsed'][:6]), 'title': item['title'], 'link': item['link']})
return items
def get_lastitemdate(feed):
date = int(datetime(2000, 1, 1).timestamp())
if feed['url'] in timestamps:
if 'regex' in feed and feed['regex'] in timestamps[feed['url']]:
date = timestamps[feed['url']][feed['regex']]
else:
date = timestamps[feed['url']]['']
return date
def set_lastitemdate(items, feed):
last_item_date = int(items[0]['date'].timestamp())
if feed['url'] not in timestamps:
timestamps[feed['url']] = dict()
if 'regex' in feed:
timestamps[feed['url']][feed['regex']] = last_item_date
else:
timestamps[feed['url']][''] = last_item_date
with open('.timestamps', 'w') as _file:
json.dump(timestamps, _file)
def download(session, items, feed):
if len(items) == 0:
return
last_item_date = get_lastitemdate(feed)
for item in items:
item_date = int(item['date'].timestamp())
if item_date > last_item_date:
print('Downloading: {}'.format(item['title']))
if item['link'].startswith('http'):
if config.DRYRUN == False:
get_torrent(session, item['link'])
else:
with open(os.devnull, 'w') as dev_null:
if config.DRYRUN == False:
subprocess.run(['transmission-remote', '--add', item['link']], stdout=dev_null)
if config.DRYRUN == False:
set_lastitemdate(items, feed)
def parse(session, feed):
try:
response = session.get(feed['url'], timeout=5, proxies=config.PROXIES)
except (requests.exceptions.Timeout, requests.exceptions.ProxyError):
print('Request timeout: {}'.format(feed['url']))
return
parsed_feed = feedparser.parse(response.text)
if parsed_feed['bozo'] == 0:
items = get_items(parsed_feed, feed)
download(session, items, feed)
else:
print('Bad feed: {}'.format(feed['url']))
if __name__ == '__main__':
for feed in config.RSS:
with requests.Session() as session:
parse(session, feed)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment