Skip to content

Instantly share code, notes, and snippets.

@zYeoman
Last active November 4, 2019 10:29
Show Gist options
  • Save zYeoman/1d841c5a1227697bc82c81f4acf1f2ad to your computer and use it in GitHub Desktop.
Save zYeoman/1d841c5a1227697bc82c81f4acf1f2ad to your computer and use it in GitHub Desktop.
自动从bt.byr.cn上下载免费种子文件,保存到指定位置
#! /usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright © 2018 Yongwen Zhuang <zeoman@163.com>
#
# Distributed under terms of the MIT license.
"""
Byr
自动从bt.byr.cn上下载免费种子文件,保存到指定位置
Author: Yongwen Zhuang
Create: 2018-06-13
"""
import os
from PIL import Image
from io import BytesIO
import logging
import pickle
try:
from urllib.parse import urlparse, parse_qs
except ImportError:
from urlparse import urlparse, parse_qs
import requests
from bs4 import BeautifulSoup
from userpass import User
from decaptcha.decaptcha import DeCaptcha
class Byr(object):
"""login/logout/getpage"""
def __init__(self):
"""Byr Init """
console = logging.StreamHandler()
console.setLevel(logging.DEBUG)
formatter = logging.Formatter(
'[%(filename)s:%(lineno)4s - %(funcName)10s ] %(message)s'
)
console.setFormatter(formatter)
self.logger = logging.getLogger("byr")
self.logger.addHandler(console)
self.logger.setLevel(logging.INFO)
self._session = requests.session()
self._session.headers = {
'User-Agent': 'Magic Browser'
}
self._root = 'https://bt.byr.cn/'
self._user = User('.byr')
self.list = []
if os.path.exists('list.csv'):
self.logger.debug('Read list.csv')
with open('list.csv', 'r') as f:
for line in f.readlines():
self.list.append(line.split(',')[0])
def login(self):
"""Login to bt.bry.cn"""
login_page = self.get_url('login.php')
image_url = login_page.find('img', alt='CAPTCHA')['src']
image_hash = login_page.find(
'input', attrs={'name': 'imagehash'})['value']
self.logger.debug('Image url: ' + image_url)
self.logger.debug('Image hash: ' + image_hash)
req = self._session.get(self._root + image_url)
image_file = Image.open(BytesIO(req.content))
decaptcha = DeCaptcha()
decaptcha.load_model('./decaptcha/captcha_classifier.pkl')
captcha_text = decaptcha.decode(image_file)
self.logger.debug('Captcha text: ' + captcha_text)
login_data = {
'username': self._user.username,
'password': self._user.password,
'imagestring': captcha_text,
'imagehash': image_hash
}
main_page = self._session.post(
self._root + 'takelogin.php', login_data)
if main_page.url != self._root + 'index.php':
self.logger.error('Login error')
return
self._save()
def _save(self):
"""Save cookies to file"""
self.logger.debug('Save cookies')
with open('cookie', 'wb') as f:
pickle.dump(self._session.cookies, f)
def _load(self):
"""Load cookies from file"""
if os.path.exists('cookie'):
with open('cookie', 'rb') as f:
self.logger.debug('Load cookies from file.')
self._session.cookies = pickle.load(f)
else:
self.logger.debug('Load cookies by login')
self.login()
self._save()
@property
def pages(self):
"""Return pages in torrents.php
:returns: yield ByrPage pages
"""
# free url
self.logger.debug('Get pages')
page = self.get_url('torrents.php?spstate=2')
n = 0
for line in page.find_all('tr', class_='free_bg'):
if n == 0:
yield(ByrPage(line))
n = 1
else:
n -= 1
def get_url(self, url):
"""Return BeautifulSoup Pages
:url: page url
:returns: BeautifulSoups
"""
self.logger.debug('Get url: ' + url)
req = self._session.get(self._root + url)
return BeautifulSoup(req.text, 'lxml')
def start(self):
"""Start spider"""
self.logger.info('Start Spider')
self._load()
with open('list.csv', 'a') as f:
for page in self.pages:
self.logger.debug('Check ' + page.name)
if page.id not in self.list and page.ok:
self.logger.info('Download ' + page.name)
self.download(page.id)
f.write(page.id + ',' + page.name + '\n')
def download(self, id_):
"""Download torrent in url
:url: url
:filename: torrent filename
"""
url = self._root + 'download.php?id=' + id_
req = self._session.get(url)
with open('./tmp/' + id_ + '.torrent', 'wb') as f:
f.write(req.content)
class ByrPage(object):
"""Torrent Page Info"""
def __init__(self, soup):
"""Init variables
:soup: Soup
"""
url = soup.find(class_='torrentname').a['href']
self.name = soup.find(class_='torrentname').b.text
self.type = soup.img['title']
self.size = self.tosize(soup.find_all('td')[-5].text)
self.seeders = int(soup.find_all('td')[-4].text.replace(',', ''))
self.snatched = int(soup.find_all('td')[-2].text.replace(',', ''))
self.id = parse_qs(urlparse(url).query)['id'][0]
@property
def ok(self):
"""Check torrent info
:returns: If a torrent are ok to be downloaded
"""
return self.size < 20 and self.seeders < 10
def tosize(self, text):
"""Convert text 'xxxGB' to int size
:text: 123GB or 123MB
:returns: 123(GB) or 0.123(GB)
"""
if text.endswith('MB'):
size = float(text[:-2].replace(',', '')) / 1024
else:
size = float(text[:-2].replace(',', ''))
return size
def main():
b = Byr()
b.start()
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment