YieldNull/download.py

## readme.md

      
    Raw
  

              readme.md
            
          
    ##功能
将飞图网某页面下所有发布在百度云上的资源转存到百度云
并将其下载到本地，然后在本地解压，之后再上传到百度云。
示例网址为http://www.ftoow.com/read.php?tid-727.html
更改fetch.py中的网址即可
##爬取链接及密码
fetch.py
得到一个txt文件。
##转存到百度云
upload.sh
转存到百度云'/ftoow'目录下
##下载
download.py
需要更改源代码中的工作路径'dest'。并切换到该目录执行
##解压
extract.py
##上传
bp u ftoow /ftoow_bin -t r -R

  
## download.py
#!/usr/bin/env python
#coding:utf-8

"""
download files from pan.baidu.com using https://github.com/PeterDing/iScript/blob/master/pan.baidu.com.py#pan.baidu.com.py
use crontab to run it in a specific time interval to check if it has failed
if failed, restart it

Created on 2015.10.31
"""

import commands
import os

dest='YOUR_DEST_DIR'
out=commands.getoutput('ps -aux | grep bp')

lens=len([c for c in out if c=='\n']) # check if bp is running or not
out=commands.getoutput('pwd') # check pwd, pwd must be the same to resume download
finish=commands.getoutput('ls | grep 32[0-9]') # check if finished

if lens==1 and out==dest and len(finish)<1 :
	os.system('bp d /ftoow -R -a 10')

## extract.py
#!/usr/bin/env python
#coding:utf-8

"""
Extract zip or rar files
When finished, use 'bp u <localpath> <remotepath> -R' to upload all the files

Created on 2015.10.31
"""

import os
import re
import sys

passwd='http://www.ftoow.com'

def unzip(file):
    # use '-O gbk' if supported to avoid encoding problem
    # On raspberry pi(Debian jessie),
    # program 'unzip' does not have the '-O' option
    command='unzip -P %s "%s"'%(passwd,file)
    os.system(command)

def unrar(file):
    command='unrar x -p%s -o- "%s"'%(passwd,file)
    os.system(command)

def extract(path):
    files=os.listdir(path)
    files.sort()
    for file in files:
        ma=re.search(r'\.(zip|rar)',file)
        if ma:
            type=ma.group(1)
            if type=='zip':
                unzip(file)
            else:
                unrar(file)
            os.system('rm "%s"'%file)

if __name__=='__main__':
    path= sys.argv[1] # file location
    extract(path)


## fetch.py
#!/usr/bin/env python
# encoding:utf-8

"""
Fetch url and secret code from http://www.ftoow.com/read.php?tid-727.html

Store in local file as a format of "url code"
"""

import re
import urllib2
import gzip
import cookielib
import codecs
import cStringIO

cookie_file = 'cookie.txt'
cookie = cookielib.MozillaCookieJar(cookie_file)
handler = urllib2.HTTPCookieProcessor(cookie)
opener = urllib2.build_opener(handler)
opener.addheaders = [
    ('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'),
    ('Accept-Encoding', 'gzip, deflate, sdch'),
    ('Accept-Language', 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4'),
    ('Connection', 'keep-alive'),
    ('Host', 'www.ftoow.com'),
    ('User-Agent',
     'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/44.0.2403.89 Chrome/44.0.2403.89 Safari/537.36')
]
urllib2.install_opener(opener)


def get_source(url):
    res = urllib2.urlopen(url)
    cookie.save(cookie_file)
    unziped = gzip.GzipFile(fileobj=cStringIO.StringIO(res.read()))
    return unziped.read().decode('gbk')


source = get_source('http://www.ftoow.com/read.php?tid-727.html')
match = re.findall(u'\"(http://www.ftoow.com/read.php?.*?)\"', source)

f = codecs.open('result.txt', 'w', 'gbk')
for url in match:
    s = get_source(url)
    pattern = u'\"(http://pan.baidu.com/.*?)\".*?百度网盘.*?密码：(\w{4})'
    m = re.search(pattern, s)
    if m is not None:
        data = u'%s %s\n' % (m.group(1), m.group(2))
        f.write(data)
        print data

f.close()

## upload.sh
#!/bin/sh

# upload to baidupan
# https://github.com/PeterDing/iScript/blob/master/pan.baidu.com.py#pan.baidu.com.py

bp login YOUR_USER_NAME YOUR_PASSWD

cat 'result.txt' | while read line
do
    i=0
    url=''
    secret=''
    for token in $line
    do
        i=$(($i + 1))
        if [ `expr $i \% 2` -eq '1' ]
        then
            url=$token
        else
            secret=$token
        fi
    done
    bp s "$url" /ftoow -s $secret
done
	#!/usr/bin/env python
	#coding:utf-8

	"""
	download files from pan.baidu.com using https://github.com/PeterDing/iScript/blob/master/pan.baidu.com.py#pan.baidu.com.py
	use crontab to run it in a specific time interval to check if it has failed
	if failed, restart it

	Created on 2015.10.31
	"""

	import commands
	import os

	dest='YOUR_DEST_DIR'
	out=commands.getoutput('ps -aux \| grep bp')

	lens=len([c for c in out if c=='\n']) # check if bp is running or not
	out=commands.getoutput('pwd') # check pwd, pwd must be the same to resume download
	finish=commands.getoutput('ls \| grep 32[0-9]') # check if finished

	if lens==1 and out==dest and len(finish)<1 :
	os.system('bp d /ftoow -R -a 10')
	#!/usr/bin/env python
	#coding:utf-8

	"""
	Extract zip or rar files
	When finished, use 'bp u <localpath> <remotepath> -R' to upload all the files

	Created on 2015.10.31
	"""

	import os
	import re
	import sys

	passwd='http://www.ftoow.com'

	def unzip(file):
	# use '-O gbk' if supported to avoid encoding problem
	# On raspberry pi(Debian jessie),
	# program 'unzip' does not have the '-O' option
	command='unzip -P %s "%s"'%(passwd,file)
	os.system(command)

	def unrar(file):
	command='unrar x -p%s -o- "%s"'%(passwd,file)
	os.system(command)

	def extract(path):
	files=os.listdir(path)
	files.sort()
	for file in files:
	ma=re.search(r'\.(zip\|rar)',file)
	if ma:
	type=ma.group(1)
	if type=='zip':
	unzip(file)
	else:
	unrar(file)
	os.system('rm "%s"'%file)

	if __name__=='__main__':
	path= sys.argv[1] # file location
	extract(path)
	#!/usr/bin/env python
	# encoding:utf-8

	"""
	Fetch url and secret code from http://www.ftoow.com/read.php?tid-727.html

	Store in local file as a format of "url code"
	"""

	import re
	import urllib2
	import gzip
	import cookielib
	import codecs
	import cStringIO

	cookie_file = 'cookie.txt'
	cookie = cookielib.MozillaCookieJar(cookie_file)
	handler = urllib2.HTTPCookieProcessor(cookie)
	opener = urllib2.build_opener(handler)
	opener.addheaders = [
	('Accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8'),
	('Accept-Encoding', 'gzip, deflate, sdch'),
	('Accept-Language', 'en-US,en;q=0.8,zh-CN;q=0.6,zh;q=0.4'),
	('Connection', 'keep-alive'),
	('Host', 'www.ftoow.com'),
	('User-Agent',
	'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/44.0.2403.89 Chrome/44.0.2403.89 Safari/537.36')
	]
	urllib2.install_opener(opener)


	def get_source(url):
	res = urllib2.urlopen(url)
	cookie.save(cookie_file)
	unziped = gzip.GzipFile(fileobj=cStringIO.StringIO(res.read()))
	return unziped.read().decode('gbk')


	source = get_source('http://www.ftoow.com/read.php?tid-727.html')
	match = re.findall(u'\"(http://www.ftoow.com/read.php?.*?)\"', source)

	f = codecs.open('result.txt', 'w', 'gbk')
	for url in match:
	s = get_source(url)
	pattern = u'\"(http://pan.baidu.com/.?)\".?百度网盘.*?密码：(\w{4})'
	m = re.search(pattern, s)
	if m is not None:
	data = u'%s %s\n' % (m.group(1), m.group(2))
	f.write(data)
	print data

	f.close()
	#!/bin/sh

	# upload to baidupan
	# https://github.com/PeterDing/iScript/blob/master/pan.baidu.com.py#pan.baidu.com.py

	bp login YOUR_USER_NAME YOUR_PASSWD

	cat 'result.txt' \| while read line
	do
	i=0
	url=''
	secret=''
	for token in $line
	do
	i=$(($i + 1))
	if [ `expr $i \% 2` -eq '1' ]
	then
	url=$token
	else
	secret=$token
	fi
	done
	bp s "$url" /ftoow -s $secret
	done