Last active
October 10, 2016 15:04
-
-
Save tyan-boot/cf8788a4ad9decfa28b316393e2a959b to your computer and use it in GitHub Desktop.
scan dytt download link
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
create DATABASE dytt | |
CREATE TABLE gndy | |
( | |
uid INT PRIMARY KEY NOT NULL AUTO_INCREMENT, | |
name TEXT, | |
url TEXT | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import requests | |
import pymysql | |
import threading | |
baseurl = 'http://www.dytt8.net/html/gndy/oumei/list_7_' | |
redown = re.compile(r'a[\s\S].*(ftp.*)</a') | |
db_config = { | |
'host':'127.0.0.1', | |
'port':3306, | |
'user':'root', | |
'password':'root', | |
'db':'dytt', | |
'charset':'utf8mb4' | |
} | |
def getnexturl(): | |
i = 1 | |
while True: | |
yield baseurl + str(i) + '.html' | |
i += 1 | |
def SaveToDB(name, url): | |
conn = pymysql.connect(**db_config) | |
cur = conn.cursor() | |
cur.execute('insert into gndy (name,url) values (%s, %s)', [name, url]) | |
conn.commit() | |
cur.close() | |
conn.close() | |
print ('save %s' % name) | |
def GetDownLink(movuri): | |
movurl = 'http://www.dytt8.net' + movuri | |
rdata = requests.get(movurl) | |
rdata.encoding = 'gb2312' | |
mdown = redown.search(rdata.text) | |
return mdown.group(1) | |
# print(movurl) | |
def FindAndSave(name, uri): | |
downurl = GetDownLink(uri) | |
SaveToDB(name, downurl) | |
url = getnexturl() | |
while True: | |
rdata = requests.get(url.__next__()) | |
rdata.encoding = 'gb2312' | |
remov = re.compile(r'table[\s\S]+?a\shref="(/html/gndy/\w*?/\d+/\d+\.html)"\sclass="ulink">(.+?)<[\s\S]*?/table') | |
movs = remov.findall(rdata.text) | |
print('1') | |
if not isinstance(movs, list): | |
break | |
for mov in movs: | |
t = threading.Thread(target=FindAndSave,args=(mov[1],mov[0])) | |
t.start() | |
#FindAndSave(mov[1], mov[0]) | |
# print("Movie name is %s and its url is %s" % (mov[1], mov[0])) | |
# print(rdata.text.encode('utf8')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment