Skip to content

Instantly share code, notes, and snippets.

@tyan-boot
Last active October 10, 2016 15:04
Show Gist options
  • Save tyan-boot/cf8788a4ad9decfa28b316393e2a959b to your computer and use it in GitHub Desktop.
Save tyan-boot/cf8788a4ad9decfa28b316393e2a959b to your computer and use it in GitHub Desktop.
scan dytt download link
create DATABASE dytt
CREATE TABLE gndy
(
uid INT PRIMARY KEY NOT NULL AUTO_INCREMENT,
name TEXT,
url TEXT
)
import re
import requests
import pymysql
import threading
baseurl = 'http://www.dytt8.net/html/gndy/oumei/list_7_'
redown = re.compile(r'a[\s\S].*(ftp.*)</a')
db_config = {
'host':'127.0.0.1',
'port':3306,
'user':'root',
'password':'root',
'db':'dytt',
'charset':'utf8mb4'
}
def getnexturl():
i = 1
while True:
yield baseurl + str(i) + '.html'
i += 1
def SaveToDB(name, url):
conn = pymysql.connect(**db_config)
cur = conn.cursor()
cur.execute('insert into gndy (name,url) values (%s, %s)', [name, url])
conn.commit()
cur.close()
conn.close()
print ('save %s' % name)
def GetDownLink(movuri):
movurl = 'http://www.dytt8.net' + movuri
rdata = requests.get(movurl)
rdata.encoding = 'gb2312'
mdown = redown.search(rdata.text)
return mdown.group(1)
# print(movurl)
def FindAndSave(name, uri):
downurl = GetDownLink(uri)
SaveToDB(name, downurl)
url = getnexturl()
while True:
rdata = requests.get(url.__next__())
rdata.encoding = 'gb2312'
remov = re.compile(r'table[\s\S]+?a\shref="(/html/gndy/\w*?/\d+/\d+\.html)"\sclass="ulink">(.+?)<[\s\S]*?/table')
movs = remov.findall(rdata.text)
print('1')
if not isinstance(movs, list):
break
for mov in movs:
t = threading.Thread(target=FindAndSave,args=(mov[1],mov[0]))
t.start()
#FindAndSave(mov[1], mov[0])
# print("Movie name is %s and its url is %s" % (mov[1], mov[0]))
# print(rdata.text.encode('utf8'))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment