Created
February 22, 2014 12:05
-
-
Save shede333/9152952 to your computer and use it in GitHub Desktop.
借鉴了 https://gist.github.com/qiaoxueshi/5976402 这里的代码;\n 修改为下载 wwdc 所有pdf 的代码 使用方法: 1. 必须在safari浏览器 打开 https://developer.apple.com/wwdc/videos/ , 接着使用 apple开发账户登陆进去,一定要登录才行。 2. 登录成功之后,将页面保存为html格式,文件名为wwdc_video.html, 3.创建文件 extract.py ,将本代码复制进去。 3. 将extract.py 和 wwdc_video.html放在同一目录下,然后在此目录下执行以下语句:(shell一定要切换到这个目录下执行) python extract.py < ~/wwdc…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#_*_ coding:UTF-8 _*_ | |
__author__ = 'shaowei' | |
import requests | |
import os | |
import re | |
print "\nAll files will be downloaded here:", os.getcwd() | |
re_video_HD = re.compile(r'<a href="(http://devstreaming.apple.com/videos/wwdc/2013/[^"]*-HD\.mov)') | |
re_video_SD = re.compile(r'<a href="(http://devstreaming.apple.com/videos/wwdc/2013/[^"]*-SD\.mov)') | |
re_pdf = re.compile(r'<a href="(http://devstreaming.apple.com/videos/wwdc/2013/[^"]*\.pdf)') | |
html_file_path = os.path.join(os.getcwd(), "wwdc_video.html") | |
if os.path.exists(html_file_path): | |
print "file is exist" | |
else: | |
print "no exist file:", html_file_path | |
html_file = open(html_file_path) | |
html_content = html_file.read() | |
pdf_url_list = re_pdf.findall(html_content) | |
sd_url_list = re_video_SD.findall(html_content) | |
hd_url_list = re_video_HD.findall(html_content) | |
print "pdf file count:", len(pdf_url_list) | |
print "video-sd file count:", len(sd_url_list) | |
print "video-hd file count:", len(hd_url_list) | |
# start downLoad | |
def down_files(url_list, save_folder): | |
dl_count = len(url_list) | |
dl_finish_count = 0 | |
for file_url in url_list: | |
dl_finish_count += 1 | |
dl_progress = str(dl_count) + '/' + str(dl_finish_count) | |
file_path = os.path.join(save_folder, file_url[file_url.rindex("/") + 1:]) | |
# check file exist | |
if os.path.exists(file_path): | |
print "file %s has exist,don`t downLoad " % dl_progress, file_path | |
continue # file has exist,don`t downLoad | |
print "\nstart download: ", dl_progress, file_url | |
file_content = requests.get(file_url).content | |
file_obj = open(file_path, "wb") | |
file_obj.write(file_content) | |
print "downLoad finish %s, file saved in:" % dl_progress, file_path, "\n" | |
pdf_folder_path = os.path.join(os.getcwd(), "pdf") | |
sd_folder_path = os.path.join(os.getcwd(), "video-sd") | |
hd_folder_path = os.path.join(os.getcwd(), "video-hd") | |
if not os.path.exists(pdf_folder_path): | |
os.mkdir(pdf_folder_path) | |
if not os.path.exists(sd_folder_path): | |
os.mkdir(sd_folder_path) | |
if not os.path.exists(hd_folder_path): | |
os.mkdir(hd_folder_path) | |
print "start downLoad pdf file:", len(pdf_url_list), "\n" | |
down_files(pdf_url_list, pdf_folder_path) | |
print " all pdf file download finish." | |
# print "start downLoad video-sd file:", len(sd_url_list), "\n" | |
# down_files(sd_url_list, sd_folder_path) | |
# print " all video-sd file download finish." | |
# | |
# print "start downLoad video-hd file:", len(hd_url_list), "\n" | |
# down_files(hd_url_list, hd_folder_path) | |
# print " all video-hd file download finish." |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment