Skip to content

Instantly share code, notes, and snippets.

@shede333
Created February 22, 2014 12:05
Show Gist options
  • Save shede333/9152952 to your computer and use it in GitHub Desktop.
Save shede333/9152952 to your computer and use it in GitHub Desktop.
借鉴了 https://gist.github.com/qiaoxueshi/5976402 这里的代码;\n 修改为下载 wwdc 所有pdf 的代码 使用方法: 1. 必须在safari浏览器 打开 https://developer.apple.com/wwdc/videos/ , 接着使用 apple开发账户登陆进去,一定要登录才行。 2. 登录成功之后,将页面保存为html格式,文件名为wwdc_video.html, 3.创建文件 extract.py ,将本代码复制进去。 3. 将extract.py 和 wwdc_video.html放在同一目录下,然后在此目录下执行以下语句:(shell一定要切换到这个目录下执行) python extract.py < ~/wwdc…
#_*_ coding:UTF-8 _*_
__author__ = 'shaowei'
import requests
import os
import re
print "\nAll files will be downloaded here:", os.getcwd()
re_video_HD = re.compile(r'<a href="(http://devstreaming.apple.com/videos/wwdc/2013/[^"]*-HD\.mov)')
re_video_SD = re.compile(r'<a href="(http://devstreaming.apple.com/videos/wwdc/2013/[^"]*-SD\.mov)')
re_pdf = re.compile(r'<a href="(http://devstreaming.apple.com/videos/wwdc/2013/[^"]*\.pdf)')
html_file_path = os.path.join(os.getcwd(), "wwdc_video.html")
if os.path.exists(html_file_path):
print "file is exist"
else:
print "no exist file:", html_file_path
html_file = open(html_file_path)
html_content = html_file.read()
pdf_url_list = re_pdf.findall(html_content)
sd_url_list = re_video_SD.findall(html_content)
hd_url_list = re_video_HD.findall(html_content)
print "pdf file count:", len(pdf_url_list)
print "video-sd file count:", len(sd_url_list)
print "video-hd file count:", len(hd_url_list)
# start downLoad
def down_files(url_list, save_folder):
dl_count = len(url_list)
dl_finish_count = 0
for file_url in url_list:
dl_finish_count += 1
dl_progress = str(dl_count) + '/' + str(dl_finish_count)
file_path = os.path.join(save_folder, file_url[file_url.rindex("/") + 1:])
# check file exist
if os.path.exists(file_path):
print "file %s has exist,don`t downLoad " % dl_progress, file_path
continue # file has exist,don`t downLoad
print "\nstart download: ", dl_progress, file_url
file_content = requests.get(file_url).content
file_obj = open(file_path, "wb")
file_obj.write(file_content)
print "downLoad finish %s, file saved in:" % dl_progress, file_path, "\n"
pdf_folder_path = os.path.join(os.getcwd(), "pdf")
sd_folder_path = os.path.join(os.getcwd(), "video-sd")
hd_folder_path = os.path.join(os.getcwd(), "video-hd")
if not os.path.exists(pdf_folder_path):
os.mkdir(pdf_folder_path)
if not os.path.exists(sd_folder_path):
os.mkdir(sd_folder_path)
if not os.path.exists(hd_folder_path):
os.mkdir(hd_folder_path)
print "start downLoad pdf file:", len(pdf_url_list), "\n"
down_files(pdf_url_list, pdf_folder_path)
print " all pdf file download finish."
# print "start downLoad video-sd file:", len(sd_url_list), "\n"
# down_files(sd_url_list, sd_folder_path)
# print " all video-sd file download finish."
#
# print "start downLoad video-hd file:", len(hd_url_list), "\n"
# down_files(hd_url_list, hd_folder_path)
# print " all video-hd file download finish."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment