Created
November 3, 2016 13:23
-
-
Save anonymous/29ca31b52689d9c0ce99781097a7fe1b to your computer and use it in GitHub Desktop.
downloadfacebook1.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# coding: utf-8 | |
import webbrowser, os, requests, re, bs4 | |
import appex | |
import console | |
import dialogs | |
data_dir = os.path.join(os.path.abspath('.'),'Videos') | |
if not os.path.isdir (data_dir): | |
#print(data_dir) | |
os.makedirs(data_dir) | |
os.chdir(data_dir) | |
#ßprint(os.path.abspath('.')) | |
def download_file(url, tmpfile=None): | |
if not tmpfile: | |
local_filename = url.split('/')[-1] | |
else: | |
local_filename = tmpfile | |
with open(local_filename, 'wb') as f: | |
r = requests.get(url, stream=True) | |
total_length = r.headers.get('content-length') | |
if not total_length: | |
f.write(r.content) | |
else: | |
dl = 0 | |
total_length = float(total_length) | |
for chunk in r.iter_content(1024): | |
dl += len(chunk) | |
f.write(chunk) | |
return local_filename | |
def DownloadFile(url, filename): | |
res = requests.get(url) | |
try: | |
res.raise_for_status() | |
except Exception as exc: | |
print('There was a problem: %s' % (exc)) | |
with open(filename, "wb") as code: | |
code.write(res.content) | |
'''' playFile = open(filename, 'wb') | |
for chunk in res.iter_content(100000): | |
playFile.write(chunk) | |
print('Writing 100000 bytes') | |
playFile.close() | |
''' | |
def OpenURL(url): | |
res = requests.get(url) | |
res.encoding = 'utf-8' # 中文顯示必用 | |
try: | |
res.raise_for_status() | |
except Exception as exc: | |
print('There was a problem: %s' % (exc)) | |
return res | |
def DownloadFacebookVideo(url, title_length): | |
r = OpenURL(url) | |
doc = r.text | |
soup = bs4.BeautifulSoup(doc, 'html5lib') | |
#elements = soup.find_all('div', attrs={class: '_5pbx userContent'}) | |
elements = soup.find_all(class_ = '_5pcq') | |
video_list = [] | |
title_list = [] | |
for i in range(len(elements)): # Search all the posts which contains video | |
#print (str(elements[i]['href'])) | |
r1 = (re.search(r'videos/(\d+)/', str(elements[i]['href']))) | |
if not r1: | |
# Skipped all posts without video | |
continue | |
video_id = r1.group(1) | |
#print (video_id) | |
x = elements[i].findNext('p') | |
t = x.getText() # Get the text of the post which contains video | |
if len(t) > title_length: | |
t = t[0:title_length] + '...' | |
temp_dict = {'text': t, 'video_id': video_id, 'hd_src': None, 'sd_src': None, 'sd_src_no_ratelimit':None, 'hd_src_no_ratelimit': None} | |
video_list.append(temp_dict) | |
title_list.append(t) | |
#print(len(video_list)) | |
for i in range(len(video_list)): | |
video_id = video_list[i]['video_id'] | |
#print ('video_id: ' + video_id) | |
patt = r'\[\{"is_hds":(?:false|true),"video_id":"' + video_id + '".*?"sd_src_no_ratelimit":"(.*?)".*?"hd_src_no_ratelimit":"(.*?)".*?"hd_src":"(.*?)".*?"sd_src":"(.*?)".*?\}\]' | |
r2 = re.search(patt, doc) | |
if r2.group(1) != '': | |
video_list[i]['sd_src_no_ratelimit'] = r2.group(1).replace('\/','/') | |
if r2.group(2) != '': | |
video_list[i]['hd_src_no_ratelimit'] = r2.group(2).replace('\/','/') | |
if r2.group(3) != '': | |
video_list[i]['hd_src'] = r2.group(3).replace('\/','/') | |
if r2.group(4) != '': | |
video_list[i]['sd_src'] = r2.group(4).replace('\/','/') | |
# print(video_list) | |
for i in range(len(video_list)): | |
t = video_list[i]['text'] | |
if video_list[i]['hd_src'] != None: | |
url = video_list[i]['hd_src'] | |
video_quality = 'hd_src' | |
elif video_list[i]['hd_src_no_ratelimit'] != None: | |
url = video_list[i]['hd_src_no_ratelimit'] | |
video_quality = 'hd_src_no_ratelimit' | |
elif video_list[i]['sd_src_no_ratelimit'] != None: | |
url = video_list[i]['sd_src_no_ratelimit'] | |
video_quality = 'sd_src_no_ratelimit' | |
elif video_list[i]['sd_src'] != None: | |
url = video_list[i]['sd_src'] | |
video_quality = 'sd_src' | |
else: | |
video_quality = None | |
print('No video found!!') | |
item = dialogs.list_dialog('Pick', title_list) | |
if item != None: | |
for i, text in enumerate(title_list): | |
if item == text: | |
vq_list = {} | |
if video_list[i]['hd_src'] != None: | |
vq_list['hd_src'] = video_list[i]['hd_src'] | |
if video_list[i]['hd_src_no_ratelimit'] != None: | |
vq_list['hd_src_no_ratelimit'] = video_list[i]['hd_src_no_ratelimit'] | |
if video_list[i]['sd_src'] != None: | |
vq_list['sd_src'] = video_list[i]['sd_src'] | |
if video_list[i]['sd_src_no_ratelimit'] != None: | |
vq_list['sd_src_no_ratelimit'] = video_list[i]['sd_src_no_ratelimit'] | |
video_quality = dialogs.list_dialog('Pick the Video Quality', sorted(vq_list.keys())) | |
if video_quality != None: | |
fname = (video_list[i]['text'])[0:20] + '(' + video_quality +').mp4' | |
console.set_color(0,0,255) | |
print('URL:' + vq_list[video_quality]) | |
print ('Downloading ' + fname + '...',end='') | |
#dialogs.alert('Downloading ' + fname + '...') | |
fname = 'temp.mp4' | |
#DownloadFile(vq_list[video_quality], fname) | |
download_file(vq_list[video_quality], fname) | |
print('Done!') | |
return fname | |
def main(): | |
if not appex.is_running_extension(): | |
print('This script is intended to be run from the sharing extension.') | |
return | |
url = appex.get_url() | |
if not url: | |
print('No input URL found.') | |
return | |
#url = 'https://www.facebook.com/Spurs/?ref=ts&fref=ts' | |
fname = DownloadFacebookVideo(url, 35) | |
console.quicklook(fname) | |
os.remove(fname) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment