Last active
July 4, 2022 20:25
-
-
Save nkpro2000/253d64a51ecb9a631531ced001d265bf to your computer and use it in GitHub Desktop.
To download reel you saved in facebook from facebook
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import json | |
import requests | |
os.chdir('reels') | |
try: | |
li = int(sorted(os.listdir('./dels'))[-1].split('.')[0]) | |
except FileNotFoundError: li = 0 | |
with open('./deleted') as f: | |
for i,reel in enumerate(f.read().splitlines()[::4]): | |
rj = json.loads(reel) | |
os.makedirs('./dels', exist_ok=True) | |
with open(f'./dels/{str(li+(i+1)).zfill(3)}.txt','w') as f_: | |
f_.write(rj[2]+'\n\n') | |
f_.write(rj[1]+'\n\n') | |
f_.write(rj[3]+'\n\n') | |
f_.write(rj[4]+'\n') | |
with open(f'./dels/{str(li+(i+1)).zfill(3)}.png','wb') as f_: | |
r = requests.get(rj[4]) | |
if r.status_code == 200: | |
f_.write(r.content) | |
with open('dels-todl','a') as f_: | |
f_.write(json.dumps([rj[1], f'{str(li+(i+1)).zfill(3)}.mp4'])) | |
f_.write('\n') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async function scroll_untill(pattern) { | |
while (!document.body.innerHTML.match(pattern)) { | |
window.scrollByPages(5); | |
await new Promise(r => setTimeout(r, 1000)); | |
} | |
console.log('#########> Scrolled <#########') | |
} | |
/*const reel_url = /https\:\/\/www.facebook.com\/reel\/\d+\//g; | |
function get_urls(pattern) { | |
urls = '' | |
rb = [...document.body.innerHTML.matchAll(pattern)] | |
rb = [...new Set(rb.map(e => e[0]))] | |
rb.forEach(e => urls+=(e+'\n')) | |
return urls | |
}*/ | |
log = [] | |
function get_all(breakp) { | |
a=$x('/html/body/div[1]/div/div[1]/div/div[3]/div/div/div[1]/div[1]/div[2]/div/div/div/div/div[2]/div') | |
urls = [] | |
for (i in a) { | |
it = a[i].innerText | |
console.log(it) | |
ls = [...a[i].innerHTML.matchAll(/href\=\"(.*?)\"\ role\=\"link\"/g)] | |
img = [...a[i].innerHTML.matchAll(/src\=\"(.*?)\"\ alt\=\"image\"/g)][0] | |
if (!!img) { | |
img = img[1] | |
} else { | |
img = '' | |
console.log('No image') | |
log.push(['No image', i, a[i]]) | |
} | |
if (!!ls[0]) { | |
console.log(ls[0][1]) | |
if (!!ls[2]) { | |
console.log(ls[2][1]) | |
urls.push([ls[0][1],it,ls[2][1],img]) | |
} else { | |
console.log(ls) | |
log.push(['No video url', i, a[i]]) | |
urls.push([ls[0][1],it,'',img]) | |
} | |
} else { | |
console.log(a[i].innerHTML) | |
log.push(['No links', i, a[i]]) | |
} | |
if (!!a[i].innerHTML.match(breakp)) { | |
break | |
} | |
} | |
return urls | |
} | |
all_urls = get_all(break_pattern) | |
///////////////////////////////////////////////////////////////////////////////////////////////////// | |
// $nano all_urls.json #paste the json copied json from firefox | |
//=> all_urls.json | |
///////////////////////////////////////////////////////////////////////////////////////////////// | |
// $python reelorder.py #order reels #this script generate files with reels url and filename to save mp4 files | |
//-> reels_temp/*/* | |
//=> reels/.directory | |
//=> reels/deleted | |
//=> reels/*/.directory , reels/*-todl | |
///////////////////////////////////////////////////////////////////////////////////////////////// | |
// $python reeltodl.py #this script gives the reels url array which is used for download link scraping | |
//=> reels/todl.json | |
//=> reels/rurls.txt | |
///////////////////////////////////////////////////////////////////////////////////////////////// | |
// $mkdir /tmp/tmpgcsp #just a temprary profile directory | |
// $google-chrome-stable --disable-web-security --incognito --user-data-dir='/tmp/tmpgcsp' | |
//? why not firefox : firefox requires installing extention for unblocking cross-orgin access | |
const dlurlp=/(playable_url|playable_url_quality_hd)":"([^"]+)"/g | |
var qualities = ["playable_url", "playable_url_quality_hd"] | |
// https://github.com/vikas5914/Facebook-Video-Downloader/blob/9e4cd6bc51c9ba66b48271d9873742bf88a0faaf/app/main.php#L98 | |
async function sp_urls(rurls) { | |
var durls = [] | |
for (i in rurls) { | |
var w = window.open(rurls[i]) | |
while (!w.document || !w.document.body || !w.document.body.innerHTML || ![...w.document.body.innerHTML.matchAll(dlurlp)][0]) { | |
await new Promise(r => setTimeout(r, 500)); | |
} | |
durl = [...w.document.body.innerHTML.matchAll(dlurlp)] | |
var x = null | |
var k = -2 | |
for (j in durl) { | |
if (qualities.indexOf(durl[j][1]) > k) { | |
k = qualities.indexOf(durl[j][1]) | |
x = durl[j][2] | |
} else { | |
break | |
} | |
} | |
w.close() | |
durls.push(x) | |
} | |
return durls | |
} | |
dlurls = await sp_urls(rurls) | |
///////////////////////////////////////////////////////////////////////////////////////////////////// | |
// $nano reels/dlurls.json #paste the json copied json from google-chrome | |
//=> reels/dlurls.json | |
///////////////////////////////////////////////////////////////////////////////////////////////// | |
// $python reeldow.py | |
///////////////////////////////////////////////////////////////////////////////////////////////// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import json | |
import requests | |
os.chdir('reels') | |
if os.path.isfile('todl.json'): | |
with open('todl.json') as f: | |
todl = json.load(f) | |
else: | |
print('Run reeltodl.py first') | |
sys.exit() | |
if os.path.isfile('dlurls.json'): | |
with open('dlurls.json') as f: | |
dlurls = json.load(f) | |
else: | |
print('Get dlurls from chrome with rurls using reeldow.js') | |
sys.exit() | |
dlurls_ = [] | |
for i in dlurls: | |
dlurls_.append(i.replace('\\u0025','\u0025').replace('\\','').replace('&','&')) | |
dlurls = dlurls_ | |
print('\nDownloding mp4s:\n') | |
for i,j in zip(dlurls, todl): | |
r = requests.get(i) | |
if r.status_code == 200: | |
with open(j[1],'wb') as f: | |
f.write(r.content) | |
with open(j[1].partition('.mp4')[0]+'.txt', 'a') as f: | |
f.write(f'\n{i}\n') | |
else: | |
print(j[0], i, r.text, sep='\n', end='\n\n') | |
print('Now you can clear these files :') | |
print('\t./reels/dlurls.json') | |
print('\t./reels/rurls.txt') | |
print('\t./reels/todl.json') | |
for i in list(dict.fromkeys([i[1].split('/')[1] for i in todl])): | |
print(f'\t./reels/{i}-todl') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import json | |
import requests | |
import datetime | |
def parse(j): | |
n = len(str(len(j))) | |
d=[] | |
for i,e in enumerate(j): | |
id_ = str(i+1).zfill(n) | |
rurl = e[0].replace('&','&') | |
tl = e[1].splitlines() | |
tl.remove('Add to collection') | |
desc = '\n'.join(tl) | |
vurl = e[2].replace('&','&') | |
iurl = e[3].replace('&','&') | |
# dlurl = e[4].replace('\\u0025','\u0025').replace('\\','').replace('&','&') | |
## moved to reeldow so than only required group can be downloaded | |
d.append([id_, rurl, desc, vurl, iurl]) #, dlurl]) | |
return d | |
def order(jl): | |
while True: | |
cmd = input('cmd: ') | |
if cmd == 'h': | |
print('t\w : touch \w') | |
print('d\w : delete \w') | |
print('s\w-\w,\w(,\d)? : will save these range in folder') | |
print(' with name as 3rd\w and image') | |
print(' with name starting from \d') | |
print(' defaults to 1') | |
print('q : quit (save before quiting)') | |
print('h : show this help message') | |
elif cmd == 'q': | |
break | |
elif cmd[0] == 's': | |
cmd = cmd[1:].split(',') | |
if len(cmd) == 2: | |
order_save(*cmd[0].split('-'),cmd[1],1,jl) | |
elif len(cmd) == 3: | |
order_save(*cmd[0].split('-'),cmd[1],int(cmd[2]),jl) | |
else: | |
print('see help') | |
elif cmd[0] == 'd': | |
order_delete(cmd[1:],jl) | |
elif cmd[0] == 't': | |
order_touch(cmd[1:]) | |
else: | |
print('command not matching (h for help)') | |
def order_delete(n,jl:list): | |
os.system(f'rm {n}') | |
if len(n.split('.')) > 1 and n.split('.')[1] == 'png': | |
with open('../../reels/deleted','a') as f: | |
f.write(json.dumps(jl[int(n.split('.')[0])-1])) | |
f.write('\n\n'+('-'*20)+'\n\n') | |
order_reorder() | |
def order_touch(n): | |
os.system(f'touch {n}') | |
order_reorder() | |
def order_reorder(): | |
os.system("sh -c 'rm -f _*'") | |
for i in range(3-(len(os.listdir())%3)): | |
os.system(f'touch {"_"*(i+1)}') | |
def order_save(a,b,d,n:int,jl:list): | |
time = datetime.datetime.now() | |
ls = os.listdir() | |
ls.sort() | |
ai = ls.index(a) | |
bi = ls.index(b) | |
ls = ls[ai:bi+1] | |
os.makedirs(f'../../reels/{d}', exist_ok=True) | |
if not os.path.isfile(f'../../reels/{d}/.directory'): | |
with open(f'../../reels/{d}/.directory','w') as f: | |
f.write(f'[Dolphin]\nSortOrder=1\nTimestamp={time.strftime("%Y,%-m,%-d,%-H,%-M,%-S.%f")[:-3]}\nVersion=4\n') | |
for i,f in zip(range(n+len(ls)-1,n-1,-1), ls): | |
if os.path.isfile(f'../{f}'): | |
fn = f.split('.')[0] | |
os.system(f'cp -i ../{fn}.txt ../../reels/{d}/{str(i).zfill(2)}.txt') | |
os.system(f'cp -i ../{fn}.png ../../reels/{d}/{str(i).zfill(2)}.png') | |
with open(f'../../reels/{d}-todl','a') as fd: | |
fd.write(json.dumps([jl[int(fn)-1][1], f'{str(i).zfill(2)}.mp4'])) | |
fd.write('\n') | |
else: | |
os.system(f'touch ../../reels/{d}/{str(i).zfill(2)}.txt ../../reels/{d}/{str(i).zfill(2)}.png ../../reels/{d}/{str(i).zfill(2)}.mp4') | |
with open(f'../../reels/{d}-todl','a') as fd: | |
fd.write(json.dumps(['DUMMY', f'{str(i).zfill(2)}.mp4'])) | |
fd.write('\n') | |
os.system(f'rm {f}') | |
order_reorder() | |
if __name__ == '__main__': | |
if os.path.isfile('all_urls.json'): | |
with open('all_urls.json') as f: | |
j = json.load(f) | |
else: | |
print('Get all_urls from firefox using reeldow.js') | |
sys.exit() | |
a = parse(j) | |
print(a[0]) | |
print(a[-1]) | |
os.mkdir('reels_temp') | |
os.makedirs('reels', exist_ok=True) | |
time = datetime.datetime.now() | |
if not os.path.isfile('./reels/.directory'): | |
with open('./reels/.directory','w') as f: | |
f.write(f'[Dolphin]\nSortOrder=1\nTimestamp={time.strftime("%Y,%-m,%-d,%-H,%-M,%-S.%f")[:-3]}\nVersion=4\n') | |
os.chdir('reels_temp') | |
print('\nWorkingDir:',end=' ',flush=True); os.system('pwd') | |
print('\nFaulty:\n') | |
faulty = [] | |
for i in a: | |
if '/reel/' not in i[1] and '/watch/' not in i[1]: | |
print(i[0], i[2], i[1], i[3], sep='\n', end='\n\n') | |
faulty.append(i[0]) | |
continue | |
with open(f'{i[0]}.txt','w') as f: | |
f.write(i[2]+'\n\n') | |
f.write(i[1]+'\n\n') | |
f.write(i[3]+'\n\n') | |
f.write(i[4]+'\n') #\n') | |
#f.write(i[5]+'\n') | |
print('\nDownloding imgs:\n') | |
for i in a: | |
if i[0] in faulty: | |
print(i[0], 'Faulty') | |
continue | |
if i[4] == '': | |
print(i[0], 'No Image') | |
continue | |
r = requests.get(i[4]) | |
if r.status_code == 200: | |
with open(f'{i[0]}.png','wb') as f: | |
f.write(r.content) | |
else: | |
print(i[0], r.text) | |
print('\nCopying images to 2order/') | |
os.system('mkdir 2order && cp *.png 2order/') | |
os.chdir('2order') | |
print('WorkingDir:',end=' ',flush=True); os.system('pwd') | |
order_reorder() | |
print('\nStart ordering ...') | |
os.system('dolphin --new-window '+os.getcwd()+' &') | |
order(a) | |
print('Now you can clear there files (if saved all requireds) :') | |
print('\t./all_urls.json') | |
print('\t./reels_temp/') | |
print('\t./reels/deleted') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import sys | |
import json | |
os.chdir('reels') | |
ls = [i.partition('-todl')[0] for i in os.listdir() if i.endswith('-todl')] | |
ls.sort() | |
if len(ls) == 0: | |
print('Run reelorder.py first') | |
sys.exit() | |
print(f'ls = {ls}') | |
print(tuple(zip(range(len(ls)), ls))) | |
print('Edit ls :') | |
while True: | |
ls_ = eval(input('new_ls = ')) | |
print(f'new_ls = {ls_}') | |
o = input("'s' to set OR 'r' to retry : ") | |
if o == 's': | |
ls = ls_ | |
break | |
print(f'final\nls = {ls}') | |
todl = [] | |
for i in ls: | |
with open(f'{i}-todl') as f: | |
for j in f: | |
todl_ = json.loads(j.strip()) | |
if todl_[0] == 'DUMMY': continue | |
todl_[0] = todl_[0].replace('https://www.facebook.com','') | |
todl_[1] = f'./{i}/'+todl_[1] | |
todl.append(todl_) | |
with open('todl.json','w') as f: | |
json.dump(todl, f, indent=2) | |
with open('rurls.txt','w') as f: | |
rurls = [] | |
for i in todl: | |
rurls.append(i[0]) | |
f.write(f'{rurls=}') | |
print('reel urls array is saved with litral for initialization, in rurls.txt') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ tree -l SampleWdir/ | |
SampleWdir/ | |
├── reeldels.py | |
├── reeldow.js | |
├── reeldow.py | |
├── reelorder.py | |
├── reels | |
│ ├── 01 | |
│ │ ├── 01.mp4 | |
│ │ ├── 01.png | |
│ │ ├── 01.txt | |
│ │ ├── 02.mp4 | |
│ │ ├── 02.png | |
│ │ ├── 02.txt | |
│ │ ├── . | |
│ │ ├── . | |
│ │ └── . | |
│ ├── 01.png | |
│ ├── 02 | |
│ │ ├── . | |
│ │ ├── . | |
│ │ └── . | |
│ ├── 02.png | |
│ ├── . | |
│ ├── . | |
│ ├── . | |
│ ├── dels | |
│ │ ├── 001.mp4 | |
│ │ ├── 001.png | |
│ │ ├── 001.txt | |
│ │ ├── 002.mp4 | |
│ │ ├── 002.png | |
│ │ ├── 002.txt | |
│ │ ├── . | |
│ │ ├── . | |
│ │ └── . | |
│ └── todo & others | |
│ ├── . | |
│ ├── . | |
│ │ ├── . | |
│ │ └── . | |
│ └── . | |
└── reeltodl.py |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment