Skip to content

Instantly share code, notes, and snippets.

@nkpro2000
Last active July 4, 2022 20:25
Show Gist options
  • Save nkpro2000/253d64a51ecb9a631531ced001d265bf to your computer and use it in GitHub Desktop.
Save nkpro2000/253d64a51ecb9a631531ced001d265bf to your computer and use it in GitHub Desktop.
To download reel you saved in facebook from facebook
import os
import json
import requests
os.chdir('reels')
try:
li = int(sorted(os.listdir('./dels'))[-1].split('.')[0])
except FileNotFoundError: li = 0
with open('./deleted') as f:
for i,reel in enumerate(f.read().splitlines()[::4]):
rj = json.loads(reel)
os.makedirs('./dels', exist_ok=True)
with open(f'./dels/{str(li+(i+1)).zfill(3)}.txt','w') as f_:
f_.write(rj[2]+'\n\n')
f_.write(rj[1]+'\n\n')
f_.write(rj[3]+'\n\n')
f_.write(rj[4]+'\n')
with open(f'./dels/{str(li+(i+1)).zfill(3)}.png','wb') as f_:
r = requests.get(rj[4])
if r.status_code == 200:
f_.write(r.content)
with open('dels-todl','a') as f_:
f_.write(json.dumps([rj[1], f'{str(li+(i+1)).zfill(3)}.mp4']))
f_.write('\n')
async function scroll_untill(pattern) {
while (!document.body.innerHTML.match(pattern)) {
window.scrollByPages(5);
await new Promise(r => setTimeout(r, 1000));
}
console.log('#########> Scrolled <#########')
}
/*const reel_url = /https\:\/\/www.facebook.com\/reel\/\d+\//g;
function get_urls(pattern) {
urls = ''
rb = [...document.body.innerHTML.matchAll(pattern)]
rb = [...new Set(rb.map(e => e[0]))]
rb.forEach(e => urls+=(e+'\n'))
return urls
}*/
log = []
function get_all(breakp) {
a=$x('/html/body/div[1]/div/div[1]/div/div[3]/div/div/div[1]/div[1]/div[2]/div/div/div/div/div[2]/div')
urls = []
for (i in a) {
it = a[i].innerText
console.log(it)
ls = [...a[i].innerHTML.matchAll(/href\=\"(.*?)\"\ role\=\"link\"/g)]
img = [...a[i].innerHTML.matchAll(/src\=\"(.*?)\"\ alt\=\"image\"/g)][0]
if (!!img) {
img = img[1]
} else {
img = ''
console.log('No image')
log.push(['No image', i, a[i]])
}
if (!!ls[0]) {
console.log(ls[0][1])
if (!!ls[2]) {
console.log(ls[2][1])
urls.push([ls[0][1],it,ls[2][1],img])
} else {
console.log(ls)
log.push(['No video url', i, a[i]])
urls.push([ls[0][1],it,'',img])
}
} else {
console.log(a[i].innerHTML)
log.push(['No links', i, a[i]])
}
if (!!a[i].innerHTML.match(breakp)) {
break
}
}
return urls
}
all_urls = get_all(break_pattern)
/////////////////////////////////////////////////////////////////////////////////////////////////////
// $nano all_urls.json #paste the json copied json from firefox
//=> all_urls.json
/////////////////////////////////////////////////////////////////////////////////////////////////
// $python reelorder.py #order reels #this script generate files with reels url and filename to save mp4 files
//-> reels_temp/*/*
//=> reels/.directory
//=> reels/deleted
//=> reels/*/.directory , reels/*-todl
/////////////////////////////////////////////////////////////////////////////////////////////////
// $python reeltodl.py #this script gives the reels url array which is used for download link scraping
//=> reels/todl.json
//=> reels/rurls.txt
/////////////////////////////////////////////////////////////////////////////////////////////////
// $mkdir /tmp/tmpgcsp #just a temprary profile directory
// $google-chrome-stable --disable-web-security --incognito --user-data-dir='/tmp/tmpgcsp'
//? why not firefox : firefox requires installing extention for unblocking cross-orgin access
const dlurlp=/(playable_url|playable_url_quality_hd)":"([^"]+)"/g
var qualities = ["playable_url", "playable_url_quality_hd"]
// https://github.com/vikas5914/Facebook-Video-Downloader/blob/9e4cd6bc51c9ba66b48271d9873742bf88a0faaf/app/main.php#L98
async function sp_urls(rurls) {
var durls = []
for (i in rurls) {
var w = window.open(rurls[i])
while (!w.document || !w.document.body || !w.document.body.innerHTML || ![...w.document.body.innerHTML.matchAll(dlurlp)][0]) {
await new Promise(r => setTimeout(r, 500));
}
durl = [...w.document.body.innerHTML.matchAll(dlurlp)]
var x = null
var k = -2
for (j in durl) {
if (qualities.indexOf(durl[j][1]) > k) {
k = qualities.indexOf(durl[j][1])
x = durl[j][2]
} else {
break
}
}
w.close()
durls.push(x)
}
return durls
}
dlurls = await sp_urls(rurls)
/////////////////////////////////////////////////////////////////////////////////////////////////////
// $nano reels/dlurls.json #paste the json copied json from google-chrome
//=> reels/dlurls.json
/////////////////////////////////////////////////////////////////////////////////////////////////
// $python reeldow.py
/////////////////////////////////////////////////////////////////////////////////////////////////
import os
import sys
import json
import requests
os.chdir('reels')
if os.path.isfile('todl.json'):
with open('todl.json') as f:
todl = json.load(f)
else:
print('Run reeltodl.py first')
sys.exit()
if os.path.isfile('dlurls.json'):
with open('dlurls.json') as f:
dlurls = json.load(f)
else:
print('Get dlurls from chrome with rurls using reeldow.js')
sys.exit()
dlurls_ = []
for i in dlurls:
dlurls_.append(i.replace('\\u0025','\u0025').replace('\\','').replace('&amp;','&'))
dlurls = dlurls_
print('\nDownloding mp4s:\n')
for i,j in zip(dlurls, todl):
r = requests.get(i)
if r.status_code == 200:
with open(j[1],'wb') as f:
f.write(r.content)
with open(j[1].partition('.mp4')[0]+'.txt', 'a') as f:
f.write(f'\n{i}\n')
else:
print(j[0], i, r.text, sep='\n', end='\n\n')
print('Now you can clear these files :')
print('\t./reels/dlurls.json')
print('\t./reels/rurls.txt')
print('\t./reels/todl.json')
for i in list(dict.fromkeys([i[1].split('/')[1] for i in todl])):
print(f'\t./reels/{i}-todl')
import os
import sys
import json
import requests
import datetime
def parse(j):
n = len(str(len(j)))
d=[]
for i,e in enumerate(j):
id_ = str(i+1).zfill(n)
rurl = e[0].replace('&amp;','&')
tl = e[1].splitlines()
tl.remove('Add to collection')
desc = '\n'.join(tl)
vurl = e[2].replace('&amp;','&')
iurl = e[3].replace('&amp;','&')
# dlurl = e[4].replace('\\u0025','\u0025').replace('\\','').replace('&amp;','&')
## moved to reeldow so than only required group can be downloaded
d.append([id_, rurl, desc, vurl, iurl]) #, dlurl])
return d
def order(jl):
while True:
cmd = input('cmd: ')
if cmd == 'h':
print('t\w : touch \w')
print('d\w : delete \w')
print('s\w-\w,\w(,\d)? : will save these range in folder')
print(' with name as 3rd\w and image')
print(' with name starting from \d')
print(' defaults to 1')
print('q : quit (save before quiting)')
print('h : show this help message')
elif cmd == 'q':
break
elif cmd[0] == 's':
cmd = cmd[1:].split(',')
if len(cmd) == 2:
order_save(*cmd[0].split('-'),cmd[1],1,jl)
elif len(cmd) == 3:
order_save(*cmd[0].split('-'),cmd[1],int(cmd[2]),jl)
else:
print('see help')
elif cmd[0] == 'd':
order_delete(cmd[1:],jl)
elif cmd[0] == 't':
order_touch(cmd[1:])
else:
print('command not matching (h for help)')
def order_delete(n,jl:list):
os.system(f'rm {n}')
if len(n.split('.')) > 1 and n.split('.')[1] == 'png':
with open('../../reels/deleted','a') as f:
f.write(json.dumps(jl[int(n.split('.')[0])-1]))
f.write('\n\n'+('-'*20)+'\n\n')
order_reorder()
def order_touch(n):
os.system(f'touch {n}')
order_reorder()
def order_reorder():
os.system("sh -c 'rm -f _*'")
for i in range(3-(len(os.listdir())%3)):
os.system(f'touch {"_"*(i+1)}')
def order_save(a,b,d,n:int,jl:list):
time = datetime.datetime.now()
ls = os.listdir()
ls.sort()
ai = ls.index(a)
bi = ls.index(b)
ls = ls[ai:bi+1]
os.makedirs(f'../../reels/{d}', exist_ok=True)
if not os.path.isfile(f'../../reels/{d}/.directory'):
with open(f'../../reels/{d}/.directory','w') as f:
f.write(f'[Dolphin]\nSortOrder=1\nTimestamp={time.strftime("%Y,%-m,%-d,%-H,%-M,%-S.%f")[:-3]}\nVersion=4\n')
for i,f in zip(range(n+len(ls)-1,n-1,-1), ls):
if os.path.isfile(f'../{f}'):
fn = f.split('.')[0]
os.system(f'cp -i ../{fn}.txt ../../reels/{d}/{str(i).zfill(2)}.txt')
os.system(f'cp -i ../{fn}.png ../../reels/{d}/{str(i).zfill(2)}.png')
with open(f'../../reels/{d}-todl','a') as fd:
fd.write(json.dumps([jl[int(fn)-1][1], f'{str(i).zfill(2)}.mp4']))
fd.write('\n')
else:
os.system(f'touch ../../reels/{d}/{str(i).zfill(2)}.txt ../../reels/{d}/{str(i).zfill(2)}.png ../../reels/{d}/{str(i).zfill(2)}.mp4')
with open(f'../../reels/{d}-todl','a') as fd:
fd.write(json.dumps(['DUMMY', f'{str(i).zfill(2)}.mp4']))
fd.write('\n')
os.system(f'rm {f}')
order_reorder()
if __name__ == '__main__':
if os.path.isfile('all_urls.json'):
with open('all_urls.json') as f:
j = json.load(f)
else:
print('Get all_urls from firefox using reeldow.js')
sys.exit()
a = parse(j)
print(a[0])
print(a[-1])
os.mkdir('reels_temp')
os.makedirs('reels', exist_ok=True)
time = datetime.datetime.now()
if not os.path.isfile('./reels/.directory'):
with open('./reels/.directory','w') as f:
f.write(f'[Dolphin]\nSortOrder=1\nTimestamp={time.strftime("%Y,%-m,%-d,%-H,%-M,%-S.%f")[:-3]}\nVersion=4\n')
os.chdir('reels_temp')
print('\nWorkingDir:',end=' ',flush=True); os.system('pwd')
print('\nFaulty:\n')
faulty = []
for i in a:
if '/reel/' not in i[1] and '/watch/' not in i[1]:
print(i[0], i[2], i[1], i[3], sep='\n', end='\n\n')
faulty.append(i[0])
continue
with open(f'{i[0]}.txt','w') as f:
f.write(i[2]+'\n\n')
f.write(i[1]+'\n\n')
f.write(i[3]+'\n\n')
f.write(i[4]+'\n') #\n')
#f.write(i[5]+'\n')
print('\nDownloding imgs:\n')
for i in a:
if i[0] in faulty:
print(i[0], 'Faulty')
continue
if i[4] == '':
print(i[0], 'No Image')
continue
r = requests.get(i[4])
if r.status_code == 200:
with open(f'{i[0]}.png','wb') as f:
f.write(r.content)
else:
print(i[0], r.text)
print('\nCopying images to 2order/')
os.system('mkdir 2order && cp *.png 2order/')
os.chdir('2order')
print('WorkingDir:',end=' ',flush=True); os.system('pwd')
order_reorder()
print('\nStart ordering ...')
os.system('dolphin --new-window '+os.getcwd()+' &')
order(a)
print('Now you can clear there files (if saved all requireds) :')
print('\t./all_urls.json')
print('\t./reels_temp/')
print('\t./reels/deleted')
import os
import sys
import json
os.chdir('reels')
ls = [i.partition('-todl')[0] for i in os.listdir() if i.endswith('-todl')]
ls.sort()
if len(ls) == 0:
print('Run reelorder.py first')
sys.exit()
print(f'ls = {ls}')
print(tuple(zip(range(len(ls)), ls)))
print('Edit ls :')
while True:
ls_ = eval(input('new_ls = '))
print(f'new_ls = {ls_}')
o = input("'s' to set OR 'r' to retry : ")
if o == 's':
ls = ls_
break
print(f'final\nls = {ls}')
todl = []
for i in ls:
with open(f'{i}-todl') as f:
for j in f:
todl_ = json.loads(j.strip())
if todl_[0] == 'DUMMY': continue
todl_[0] = todl_[0].replace('https://www.facebook.com','')
todl_[1] = f'./{i}/'+todl_[1]
todl.append(todl_)
with open('todl.json','w') as f:
json.dump(todl, f, indent=2)
with open('rurls.txt','w') as f:
rurls = []
for i in todl:
rurls.append(i[0])
f.write(f'{rurls=}')
print('reel urls array is saved with litral for initialization, in rurls.txt')
$ tree -l SampleWdir/
SampleWdir/
├── reeldels.py
├── reeldow.js
├── reeldow.py
├── reelorder.py
├── reels
│   ├── 01
│   │   ├── 01.mp4
│   │   ├── 01.png
│   │   ├── 01.txt
│   │   ├── 02.mp4
│   │   ├── 02.png
│   │   ├── 02.txt
│   │   ├── .
│   │   ├── .
│   │   └── .
│   ├── 01.png
│   ├── 02
│   │   ├── .
│   │   ├── .
│   │   └── .
│   ├── 02.png
│   ├── .
│   ├── .
│   ├── .
│   ├── dels
│   │   ├── 001.mp4
│   │   ├── 001.png
│   │   ├── 001.txt
│   │   ├── 002.mp4
│   │   ├── 002.png
│   │   ├── 002.txt
│   │   ├── .
│   │   ├── .
│   │   └── .
│   └── todo & others
│   ├── .
│   ├── .
│   │   ├── .
│   │   └── .
│   └── .
└── reeltodl.py
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment