nkpro2000/reeldels.py

## reeldels.py
import os
import json
import requests

os.chdir('reels')

try:
    li = int(sorted(os.listdir('./dels'))[-1].split('.')[0])
except FileNotFoundError: li = 0

with open('./deleted') as f:
    for i,reel in enumerate(f.read().splitlines()[::4]):
        rj = json.loads(reel)
        os.makedirs('./dels', exist_ok=True)
        with open(f'./dels/{str(li+(i+1)).zfill(3)}.txt','w') as f_:
            f_.write(rj[2]+'\n\n')
            f_.write(rj[1]+'\n\n')
            f_.write(rj[3]+'\n\n')
            f_.write(rj[4]+'\n')
        with open(f'./dels/{str(li+(i+1)).zfill(3)}.png','wb') as f_:
            r = requests.get(rj[4])
            if r.status_code == 200:
                f_.write(r.content)
        with open('dels-todl','a') as f_:
            f_.write(json.dumps([rj[1], f'{str(li+(i+1)).zfill(3)}.mp4']))
            f_.write('\n')

## reeldow.js
async function scroll_untill(pattern) {
  while (!document.body.innerHTML.match(pattern)) {
        window.scrollByPages(5);
        await new Promise(r => setTimeout(r, 1000));
  }
  console.log('#########> Scrolled <#########')
}

/*const reel_url = /https\:\/\/www.facebook.com\/reel\/\d+\//g;

function get_urls(pattern) {
    urls = ''
    rb = [...document.body.innerHTML.matchAll(pattern)]
    rb = [...new Set(rb.map(e => e[0]))]
    rb.forEach(e => urls+=(e+'\n'))
    return urls
}*/

log = []
function get_all(breakp) {
  a=$x('/html/body/div[1]/div/div[1]/div/div[3]/div/div/div[1]/div[1]/div[2]/div/div/div/div/div[2]/div')
  urls = []
  for (i in a) {
    it = a[i].innerText
    console.log(it)
    ls = [...a[i].innerHTML.matchAll(/href\=\"(.*?)\"\ role\=\"link\"/g)]
    img = [...a[i].innerHTML.matchAll(/src\=\"(.*?)\"\ alt\=\"image\"/g)][0]
    if (!!img) {
      img = img[1]
    } else {
      img = ''
      console.log('No image')
      log.push(['No image', i, a[i]])
    }
    if (!!ls[0]) {
      console.log(ls[0][1])
      if (!!ls[2]) {
        console.log(ls[2][1])
        urls.push([ls[0][1],it,ls[2][1],img])
      } else {
        console.log(ls)
        log.push(['No video url', i, a[i]])
        urls.push([ls[0][1],it,'',img])
      }
    } else {
      console.log(a[i].innerHTML)
      log.push(['No links', i, a[i]])
    }
    if (!!a[i].innerHTML.match(breakp)) {
      break
    }
  }
  return urls
}

all_urls = get_all(break_pattern)

/////////////////////////////////////////////////////////////////////////////////////////////////////
// $nano all_urls.json #paste the json copied json from firefox
//=> all_urls.json
/////////////////////////////////////////////////////////////////////////////////////////////////
// $python reelorder.py #order reels #this script generate files with reels url and filename to save mp4 files
//-> reels_temp/*/*
//=> reels/.directory
//=> reels/deleted
//=> reels/*/.directory , reels/*-todl
/////////////////////////////////////////////////////////////////////////////////////////////////
// $python reeltodl.py #this script gives the reels url array which is used for download link scraping
//=> reels/todl.json
//=> reels/rurls.txt
/////////////////////////////////////////////////////////////////////////////////////////////////
// $mkdir /tmp/tmpgcsp #just a temprary profile directory
// $google-chrome-stable --disable-web-security --incognito --user-data-dir='/tmp/tmpgcsp'
//? why not firefox : firefox requires installing extention for unblocking cross-orgin access

const dlurlp=/(playable_url|playable_url_quality_hd)":"([^"]+)"/g
var qualities = ["playable_url", "playable_url_quality_hd"]
// https://github.com/vikas5914/Facebook-Video-Downloader/blob/9e4cd6bc51c9ba66b48271d9873742bf88a0faaf/app/main.php#L98
async function sp_urls(rurls) {
  var durls = []
  for (i in rurls) {
    var w = window.open(rurls[i])
    while (!w.document || !w.document.body || !w.document.body.innerHTML || ![...w.document.body.innerHTML.matchAll(dlurlp)][0]) {
      await new Promise(r => setTimeout(r, 500));
    }
    durl = [...w.document.body.innerHTML.matchAll(dlurlp)]
    var x = null
    var k = -2
    for (j in durl) {
      if (qualities.indexOf(durl[j][1]) > k) {
        k = qualities.indexOf(durl[j][1])
        x = durl[j][2]
      } else {
        break
      }
    }
    w.close()
    durls.push(x)
  }
  return durls
}

dlurls = await sp_urls(rurls)

/////////////////////////////////////////////////////////////////////////////////////////////////////
// $nano reels/dlurls.json #paste the json copied json from google-chrome
//=> reels/dlurls.json
/////////////////////////////////////////////////////////////////////////////////////////////////
// $python reeldow.py
/////////////////////////////////////////////////////////////////////////////////////////////////

## reeldow.py
import os
import sys
import json
import requests

os.chdir('reels')

if os.path.isfile('todl.json'):
    with open('todl.json') as f:
        todl = json.load(f)
else:
    print('Run reeltodl.py first')
    sys.exit()

if os.path.isfile('dlurls.json'):
    with open('dlurls.json') as f:
        dlurls = json.load(f)
else:
    print('Get dlurls from chrome with rurls using reeldow.js')
    sys.exit()

dlurls_ = []
for i in dlurls:
    dlurls_.append(i.replace('\\u0025','\u0025').replace('\\','').replace('&amp;','&'))
dlurls = dlurls_

print('\nDownloding mp4s:\n')
for i,j in zip(dlurls, todl):
    r = requests.get(i)
    if r.status_code == 200:
        with open(j[1],'wb') as f:
            f.write(r.content)
        with open(j[1].partition('.mp4')[0]+'.txt', 'a') as f:
            f.write(f'\n{i}\n')
    else:
        print(j[0], i, r.text, sep='\n', end='\n\n')

print('Now you can clear these files :')
print('\t./reels/dlurls.json')
print('\t./reels/rurls.txt')
print('\t./reels/todl.json')
for i in list(dict.fromkeys([i[1].split('/')[1] for i in todl])):
    print(f'\t./reels/{i}-todl')

## reelorder.py
import os
import sys
import json
import requests
import datetime

def parse(j):
    n = len(str(len(j)))
    d=[]
    for i,e in enumerate(j):
        id_ = str(i+1).zfill(n)
        rurl = e[0].replace('&amp;','&')
        tl = e[1].splitlines()
        tl.remove('Add to collection')
        desc = '\n'.join(tl)
        vurl = e[2].replace('&amp;','&')
        iurl = e[3].replace('&amp;','&')
        # dlurl = e[4].replace('\\u0025','\u0025').replace('\\','').replace('&amp;','&')
        ## moved to reeldow so than only required group can be downloaded
        d.append([id_, rurl, desc, vurl, iurl]) #, dlurl])
    return d


def order(jl):
    while True:
        cmd = input('cmd: ')
        if cmd == 'h':
            print('t\w : touch \w')
            print('d\w : delete \w')
            print('s\w-\w,\w(,\d)? : will save these range in folder')
            print('                  with name as 3rd\w and image')
            print('                  with name starting from \d')
            print('                  defaults to 1')
            print('q : quit (save before quiting)')
            print('h : show this help message')
        elif cmd == 'q':
            break
        elif cmd[0] == 's':
            cmd = cmd[1:].split(',')
            if len(cmd) == 2:
                order_save(*cmd[0].split('-'),cmd[1],1,jl)
            elif len(cmd) == 3:
                order_save(*cmd[0].split('-'),cmd[1],int(cmd[2]),jl)
            else:
                print('see help')
        elif cmd[0] == 'd':
            order_delete(cmd[1:],jl)
        elif cmd[0] == 't':
            order_touch(cmd[1:])
        else:
            print('command not matching (h for help)')

def order_delete(n,jl:list):
    os.system(f'rm {n}')
    if len(n.split('.')) > 1 and n.split('.')[1] == 'png':
        with open('../../reels/deleted','a') as f:
            f.write(json.dumps(jl[int(n.split('.')[0])-1]))
            f.write('\n\n'+('-'*20)+'\n\n')
    order_reorder()

def order_touch(n):
    os.system(f'touch {n}')
    order_reorder()

def order_reorder():
    os.system("sh -c 'rm -f _*'")
    for i in range(3-(len(os.listdir())%3)):
        os.system(f'touch {"_"*(i+1)}')

def order_save(a,b,d,n:int,jl:list):
    time = datetime.datetime.now()
    ls = os.listdir()
    ls.sort()
    ai = ls.index(a)
    bi = ls.index(b)
    ls = ls[ai:bi+1]
    os.makedirs(f'../../reels/{d}', exist_ok=True)
    if not os.path.isfile(f'../../reels/{d}/.directory'):
        with open(f'../../reels/{d}/.directory','w') as f:
            f.write(f'[Dolphin]\nSortOrder=1\nTimestamp={time.strftime("%Y,%-m,%-d,%-H,%-M,%-S.%f")[:-3]}\nVersion=4\n')

    for i,f in zip(range(n+len(ls)-1,n-1,-1), ls):
        if os.path.isfile(f'../{f}'):
            fn = f.split('.')[0]
            os.system(f'cp -i ../{fn}.txt ../../reels/{d}/{str(i).zfill(2)}.txt')
            os.system(f'cp -i ../{fn}.png ../../reels/{d}/{str(i).zfill(2)}.png')
            with open(f'../../reels/{d}-todl','a') as fd:
                fd.write(json.dumps([jl[int(fn)-1][1], f'{str(i).zfill(2)}.mp4']))
                fd.write('\n')
        else:
            os.system(f'touch ../../reels/{d}/{str(i).zfill(2)}.txt ../../reels/{d}/{str(i).zfill(2)}.png ../../reels/{d}/{str(i).zfill(2)}.mp4')
            with open(f'../../reels/{d}-todl','a') as fd:
                fd.write(json.dumps(['DUMMY', f'{str(i).zfill(2)}.mp4']))
                fd.write('\n')
        os.system(f'rm {f}')
    order_reorder()


if __name__ == '__main__':
    if os.path.isfile('all_urls.json'):
        with open('all_urls.json') as f:
            j = json.load(f)
    else:
        print('Get all_urls from firefox using reeldow.js')
        sys.exit()
    a = parse(j)
    print(a[0])
    print(a[-1])
    os.mkdir('reels_temp')
    os.makedirs('reels', exist_ok=True)
    time = datetime.datetime.now()
    if not os.path.isfile('./reels/.directory'):
        with open('./reels/.directory','w') as f:
            f.write(f'[Dolphin]\nSortOrder=1\nTimestamp={time.strftime("%Y,%-m,%-d,%-H,%-M,%-S.%f")[:-3]}\nVersion=4\n')
    os.chdir('reels_temp')
    print('\nWorkingDir:',end=' ',flush=True); os.system('pwd')

    print('\nFaulty:\n')
    faulty = []
    for i in a:
        if '/reel/' not in i[1] and '/watch/' not in i[1]:
            print(i[0], i[2], i[1], i[3], sep='\n', end='\n\n')
            faulty.append(i[0])
            continue
        with open(f'{i[0]}.txt','w') as f:
            f.write(i[2]+'\n\n')
            f.write(i[1]+'\n\n')
            f.write(i[3]+'\n\n')
            f.write(i[4]+'\n')  #\n')
            #f.write(i[5]+'\n')

    print('\nDownloding imgs:\n')
    for i in a:
        if i[0] in faulty:
            print(i[0], 'Faulty')
            continue
        if i[4] == '':
            print(i[0], 'No Image')
            continue
        r = requests.get(i[4])
        if r.status_code == 200:
            with open(f'{i[0]}.png','wb') as f:
                f.write(r.content)
        else:
            print(i[0], r.text)

    print('\nCopying images to 2order/')
    os.system('mkdir 2order && cp *.png 2order/')
    os.chdir('2order')
    print('WorkingDir:',end=' ',flush=True); os.system('pwd')
    order_reorder()

    print('\nStart ordering ...')
    os.system('dolphin --new-window '+os.getcwd()+' &')
    order(a)

    print('Now you can clear there files (if saved all requireds) :')
    print('\t./all_urls.json')
    print('\t./reels_temp/')
    print('\t./reels/deleted')

## reeltodl.py
import os
import sys
import json

os.chdir('reels')

ls = [i.partition('-todl')[0] for i in os.listdir() if i.endswith('-todl')]
ls.sort()

if len(ls) == 0:
    print('Run reelorder.py first')
    sys.exit()

print(f'ls = {ls}')
print(tuple(zip(range(len(ls)), ls)))
print('Edit ls :')
while True:
    ls_ = eval(input('new_ls = '))
    print(f'new_ls = {ls_}')
    o = input("'s' to set OR 'r' to retry : ")
    if o == 's':
        ls = ls_
        break
print(f'final\nls = {ls}')

todl = []
for i in ls:
    with open(f'{i}-todl') as f:
        for j in f:
            todl_ = json.loads(j.strip())
            if todl_[0] == 'DUMMY': continue
            todl_[0] = todl_[0].replace('https://www.facebook.com','')
            todl_[1] = f'./{i}/'+todl_[1]
            todl.append(todl_)

with open('todl.json','w') as f:
    json.dump(todl, f, indent=2)

with open('rurls.txt','w') as f:
    rurls = []
    for i in todl:
        rurls.append(i[0])
    f.write(f'{rurls=}')
print('reel urls array is saved with litral for initialization, in rurls.txt')

## sample_tree
$ tree -l SampleWdir/
SampleWdir/
├── reeldels.py
├── reeldow.js
├── reeldow.py
├── reelorder.py
├── reels
│   ├── 01
│   │   ├── 01.mp4
│   │   ├── 01.png
│   │   ├── 01.txt
│   │   ├── 02.mp4
│   │   ├── 02.png
│   │   ├── 02.txt
│   │   ├── .
│   │   ├── .
│   │   └── .
│   ├── 01.png
│   ├── 02
│   │   ├── .
│   │   ├── .
│   │   └── .
│   ├── 02.png
│   ├── .
│   ├── .
│   ├── .
│   ├── dels
│   │   ├── 001.mp4
│   │   ├── 001.png
│   │   ├── 001.txt
│   │   ├── 002.mp4
│   │   ├── 002.png
│   │   ├── 002.txt
│   │   ├── .
│   │   ├── .
│   │   └── .
│   └── todo & others
│       ├── .
│       ├── .
│       │   ├── .
│       │   └── .
│       └── .
└── reeltodl.py
	import os
	import json
	import requests

	os.chdir('reels')

	try:
	li = int(sorted(os.listdir('./dels'))[-1].split('.')[0])
	except FileNotFoundError: li = 0

	with open('./deleted') as f:
	for i,reel in enumerate(f.read().splitlines()[::4]):
	rj = json.loads(reel)
	os.makedirs('./dels', exist_ok=True)
	with open(f'./dels/{str(li+(i+1)).zfill(3)}.txt','w') as f_:
	f_.write(rj[2]+'\n\n')
	f_.write(rj[1]+'\n\n')
	f_.write(rj[3]+'\n\n')
	f_.write(rj[4]+'\n')
	with open(f'./dels/{str(li+(i+1)).zfill(3)}.png','wb') as f_:
	r = requests.get(rj[4])
	if r.status_code == 200:
	f_.write(r.content)
	with open('dels-todl','a') as f_:
	f_.write(json.dumps([rj[1], f'{str(li+(i+1)).zfill(3)}.mp4']))
	f_.write('\n')
	async function scroll_untill(pattern) {
	while (!document.body.innerHTML.match(pattern)) {
	window.scrollByPages(5);
	await new Promise(r => setTimeout(r, 1000));
	}
	console.log('#########> Scrolled <#########')
	}

	/*const reel_url = /https\:\/\/www.facebook.com\/reel\/\d+\//g;

	function get_urls(pattern) {
	urls = ''
	rb = [...document.body.innerHTML.matchAll(pattern)]
	rb = [...new Set(rb.map(e => e[0]))]
	rb.forEach(e => urls+=(e+'\n'))
	return urls
	}*/

	log = []
	function get_all(breakp) {
	a=$x('/html/body/div[1]/div/div[1]/div/div[3]/div/div/div[1]/div[1]/div[2]/div/div/div/div/div[2]/div')
	urls = []
	for (i in a) {
	it = a[i].innerText
	console.log(it)
	ls = [...a[i].innerHTML.matchAll(/href\=\"(.*?)\"\ role\=\"link\"/g)]
	img = [...a[i].innerHTML.matchAll(/src\=\"(.*?)\"\ alt\=\"image\"/g)][0]
	if (!!img) {
	img = img[1]
	} else {
	img = ''
	console.log('No image')
	log.push(['No image', i, a[i]])
	}
	if (!!ls[0]) {
	console.log(ls[0][1])
	if (!!ls[2]) {
	console.log(ls[2][1])
	urls.push([ls[0][1],it,ls[2][1],img])
	} else {
	console.log(ls)
	log.push(['No video url', i, a[i]])
	urls.push([ls[0][1],it,'',img])
	}
	} else {
	console.log(a[i].innerHTML)
	log.push(['No links', i, a[i]])
	}
	if (!!a[i].innerHTML.match(breakp)) {
	break
	}
	}
	return urls
	}

	all_urls = get_all(break_pattern)

	/////////////////////////////////////////////////////////////////////////////////////////////////////
	// $nano all_urls.json #paste the json copied json from firefox
	//=> all_urls.json
	/////////////////////////////////////////////////////////////////////////////////////////////////
	// $python reelorder.py #order reels #this script generate files with reels url and filename to save mp4 files
	//-> reels_temp//
	//=> reels/.directory
	//=> reels/deleted
	//=> reels//.directory , reels/-todl
	/////////////////////////////////////////////////////////////////////////////////////////////////
	// $python reeltodl.py #this script gives the reels url array which is used for download link scraping
	//=> reels/todl.json
	//=> reels/rurls.txt
	/////////////////////////////////////////////////////////////////////////////////////////////////
	// $mkdir /tmp/tmpgcsp #just a temprary profile directory
	// $google-chrome-stable --disable-web-security --incognito --user-data-dir='/tmp/tmpgcsp'
	//? why not firefox : firefox requires installing extention for unblocking cross-orgin access

	const dlurlp=/(playable_url\|playable_url_quality_hd)":"([^"]+)"/g
	var qualities = ["playable_url", "playable_url_quality_hd"]
	// https://github.com/vikas5914/Facebook-Video-Downloader/blob/9e4cd6bc51c9ba66b48271d9873742bf88a0faaf/app/main.php#L98
	async function sp_urls(rurls) {
	var durls = []
	for (i in rurls) {
	var w = window.open(rurls[i])
	while (!w.document \|\| !w.document.body \|\| !w.document.body.innerHTML \|\| ![...w.document.body.innerHTML.matchAll(dlurlp)][0]) {
	await new Promise(r => setTimeout(r, 500));
	}
	durl = [...w.document.body.innerHTML.matchAll(dlurlp)]
	var x = null
	var k = -2
	for (j in durl) {
	if (qualities.indexOf(durl[j][1]) > k) {
	k = qualities.indexOf(durl[j][1])
	x = durl[j][2]
	} else {
	break
	}
	}
	w.close()
	durls.push(x)
	}
	return durls
	}

	dlurls = await sp_urls(rurls)

	/////////////////////////////////////////////////////////////////////////////////////////////////////
	// $nano reels/dlurls.json #paste the json copied json from google-chrome
	//=> reels/dlurls.json
	/////////////////////////////////////////////////////////////////////////////////////////////////
	// $python reeldow.py
	/////////////////////////////////////////////////////////////////////////////////////////////////
	import os
	import sys
	import json
	import requests

	os.chdir('reels')

	if os.path.isfile('todl.json'):
	with open('todl.json') as f:
	todl = json.load(f)
	else:
	print('Run reeltodl.py first')
	sys.exit()

	if os.path.isfile('dlurls.json'):
	with open('dlurls.json') as f:
	dlurls = json.load(f)
	else:
	print('Get dlurls from chrome with rurls using reeldow.js')
	sys.exit()

	dlurls_ = []
	for i in dlurls:
	dlurls_.append(i.replace('\\u0025','\u0025').replace('\\','').replace('&','&'))
	dlurls = dlurls_

	print('\nDownloding mp4s:\n')
	for i,j in zip(dlurls, todl):
	r = requests.get(i)
	if r.status_code == 200:
	with open(j[1],'wb') as f:
	f.write(r.content)
	with open(j[1].partition('.mp4')[0]+'.txt', 'a') as f:
	f.write(f'\n{i}\n')
	else:
	print(j[0], i, r.text, sep='\n', end='\n\n')

	print('Now you can clear these files :')
	print('\t./reels/dlurls.json')
	print('\t./reels/rurls.txt')
	print('\t./reels/todl.json')
	for i in list(dict.fromkeys([i[1].split('/')[1] for i in todl])):
	print(f'\t./reels/{i}-todl')
	$ tree -l SampleWdir/
	SampleWdir/
	├── reeldels.py
	├── reeldow.js
	├── reeldow.py
	├── reelorder.py
	├── reels
	│ ├── 01
	│ │ ├── 01.mp4
	│ │ ├── 01.png
	│ │ ├── 01.txt
	│ │ ├── 02.mp4
	│ │ ├── 02.png
	│ │ ├── 02.txt
	│ │ ├── .
	│ │ ├── .
	│ │ └── .
	│ ├── 01.png
	│ ├── 02
	│ │ ├── .
	│ │ ├── .
	│ │ └── .
	│ ├── 02.png
	│ ├── .
	│ ├── .
	│ ├── .
	│ ├── dels
	│ │ ├── 001.mp4
	│ │ ├── 001.png
	│ │ ├── 001.txt
	│ │ ├── 002.mp4
	│ │ ├── 002.png
	│ │ ├── 002.txt
	│ │ ├── .
	│ │ ├── .
	│ │ └── .
	│ └── todo & others
	│ ├── .
	│ ├── .
	│ │ ├── .
	│ │ └── .
	│ └── .
	└── reeltodl.py