Last active
August 29, 2015 14:08
-
-
Save lunks/a00652cafbeb6797c240 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env lua | |
-- This file: | |
-- http://angg.twu.net/fbcache/urls.lua | |
-- http://angg.twu.net/fbcache/urls.lua.html | |
-- (find-angg "fbcache/urls.lua") | |
-- 2014sep07 | |
-- (defun u () (interactive) (find-angg-upload-links "fbcache/" "urls.lua")) | |
-- | |
-- (find-lua51manualw3m "") | |
-- (find-books "__comp/__comp.el" "ierusalimschy") | |
-- (find-pil2page 8 "Contents") | |
-- (find-pil2text 8 "Contents") | |
-- «.basic» (to "basic") | |
-- «.pypp» (to "pypp") | |
-- «.pypp-cache» (to "pypp-cache") | |
-- «.lua-cache» (to "lua-cache") | |
-- «.translations0» (to "translations0") | |
-- «.urls-by-kind» (to "urls-by-kind") | |
-- «.all-fb-urls» (to "all-fb-urls") | |
-- «.url-to-ids» (to "url-to-ids") | |
-- «.wget» (to "wget") | |
-- «.examples» (to "examples") | |
-- Dependencies: | |
-- rocks (for penlight) | |
-- posix (find-es "lua5" "lua-posix-wheezy") | |
-- lpeg: re.lua (find-es "lua-intro" "lpeg-re-1") | |
-- penlight: pretty, wrap (find-es "lua5" "penlight") | |
-- ____ _ | |
-- | __ ) __ _ ___(_) ___ | |
-- | _ \ / _` / __| |/ __| | |
-- | |_) | (_| \__ \ | (__ | |
-- |____/ \__,_|___/_|\___| | |
-- | |
-- «basic» (to ".basic") | |
-- Some functions copied from my LUA_INIT file. | |
-- (This is an attempt to make this script self-contained). | |
-- (find-angg "LUA/lua50init.lua") | |
format = string.format | |
write = io.write | |
printf = function (...) write(format(...)) end | |
readfile = function (fname) | |
local f = assert(io.open(fname, "r")) | |
local bigstr = f:read("*a") | |
f:close() | |
return bigstr | |
end | |
writefile = function (fname, bigstr) | |
local f = assert(io.open(fname, "w+")) | |
f:write(bigstr) | |
f:close() | |
end | |
readfile_or_nil = function (fname) | |
local ok,contents = pcall(function () return readfile(fname) end) | |
if ok then return contents end | |
end | |
file_exists = function (fname) return readfile_or_nil(fname) end -- quick hack | |
cfmt = function (fmt) return function (...) return format(fmt, ...) end end | |
ee_fmt = function (fmt) return cfmt(ee_expand(fmt)) end | |
require "re" | |
require "posix" | |
userocks() -- (find-angg "LUA/lua50init.lua" "userocks") | |
pretty = require 'pl.pretty' -- (find-es "lua5" "pl.pretty") | |
pp0 = function (o) return pretty.write(o) end | |
pp = function (o) print(pretty.write(o)) end | |
ee_readfile_pp = function (fname) return expr(ee_readfile(fname)) end | |
ee_writefile_pp = function (fname, o) ee_writefile(fname, pp0(o)) end | |
-- (find-es "lua5" "pl.pretty-fix") | |
longquote = function (str) | |
local T = {} | |
local f = function (eqs) T[#eqs+1] = 1 end | |
if str:gsub("%](=*)", f) then | |
local eqs = string.rep("=", #T) | |
return '['..eqs..'[\n'..str..']'..eqs..']' | |
end | |
return '[[\n'..str..']]' | |
end | |
-- (find-es "lua5" "pl.text.wrap") | |
wrap = (require "pl.text").wrap | |
wraps = function (bigstr) | |
local f = function (li) return table.concat(wrap(li), "\n").."\n" end | |
return (bigstr:gsub("([^\n]+)", f)) | |
end | |
-- (find-angg "LUA/lua50init.lua" "ee_ls") | |
no_dots = function (L) | |
for i=#L,1,-1 do if L[i]=="." or L[i]==".." then table.remove(L, i) end end | |
return L | |
end | |
ee_ls = function (dir) return (posix.dir(ee_expand(dir))) end | |
ee_ls_no_dots = function (dir) return sorted(no_dots(ee_ls(dir))) end | |
require "re" | |
repat0 = "" | |
redefs = { concat = table.concat } | |
recomp = function (pat) return re.compile(pat..repat0, redefs) end | |
retest = function (p) return function (s) print(re.match(s, recomp(p))) end end | |
--[==[ | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
dofile "urls.lua" | |
= lpeg.version() | |
retest [=[ | |
cs <- { c* } | |
c <- { [a-z] } | |
]=] "bop" | |
--]==] | |
-- | |
-- _ __ _ _ _ __ _ __ | |
-- | '_ \| | | | '_ \| '_ \ | |
-- | |_) | |_| | |_) | |_) | | |
-- | .__/ \__, | .__/| .__/ | |
-- |_| |___/|_| |_| | |
-- | |
-- «pypp» (to ".pypp") | |
-- A "pypp object" is "Python pretty-printed object", i.e., a a string | |
-- produced by Python's "pprint.pprint". We have to handle these | |
-- objects because we use facebook-sdk (in Python) to talk to | |
-- Facebook; facebook-sdk returns dict objects, that we then save into | |
-- files. | |
-- | |
-- The main function here is pypp_parse, that converts pypp objects | |
-- into Lua tables. | |
-- | |
-- Note that fetching information from Facebook is very slow and | |
-- error-prone - Facebook posts can be deleted or set to restricted, | |
-- our access token may expire, our internet connection may be | |
-- failing, and so on - so we want to keep a cache with the returned | |
-- objects, as pypp objects (one for each Facebook id). Reading pypp | |
-- objects is also relatively slow (up to 1/100 second for each), so | |
-- we want to keep a cache of "luapp objects" for speed; a luapp | |
-- object is a string containg a pretty-printed version of a Lua | |
-- table. | |
-- (find-angg "fbcache/p.py" "basic") | |
-- (find-es "python" "facebook-sdk") | |
-- (find-es "lua-intro" "lpeg-re-infix-1") | |
-- file:///usr/share/doc/lua-lpeg-dev/lpeg.html | |
-- file:///usr/share/doc/lua-lpeg-dev/re.html#ex | |
-- http://www.inf.puc-rio.br/~roberto/lpeg/ | |
-- http://www.inf.puc-rio.br/~roberto/lpeg/re.html | |
-- (find-file "~/fbcache/cache_by_id/141539566016667") | |
pypp_grammar0 = [=[ | |
objrest <- obj {} [%s]* {.*} | |
obj <- bool / num / str / table / list | |
objp <- obj colon obj | |
table <- ("{" (objp (comma objp)*)* "}") -> totable | |
list <- ("[" (obj (comma obj )*)* "]") -> tolist | |
colon <- [%s]* ":" [%s]* | |
comma <- [%s]* "," [%s]* | |
bool <- ("True" / "False") -> tobool | |
num <- ("-"? [0-9.]+) -> tonum | |
str <- (ustr / str0) | |
ustr <- "u" str0 | |
str0 <- ("'" (stritem / {'"'})* -> concat "'") / | |
('"' (stritem / {"'"})* -> concat '"') | |
stritem <- strcnormals / strcc / strcx / strcu / strcU / strcother | |
strcnormals <- {[^'"\]+} | |
strcc <- "\" {['"\]} | |
strcx <- "\x" ({[0-9a-f][0-9a-f]} -> hextoc) | |
strcu <- { "\u" [0-9a-f][0-9a-f][0-9a-f][0-9a-f] } | |
strcU <- { "\U" [%x][%x][%x][%x][%x][%x][%x][%x] } | |
strcother <- "\" . -> otherc | |
]=] | |
pypp_defs = { | |
tobool = function (s) return s == "True" and true or false end, | |
tonum = function (s) return tonumber(s) end, | |
tostr = function (s) return s end, | |
tolist = function (...) return {...} end, | |
totable = function (...) | |
local L, T = {...}, {} | |
for i=1,#L-1,2 do T[L[i]] = L[i+1] end | |
return T | |
end, | |
hextoc = function (cc) return string.char(tonumber(cc, 16)) end, | |
concat = function (...) return table.concat {...} end, | |
otherc = function (c) | |
if c == "n" then return "\n" end | |
if c == "r" then return "\r" end | |
if c == "t" then return "\t" end | |
print("\\"..c) | |
return "\\"..c | |
end, | |
} | |
pypp_grammar = re.compile(pypp_grammar0, pypp_defs) | |
pypp_parse = function (bigstr, pos) return pypp_grammar:match(bigstr, pos) end | |
pypp_test = function (bigstr, pos) pp(pypp_parse(bigstr, pos)) end | |
-- _ | |
-- _ __ _ _ _ __ _ __ ___ __ _ ___| |__ ___ | |
-- | '_ \| | | | '_ \| '_ \ / __/ _` |/ __| '_ \ / _ \ | |
-- | |_) | |_| | |_) | |_) | | (_| (_| | (__| | | | __/ | |
-- | .__/ \__, | .__/| .__/ \___\__,_|\___|_| |_|\___| | |
-- |_| |___/|_| |_| | |
-- | |
-- «pypp-cache» (to ".pypp-cache") | |
-- | |
-- Conversions: | |
-- readfile parse | |
-- id ---> pyppf ----------> pypp -------> obj ---> sexp | |
-- obj ---> date | |
-- obj ---> txt | |
-- | |
-- (find-fline "~/fbcache/cache_by_id/") | |
-- (find-fline "~/fbcache/cache_by_id_lua/") | |
pypp_dir = "~/fbcache/cache_by_id/" | |
luapp_dir = "~/fbcache/cache_by_id_lua/" | |
id_to_pyppf = ee_fmt "~/fbcache/cache_by_id/%s" | |
id_to_luappf = ee_fmt "~/fbcache/cache_by_id_lua/%s" | |
pypp_ls = function () return ee_ls_no_dots(pypp_dir) end | |
luapp_ls = function () return ee_ls_no_dots(luapp_dir) end | |
pypp_fname = function (id, e) return pypp_dir..id..(e or "") end | |
pypp_readfile = function (id, e) return ee_readfile(pypp_fname(id)) end | |
pypp_id_obj = function (id, p) return pypp_parse(pypp_readfile(id), p) end | |
pypp_id_sexp = function (id) | |
return format('(find-fline "%s%s")', luapp_dir, id) | |
end | |
obj_date = function (o) return o and (o.created_time or o.updated_time) end | |
pypp_obj_txt = function (o) | |
local fmt = "From: $FROM\nDate: $DATE\n$URL\n $SEXP\n\n$BODY" | |
local T = { FROM = o.from and o.from.name or "(no FROM)", | |
DATE = obj_date(o) or "(no DATE)", | |
URL = o.link or "(no URL)", | |
SEXP = pypp_id_sexp(o.id), | |
BODY = wraps(o.name or o.message or "(no NAME or MESSAGE)") } | |
local f = function (name) return T[name] or error(name.." is nil") end | |
return (fmt:gsub("%$([A-Z]+)", f)) | |
end | |
pypp_id_txt = function (id) return pypp_obj_txt(pypp_id_obj(id)) end | |
-- _ _ | |
-- | | _ _ __ _ ___ __ _ ___| |__ ___ | |
-- | | | | | |/ _` | / __/ _` |/ __| '_ \ / _ \ | |
-- | |__| |_| | (_| | | (_| (_| | (__| | | | __/ | |
-- |_____\__,_|\__,_| \___\__,_|\___|_| |_|\___| | |
-- | |
-- «lua-cache» (to ".lua-cache") | |
id_objs = {} | |
write_lua_cache = function (verbose) | |
for _,id in ipairs(pypp_ls()) do | |
if verbose then print(id) end | |
-- local o = pypp_readfile(id) | |
local o = pypp_parse(ee_readfile(id_to_pyppf(id))) | |
ee_writefile_pp(id_to_luappf(id), o) | |
end | |
end | |
test_lua_cache = function () | |
read_lua_cache("verbose") | |
end | |
read_lua_cache = function (verbose) | |
ids = luapp_ls() | |
id_objs = {} | |
for _,id in ipairs(ids) do | |
if verbose then print(id) end | |
id_objs[id] = ee_readfile_pp(id_to_luappf(id)) | |
end | |
end | |
--[==[ | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
dofile "urls.lua" | |
write_lua_cache("verbose") | |
test_lua_cache() | |
read_lua_cache() | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
dofile "urls.lua" | |
read_lua_cache() | |
= table.concat(ids, "\n") | |
pp(id_objs[ids[1]]) | |
-- (find-fline "~/fbcache/") | |
-- (find-fline "~/fbcache/cache_by_id/") | |
-- (find-fline "~/fbcache/cache_by_id_lua/") | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
dofile "urls.lua" | |
for _,id in ipairs(pypp_ls()) do | |
print(id) | |
print(pypp_id_txt(id)) | |
print("\n--------------\n") | |
end | |
hugestr = mapconcat(pypp_id_txt, pypp_ls(), "\n--------------\n") | |
writefile("huge.txt", hugestr) | |
# (find-fline "~/fbcache/cache_by_id/") | |
# (find-fline "~/fbcache/huge.txt") | |
# (find-sh "grep ^From: ~/fbcache/huge.txt") | |
# (find-sh "grep ^From: ~/fbcache/huge.txt | sort") | |
(eepitch-shell) | |
(eepitch-kill) | |
(eepitch-shell) | |
cd ~/fbcache/ | |
Scp-np huge.txt $TWUP/fbcache/ | |
Scp-np huge.txt $TWUS/fbcache/ | |
# http://angg.twu.net/fbcache/huge.txt | |
--]==] | |
-- _ _ _ | |
-- | |_ _ __ __ _ _ __ ___| | __ _| |_ ___ _ __ ___ | |
-- | __| '__/ _` | '_ \/ __| |/ _` | __/ _ \| '__/ __| | |
-- | |_| | | (_| | | | \__ \ | (_| | || (_) | | \__ \ | |
-- \__|_| \__,_|_| |_|___/_|\__,_|\__\___/|_| |___/ | |
-- | |
-- «translations0» (to ".translations0") | |
-- Idea: Facebook URLs are split into "parts" by fb_url_to_iparts, | |
-- like this: ... | |
-- /permalink.php?story_fbid=N&set=OR pesfs_L_L 0 | |
-- /W/photos/W/WR photos_L_L_L 3 | |
-- (find-angg "fbcache/urls.lua" "translations0" " /W/photos/W/WR ") | |
-- /W/media_set?set=OR media0_L_L 0 | |
-- Python version: (find-angg "fbcache/p.py" "translations0") | |
fb_translations0 = [[ | |
/events/N/permalink/N/R evpe_L_L 1_2 | |
/events/N/N/R ev2_L_L 12 | |
/events/N/R ev1_L 1 | |
/groups/W/permalink/N/R grpe_L_L 2 | |
/groups/N/NR groups_L_L 1 | |
/groups/W/NR groups_L_L 2 | |
/groups/NR groups_L 1 | |
/groups/WR groups_L 0 | |
/W/media_set?set=a.N.N.NR media4_L_L_L 0 | |
/media/set/?set=OR media_L 0 | |
/notes/W/W/NR notes_L_L_L 3 | |
/notes/N/R notes_L 1 | |
/pages/W/NR pages_L_L 2 | |
/permalink.php?story_fbid=N&id=NR pesfi_L_L 1 | |
/photo.php?fbid=N&set=OR photofs_L_L 1 | |
/N/photos/t.N/N/R photos_L_t.L_L 13 | |
/N/photos/gm.N/N/R photos_L_gm.L_L 123 | |
/N/photos/a.N.N.N/N/R photos_L_a.L.L.L_L 1245 | |
/photo.php?v=N&set=OR photovs_L_L 1 | |
/photo.php?v=NR photov_L 1 | |
/W/posts/NR posts_L_L 2 | |
/video.php?v=NR video_L 1 | |
]] | |
fb_chars_to_tbl = function (str) | |
local tbl = {} | |
for char,rgx in str:gmatch("(%S):(%S*)") do | |
tbl[char] = rgx | |
end | |
return tbl | |
end | |
fb_char_to_re = fb_chars_to_tbl( | |
" W:([^/]+) N:([0-9]+) O:([^/&?]+) R:(.*) L:([^_]+) .:%. ?:%? ") | |
fb_char_to_fmt = fb_chars_to_tbl( | |
" W:%s N:%s O:%s R: L:%s .:. ?:? ") | |
fb_abc_to_tr = function (a, b, c) | |
local f_re = function (str) return str:gsub("[WNORL?.]", fb_char_to_re) end | |
local f_fmt = function (str) return str:gsub("[WNORL?.]", fb_char_to_fmt) end | |
local f = function (str) | |
return { orig = str, | |
re = "^"..f_re(str).."$", | |
fmt = f_fmt(str), | |
} | |
end | |
ha = "https://www.facebook.com"..a | |
return { u = f(ha), f = f(b), etc = c} | |
end | |
fb_translations1 = {} | |
for a,b,c in fb_translations0:gmatch("(%S+)[ \t]+(%S+)[ \t]+(%S+)") do | |
table.insert(fb_translations1, fb_abc_to_tr(a, b, c)) | |
end | |
--[==[ | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
dofile "urls.lua" | |
pp(fb_translations1) | |
--]==] | |
fb_url_to_iparts = function (url) | |
for i,tr in ipairs(fb_translations1) do | |
-- local a,b,c,d,e = url:match(tr[1][1]) | |
local a,b,c,d,e = url:match(tr.u.re) | |
if a then return i,{a,b,c,d,e} end | |
end | |
end | |
-- fb_u1toc = function (url1) | |
-- for i,entry in ipairs(fb_translations) do | |
-- local a,b,c,d,e = url1:match(entry.upat) | |
-- if a then return format(entry.cfmt, a, b, c, d, e) end | |
-- end | |
-- end | |
-- fb_ctou1 = function (cfname) | |
-- for i,entry in ipairs(fb_translations) do | |
-- local a,b,c,d,e = cfname:match(entry.cpat) | |
-- if a then return format(entry.ufmt, a, b, c, d, e) end | |
-- end | |
-- end | |
-- _ _ _ _ _ | |
-- _ _ _ __| |___ | |__ _ _ | | _(_)_ __ __| | | |
-- | | | | '__| / __| | '_ \| | | | | |/ / | '_ \ / _` | | |
-- | |_| | | | \__ \ | |_) | |_| | | <| | | | | (_| | | |
-- \__,_|_| |_|___/ |_.__/ \__, | |_|\_\_|_| |_|\__,_| | |
-- |___/ | |
-- | |
-- «urls-by-kind» (to ".urls-by-kind") | |
fb_urls_by_kind = function (bigstr) | |
urls_by_kind = {} | |
for i=0,#fb_translations1 do urls_by_kind[i] = {} end | |
for url in bigstr:gmatch("(%S+)") do | |
local i,parts = fb_url_to_iparts(url) | |
table.insert(urls_by_kind[i or 0], url) | |
end | |
return urls_by_kind | |
end | |
-- ubyk = fb_urls_by_kind(readfile("/tmp/o0")) | |
prubyk = function (kind, n) | |
if kind > 0 then | |
printf(' (find-angg "fbcache/urls.lua" "translations0" " %s ")\n', | |
fb_translations1[kind].u.orig:sub(25)) | |
end | |
for i,url in ipairs(ubyk[kind]) do | |
print(kind.." "..url) | |
if i == n then break end | |
end | |
end | |
prubyks = function (n) | |
for i=1,#fb_translations1 do print(); prubyk(i, n) end | |
end | |
pyhead = [=[ | |
(eepitch-python) | |
(eepitch-kill) | |
(eepitch-python) | |
execfile("p.py", globals()) | |
]=] | |
all_ids = function (bigstr, f) | |
if not f then print(pyhead) end | |
url_ids = {} | |
for url in bigstr:gmatch("(%S+)") do | |
if not f then print("# "..url) end | |
local i,parts = fb_url_to_iparts(url) | |
if i then | |
local digits = fb_translations1[i].etc:match("^[1-9]+") | |
if digits then | |
for d in digits:gmatch(".") do | |
local id = parts[0 + d] | |
table.insert(url_ids, id) | |
if f then f(id) | |
else printf('pp(fi("%s"))\n', id) | |
end | |
end | |
end | |
end | |
end | |
return url_ids | |
end | |
--[==[ | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
(eek "M-o <<wrap>> M-o") | |
dofile "urls.lua" | |
all_ids(ee_readfile "/tmp/o1") | |
print(pyhead) | |
all_ids(ee_readfile "/tmp/o1", function (id) print('fc("'..id..'")') end) | |
all_ids(ee_readfile "/tmp/o0", function (id) print('fc("'..id..'")') end) | |
(eek "M-o M-< 2*<down>") | |
(eepitch-python) | |
(eepitch-kill) | |
(eepitch-python) | |
execfile("p.py", globals()) | |
execfile("/tmp/o2", globals()) | |
-- (find-fline "/tmp/o1") | |
-- (find-file "~/fbcache/cache_by_id/") | |
--]==] | |
-- digits = etc:match("^([1-9]*)") | |
-- if #digits > 0 then | |
-- local parts = {a, b, c, d, e} | |
-- return parts[0 + digits[1]] | |
-- end | |
-- _ _ _ _____ ____ _ | |
-- / \ | | | | ___| __ ) _ _ _ __| |___ | |
-- / _ \ | | | | |_ | _ \ | | | | '__| / __| | |
-- / ___ \| | | | _| | |_) | | |_| | | | \__ \ | |
-- /_/ \_\_|_| |_| |____/ \__,_|_| |_|___/ | |
-- | |
-- «all-fb-urls» (to ".all-fb-urls") | |
--[==[ | |
(eepitch-shell) | |
(eepitch-kill) | |
(eepitch-shell) | |
cat ~/TODO \ | |
~/ORG/index.org \ | |
~/TH/ee.blogme \ | |
~/TH/2014-xs.blogme \ | |
~/TH/links-sobre-gaza.blogme \ | |
| grep https://www.facebook.com/ \ | |
| tr ' ' '\n' \ | |
| grep https://www.facebook.com/ \ | |
| sort | uniq \ | |
| tee /tmp/o0 | |
laf /tmp/o0 ~/fbcache/all-fb-urls.lst | |
cp -v /tmp/o0 ~/fbcache/all-fb-urls.lst | |
# (find-fline "/tmp/o0") | |
# (find-fline "~/fbcache/all-fb-urls.lst") | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
(eek "M-o <<wrap>> M-o") | |
dofile "urls.lua" | |
ubyk = fb_urls_by_kind(readfile("/tmp/o0")) | |
prubyks(4) | |
prubyk(0) | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
dofile "urls.lua" | |
ubyk = fb_urls_by_kind(readfile("/tmp/o0")) | |
for i=0,#ubyk do | |
print(i,#ubyk[i]) | |
end | |
prubyks(4) | |
pp(fb_translations1[1]) | |
prubyk(0) | |
prubyk(0, 10) | |
prubyk(1) | |
prubyk(1, 10) | |
--]==] | |
-- _ _ _ _ | |
-- _ _ _ __| | | |_ ___ (_) __| |___ | |
-- | | | | '__| | | __/ _ \ | |/ _` / __| | |
-- | |_| | | | | | || (_) | | | (_| \__ \ | |
-- \__,_|_| |_| \__\___/ |_|\__,_|___/ | |
-- | |
-- «url-to-ids» (to ".url-to-ids") | |
url_to_ids = function (url) | |
local ids = {} | |
local i,parts = fb_url_to_iparts(url) | |
if i then | |
local digits = fb_translations1[i].etc:match("^[1-9]+") | |
if digits then | |
for d in digits:gmatch(".") do | |
local id = parts[0 + d] | |
table.insert(ids, id) | |
end | |
return ids | |
end | |
end | |
end | |
fb_urls = {} | |
fb_urls_file = "~/fbcache/all-fb-urls.lst" | |
read_fb_urls = function () fb_urls = split(ee_readfile(fb_urls_file)) end | |
-- (find-fline "~/fbcache/all-fb-urls.lst") | |
id_urls = {} | |
read_id_urls = function (verbose) | |
for _,url in ipairs(fb_urls) do | |
if verbose then print(url) end | |
for _,id in ipairs(url_to_ids(url) or {}) do | |
if verbose then print(" "..id) end | |
id_urls[id] = id_urls[id] or {} | |
table.insert(id_urls[id], url) | |
end | |
end | |
end | |
id_obj_txt0 = function (id) | |
return pypp_obj_txt(id_objs[id]) | |
end | |
id_obj_txt1 = function (id) | |
local urls = mapconcat(cfmt "%s\n", id_urls[id] or {}) | |
return urls..id_obj_txt0(id) | |
end | |
id_obj_txt2 = function (id) | |
return cfmt "\171%s\187\n" (id) .. id_obj_txt1(id) | |
end | |
id_objs_txt = function (ids) | |
return mapconcat(id_obj_txt2, ids, "\n\n---------------\n") | |
end | |
id_date = function (id) | |
local o = id_objs[id] | |
local date = obj_date(o) or " "..id | |
return date | |
end | |
ids_sorted_by_date = function () | |
local lt = function (a, b) return id_date(a) <= id_date(b) end | |
return sorted(shallowcopy(ids), lt) | |
end | |
raw_text_unix = "\n\n\n"..[[ | |
-- Local Variables: | |
-- coding: raw-text-unix | |
-- End: | |
]] | |
write_huge_txt = function () | |
local ids = ids_sorted_by_date() | |
ee_writefile("huge.txt", id_objs_txt(ids)..raw_text_unix) | |
end | |
-- all_fb_urls0 = function () return ee_readfile("~/fbcache/all-fb-urls.lst") end | |
-- all_fb_urls1 = function () return split(all_fb_urls0()) end | |
-- gen_all_fb_urls = function () return all_fb_urls0():gmatch("%S+") end | |
--[==[ | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
dofile "urls.lua" | |
read_fb_urls() | |
all_fb_ids = Set.new() -- (find-angg "LUA/lua50init.lua" "Set") | |
for _,url in ipairs(fb_urls) do | |
for _,id in ipairs(url_to_ids(url) or {}) do | |
all_fb_ids._[id] = id | |
end | |
end | |
= all_fb_ids | |
python_ids = Set.from(pypp_ls()) | |
= python_ids | |
missing_ids = all_fb_ids - python_ids | |
= missing_ids | |
= missing_ids:ksc() | |
= pyhead..mapconcat(cfmt 'pp(fi("%s"))\n', missing_ids:ks()) | |
--]==] | |
--[==[ | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
dofile "urls.lua" | |
read_fb_urls() | |
read_id_urls() | |
read_lua_cache() | |
write_huge_txt() | |
-- (find-fline "huge.txt") | |
-- (find-angg "fbcache/huge.txt") | |
-- (find-angg "fbcache/huge.txt" "10152622815229198") | |
pp(id_urls["701795806545331"]) | |
(eepitch-shell) | |
(eepitch-kill) | |
(eepitch-shell) | |
laf huge.txt | |
lua5.1 ~/blogme3/blogme3.lua -o huge.txt.html -a2html huge.txt | |
cd ~/fbcache/ | |
Scp -v huge.txt huge.txt.html edrx@angg.twu.net:/home/edrx/slow_html/fbcache/ | |
Scp -v huge.txt huge.txt.html edrx@angg.twu.net:/home/edrx/public_html/fbcache/ | |
http://angg.twu.net/fbcache/huge.txt.html | |
= id_obj_txt2(ids[1]) | |
= id_obj_txt2(ids[1])..id_obj_txt2(ids[2]) | |
= cfmt "\187" "a" | |
= cfmt "\171%s\187" "a" | |
-- for _,id in ipairs(ids) do print(id, id_date(id)) end | |
for _,id in ipairs(ids_sorted_by_date()) do print(id.." "..id_date(id)) end | |
= id_date "701795806545331" | |
= mapconcat(cfmt " %s\n", id_urls["701795806545331"] or {}) | |
id = "701795806545331" | |
pp(id_objs[id]) | |
= id_obj_txt(id) | |
pp(id_urls[id]) | |
= | |
= id_obj_txt0(id) | |
id = "10151841953998086" | |
pypp_id_txt = pypp_obj_txtfunction (o) | |
all_fb_urls = all_fb_urls1() | |
url = "https://www.facebook.com/usauncut/photos/a.190167221017767.44131.186219261412563/823206274380522/?type=1" | |
url = "https://www.facebook.com/video.php?v=1009188935762835" | |
pp(url_to_ids(url)) | |
id_urls = {} | |
for url in | |
= #all_fb_urls | |
get_id_urls = function | |
id_urls = {} | |
for url in | |
id_urls[id] = id_urls[id] or {} | |
table.insert(id_urls[id], url) | |
pp(url_to_ids) | |
gen_all_fb_urls | |
--]==] | |
-- _ | |
-- __ ____ _ ___| |_ | |
-- \ \ /\ / / _` |/ _ \ __| | |
-- \ V V / (_| | __/ |_ | |
-- \_/\_/ \__, |\___|\__| | |
-- |___/ | |
-- | |
-- «wget» (to ".wget") | |
fb_linesplit0 = function (line) | |
local pat = "^(.-)(https?://)([!-.0-~]*)([!-~]*)(.*)$" | |
local pre,prot,site,path,rest = line:match(pat) | |
return pre,prot,site,path,rest | |
end | |
fb_goodpre = function (pre) | |
return pre and (pre == "" or pre:sub(-1):match"[ \t]") | |
end | |
fb_linesplit1 = function (line) | |
local pre,prot,site,path,rest = fb_linesplit0(line) | |
if fb_goodpre(pre) and site:match "facebook" then | |
local cfname = fb_u1toc(path) | |
return pre,prot,site,path,rest,cfname | |
end | |
end | |
fb_linesplit = function (line) | |
local pre,prot,site,path,rest = fb_linesplit0(line) | |
if fb_goodpre(pre) and site:match "facebook" then | |
local cfname = fb_u1toc(path) | |
if cfname then print(cfname) end | |
-- if not cfname then print(path) end | |
end | |
end | |
myerror = function (fmt, ...) printf(fmt.."\n", ...); os.exit(2) end | |
myerror = function (fmt, ...) printf(fmt.."\n", ...); error() end | |
fb_utof = function (url) | |
local pre,prot,site,path,rest,fname = fb_linesplit1(url) | |
if not fname then myerror("No FB URL in: %s", line) end | |
return fname | |
end | |
fb_user_agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.3) ".. | |
"Gecko/20070310 Iceweasel/2.0.0.3 (Debian-2.0.0.3-1)" | |
fb_wget = function (fname, url) | |
local A = {UA=fb_user_agent, FNAME=fname, URL=url} | |
local a = function (str) return (str:gsub("%$([A-Z]+)", A)) end | |
local cmd = a "wget --progress=dot -U '$UA' -O $FNAME '$URL'" | |
getoutput(cmd) | |
end | |
fb_wget_if_needed = function (url) | |
local fname = fb_utof(url)..".wget" | |
if not file_exists(fname) then fb_wget(fname, url) end | |
end | |
fb_cache = function (url) | |
return readfile(fb_utof(url)) | |
end | |
-- (find-ydb "ydb" "dooptions") | |
dooptions = function (optionname, a, b) | |
if optionname == "-utof" then | |
local pre,prot,site,path,rest,cfname = fb_linesplit1(a) | |
if cfname then print(cfname) else myerror("No FB URL in: %s", a) end | |
elseif optionname == "-utot" then | |
local url = a | |
local id = url_to_ids(url)[1] | |
-- PP(url, id, id_to_luappf(id)) | |
local o = ee_readfile_pp(id_to_luappf(id)) | |
-- pp(o) | |
id_urls[id] = {url} | |
id_objs[id] = o | |
print(id_obj_txt1(id)..raw_text_unix) | |
elseif optionname == "-win" then | |
fb_wget_if_needed(a) | |
elseif optionname == "-wins" then | |
local bigstr = readfile(a) | |
for url in bigstr:gmatch "([^\n]+)" do | |
print(url) | |
pcall(function () fb_wget_if_needed(url) end) | |
end | |
end | |
end | |
dooptions(...) | |
-- myerror "Not implemented" | |
--[[ | |
(eepitch-shell) | |
(eepitch-kill) | |
(eepitch-shell) | |
cd /tmp/ | |
U=https://www.facebook.com/gustavo.gindre/posts/515590031857632 | |
lua5.1 ~/fbcache/urls.lua -utot $U | |
--]] | |
--[[ | |
-- (find-sh "grep facebook ~/TODO | sort") | |
-- (find-sh "grep facebook ~/TODO | grep photo.php | sort") | |
-- (find-sh "grep facebook ~/TODO | grep posts | sort") | |
-- (find-sh "grep facebook ~/TODO | grep -v photo.php | grep -v posts | sort") | |
# «examples» (to ".examples") | |
(eepitch-shell) | |
(eepitch-kill) | |
(eepitch-shell) | |
export LUA_INIT= | |
lua51 | |
dofile "urls.lua" | |
dooptions("-utof", "https://www.facebook.com/christian.fischgold/posts/713430788717068") | |
dooptions("-ftou", "posts_FIPRJ_487608268040090.wget") | |
dooptions("-wins", "urls.lst") | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
-- (find-fline "~/fbcache/urls.lst") | |
bigstr = ee_readfile "~/fbcache/urls.lst" | |
for li in splitlines(bigstr) do | |
end | |
(eepitch-shell) | |
(eepitch-kill) | |
(eepitch-shell) | |
mkdir ~/fbcache/tmp/ | |
cd ~/fbcache/tmp/ | |
URL=https://www.facebook.com/sergio.martins.984991/posts/10152616093738086 | |
# Convert a URL to a filename: | |
~/fbcache/urls.lua -utof $URL | |
# outputs | |
# posts_sergio.martins.984991_10152616093738086 | |
# Run wget if needed: | |
~/fbcache/urls.lua -win $URL | |
# as the file "posts_sergio.martins.984991_10152616093738086.wget" | |
# does not exist yet, this runs wget on the URL with output to | |
# "posts_sergio.martins.984991_10152616093738086.wget". | |
# Run wget if needed: | |
~/fbcache/urls.lua -win $URL | |
# We are running this a second time - | |
# "posts_sergio.martins.984991_10152616093738086.wget" exists, so this | |
# does nothing. | |
~/fbcache/urls.lua -wins ../urls.lst | |
# Bad news: running this yields no output! | |
python ~/usrc/code/python/edu/fbtxt.py pos* | |
# (find-fline "~/usrc/code/python/edu/") | |
# (find-fline "~/usrc/code/python/edu/fbtxt.py") | |
# (find-fline "~/fbcache/tmp/") | |
# (find-fline "~/usrc/") | |
# (find-oilercodefile "python/edu/") | |
(eepitch-shell2) | |
(eepitch-kill) | |
(eepitch-shell2) | |
cd ~/fbcache/tmp/ | |
tar -cvzf /tmp/posts-wget.tgz * | |
laf /tmp/posts-wget.tgz | |
# Old stuff: | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
dofile "urls.lua" | |
bigstr = getoutput "grep facebook ~/TODO | sort" | |
bigstr:gsub("([^\n]+)", fb_linesplit) | |
fb_linesplit2 = function (line) | |
local pre,prot,site,path,rest = fb_linesplit0(line) | |
if fb_goodpre(pre) and site:match "facebook" then | |
local cfname = fb_u1toc(path) | |
if cfname then | |
print(cfname) | |
print(prot..site..path) | |
print() | |
end | |
end | |
end | |
bigstr:gsub("([^\n]+)", fb_linesplit2) | |
bigstr = [=[ | |
https://www.facebook.com/sergio.martins.984991/posts/10152616093738086 | |
https://www.facebook.com/jornalanovademocracia/photos/a.288492381220437.66632.187051701364506/679809862088685/ | |
https://www.facebook.com/permalink.php?story_fbid=921476867869306&id=347772661906399 | |
https://www.facebook.com/photo.php?fbid=10201336092313990&set=a.1569106477271.73917.1523735650 | |
]=] | |
bigstr:gsub("([^\n]+)", fb_linesplit) | |
bigstr:gsub("([^\n]+)", linesplit1) | |
bigstr:gsub("([^\n]+)", linesplit2) | |
= bigstr:gsub("([^\n]+)", url_to_cache) | |
PP(fbdictionary) | |
= #bigstr | |
-- = bigstr:gsub("([^\n]+)", "_%1_") | |
-- = bigstr:gsub("([^\n]+)", linesplit1) | |
s = "/photo.php?fbid=664059120317353&set=a.140340059355931.27893.100001398127964&type=1" | |
= s:match "^/photo%.php%?fbid=([0-9]+)&set=([^/&?]+)(.*)" | |
A = {} | |
bigstr:gsub("([^\n]+)", function (li) A[#A+1] = linesplit1(li) end) | |
table.sort(A) | |
print(table.concat(A, "\n")) | |
(eepitch-shell) | |
(eepitch-kill) | |
(eepitch-shell) | |
(eepitch-lua51) | |
(eepitch-kill) | |
(eepitch-lua51) | |
u="/W/photos/W/WR" | |
T = {W="([^/]+)", R="(.*)"} | |
= u:gsub("[WR]", T) | |
local P = t" N:([0-9]+) O:([^/&?]+) R:(.*) L:([^_]+) .:%. ?:%? " | |
# Tau's translation to Python: | |
# (find-faceutilsfile "url.py") | |
# (find-angg "faceutils/README") | |
(eepitch-shell) | |
(eepitch-kill) | |
(eepitch-shell) | |
--]] | |
-- Local Variables: | |
-- coding: raw-text-unix | |
-- End: |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment