Skip to content

Instantly share code, notes, and snippets.

@lunks
Last active August 29, 2015 14:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lunks/a00652cafbeb6797c240 to your computer and use it in GitHub Desktop.
Save lunks/a00652cafbeb6797c240 to your computer and use it in GitHub Desktop.
#!/usr/bin/env lua
-- This file:
-- http://angg.twu.net/fbcache/urls.lua
-- http://angg.twu.net/fbcache/urls.lua.html
-- (find-angg "fbcache/urls.lua")
-- 2014sep07
-- (defun u () (interactive) (find-angg-upload-links "fbcache/" "urls.lua"))
--
-- (find-lua51manualw3m "")
-- (find-books "__comp/__comp.el" "ierusalimschy")
-- (find-pil2page 8 "Contents")
-- (find-pil2text 8 "Contents")
-- «.basic» (to "basic")
-- «.pypp» (to "pypp")
-- «.pypp-cache» (to "pypp-cache")
-- «.lua-cache» (to "lua-cache")
-- «.translations0» (to "translations0")
-- «.urls-by-kind» (to "urls-by-kind")
-- «.all-fb-urls» (to "all-fb-urls")
-- «.url-to-ids» (to "url-to-ids")
-- «.wget» (to "wget")
-- «.examples» (to "examples")
-- Dependencies:
-- rocks (for penlight)
-- posix (find-es "lua5" "lua-posix-wheezy")
-- lpeg: re.lua (find-es "lua-intro" "lpeg-re-1")
-- penlight: pretty, wrap (find-es "lua5" "penlight")
-- ____ _
-- | __ ) __ _ ___(_) ___
-- | _ \ / _` / __| |/ __|
-- | |_) | (_| \__ \ | (__
-- |____/ \__,_|___/_|\___|
--
-- «basic» (to ".basic")
-- Some functions copied from my LUA_INIT file.
-- (This is an attempt to make this script self-contained).
-- (find-angg "LUA/lua50init.lua")
format = string.format
write = io.write
printf = function (...) write(format(...)) end
readfile = function (fname)
local f = assert(io.open(fname, "r"))
local bigstr = f:read("*a")
f:close()
return bigstr
end
writefile = function (fname, bigstr)
local f = assert(io.open(fname, "w+"))
f:write(bigstr)
f:close()
end
readfile_or_nil = function (fname)
local ok,contents = pcall(function () return readfile(fname) end)
if ok then return contents end
end
file_exists = function (fname) return readfile_or_nil(fname) end -- quick hack
cfmt = function (fmt) return function (...) return format(fmt, ...) end end
ee_fmt = function (fmt) return cfmt(ee_expand(fmt)) end
require "re"
require "posix"
userocks() -- (find-angg "LUA/lua50init.lua" "userocks")
pretty = require 'pl.pretty' -- (find-es "lua5" "pl.pretty")
pp0 = function (o) return pretty.write(o) end
pp = function (o) print(pretty.write(o)) end
ee_readfile_pp = function (fname) return expr(ee_readfile(fname)) end
ee_writefile_pp = function (fname, o) ee_writefile(fname, pp0(o)) end
-- (find-es "lua5" "pl.pretty-fix")
longquote = function (str)
local T = {}
local f = function (eqs) T[#eqs+1] = 1 end
if str:gsub("%](=*)", f) then
local eqs = string.rep("=", #T)
return '['..eqs..'[\n'..str..']'..eqs..']'
end
return '[[\n'..str..']]'
end
-- (find-es "lua5" "pl.text.wrap")
wrap = (require "pl.text").wrap
wraps = function (bigstr)
local f = function (li) return table.concat(wrap(li), "\n").."\n" end
return (bigstr:gsub("([^\n]+)", f))
end
-- (find-angg "LUA/lua50init.lua" "ee_ls")
no_dots = function (L)
for i=#L,1,-1 do if L[i]=="." or L[i]==".." then table.remove(L, i) end end
return L
end
ee_ls = function (dir) return (posix.dir(ee_expand(dir))) end
ee_ls_no_dots = function (dir) return sorted(no_dots(ee_ls(dir))) end
require "re"
repat0 = ""
redefs = { concat = table.concat }
recomp = function (pat) return re.compile(pat..repat0, redefs) end
retest = function (p) return function (s) print(re.match(s, recomp(p))) end end
--[==[
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
dofile "urls.lua"
= lpeg.version()
retest [=[
cs <- { c* }
c <- { [a-z] }
]=] "bop"
--]==]
--
-- _ __ _ _ _ __ _ __
-- | '_ \| | | | '_ \| '_ \
-- | |_) | |_| | |_) | |_) |
-- | .__/ \__, | .__/| .__/
-- |_| |___/|_| |_|
--
-- «pypp» (to ".pypp")
-- A "pypp object" is "Python pretty-printed object", i.e., a a string
-- produced by Python's "pprint.pprint". We have to handle these
-- objects because we use facebook-sdk (in Python) to talk to
-- Facebook; facebook-sdk returns dict objects, that we then save into
-- files.
--
-- The main function here is pypp_parse, that converts pypp objects
-- into Lua tables.
--
-- Note that fetching information from Facebook is very slow and
-- error-prone - Facebook posts can be deleted or set to restricted,
-- our access token may expire, our internet connection may be
-- failing, and so on - so we want to keep a cache with the returned
-- objects, as pypp objects (one for each Facebook id). Reading pypp
-- objects is also relatively slow (up to 1/100 second for each), so
-- we want to keep a cache of "luapp objects" for speed; a luapp
-- object is a string containg a pretty-printed version of a Lua
-- table.
-- (find-angg "fbcache/p.py" "basic")
-- (find-es "python" "facebook-sdk")
-- (find-es "lua-intro" "lpeg-re-infix-1")
-- file:///usr/share/doc/lua-lpeg-dev/lpeg.html
-- file:///usr/share/doc/lua-lpeg-dev/re.html#ex
-- http://www.inf.puc-rio.br/~roberto/lpeg/
-- http://www.inf.puc-rio.br/~roberto/lpeg/re.html
-- (find-file "~/fbcache/cache_by_id/141539566016667")
pypp_grammar0 = [=[
objrest <- obj {} [%s]* {.*}
obj <- bool / num / str / table / list
objp <- obj colon obj
table <- ("{" (objp (comma objp)*)* "}") -> totable
list <- ("[" (obj (comma obj )*)* "]") -> tolist
colon <- [%s]* ":" [%s]*
comma <- [%s]* "," [%s]*
bool <- ("True" / "False") -> tobool
num <- ("-"? [0-9.]+) -> tonum
str <- (ustr / str0)
ustr <- "u" str0
str0 <- ("'" (stritem / {'"'})* -> concat "'") /
('"' (stritem / {"'"})* -> concat '"')
stritem <- strcnormals / strcc / strcx / strcu / strcU / strcother
strcnormals <- {[^'"\]+}
strcc <- "\" {['"\]}
strcx <- "\x" ({[0-9a-f][0-9a-f]} -> hextoc)
strcu <- { "\u" [0-9a-f][0-9a-f][0-9a-f][0-9a-f] }
strcU <- { "\U" [%x][%x][%x][%x][%x][%x][%x][%x] }
strcother <- "\" . -> otherc
]=]
pypp_defs = {
tobool = function (s) return s == "True" and true or false end,
tonum = function (s) return tonumber(s) end,
tostr = function (s) return s end,
tolist = function (...) return {...} end,
totable = function (...)
local L, T = {...}, {}
for i=1,#L-1,2 do T[L[i]] = L[i+1] end
return T
end,
hextoc = function (cc) return string.char(tonumber(cc, 16)) end,
concat = function (...) return table.concat {...} end,
otherc = function (c)
if c == "n" then return "\n" end
if c == "r" then return "\r" end
if c == "t" then return "\t" end
print("\\"..c)
return "\\"..c
end,
}
pypp_grammar = re.compile(pypp_grammar0, pypp_defs)
pypp_parse = function (bigstr, pos) return pypp_grammar:match(bigstr, pos) end
pypp_test = function (bigstr, pos) pp(pypp_parse(bigstr, pos)) end
-- _
-- _ __ _ _ _ __ _ __ ___ __ _ ___| |__ ___
-- | '_ \| | | | '_ \| '_ \ / __/ _` |/ __| '_ \ / _ \
-- | |_) | |_| | |_) | |_) | | (_| (_| | (__| | | | __/
-- | .__/ \__, | .__/| .__/ \___\__,_|\___|_| |_|\___|
-- |_| |___/|_| |_|
--
-- «pypp-cache» (to ".pypp-cache")
--
-- Conversions:
-- readfile parse
-- id ---> pyppf ----------> pypp -------> obj ---> sexp
-- obj ---> date
-- obj ---> txt
--
-- (find-fline "~/fbcache/cache_by_id/")
-- (find-fline "~/fbcache/cache_by_id_lua/")
pypp_dir = "~/fbcache/cache_by_id/"
luapp_dir = "~/fbcache/cache_by_id_lua/"
id_to_pyppf = ee_fmt "~/fbcache/cache_by_id/%s"
id_to_luappf = ee_fmt "~/fbcache/cache_by_id_lua/%s"
pypp_ls = function () return ee_ls_no_dots(pypp_dir) end
luapp_ls = function () return ee_ls_no_dots(luapp_dir) end
pypp_fname = function (id, e) return pypp_dir..id..(e or "") end
pypp_readfile = function (id, e) return ee_readfile(pypp_fname(id)) end
pypp_id_obj = function (id, p) return pypp_parse(pypp_readfile(id), p) end
pypp_id_sexp = function (id)
return format('(find-fline "%s%s")', luapp_dir, id)
end
obj_date = function (o) return o and (o.created_time or o.updated_time) end
pypp_obj_txt = function (o)
local fmt = "From: $FROM\nDate: $DATE\n$URL\n $SEXP\n\n$BODY"
local T = { FROM = o.from and o.from.name or "(no FROM)",
DATE = obj_date(o) or "(no DATE)",
URL = o.link or "(no URL)",
SEXP = pypp_id_sexp(o.id),
BODY = wraps(o.name or o.message or "(no NAME or MESSAGE)") }
local f = function (name) return T[name] or error(name.." is nil") end
return (fmt:gsub("%$([A-Z]+)", f))
end
pypp_id_txt = function (id) return pypp_obj_txt(pypp_id_obj(id)) end
-- _ _
-- | | _ _ __ _ ___ __ _ ___| |__ ___
-- | | | | | |/ _` | / __/ _` |/ __| '_ \ / _ \
-- | |__| |_| | (_| | | (_| (_| | (__| | | | __/
-- |_____\__,_|\__,_| \___\__,_|\___|_| |_|\___|
--
-- «lua-cache» (to ".lua-cache")
id_objs = {}
write_lua_cache = function (verbose)
for _,id in ipairs(pypp_ls()) do
if verbose then print(id) end
-- local o = pypp_readfile(id)
local o = pypp_parse(ee_readfile(id_to_pyppf(id)))
ee_writefile_pp(id_to_luappf(id), o)
end
end
test_lua_cache = function ()
read_lua_cache("verbose")
end
read_lua_cache = function (verbose)
ids = luapp_ls()
id_objs = {}
for _,id in ipairs(ids) do
if verbose then print(id) end
id_objs[id] = ee_readfile_pp(id_to_luappf(id))
end
end
--[==[
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
dofile "urls.lua"
write_lua_cache("verbose")
test_lua_cache()
read_lua_cache()
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
dofile "urls.lua"
read_lua_cache()
= table.concat(ids, "\n")
pp(id_objs[ids[1]])
-- (find-fline "~/fbcache/")
-- (find-fline "~/fbcache/cache_by_id/")
-- (find-fline "~/fbcache/cache_by_id_lua/")
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
dofile "urls.lua"
for _,id in ipairs(pypp_ls()) do
print(id)
print(pypp_id_txt(id))
print("\n--------------\n")
end
hugestr = mapconcat(pypp_id_txt, pypp_ls(), "\n--------------\n")
writefile("huge.txt", hugestr)
# (find-fline "~/fbcache/cache_by_id/")
# (find-fline "~/fbcache/huge.txt")
# (find-sh "grep ^From: ~/fbcache/huge.txt")
# (find-sh "grep ^From: ~/fbcache/huge.txt | sort")
 (eepitch-shell)
 (eepitch-kill)
 (eepitch-shell)
cd ~/fbcache/
Scp-np huge.txt $TWUP/fbcache/
Scp-np huge.txt $TWUS/fbcache/
# http://angg.twu.net/fbcache/huge.txt
--]==]
-- _ _ _
-- | |_ _ __ __ _ _ __ ___| | __ _| |_ ___ _ __ ___
-- | __| '__/ _` | '_ \/ __| |/ _` | __/ _ \| '__/ __|
-- | |_| | | (_| | | | \__ \ | (_| | || (_) | | \__ \
-- \__|_| \__,_|_| |_|___/_|\__,_|\__\___/|_| |___/
--
-- «translations0» (to ".translations0")
-- Idea: Facebook URLs are split into "parts" by fb_url_to_iparts,
-- like this: ...
-- /permalink.php?story_fbid=N&set=OR pesfs_L_L 0
-- /W/photos/W/WR photos_L_L_L 3
-- (find-angg "fbcache/urls.lua" "translations0" " /W/photos/W/WR ")
-- /W/media_set?set=OR media0_L_L 0
-- Python version: (find-angg "fbcache/p.py" "translations0")
fb_translations0 = [[
/events/N/permalink/N/R evpe_L_L 1_2
/events/N/N/R ev2_L_L 12
/events/N/R ev1_L 1
/groups/W/permalink/N/R grpe_L_L 2
/groups/N/NR groups_L_L 1
/groups/W/NR groups_L_L 2
/groups/NR groups_L 1
/groups/WR groups_L 0
/W/media_set?set=a.N.N.NR media4_L_L_L 0
/media/set/?set=OR media_L 0
/notes/W/W/NR notes_L_L_L 3
/notes/N/R notes_L 1
/pages/W/NR pages_L_L 2
/permalink.php?story_fbid=N&id=NR pesfi_L_L 1
/photo.php?fbid=N&set=OR photofs_L_L 1
/N/photos/t.N/N/R photos_L_t.L_L 13
/N/photos/gm.N/N/R photos_L_gm.L_L 123
/N/photos/a.N.N.N/N/R photos_L_a.L.L.L_L 1245
/photo.php?v=N&set=OR photovs_L_L 1
/photo.php?v=NR photov_L 1
/W/posts/NR posts_L_L 2
/video.php?v=NR video_L 1
]]
fb_chars_to_tbl = function (str)
local tbl = {}
for char,rgx in str:gmatch("(%S):(%S*)") do
tbl[char] = rgx
end
return tbl
end
fb_char_to_re = fb_chars_to_tbl(
" W:([^/]+) N:([0-9]+) O:([^/&?]+) R:(.*) L:([^_]+) .:%. ?:%? ")
fb_char_to_fmt = fb_chars_to_tbl(
" W:%s N:%s O:%s R: L:%s .:. ?:? ")
fb_abc_to_tr = function (a, b, c)
local f_re = function (str) return str:gsub("[WNORL?.]", fb_char_to_re) end
local f_fmt = function (str) return str:gsub("[WNORL?.]", fb_char_to_fmt) end
local f = function (str)
return { orig = str,
re = "^"..f_re(str).."$",
fmt = f_fmt(str),
}
end
ha = "https://www.facebook.com"..a
return { u = f(ha), f = f(b), etc = c}
end
fb_translations1 = {}
for a,b,c in fb_translations0:gmatch("(%S+)[ \t]+(%S+)[ \t]+(%S+)") do
table.insert(fb_translations1, fb_abc_to_tr(a, b, c))
end
--[==[
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
dofile "urls.lua"
pp(fb_translations1)
--]==]
fb_url_to_iparts = function (url)
for i,tr in ipairs(fb_translations1) do
-- local a,b,c,d,e = url:match(tr[1][1])
local a,b,c,d,e = url:match(tr.u.re)
if a then return i,{a,b,c,d,e} end
end
end
-- fb_u1toc = function (url1)
-- for i,entry in ipairs(fb_translations) do
-- local a,b,c,d,e = url1:match(entry.upat)
-- if a then return format(entry.cfmt, a, b, c, d, e) end
-- end
-- end
-- fb_ctou1 = function (cfname)
-- for i,entry in ipairs(fb_translations) do
-- local a,b,c,d,e = cfname:match(entry.cpat)
-- if a then return format(entry.ufmt, a, b, c, d, e) end
-- end
-- end
-- _ _ _ _ _
-- _ _ _ __| |___ | |__ _ _ | | _(_)_ __ __| |
-- | | | | '__| / __| | '_ \| | | | | |/ / | '_ \ / _` |
-- | |_| | | | \__ \ | |_) | |_| | | <| | | | | (_| |
-- \__,_|_| |_|___/ |_.__/ \__, | |_|\_\_|_| |_|\__,_|
-- |___/
--
-- «urls-by-kind» (to ".urls-by-kind")
fb_urls_by_kind = function (bigstr)
urls_by_kind = {}
for i=0,#fb_translations1 do urls_by_kind[i] = {} end
for url in bigstr:gmatch("(%S+)") do
local i,parts = fb_url_to_iparts(url)
table.insert(urls_by_kind[i or 0], url)
end
return urls_by_kind
end
-- ubyk = fb_urls_by_kind(readfile("/tmp/o0"))
prubyk = function (kind, n)
if kind > 0 then
printf(' (find-angg "fbcache/urls.lua" "translations0" " %s ")\n',
fb_translations1[kind].u.orig:sub(25))
end
for i,url in ipairs(ubyk[kind]) do
print(kind.." "..url)
if i == n then break end
end
end
prubyks = function (n)
for i=1,#fb_translations1 do print(); prubyk(i, n) end
end
pyhead = [=[
 (eepitch-python)
 (eepitch-kill)
 (eepitch-python)
execfile("p.py", globals())
]=]
all_ids = function (bigstr, f)
if not f then print(pyhead) end
url_ids = {}
for url in bigstr:gmatch("(%S+)") do
if not f then print("# "..url) end
local i,parts = fb_url_to_iparts(url)
if i then
local digits = fb_translations1[i].etc:match("^[1-9]+")
if digits then
for d in digits:gmatch(".") do
local id = parts[0 + d]
table.insert(url_ids, id)
if f then f(id)
else printf('pp(fi("%s"))\n', id)
end
end
end
end
end
return url_ids
end
--[==[
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
 (eek "M-o <<wrap>> M-o")
dofile "urls.lua"
all_ids(ee_readfile "/tmp/o1")
print(pyhead)
all_ids(ee_readfile "/tmp/o1", function (id) print('fc("'..id..'")') end)
all_ids(ee_readfile "/tmp/o0", function (id) print('fc("'..id..'")') end)

 (eek "M-o M-< 2*<down>")
 (eepitch-python)
 (eepitch-kill)
 (eepitch-python)
execfile("p.py", globals())
execfile("/tmp/o2", globals())
-- (find-fline "/tmp/o1")
-- (find-file "~/fbcache/cache_by_id/")
--]==]
-- digits = etc:match("^([1-9]*)")
-- if #digits > 0 then
-- local parts = {a, b, c, d, e}
-- return parts[0 + digits[1]]
-- end
-- _ _ _ _____ ____ _
-- / \ | | | | ___| __ ) _ _ _ __| |___
-- / _ \ | | | | |_ | _ \ | | | | '__| / __|
-- / ___ \| | | | _| | |_) | | |_| | | | \__ \
-- /_/ \_\_|_| |_| |____/ \__,_|_| |_|___/
--
-- «all-fb-urls» (to ".all-fb-urls")
--[==[
 (eepitch-shell)
 (eepitch-kill)
 (eepitch-shell)
cat ~/TODO \
~/ORG/index.org \
~/TH/ee.blogme \
~/TH/2014-xs.blogme \
~/TH/links-sobre-gaza.blogme \
| grep https://www.facebook.com/ \
| tr ' ' '\n' \
| grep https://www.facebook.com/ \
| sort | uniq \
| tee /tmp/o0
laf /tmp/o0 ~/fbcache/all-fb-urls.lst
cp -v /tmp/o0 ~/fbcache/all-fb-urls.lst
# (find-fline "/tmp/o0")
# (find-fline "~/fbcache/all-fb-urls.lst")
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
 (eek "M-o <<wrap>> M-o")
dofile "urls.lua"
ubyk = fb_urls_by_kind(readfile("/tmp/o0"))
prubyks(4)
prubyk(0)
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
dofile "urls.lua"
ubyk = fb_urls_by_kind(readfile("/tmp/o0"))
for i=0,#ubyk do
print(i,#ubyk[i])
end
prubyks(4)
pp(fb_translations1[1])
prubyk(0)
prubyk(0, 10)
prubyk(1)
prubyk(1, 10)
--]==]
-- _ _ _ _
-- _ _ _ __| | | |_ ___ (_) __| |___
-- | | | | '__| | | __/ _ \ | |/ _` / __|
-- | |_| | | | | | || (_) | | | (_| \__ \
-- \__,_|_| |_| \__\___/ |_|\__,_|___/
--
-- «url-to-ids» (to ".url-to-ids")
url_to_ids = function (url)
local ids = {}
local i,parts = fb_url_to_iparts(url)
if i then
local digits = fb_translations1[i].etc:match("^[1-9]+")
if digits then
for d in digits:gmatch(".") do
local id = parts[0 + d]
table.insert(ids, id)
end
return ids
end
end
end
fb_urls = {}
fb_urls_file = "~/fbcache/all-fb-urls.lst"
read_fb_urls = function () fb_urls = split(ee_readfile(fb_urls_file)) end
-- (find-fline "~/fbcache/all-fb-urls.lst")
id_urls = {}
read_id_urls = function (verbose)
for _,url in ipairs(fb_urls) do
if verbose then print(url) end
for _,id in ipairs(url_to_ids(url) or {}) do
if verbose then print(" "..id) end
id_urls[id] = id_urls[id] or {}
table.insert(id_urls[id], url)
end
end
end
id_obj_txt0 = function (id)
return pypp_obj_txt(id_objs[id])
end
id_obj_txt1 = function (id)
local urls = mapconcat(cfmt "%s\n", id_urls[id] or {})
return urls..id_obj_txt0(id)
end
id_obj_txt2 = function (id)
return cfmt "\171%s\187\n" (id) .. id_obj_txt1(id)
end
id_objs_txt = function (ids)
return mapconcat(id_obj_txt2, ids, "\n\n---------------\n")
end
id_date = function (id)
local o = id_objs[id]
local date = obj_date(o) or " "..id
return date
end
ids_sorted_by_date = function ()
local lt = function (a, b) return id_date(a) <= id_date(b) end
return sorted(shallowcopy(ids), lt)
end
raw_text_unix = "\n\n\n"..[[
-- Local Variables:
-- coding: raw-text-unix
-- End:
]]
write_huge_txt = function ()
local ids = ids_sorted_by_date()
ee_writefile("huge.txt", id_objs_txt(ids)..raw_text_unix)
end
-- all_fb_urls0 = function () return ee_readfile("~/fbcache/all-fb-urls.lst") end
-- all_fb_urls1 = function () return split(all_fb_urls0()) end
-- gen_all_fb_urls = function () return all_fb_urls0():gmatch("%S+") end
--[==[
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
dofile "urls.lua"
read_fb_urls()
all_fb_ids = Set.new() -- (find-angg "LUA/lua50init.lua" "Set")
for _,url in ipairs(fb_urls) do
for _,id in ipairs(url_to_ids(url) or {}) do
all_fb_ids._[id] = id
end
end
= all_fb_ids
python_ids = Set.from(pypp_ls())
= python_ids
missing_ids = all_fb_ids - python_ids
= missing_ids
= missing_ids:ksc()
= pyhead..mapconcat(cfmt 'pp(fi("%s"))\n', missing_ids:ks())
--]==]
--[==[
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
dofile "urls.lua"
read_fb_urls()
read_id_urls()
read_lua_cache()
write_huge_txt()
-- (find-fline "huge.txt")
-- (find-angg "fbcache/huge.txt")
-- (find-angg "fbcache/huge.txt" "10152622815229198")
pp(id_urls["701795806545331"])
 (eepitch-shell)
 (eepitch-kill)
 (eepitch-shell)
laf huge.txt
lua5.1 ~/blogme3/blogme3.lua -o huge.txt.html -a2html huge.txt
cd ~/fbcache/
Scp -v huge.txt huge.txt.html edrx@angg.twu.net:/home/edrx/slow_html/fbcache/
Scp -v huge.txt huge.txt.html edrx@angg.twu.net:/home/edrx/public_html/fbcache/
http://angg.twu.net/fbcache/huge.txt.html
= id_obj_txt2(ids[1])
= id_obj_txt2(ids[1])..id_obj_txt2(ids[2])
= cfmt "\187" "a"
= cfmt "\171%s\187" "a"
-- for _,id in ipairs(ids) do print(id, id_date(id)) end
for _,id in ipairs(ids_sorted_by_date()) do print(id.." "..id_date(id)) end
= id_date "701795806545331"
= mapconcat(cfmt " %s\n", id_urls["701795806545331"] or {})
id = "701795806545331"
pp(id_objs[id])
= id_obj_txt(id)
pp(id_urls[id])
=
= id_obj_txt0(id)
id = "10151841953998086"
pypp_id_txt = pypp_obj_txtfunction (o)
all_fb_urls = all_fb_urls1()
url = "https://www.facebook.com/usauncut/photos/a.190167221017767.44131.186219261412563/823206274380522/?type=1"
url = "https://www.facebook.com/video.php?v=1009188935762835"
pp(url_to_ids(url))
id_urls = {}
for url in
= #all_fb_urls
get_id_urls = function
id_urls = {}
for url in
id_urls[id] = id_urls[id] or {}
table.insert(id_urls[id], url)
pp(url_to_ids)
gen_all_fb_urls
--]==]
-- _
-- __ ____ _ ___| |_
-- \ \ /\ / / _` |/ _ \ __|
-- \ V V / (_| | __/ |_
-- \_/\_/ \__, |\___|\__|
-- |___/
--
-- «wget» (to ".wget")
fb_linesplit0 = function (line)
local pat = "^(.-)(https?://)([!-.0-~]*)([!-~]*)(.*)$"
local pre,prot,site,path,rest = line:match(pat)
return pre,prot,site,path,rest
end
fb_goodpre = function (pre)
return pre and (pre == "" or pre:sub(-1):match"[ \t]")
end
fb_linesplit1 = function (line)
local pre,prot,site,path,rest = fb_linesplit0(line)
if fb_goodpre(pre) and site:match "facebook" then
local cfname = fb_u1toc(path)
return pre,prot,site,path,rest,cfname
end
end
fb_linesplit = function (line)
local pre,prot,site,path,rest = fb_linesplit0(line)
if fb_goodpre(pre) and site:match "facebook" then
local cfname = fb_u1toc(path)
if cfname then print(cfname) end
-- if not cfname then print(path) end
end
end
myerror = function (fmt, ...) printf(fmt.."\n", ...); os.exit(2) end
myerror = function (fmt, ...) printf(fmt.."\n", ...); error() end
fb_utof = function (url)
local pre,prot,site,path,rest,fname = fb_linesplit1(url)
if not fname then myerror("No FB URL in: %s", line) end
return fname
end
fb_user_agent = "Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.8.1.3) "..
"Gecko/20070310 Iceweasel/2.0.0.3 (Debian-2.0.0.3-1)"
fb_wget = function (fname, url)
local A = {UA=fb_user_agent, FNAME=fname, URL=url}
local a = function (str) return (str:gsub("%$([A-Z]+)", A)) end
local cmd = a "wget --progress=dot -U '$UA' -O $FNAME '$URL'"
getoutput(cmd)
end
fb_wget_if_needed = function (url)
local fname = fb_utof(url)..".wget"
if not file_exists(fname) then fb_wget(fname, url) end
end
fb_cache = function (url)
return readfile(fb_utof(url))
end
-- (find-ydb "ydb" "dooptions")
dooptions = function (optionname, a, b)
if optionname == "-utof" then
local pre,prot,site,path,rest,cfname = fb_linesplit1(a)
if cfname then print(cfname) else myerror("No FB URL in: %s", a) end
elseif optionname == "-utot" then
local url = a
local id = url_to_ids(url)[1]
-- PP(url, id, id_to_luappf(id))
local o = ee_readfile_pp(id_to_luappf(id))
-- pp(o)
id_urls[id] = {url}
id_objs[id] = o
print(id_obj_txt1(id)..raw_text_unix)
elseif optionname == "-win" then
fb_wget_if_needed(a)
elseif optionname == "-wins" then
local bigstr = readfile(a)
for url in bigstr:gmatch "([^\n]+)" do
print(url)
pcall(function () fb_wget_if_needed(url) end)
end
end
end
dooptions(...)
-- myerror "Not implemented"
--[[
 (eepitch-shell)
 (eepitch-kill)
 (eepitch-shell)
cd /tmp/
U=https://www.facebook.com/gustavo.gindre/posts/515590031857632
lua5.1 ~/fbcache/urls.lua -utot $U
--]]
--[[
-- (find-sh "grep facebook ~/TODO | sort")
-- (find-sh "grep facebook ~/TODO | grep photo.php | sort")
-- (find-sh "grep facebook ~/TODO | grep posts | sort")
-- (find-sh "grep facebook ~/TODO | grep -v photo.php | grep -v posts | sort")
# «examples» (to ".examples")
 (eepitch-shell)
 (eepitch-kill)
 (eepitch-shell)
export LUA_INIT=
lua51
dofile "urls.lua"
dooptions("-utof", "https://www.facebook.com/christian.fischgold/posts/713430788717068")
dooptions("-ftou", "posts_FIPRJ_487608268040090.wget")
dooptions("-wins", "urls.lst")
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
-- (find-fline "~/fbcache/urls.lst")
bigstr = ee_readfile "~/fbcache/urls.lst"
for li in splitlines(bigstr) do
end
 (eepitch-shell)
 (eepitch-kill)
 (eepitch-shell)
mkdir ~/fbcache/tmp/
cd ~/fbcache/tmp/
URL=https://www.facebook.com/sergio.martins.984991/posts/10152616093738086
# Convert a URL to a filename:
~/fbcache/urls.lua -utof $URL
# outputs
# posts_sergio.martins.984991_10152616093738086
# Run wget if needed:
~/fbcache/urls.lua -win $URL
# as the file "posts_sergio.martins.984991_10152616093738086.wget"
# does not exist yet, this runs wget on the URL with output to
# "posts_sergio.martins.984991_10152616093738086.wget".
# Run wget if needed:
~/fbcache/urls.lua -win $URL
# We are running this a second time -
# "posts_sergio.martins.984991_10152616093738086.wget" exists, so this
# does nothing.
~/fbcache/urls.lua -wins ../urls.lst
# Bad news: running this yields no output!
python ~/usrc/code/python/edu/fbtxt.py pos*
# (find-fline "~/usrc/code/python/edu/")
# (find-fline "~/usrc/code/python/edu/fbtxt.py")
# (find-fline "~/fbcache/tmp/")
# (find-fline "~/usrc/")
# (find-oilercodefile "python/edu/")
 (eepitch-shell2)
 (eepitch-kill)
 (eepitch-shell2)
cd ~/fbcache/tmp/
tar -cvzf /tmp/posts-wget.tgz *
laf /tmp/posts-wget.tgz
# Old stuff:
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
dofile "urls.lua"
bigstr = getoutput "grep facebook ~/TODO | sort"
bigstr:gsub("([^\n]+)", fb_linesplit)
fb_linesplit2 = function (line)
local pre,prot,site,path,rest = fb_linesplit0(line)
if fb_goodpre(pre) and site:match "facebook" then
local cfname = fb_u1toc(path)
if cfname then
print(cfname)
print(prot..site..path)
print()
end
end
end
bigstr:gsub("([^\n]+)", fb_linesplit2)
bigstr = [=[
https://www.facebook.com/sergio.martins.984991/posts/10152616093738086
https://www.facebook.com/jornalanovademocracia/photos/a.288492381220437.66632.187051701364506/679809862088685/
https://www.facebook.com/permalink.php?story_fbid=921476867869306&id=347772661906399
https://www.facebook.com/photo.php?fbid=10201336092313990&set=a.1569106477271.73917.1523735650
]=]
bigstr:gsub("([^\n]+)", fb_linesplit)
bigstr:gsub("([^\n]+)", linesplit1)
bigstr:gsub("([^\n]+)", linesplit2)
= bigstr:gsub("([^\n]+)", url_to_cache)
PP(fbdictionary)
= #bigstr
-- = bigstr:gsub("([^\n]+)", "_%1_")
-- = bigstr:gsub("([^\n]+)", linesplit1)
s = "/photo.php?fbid=664059120317353&set=a.140340059355931.27893.100001398127964&type=1"
= s:match "^/photo%.php%?fbid=([0-9]+)&set=([^/&?]+)(.*)"
A = {}
bigstr:gsub("([^\n]+)", function (li) A[#A+1] = linesplit1(li) end)
table.sort(A)
print(table.concat(A, "\n"))
 (eepitch-shell)
 (eepitch-kill)
 (eepitch-shell)
 (eepitch-lua51)
 (eepitch-kill)
 (eepitch-lua51)
u="/W/photos/W/WR"
T = {W="([^/]+)", R="(.*)"}
= u:gsub("[WR]", T)
local P = t" N:([0-9]+) O:([^/&?]+) R:(.*) L:([^_]+) .:%. ?:%? "
# Tau's translation to Python:
# (find-faceutilsfile "url.py")
# (find-angg "faceutils/README")
 (eepitch-shell)
 (eepitch-kill)
 (eepitch-shell)
--]]
-- Local Variables:
-- coding: raw-text-unix
-- End:
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment