Skip to content

Instantly share code, notes, and snippets.

@appgurueu
Last active September 9, 2021 13:10
Show Gist options
  • Save appgurueu/6b99957fd597d0528080c9c457bdc877 to your computer and use it in GitHub Desktop.
Save appgurueu/6b99957fd597d0528080c9c457bdc877 to your computer and use it in GitHub Desktop.
A simple file type scanner (by file extension) for ContentDB, needs a mounted ramdisk under /tmp/ramdisk
-- See https://content.minetest.net/help/api/
local http_request = require"http.request"
local lunajson = require"lunajson"
local zip = require"zip"
do
-- URI escaping utilities
-- See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI
local uri_unescaped_chars = {}
for char in ("-_.!~*'()"):gmatch(".") do
uri_unescaped_chars[char] = true
end
local function add_unescaped_range(from, to)
for byte = from:byte(), to:byte() do
uri_unescaped_chars[string.char(byte)] = true
end
end
add_unescaped_range("0", "9")
add_unescaped_range("a", "z")
add_unescaped_range("A", "Z")
local function encode(str, allowed_chars)
return str:gsub(".", function(char)
if allowed_chars[char] then
return char
end
return ("%%%02X"):format(char:byte())
end)
end
function string:encode_uri_component()
return encode(self, uri_unescaped_chars)
end
end
local function get_body(uri)
local headers, stream = assert(http_request.new_from_uri(uri):go())
local body = assert(stream:get_body_as_string())
if headers:get":status" ~= "200" then
error(body)
end
return body
end
local file_extensions = {}
local packages = lunajson.decode(get_body"https://content.minetest.net/api/packages/")
for index, package in ipairs(packages) do
local path = package.author:encode_uri_component() .. "/" .. package.name
print("Scanning package " .. path .. ": " .. index .. " of " .. #packages)
local releases = lunajson.decode(get_body("https://content.minetest.net/api/packages/" .. path .. "/releases/"))
for index, release in ipairs(releases) do
print("Scanning release " .. index .. " of " .. #releases)
-- os.execute"clear" -- io.write("\027[H\027[2J")
local zipfile = get_body("https://content.minetest.net" .. release.url)
local rampath = "/tmp/ramdisk/data.zip"
local ramfile = assert(io.open(rampath, "w"))
ramfile:write(zipfile)
ramfile:close()
local zfile = zip.open(rampath)
for file in zfile:files() do
assert(type(file.filename) == "string")
local ext = file.filename:match".*%.(.*)$"
if ext then
ext = ext:lower()
file_extensions[ext] = (file_extensions[ext] or 0) + 1
end
end
zfile:close()
-- HACK drop that large string immediately
collectgarbage"collect"
end
end
print"File extension counts from all releases of all ContentDB content:"
for ext, count in pairs(file_extensions) do
print(ext, count)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment