-
-
Save appgurueu/6b99957fd597d0528080c9c457bdc877 to your computer and use it in GitHub Desktop.
A simple file type scanner (by file extension) for ContentDB, needs a mounted ramdisk under /tmp/ramdisk
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- See https://content.minetest.net/help/api/ | |
local http_request = require"http.request" | |
local lunajson = require"lunajson" | |
local zip = require"zip" | |
do | |
-- URI escaping utilities | |
-- See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI | |
local uri_unescaped_chars = {} | |
for char in ("-_.!~*'()"):gmatch(".") do | |
uri_unescaped_chars[char] = true | |
end | |
local function add_unescaped_range(from, to) | |
for byte = from:byte(), to:byte() do | |
uri_unescaped_chars[string.char(byte)] = true | |
end | |
end | |
add_unescaped_range("0", "9") | |
add_unescaped_range("a", "z") | |
add_unescaped_range("A", "Z") | |
local function encode(str, allowed_chars) | |
return str:gsub(".", function(char) | |
if allowed_chars[char] then | |
return char | |
end | |
return ("%%%02X"):format(char:byte()) | |
end) | |
end | |
function string:encode_uri_component() | |
return encode(self, uri_unescaped_chars) | |
end | |
end | |
local function get_body(uri) | |
local headers, stream = assert(http_request.new_from_uri(uri):go()) | |
local body = assert(stream:get_body_as_string()) | |
if headers:get":status" ~= "200" then | |
error(body) | |
end | |
return body | |
end | |
local file_extensions = {} | |
local packages = lunajson.decode(get_body"https://content.minetest.net/api/packages/") | |
for index, package in ipairs(packages) do | |
local path = package.author:encode_uri_component() .. "/" .. package.name | |
print("Scanning package " .. path .. ": " .. index .. " of " .. #packages) | |
local releases = lunajson.decode(get_body("https://content.minetest.net/api/packages/" .. path .. "/releases/")) | |
for index, release in ipairs(releases) do | |
print("Scanning release " .. index .. " of " .. #releases) | |
-- os.execute"clear" -- io.write("\027[H\027[2J") | |
local zipfile = get_body("https://content.minetest.net" .. release.url) | |
local rampath = "/tmp/ramdisk/data.zip" | |
local ramfile = assert(io.open(rampath, "w")) | |
ramfile:write(zipfile) | |
ramfile:close() | |
local zfile = zip.open(rampath) | |
for file in zfile:files() do | |
assert(type(file.filename) == "string") | |
local ext = file.filename:match".*%.(.*)$" | |
if ext then | |
ext = ext:lower() | |
file_extensions[ext] = (file_extensions[ext] or 0) + 1 | |
end | |
end | |
zfile:close() | |
-- HACK drop that large string immediately | |
collectgarbage"collect" | |
end | |
end | |
print"File extension counts from all releases of all ContentDB content:" | |
for ext, count in pairs(file_extensions) do | |
print(ext, count) | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment