Skip to content

Instantly share code, notes, and snippets.

@kaeza
Last active January 7, 2017 06:08
Show Gist options
  • Save kaeza/611b22c81835894887643c06684e2cac to your computer and use it in GitHub Desktop.
Save kaeza/611b22c81835894887643c06684e2cac to your computer and use it in GitHub Desktop.
---
-- Table serialization with minification.
--
-- This module contains functions to serialize tables to strings, and
-- deserialize these strings back to a table.
--
-- The serialized string is a representation of the table in Lua code.
-- The module does its best to generate the most compact code possible.
--
-- Tables with consecutive numerical indices starting from 1 ("arrays")
-- are efficiently stored by omitting the key. Numerical indices after
-- the first nil element are output adorned.
--
-- local t = { 42, "Hello!", nil, "blah" }
-- print(serialize(t)) --> {42,"Hello!",[4]="blah"}
--
-- Keys that are considered valid identifiers are output unadorned; other
-- keys (including reserved words) are serialized as `[key]`.
--
-- local t = { a=1, ["b"]=2, c=3 }
-- t["true"] = true
-- -- Note that this is just an example; the order of non-array
-- -- fields is random, so they may not appear as shown here.
-- print(serialize(t)) --> {a=1,b=2,c=3,["true"]=true}
--
-- A key is a valid identifier if and only if all the following are true:
--
-- * It is a string, and is not empty.
-- * It consists of only letters, digits, or the underscore.
-- * It does not begin with a digit.
-- * It is not a reserved word as listed in the "Lexical Conventions"
-- of the Lua manual (sec. 2.1 in 5.1, 3.1 in 5.3).
--
-- Since what Lua considers a "letter" or "digit" depends on the locale,
-- we take a shortcut and only take into account ASCII letters and digits.
--
-- As you have seen above, the serialization algorithm only inserts a
-- comma if needed, and it doesn't add any spaces (except in actual
-- strings). The serialized data does not contain the `return` statement,
-- so this must be added if needed. The `deserialize` function provided by
-- this module takes care of adding the return statement if needed.
--
-- Please note that not all tables can be serialized:
--
-- * For keys, only strings, numbers, and booleans are supported. For
-- values, tables are supported in addition to the types for keys.
-- An unsupported type will raise an error.
-- * Tables containing circular references may cause a stack overflow
-- error or lock up the interpreter.
-- * Tables referenced more than once in the tree will be serialized
-- separately each time, and will result in references to different
-- tables on deserialization.
--
-- Also note that this module does not handle persistence; that must
-- be handled by the caller.
--
-- @module minser
-- @author kaeza <https://github.com/kaeza>
local M = {
_NAME = "minser",
_VERSION = "2016.12.27",
_LICENSE = "Unlicense <https://unlicense.org>",
}
-- Localize everything.
local error, tostring, type, assert, loadstring, setfenv, load =
error, tostring, type, assert, loadstring, setfenv, load
local tconcat, strformat, strfind, strmatch =
table.concat, string.format, string.find, string.match
-- List of reserved words in the Lua language. Taken from section
-- 3.1 "Lexical Conventions" in the manual for Lua 5.3.
local reserved = {
"and", "break", "do", "else", "elseif", "end", "false", "for",
"function", "goto", "if", "in", "local", "nil", "not", "or",
"repeat", "return", "then", "true", "until", "while",
}
-- Convert array to mapping for more efficient use.
for i, k in ipairs(reserved) do
reserved[i] = nil
reserved[k] = true
end
-- Check if a key is a valid identifier.
local function isvalidkey(k)
return not (k=="" or reserved[k]
or strfind(k, "^[0-9]")
or strfind(k, "[^A-Za-z0-9_]"))
end
-- Return the representation of a key.
local function reprkey(k)
local t = type(k)
if t == "string" then
return isvalidkey(k) and k or strformat("[%q]", k)
elseif t == "number" then
k = tostring(k)
-- Drop integer part if possible.
return "["..(strmatch(k, "^0(%..*)") or k).."]"
elseif t == "boolean" then
return "["..tostring(k).."]"
else
error("unsupported key type: "..t)
end
end
local serialize
-- Return the representation of a value.
local function reprval(v)
local t = type(v)
if t == "string" then
return strformat("%q", v)
elseif t == "number" then
v = tostring(v)
-- Drop integer part if possible.
return strmatch(v, "^0(%..*)") or v
elseif t == "boolean" or t == "nil" then
return tostring(v)
elseif t == "table" then
return serialize(v)
else
error("unsupported value type: "..t)
end
end
---
-- Serializes a table to a string.
--
-- @function serialize
-- @tparam table t Table to serialize. See notes in the module
-- description for possible issues.
-- @return A string.
function serialize(t) -- local
assert(type(t) == "table", "argument #1 must be a table")
local out, n, nc = { }, 0, false
local cutoff = 1
-- Serialize array part if possible.
while true do
local v = t[cutoff]
if v == nil then break end
n=n+1 out[n]=(nc and "," or "")..reprval(v)
cutoff = cutoff + 1
nc = true
end
for k, v in pairs(t) do
-- Only serialize non-numbers, or numbers not part of the "array"
if type(k)~="number" or k<1 or k>cutoff or k%1~=0 then
k, v = reprkey(k), reprval(v)
n = n + 1
out[n] = (nc and "," or "")..k.."="..v
nc = true
end
end
return "{"..tconcat(out).."}"
end
---
-- Deserialize a string into a table.
--
-- **IMPORTANT NOTE: Don't ever pass strings received from untrusted
-- sources to this function! It loads the data as Lua code in order to
-- deserialize it, and while it has some measures in place to thwart some
-- attacks, it can't be guaranteed it will prevent all kinds of code
-- injection!**
--
-- If some error occurs parsing the Lua code, or the value to be returned
-- is not a table, this function raises an error.
--
-- @function deserialize
-- @tparam string s String to deserialize.
-- @return The deserialized table.
local function deserialize(s)
assert(type(s) == "string", "argument #1 must be a string")
if s:sub(1, 1) == "\27" then
error("refused to load bytecode")
end
local env = { }
env._ENV, env._G = env, env
if not strmatch(s, "^%s*return%s*%{") then
s = "return"..s
end
local f
if setfenv then -- Lua 5.1
f = assert(loadstring(s))
setfenv(f, env)
else
f = load(s, s, nil, env)
end
local t = f()
assert(type(t) == "table", "invalid data")
return t
end
M.serialize = serialize
M.deserialize = deserialize
return M
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment