Last active
December 26, 2023 19:11
-
-
Save Beherith/c6f09bfa0acc2f2e81743996d2cafa7b to your computer and use it in GitHub Desktop.
Base lua json module at 0.9.50, updated to correctly parse empty strings and to not pollute global namespace
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
----------------------------------------------------------------------------- | |
-- JSON4Lua: JSON encoding / decoding support for the Lua language. | |
-- json Module. | |
-- Author: Craig Mason-Jones | |
-- Homepage: http://json.luaforge.net/ | |
-- Version: 0.9.50 | |
-- This module is released under the MIT License (MIT). | |
-- Please see LICENCE.txt for details. | |
-- | |
-- USAGE: | |
-- This module exposes two functions: | |
-- encode(o) | |
-- Returns the table / string / boolean / number / nil / json.null value as a JSON-encoded string. | |
-- decode(json_string) | |
-- Returns a Lua object populated with the data encoded in the JSON string json_string. | |
-- | |
-- REQUIREMENTS: | |
-- compat-5.1 if using Lua 5.0 | |
-- | |
-- CHANGELOG | |
-- 0.9.50 Radical performance improvement on decode from Eike Decker. Many thanks! | |
-- 0.9.40 Changed licence to MIT License (MIT) | |
-- 0.9.20 Introduction of local Lua functions for private functions (removed _ function prefix). | |
-- Fixed Lua 5.1 compatibility issues. | |
-- Introduced json.null to have null values in associative arrays. | |
-- encode() performance improvement (more than 50%) through table.concat rather than .. | |
-- Introduced decode ability to ignore /**/ comments in the JSON string. | |
-- 0.9.10 Fix to array encoding / decoding to correctly manage nil/null values in arrays. | |
----------------------------------------------------------------------------- | |
----------------------------------------------------------------------------- | |
-- Imports and dependencies | |
----------------------------------------------------------------------------- | |
local math = math | |
local string = string | |
local table = table | |
local tostring = tostring | |
local base = _G | |
----------------------------------------------------------------------------- | |
-- Module declaration | |
----------------------------------------------------------------------------- | |
-- Public functions | |
-- Private functions | |
local decode_scanArray | |
local decode_scanComment | |
local decode_scanConstant | |
local decode_scanNumber | |
local decode_scanObject | |
local decode_scanString | |
local decode_scanWhitespace | |
local encodeString | |
local isArray | |
local isEncodable | |
----------------------------------------------------------------------------- | |
-- PUBLIC FUNCTIONS | |
----------------------------------------------------------------------------- | |
--- Encodes an arbitrary Lua object / variable. | |
-- @param v The Lua object / variable to be JSON encoded. | |
-- @return String containing the JSON encoding in internal Lua string format (i.e. not unicode) | |
local function encode (v) | |
-- Handle nil values | |
if v==nil then | |
return "null" | |
end | |
local vtype = base.type(v) | |
-- Handle strings | |
if vtype=='string' then | |
return '"' .. encodeString(v) .. '"' -- Need to handle encoding in string | |
end | |
-- Handle booleans | |
if vtype=='number' or vtype=='boolean' then | |
return base.tostring(v) | |
end | |
-- Handle tables | |
if vtype=='table' then | |
local rval = {} | |
-- Consider arrays separately | |
local bArray, maxCount = isArray(v) | |
if bArray then | |
for i = 1,maxCount do | |
table.insert(rval, encode(v[i])) | |
end | |
else -- An object, not an array | |
for i,j in base.pairs(v) do | |
if isEncodable(i) and isEncodable(j) then | |
table.insert(rval, '"' .. encodeString(i) .. '":' .. encode(j)) | |
end | |
end | |
end | |
if bArray then | |
return '[' .. table.concat(rval,',') ..']' | |
else | |
return '{' .. table.concat(rval,',') .. '}' | |
end | |
end | |
-- Handle null values | |
if vtype=='function' and v==null then | |
return 'null' | |
end | |
base.assert(false,'encode attempt to encode unsupported type ' .. vtype .. ':' .. base.tostring(v)) | |
end | |
--- Decodes a JSON string and returns the decoded value as a Lua data structure / value. | |
-- @param s The string to scan. | |
-- @return Lua objectthat was scanned, as a Lua table / string / number / boolean or nil. | |
local function decode(s) | |
-- Function is re-defined below after token and other items are created. | |
-- Just defined here for code neatness. | |
return null | |
end | |
--- The null function allows one to specify a null value in an associative array (which is otherwise | |
-- discarded if you set the value with 'nil' in Lua. Simply set t = { first=json.null } | |
local function null() | |
return null -- so json.null() will also return null ;-) | |
end | |
----------------------------------------------------------------------------- | |
-- Internal, PRIVATE functions. | |
----------------------------------------------------------------------------- | |
--- Encodes a string to be JSON-compatible. | |
-- This just involves back-quoting inverted commas, back-quotes and newlines, I think ;-) | |
-- @param s The string to return as a JSON encoded (i.e. backquoted string) | |
-- @return The string appropriately escaped. | |
local qrep = {["\\"]="\\\\", ['"']='\\"',['\n']='\\n',['\t']='\\t'} | |
function encodeString(s) | |
return tostring(s):gsub('["\\\n\t]',qrep) | |
end | |
-- Determines whether the given Lua type is an array or a table / dictionary. | |
-- We consider any table an array if it has indexes 1..n for its n items, and no | |
-- other data in the table. | |
-- I think this method is currently a little 'flaky', but can't think of a good way around it yet... | |
-- @param t The table to evaluate as an array | |
-- @return boolean, number True if the table can be represented as an array, false otherwise. If true, | |
-- the second returned value is the maximum | |
-- number of indexed elements in the array. | |
function isArray(t) | |
-- Next we count all the elements, ensuring that any non-indexed elements are not-encodable | |
-- (with the possible exception of 'n') | |
local maxIndex = 0 | |
for k,v in base.pairs(t) do | |
if (base.type(k)=='number' and math.floor(k)==k and 1<=k) then -- k,v is an indexed pair | |
if (not isEncodable(v)) then return false end -- All array elements must be encodable | |
maxIndex = math.max(maxIndex,k) | |
else | |
if (k=='n') then | |
if v ~= table.getn(t) then return false end -- False if n does not hold the number of elements | |
else -- Else of (k=='n') | |
if isEncodable(v) then return false end | |
end -- End of (k~='n') | |
end -- End of k,v not an indexed pair | |
end -- End of loop across all pairs | |
return true, maxIndex | |
end | |
--- Determines whether the given Lua object / table / variable can be JSON encoded. The only | |
-- types that are JSON encodable are: string, boolean, number, nil, table and json.null. | |
-- In this implementation, all other types are ignored. | |
-- @param o The object to examine. | |
-- @return boolean True if the object should be JSON encoded, false if it should be ignored. | |
function isEncodable(o) | |
local t = base.type(o) | |
return (t=='string' or t=='boolean' or t=='number' or t=='nil' or t=='table') or (t=='function' and o==null) | |
end | |
local instrumentedDecode | |
-- Radical performance improvement for decode from Eike Decker! | |
do | |
local type = base.type | |
local error = base.error | |
local assert = base.assert | |
local print = base.print | |
if Spring and Spring.Echo then print = Spring.Echo end | |
local tonumber = base.tonumber | |
-- initialize some values to be used in decoding function | |
-- initializes a table to contain a byte=>table mapping | |
-- the table contains tokens (byte values) as keys and maps them on other | |
-- token tables (mostly, the boolean value 'true' is used to indicate termination | |
-- of a token sequence) | |
-- the token table's purpose is, that it allows scanning a sequence of bytes | |
-- until something interesting has been found (e.g. a token that is not expected) | |
-- name is a descriptor for the table to be printed in error messages | |
local function init_token_table (tt) | |
local struct = {} | |
local value | |
function struct:link(other_tt) | |
value = other_tt | |
return struct | |
end | |
function struct:to(chars) | |
for i=1,#chars do | |
tt[chars:byte(i)] = value | |
end | |
return struct | |
end | |
return function (name) | |
tt.name = name | |
return struct | |
end | |
end | |
-- keep "named" byte values at hands | |
local | |
c_esc, | |
c_e, | |
c_l, | |
c_r, | |
c_u, | |
c_f, | |
c_a, | |
c_s, | |
c_slash = ("\\elrufas/"):byte(1,9) | |
-- token tables - tt_doublequote_string = strDoubleQuot, tt_singlequote_string = strSingleQuot | |
local | |
tt_object_key, | |
tt_object_colon, | |
tt_object_value, | |
tt_doublequote_string, | |
tt_singlequote_string, | |
tt_array_value, | |
tt_array_seperator, | |
tt_numeric, | |
tt_boolean, | |
tt_null, | |
tt_comment_start, | |
tt_comment_middle, | |
tt_ignore --< tt_ignore is special - marked tokens will be tt_ignored | |
= {},{},{},{},{},{},{},{},{},{},{},{},{},{} | |
--= {myname = "tt_object_key"},{myname = "tt_object_colon"},{myname = "tt_object_value"},{myname = "tt_doublequote_string"},{myname = "tt_singlequote_string"},{myname = "tt_array_value"},{myname = "tt_array_seperator"},{myname = "tt_numeric"},{myname = "tt_boolean"},{myname = "tt_null"},{myname = "tt_comment_start"},{myname = "tt_comment_middle"},{myname = "tt_ignore"} | |
-- strings to be used in certain token tables | |
local strchars = "" -- all valid string characters (all except newlines) | |
local allchars = "" -- all characters that are valid in comments | |
--local escapechar = {} | |
for i=0,0xff do | |
local c = string.char(i) | |
if c~="\n" and c~="\r" then strchars = strchars .. c end | |
allchars = allchars .. c | |
--escapechar[i] = "\\" .. string.char(i) | |
end | |
--[[ | |
charstounescape = "\"\'\\bfnrt/"; | |
unescapechars = "\"'\\\b\f\n\r\t\/"; | |
for i=1,#charstounescape do | |
escapechar[ charstounescape:byte(i) ] = unescapechars:sub(i,i) | |
end | |
]]-- | |
-- obj key reader, expects the end of the object or a quoted string as key | |
init_token_table (tt_object_key) "tt_object_key: object (' or \" or } or , expected)" | |
:link(tt_singlequote_string) :to "'" | |
:link(tt_doublequote_string) :to '"' | |
:link(true) :to "}" | |
:link(tt_object_key) :to "," | |
:link(tt_comment_start) :to "/" | |
:link(tt_ignore) :to " \t\r\n" | |
-- after the key, a colon is expected (or comment) | |
init_token_table (tt_object_colon) "tt_object_colon: object (: expected)" | |
:link(tt_object_value) :to ":" | |
:link(tt_comment_start) :to "/" | |
:link(tt_ignore) :to" \t\r\n" | |
-- as values, anything is possible, numbers, arrays, objects, boolean, null, strings | |
init_token_table (tt_object_value) "tt_object_value: object ({ or [ or ' or \" or number or boolean or null expected)" | |
:link(tt_object_key) :to "{" | |
:link(tt_array_seperator) :to "[" | |
:link(tt_singlequote_string) :to "'" | |
:link(tt_doublequote_string) :to '"' | |
:link(tt_numeric) :to "0123456789.-" | |
:link(tt_boolean) :to "tf" | |
:link(tt_null) :to "n" | |
:link(tt_comment_start) :to "/" | |
:link(tt_ignore) :to " \t\r\n" | |
--:link(true) :to "]" | |
-- token tables for reading strings | |
init_token_table (tt_doublequote_string) "tt_doublequote_string: double quoted string" | |
:link(tt_ignore) :to (strchars) | |
:link(c_esc) :to "\\" | |
:link(true) :to '"' | |
init_token_table (tt_singlequote_string) "single quoted string" | |
:link(tt_ignore) :to (strchars) | |
:link(c_esc) :to "\\" | |
:link(true) :to "'" | |
-- array reader that expects termination of the array or a comma that indicates the next value | |
init_token_table (tt_array_value) "tt_array_value: array (, or ] expected)" | |
:link(tt_array_seperator) :to "," | |
:link(true) :to "]" | |
:link(tt_comment_start) :to "/" | |
:link(tt_ignore) :to " \t\r\n" | |
-- a value, pretty similar to tt_object_value | |
init_token_table (tt_array_seperator) "tt_array_seperator: array ({ or [ or ' or \" or number or boolean or null expected)" | |
:link(tt_object_key) :to "{" | |
:link(tt_array_seperator) :to "[" | |
:link(tt_singlequote_string) :to "'" | |
:link(tt_doublequote_string) :to '"' | |
:link(tt_comment_start) :to "/" | |
:link(tt_numeric) :to "0123456789.-" | |
:link(tt_boolean) :to "tf" | |
:link(tt_null) :to "n" | |
:link(tt_ignore) :to " \t\r\n" | |
:link(true) :to "]" | |
-- valid number tokens | |
init_token_table (tt_numeric) "tt_numeric: number" | |
:link(tt_ignore) :to "0123456789.-Ee" | |
-- once a comment has been started with /, a * is expected | |
init_token_table (tt_comment_start) "comment start (* expected)" | |
:link(tt_comment_middle) :to "*" | |
-- now everything is allowed, watch out for * though. The next char is then checked manually | |
init_token_table (tt_comment_middle) "comment end" | |
:link(tt_ignore) :to (allchars) | |
:link(true) :to "*" | |
function decode (js_string) | |
local pos = 1 -- position in the string | |
-- read the next byte value | |
local function next_byte () pos = pos + 1 return js_string:byte(pos-1) end | |
-- in case of error, report the location using line numbers | |
local function location () | |
local n = ("\n"):byte() | |
local line,lpos = 1,0 | |
for i=1,pos do | |
if js_string:byte(i) == n then | |
line,lpos = line + 1,1 | |
else | |
lpos = lpos + 1 | |
end | |
end | |
return "Line "..line.." character "..lpos | |
end | |
-- debug func | |
--local function status (str) | |
-- print(str.." ("..s:sub(math.max(1,p-10),p+10)..")") | |
--end | |
-- read the next token, according to the passed token table | |
local function next_token (tok) | |
while pos <= #js_string do | |
local b = js_string:byte(pos) | |
local t = tok[b] | |
if not t then | |
error("Unexpected character at "..location()..": ".. | |
string.char(b).." ("..b..") when reading "..tok.name.."\nContext: \n".. | |
js_string:sub(math.max(1,pos-30),pos+30).."\n"..(" "):rep(pos+math.min(-1,30-pos)).."^") | |
end | |
pos = pos + 1 | |
if t~=tt_ignore then return t end | |
end | |
error("unexpected termination of JSON while looking for "..tok.name) | |
end | |
-- read a string, double and single quoted ones | |
local function read_string (tok) | |
local start = pos | |
--local returnString = {} | |
repeat | |
local t = next_token(tok) | |
if t == c_esc then | |
--table.insert(returnString, js_string:sub(start, pos-2)) | |
--table.insert(returnString, escapechar[ js_string:byte(pos) ]) | |
pos = pos + 1 | |
--start = pos | |
end -- jump over escaped chars, no matter what | |
until t == true | |
return (base.loadstring("return " .. js_string:sub(start-1, pos-1) ) ()) | |
-- We consider the situation where no escaped chars were encountered separately, | |
-- and use the fastest possible return in this case. | |
--if 0 == #returnString then | |
-- return js_string:sub(start,pos-2) | |
--else | |
-- table.insert(returnString, js_string:sub(start,pos-2)) | |
-- return table.concat(returnString,""); | |
--end | |
--return js_string:sub(start,pos-2) | |
end | |
local function read_num () | |
local start = pos | |
while pos <= #js_string do | |
local b = js_string:byte(pos) | |
if not tt_numeric[b] then break end | |
pos = pos + 1 | |
end | |
return tonumber(js_string:sub(start-1,pos-1)) | |
end | |
-- read_bool and read_null are both making an assumption that I have not tested: | |
-- I would expect that the string extraction is more expensive than actually | |
-- making manual comparision of the byte values | |
local function read_bool () | |
pos = pos + 3 | |
local a,b,c,d = js_string:byte(pos-3,pos) | |
if a == c_r and b == c_u and c == c_e then return true end | |
pos = pos + 1 | |
if a ~= c_a or b ~= c_l or c ~= c_s or d ~= c_e then | |
error("Invalid boolean: "..js_string:sub(math.max(1,pos-5),pos+5)) | |
end | |
return false | |
end | |
-- same as read_bool: only last | |
local function read_null () | |
pos = pos + 3 | |
local u,l1,l2 = js_string:byte(pos-3,pos-1) | |
if u == c_u and l1 == c_l and l2 == c_l then return nil end | |
error("Invalid value (expected null):"..js_string:sub(pos-4,pos-1).. | |
" ("..js_string:byte(pos-1).."="..js_string:sub(pos-1,pos-1).." / "..c_l..")") | |
end | |
local read_object_value,read_object_key,read_array,read_value,read_comment | |
-- read a value depending on what token was returned, might require info what was used (in case of comments) | |
function read_value (t,fromt) | |
if t == tt_object_key then return read_object_key({}) end | |
if t == tt_array_seperator then return read_array({}) end | |
if t == tt_singlequote_string or | |
t == tt_doublequote_string then return read_string(t) end | |
if t == tt_numeric then return read_num() end | |
if t == tt_boolean then return read_bool() end | |
if t == tt_null then return read_null() end | |
if t == tt_comment_start then return read_value(read_comment(fromt)) end | |
error("unexpected termination - "..js_string:sub(math.max(1,pos-10),pos+10)) | |
end | |
-- read comments until something noncomment like surfaces, using the token reader which was | |
-- used when stumbling over this comment | |
function read_comment (fromt) | |
while true do | |
next_token(tt_comment_start) | |
while true do | |
local t = next_token(tt_comment_middle) | |
if next_byte() == c_slash then | |
local t = next_token(fromt) | |
if t~= tt_comment_start then return t end | |
break | |
end | |
end | |
end | |
end | |
-- read arrays, empty array expected as o arg | |
function read_array (o,i) | |
--if not i then status "arr open" end | |
i = i or 1 | |
-- loop until ... | |
while true do | |
-- At this point, the array might be empty! | |
local next_tokenvalue = next_token(tt_array_seperator) | |
if next_tokenvalue == true then return o end | |
o[i] = read_value(next_tokenvalue, tt_array_seperator) | |
local t = next_token(tt_array_value) | |
if t == tt_comment_start then | |
t = read_comment(tt_array_value) | |
end | |
if t == true then -- ... we found a terminator token | |
--status "arr close" | |
return o | |
end | |
i = i + 1 | |
end | |
end | |
-- object value reading | |
function read_object_value (o) | |
local t = next_token(tt_object_value) | |
return read_value(t,tt_object_value) | |
end | |
-- object key reading, might also terminate the object | |
function read_object_key (o) | |
while true do | |
local t = next_token(tt_object_key) | |
if t == tt_comment_start then | |
t = read_comment(tt_object_key) | |
end | |
if t == true then return o end | |
if t == tt_object_key then return read_object_key(o) end | |
local k = read_string(t) | |
if next_token(tt_object_colon) == tt_comment_start then | |
t = read_comment(tt_object_colon) | |
end | |
local v = read_object_value(o) | |
o[k] = v | |
end | |
end | |
-- now let's read data from our string and pretend it's an object value | |
local r = read_object_value() | |
if pos<=#js_string then | |
-- not sure about what to do with dangling characters | |
--error("Dangling characters in JSON code ("..location()..")") | |
end | |
return r | |
end | |
instrumentedDecode = function (js_string) | |
if tracy then tracy.ZoneBegin() end | |
localret = decode(js_string) | |
if tracy then tracy.ZoneEnd() end | |
return ret | |
end | |
--Simple test if things work: | |
local configjsonf, ioerr,ioerrno = io.open("C:/Users/Peti/Documents/My Games/Spring/games/Beyond-All-Reason.sdd/common/luaUtilities/config.json",'r') | |
local configjson = configjsonf:read("*all") | |
local test_empties = instrumentedDecode('{"a":{}, "b" : []}') | |
local rez = instrumentedDecode(configjson) | |
end | |
return { | |
encode = encode, | |
decode = instrumentedDecode, | |
null = null, | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Tested by loading a 'A Head Start' scenario (where the issue was first reported). Still doesn't work. Now there are no clear error strings to identify what was broken during the load either.
For reference, the Json.decode in luarules/gadgets/unit_scenario_script.lua:23 is the one returning nil where it shouldn't be.