Last active
October 11, 2022 11:15
-
-
Save bruceCzK/91abe395c72c5b08f186d5ae8add03e6 to your computer and use it in GitHub Desktop.
CK2TxtConfigFileParser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const {set, get, isString} = require('lodash') | |
module.exports.handler = function (event, context, callback) { | |
const data = Buffer.isBuffer(event) ? JSON.parse(event) : event | |
const text = data.text || data.content | |
if (!text) { | |
const error = new Error('Invalid Arguments') | |
if (callback) { | |
callback(error, null) | |
} | |
throw error | |
} | |
const result = parse(text) | |
if (callback) { | |
callback(null, result) | |
} | |
return result | |
} | |
function parse(content) { | |
const SPACE = ' '.codePointAt() | |
const EQUAL = '='.codePointAt() | |
const POUND = '#'.codePointAt() | |
const LBRACE = '{'.codePointAt() | |
const RBRACE = '}'.codePointAt() | |
const BR = '\n'.codePointAt() | |
const DBQUOTE = '"'.codePointAt() | |
const SLASH = '\\'.codePointAt() | |
const json = {} | |
const paths = [] | |
content = String(content).split(/\r\n|\n|\r/) | |
.map(line => line.replace(/\t/g, ' ')) // replace tabs with spaces | |
.map(line => line.replace(/(["\s]\w+?")(\w)/g, '$1 $2')) // 修复引号及字符粘连的问题 | |
.map(line => line.replace(/(\s=)(\[.+])(\s|$)/g, '$1"$2"$3')) // 修复方括号值的问题 [From.GetID] | |
.map(line => line.replace(/^\s+/, '')) // remove leading spaces | |
.map(line => line.replace(/#(?=(?:(?:[^"]*"){2})*[^"]*$).+(?:(\s|$))/g, '')) // remove comment | |
.map(line => line.replace(/#\s*$/g, '')) // remove comment | |
.map(line => line.replace(/^\s*#.+/, '')) | |
// escape operator other than equal | |
.map(line => line.replace(/<=/g, '= <e;')) | |
.map(line => line.replace(/>=/g, '= >e;')) | |
.map(line => line.replace(/>/g, '= >')) | |
.map(line => line.replace(/</g, '= <')) | |
.map(line => line.replace(/==/g, '= &eqeq;')) | |
.join('\n') | |
content += '\n' // add extra line | |
let token = '' | |
let leftHand = true | |
let mayBeArray = false | |
let string = false | |
let escape = false | |
let comment = false | |
for (const char of content) { | |
const codePoint = char.codePointAt() | |
if (comment) { | |
if (codePoint === BR) { | |
// 换行后注释才结束 | |
comment = false | |
} | |
continue | |
} | |
switch (codePoint) { | |
case EQUAL: | |
if (string) { | |
token += char | |
continue | |
} | |
if (!leftHand) { | |
// 等号后面未结束又出现了等号 | |
// 即单行的结构,如 { a = 1 b = 2 c = 3 } | |
const parts = token.trim().split(' ') | |
const key = parts.pop() // get last part as key | |
const val = parts.join(' ').trim() // join the rest | |
correctPaths() | |
setValue(val) | |
paths.pop() | |
paths.push(key.trim()) | |
correctPaths() | |
token = '' | |
break | |
} | |
if (token) { | |
let key = token.trim() | |
if (/^\d+$/.test(key)) { | |
// 全为数字的 key 后面加后缀防止转 JSON 时顺序错误 | |
key += '#' | |
} | |
paths.push(key) | |
token = '' | |
} | |
correctPaths() | |
leftHand = false // 遇到等号说明右侧开始 | |
mayBeArray = false // 而且接下来的内容不可能是数组的值 | |
break | |
case LBRACE: | |
if (string) { | |
token += char | |
continue | |
} | |
// 遇到 { 说明到了左侧,而且有可能接下来的内容是一个数组 | |
if (leftHand) { | |
if (token) { | |
// 未出现操作符之前出现了 { | |
// 按普通内容处理 | |
token += ' ' | |
} else { | |
paths.push('#') | |
} | |
correctPaths() | |
} | |
leftHand = true | |
mayBeArray = true | |
break | |
case RBRACE: | |
if (string) { | |
token += char | |
continue | |
} | |
if (token) { | |
// 内容结束 | |
token = token.trim() | |
correctPaths() | |
if (mayBeArray) { | |
// 如果没有遇到等号 | |
// mayBeArray 没有变为 false | |
// 那这就是个数组 | |
// 按空格拆分内容,但是不匹配引号内的空格 | |
const array = token.match(/(?:[^\s"]+|"[^"]*")+/g) | |
setValue(array) | |
} else { | |
setValue(token) | |
/* b_aaaa = { | |
* #1 | |
* c = { a = 'xxxx' #2} | |
* } | |
* | |
* 类似这种结构,此时层级栈中存储的是 [b_aaaa, c, a] | |
* 所以从 #2 回到 #1 的位置需要跳出两级 | |
* */ | |
paths.pop() // 这里跳出一级 | |
} | |
token = '' | |
} else if (leftHand && mayBeArray) { | |
// { 到 } 之间没有任何内容,需要填充一个空对象 | |
setValue({}) | |
} | |
// 这里回到父级 | |
paths.pop() | |
mayBeArray = false | |
leftHand = true | |
break | |
case BR: | |
if (string) { | |
token += char | |
continue | |
} | |
if (leftHand && token) { | |
// 说明这里进入了数组 | |
// 直到遇到 } 才能结束 | |
token += ' ' | |
} else if (token) { | |
correctPaths() | |
leftHand = true | |
setValue(token) // 正常的字符串内容 | |
token = '' | |
paths.pop() | |
} | |
comment = false | |
break | |
case DBQUOTE: | |
if (escape) { | |
token += char | |
escape = false | |
continue | |
} | |
string = !string | |
token += char | |
break | |
case POUND: | |
if (!string) { | |
comment = true | |
} else { | |
token += char | |
} | |
break | |
case SLASH: | |
escape = true | |
token += char | |
continue | |
default: | |
if (comment) { | |
// 注释开始后未结束 | |
break | |
} | |
if (!token && codePoint === SPACE) { | |
// text 没有内容时忽略空格 | |
break | |
} | |
token += char | |
} | |
escape = false // 仅接下来一个字符触发escape | |
} | |
return json | |
function setValue(val) { | |
if (isString(val)) { | |
val = val | |
.replace('>', '>') | |
.replace('<', '<') | |
.replace('>e;', '>=') | |
.replace('<e;', '<=') | |
.replace('&eqeq;', '==') | |
.trim() | |
} | |
// 填充占位对象,防止数字键值直接设置变成数组 | |
paths.reduce((path, key) => { | |
path.push(key) | |
if (!get(json, path)) { | |
set(json, path, {}) | |
} | |
return path | |
}, []) | |
set(json, paths, val) | |
} | |
function correctPaths(num = 1) { | |
const splitter = '$$' | |
if (get(json, paths)) { | |
// 如果有 key 重复则在后面添加一个 # | |
// 比如耶路撒冷是多教圣地, 会出现很多 holy_site 的 key | |
const suffix = splitter + Number(num).toString(16).toUpperCase() | |
paths[paths.length - 1] = paths[paths.length - 1].split(splitter)[0] + suffix | |
// 递归 | |
correctPaths(num + 1) | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment