Skip to content

Instantly share code, notes, and snippets.

@nyatla
Last active March 7, 2025 08:00
Show Gist options
  • Select an option

  • Save nyatla/2b887054ada61c84a210d3069b35f486 to your computer and use it in GitHub Desktop.

Select an option

Save nyatla/2b887054ada61c84a210d3069b35f486 to your computer and use it in GitHub Desktop.
JSON Canonicalization Scheme (JCS)の簡易バリデータです。ハッシュを取る前のお掃除にどうぞ
from typing import Iterator,Tuple;
from typing import Iterator, Iterable, Sequence, Union
class CharIterator(Iterator[str]):
def __init__(self, s: Union[Iterable[str], Iterator[str], Sequence[str]]):
self.p = 0
if isinstance(s, (list, tuple, str)):
self.s = list(s) # Sequenceならそのままリスト化
else:
self.s = [next(s)] # Iterable/Iteratorなら最初の1文字だけ取得
def __next__(self) -> str:
if self.p >= len(self.s):
raise StopIteration()
c = self.s[self.p]
self.p += 1
return c
class InvalidJcsException(Exception):
""" JSON Canonicalization Scheme (JCS)の簡易バリデータです。
https://datatracker.ietf.org/doc/html/rfc8785
以下の部分については実装していません。
使いどころはJSONハッシュ生成前の事前チェックなどです。
- 数値の長さ
- \\u、\\xの正当性チェック
"""
def __init__(self, iter: CharIterator):
if isinstance(iter.s, list) and len(iter.s) > 1:
error_context = iter.s[max(0, iter.p-10):iter.p] # Sequenceなら直前10文字
else:
error_context = iter.s[0] # Iterable/Iteratorなら最初の1文字のみ表示
super().__init__(f"Error {iter.p} at {error_context}")
class JCSValidator:
# 入力されたJSONがJCS準拠かを判定
def isJcsToken(self, iter:CharIterator):
c=next(iter)
if c=='{':
#object
self.isValidObject(iter)
elif c=='[':
#array
self.isValidArray(iter)
else:
raise InvalidJcsException(iter)
def isValidArray(self,iter:CharIterator)->bool:
last_c=None
while True:
c=next(iter)
if c=='"':#文字列
self.isValidString(iter)
c=next(iter)
elif c=='{':#object
self.isValidObject(iter)
c=next(iter)
elif c=='[':#array
self.isValidArray(iter)
c=next(iter)
elif c in 'tfn':
self.isValidLiteral(c, iter)
c=next(iter)
elif c==']':
if last_c==',':
raise InvalidJcsException(iter)
else:
c=self.isValidNumber(c,iter)
if c==']':
return True
elif c!=',':
raise InvalidJcsException(iter)
last_c=c
def isValidSet(self,iter:CharIterator)->str:
k=self.isValidKey(iter)
if next(iter)!=':':
raise InvalidJcsException(iter)
c=next(iter)
if c=='"':#文字列
self.isValidString(iter)
c=next(iter)
elif c=='{':#object
self.isValidObject(iter)
c=next(iter)
elif c=='[':#array
self.isValidArray(iter)
c=next(iter)
elif c in 'tfn':
self.isValidLiteral(c, iter)
c=next(iter)
else: #number
c=self.isValidNumber(c,iter)
return c,k
def isValidObject(self,iter:CharIterator):
last_c=None
last_key=None
while True:
c=next(iter)
if c=='"':#文字列
c,k=self.isValidSet(iter)
if last_key is not None and k<=last_key:
raise InvalidJcsException(iter)
last_key=k
elif c=='{':#object
self.isValidObject(iter)
c=next(iter)
elif c=='[':#array
self.isValidArray(iter)
c=next(iter)
elif c in 'tfn':
self.isValidLiteral(c, iter)
c=next(iter)
elif c=='}':
if last_c==',':
raise InvalidJcsException(iter)
else:
c=self.isValidNumber(c,iter)
if c=='}':
return
elif c!=',':
raise InvalidJcsException(iter)
last_c=c
def isValidNumber(self,pre_c:str,iter: CharIterator)->str:
"""
[+-]?nnn(([eE][+-]?(nnn))(.nnn)))?
"""
#1文字目
num=0
if pre_c in ['+', '-','.']:
pass
elif pre_c.isdigit():
num+=1
else:
raise InvalidJcsException(iter)
#整数部
c:str
if pre_c=='.':
c=pre_c
else:
while True:
c = next(iter)
if c.isdigit():
num+=1
continue
break
#少数部
if c=='.':
while True:
c = next(iter)
if c.isdigit():
num+=1
continue
break
if num==0:
raise InvalidJcsException(iter) #数値がない
#指数部
if c in ['e','E']:
num=0
c = next(iter)
if c in ['+', '-']:
pass
elif c.isdigit():
num+=1
else:
raise InvalidJcsException(iter) #指数部が非[数符号]
#整数部
while True:
c = next(iter)
if c.isdigit():
num+=1
continue
break
if num==0:
raise InvalidJcsException(iter) #数値がない
return c
# 文字列の検証(エスケープ処理対応)
def isValidString(self, iter: Iterator[str]):
while True:
c = next(iter)
if c == '"': # 文字列終了
return
elif c == '\\': # エスケープシーケンスの開始
c = next(iter) # エスケープされた文字
if c not in '"\\/bfnrtu':
raise InvalidJcsException(iter)
def isValidKey(self, iter: Iterator[str])->str:
k=""
while True:
c = next(iter)
if c == '"': # 文字列終了
if len(k)==0:
raise InvalidJcsException(iter)
return k
elif c == '\\': # エスケープシーケンスの開始
k+=c
c = next(iter) # エスケープされた文字
if c not in '"\\/bfnrtu':
raise InvalidJcsException(iter)
k+=c
def isValidLiteral(self, c: str, iter: CharIterator):
if c == 't':
expected = "rue"
elif c == 'f':
expected = "alse"
elif c == 'n':
expected = "ull"
else:
raise InvalidJcsException(iter)
for expected_char in expected:
next_c = next(iter)
if next_c != expected_char:
raise InvalidJcsException(iter)
# 単体実行時にテスト実行
if __name__ == "__main__":
S=[
"[true,false,null]",
"{\"a\":true,\"b\":false,\"c\":null}",
"{\"age\":30,\"name\":\"Alice\"}",
"{\"address\":{\"city\":\"New York\",\"state\":\"NY\"},\"name\":\"Alice\"}",
"[\"apple\",\"banana\",\"cherry\"]",
"[{\"age\":30,\"name\":\"Alice\"},{\"age\":25,\"name\":\"Bob\"}]",
"{}",
"[]",
"{\"location\":{\"city\":\"New York\",\"country\":\"USA\"},\"person\":{\"age\":30,\"name\":\"Alice\"}}",
"[[1,2e3,+3.0],[-4.1e5,5,6],[7,8,9,.5,5.e+1,.5e-3,1.111e0]]",
# エスケープ系(JCS仕様準拠のエスケープパターン)
"{\"key\":\"value with \\n newline\"}",
"{\"key\":\"value with \\t tab\"}",
"{\"key\":\"value with \\\" quote\"}",
"{\"key\":\"backslash \\\\\"}",
"{\"key\":\"unicode \\u0041\"}", # \u0041 = "A"
"{\"key\":\"multiple escapes \\\" \\\\ \\n \\t \\b \\f \\r\"}",
# 数値フォーマット(境界系や端ケース)
"[0,1,-1,1.0,-1.0,1e10,1E-10,-1.23456789,0.5,.5,5.]",
"[12345678901234567890]",
"[1e-1,1e+1,1E-1,1E+1]",
# ネスト・複合型(複雑構造パターン)
"{\"a\":{\"b\":[1,2,3],\"c\":true},\"d\":[false,null,3.14]}",
"[{\"nested\":{\"key\":\"value\"}},[\"array in array\"],{\"k\":1}]"
]
F=[
"{\"a\":1,}",
"1",
"true",
"\"Hello, World!\"",
"\"Hello \\xWorld\"",
"\"Hello \\\"World\\\"\"",
"[true,false,null ]",
"[\"a\":true,\"b\":false,\"c\":null]",
"{ }",
"[ ]",
"[, ]",
"{\"z\": 1, \"a\": 2}",
"[5.+e1]",
"{\"a\":1,\"a\":2}",
"{\"person\":{\"name\":\"Alice\",\"age\":30},\"location\":{\"city\":\"New York\",\"country\":\"USA\"}}",
"[{\"name\":\"Alice\",\"age\":30},{\"name\":\"Bob\",\"age\":25}]",
"{\"name\":\"Alice\",\"age\":30}",
"{\"age\":30\"name\":\"Alice\"}",
"{\"name\":\"Alice\",\"address\":{\"city\":\"New York\",\"state\":\"NY\"}}",
"\"Hello, World!",
"{: \"value\"}",
"{123: \"value\"}",
"[\"apple\" \"banana\"]",
"[{\"name\": \"Alice\", \"age\": \"thirty\"}, {\"name\": \"Bob\", \"age\": \"twenty\"}]",
"{\"name\" \"Alice\"}",
"[\"apple\", \"banana\",]",
"{\"name\": \"Alice\", \"age\": 30,}",
"[{\"name\": \"Alice\"}, \"banana\"]"
# エスケープ系(不正エスケープ文字含む)
# "{\"key\":\"value with \\x invalid escape\"}",
# "{\"key\":\"value with \\u123 invalid unicode\"}",
# "{\"key\":\"value with \\u12 invalid unicode\"}",
# "{\"key\":\"value with \\uGGGG invalid unicode\"}",
# 数値フォーマット(不正な数値表記)
"[1e]",
"[1e+]",
"[1e-]",
"[.e1]",
# 余分カンマ・余分スペース
"[1,2,3,]",
"{\"a\":1, \"b\":2,}",
# JCS非対応の単独スカラー(トップレベルスカラー値)
"true",
"null",
"\"string\"",
"123",
# キー順序違反(順序逆)
"{\"b\":1,\"a\":2}"
]
# テスト実行関数
def run_tests():
jcs_validator = JCSValidator()
print("=== PASS CASES (Expected to pass) ===")
for i in S:
try:
jcs_validator.isJcsToken(CharIterator(i))
print(f"PASS: {i}")
except InvalidJcsException as e:
print(f"FAIL (Unexpected Failure): {i} => {e}")
except StopIteration:
print(f"FAIL (Unexpected StopIteration): {i}")
print("\n=== FAIL CASES (Expected to fail) ===")
for i in F:
try:
jcs_validator.isJcsToken(CharIterator(i))
print(f"FAIL (Should have failed): {i}")
except InvalidJcsException as e:
print(f"PASS (Expected Failure): {i} => {e}")
except StopIteration:
print(f"PASS (Expected Failure): {i} => StopIteration")
run_tests()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment