Skip to content

Instantly share code, notes, and snippets.

@okuoku
Created Jun 3, 2018
Embed
What would you like to do?
#
# YuniSexpTokenize:
#
# yuni_sexp_tokenize_ctx_start(<CTX> str)
# yuni_sexp_tokenize_ctx_next(<CTX> out_result out_start out_end)
# result: Type of the token
# === Specials ===
# ( ) -- paren
# [ ] -- paren
# # -- is for vector (*)
# #vu8 -- is for bytevector (*)
# #u8 -- is for bytevector (*)
# ' -- is for quote
# ` -- is for quasiquote
# , -- is for unquote
# ,@ -- is for unquote-splicing
# #f -- is for false (*)
# #t -- is for true (*)
# #; -- next-datum-comment
#
# (*) -- Returned as DATUM
#
#
# === Datum types ===
# STRING
# DATUM
#
# === Command ===
# AGAIN
# EOF
#
# (We don't have COMMENT type for now)
# internal:
#
# <ctx>_cur -- Current position in buffer
# <ctx>_buf -- stream buffer
# <ctx>_tkns -- Token start location
# <ctx>_bcdep -- Depth for block-comment
# <ctx>_acc -- Accumulator for the state
# <ctx>_st -- current state
# NUL -- Space or BoS
# NRM -- Normal object state
# STR -- String state
# BCM -- Block comment state
# LCM -- Line comment state
#
macro(yuni_sexp_tokenize_ctx_start ctx str)
string(LENGTH "${${str}}" __end)
set(${ctx}_cur 0)
set(${ctx}_buf "${${str}}")
set(${ctx}_buf0 "${${str}}")
set(${ctx}_tkns -1)
set(${ctx}_bcdep)
set(${ctx}_acc)
set(${ctx}_st NUL)
set(${ctx}_end ${__end})
endmacro()
macro(yuni_sexp_tokenize_ctx_next ctx out_result out_start out_end)
while(1)
yuni_sexp_tokenize_ctx__itr(${ctx}
${out_result} ${out_start} ${out_end})
if(NOT ${out_result} STREQUAL AGAIN)
break()
endif()
endwhile()
endmacro()
macro(yuni_sexp_tokenize_ctx_token ctx out_result start end)
math(EXPR __len "${end}-${start}")
string(SUBSTRING "${${ctx}_buf0}" ${start} ${__len} ${out_result})
endmacro()
macro(yuni_sexp_tokenize_ctx__itr ctx out_result out_start out_end)
# __input: Calc an input char
set(__input)
set(__has_input)
if(NOT ${${ctx}_cur} EQUAL ${${ctx}_end})
set(__has_input ON)
endif()
set(__st ${${ctx}_st})
set(__result UNKNOWN)
set(__start -9999)
set(__end -8888)
#message(STATUS "${__st} ${${ctx}_cur} ${${ctx}_end} >>${${ctx}_buf}<<")
message(STATUS "${__st} ${${ctx}_cur} ${${ctx}_end}")
if(NOT __has_input)
set(__result EOF)
elseif(${__st} STREQUAL NUL)
# NUL:
if("${${ctx}_buf}" MATCHES "^[ \r\n\t]+(.*)")
# Whitespace. Again.
string(LENGTH "${CMAKE_MATCH_1}" __len)
math(EXPR ${ctx}_cur "${${ctx}_end}-${__len}")
set(${ctx}_buf "${CMAKE_MATCH_1}")
set(__result AGAIN)
elseif("${${ctx}_buf}" MATCHES "^;[^\r\n]*\r?\n(.*)")
# Line comment
string(LENGTH "${CMAKE_MATCH_1}" __len)
math(EXPR ${ctx}_cur "${${ctx}_end}-${__len}")
set(${ctx}_buf "${CMAKE_MATCH_1}")
set(__result AGAIN)
elseif("${${ctx}_buf}" MATCHES "^#\\|([^|#]*)(#?\\|?#?)(.*)")
# Block comment
string(LENGTH "${CMAKE_MATCH_3}" __len)
math(EXPR ${ctx}_cur "${${ctx}_end}-${__len}")
set(${ctx}_buf "${CMAKE_MATCH_3}")
set(__result AGAIN)
if("${CMAKE_MATCH_2}" STREQUAL "|#")
# Consumed full block
elseif("${CMAKE_MATCH_2}" STREQUAL "#|")
# Begin with level 2
set(${ctx}_bcdep x x)
set(${ctx}_st BCM)
elseif("${CMAKE_MATCH_2}" STREQUAL "#|#")
message(FATAL_ERROR "FIXME")
else()
# Begin with level 1
set(${ctx}_bcdep x)
set(${ctx}_st BCM)
endif()
elseif("${${ctx}_buf}" MATCHES "^\"([^\\\\\"]*)(\\\\?\"?)(.*)")
math(EXPR __start "${${ctx}_cur}+1")
if("${CMAKE_MATCH_2}" STREQUAL "\"")
# Captured full string
string(LENGTH "${CMAKE_MATCH_1}" __len)
math(EXPR __end "${__start}+${__len}")
math(EXPR ${ctx}_cur "${__end}+1")
set(__result STRING)
else()
# Partial string
string(LENGTH "${CMAKE_MATCH_3}" __len)
math(EXPR ${ctx}_cur "${${ctx}_end}-${__len}")
set(${ctx}_st STR)
set(${ctx}_tkns ${__start})
set(__result AGAIN)
endif()
set(${ctx}_buf "${CMAKE_MATCH_3}")
elseif("${${ctx}_buf}" MATCHES "^(\\(|\\)|\\[\\]|'|`|#vu8|#u8|,@|,|#f|#t|#\;|#\\\\[a-z]+)(.*)")
# Delimiters
set(__result "${CMAKE_MATCH_1}")
set(__start "${${ctx}_cur}")
string(LENGTH "${CMAKE_MATCH_1}" __len)
math(EXPR __end "${${ctx}_cur}+${__len}")
math(EXPR ${ctx}_cur "${__end}")
set(${ctx}_buf "${CMAKE_MATCH_2}")
elseif("${${ctx}_buf}" MATCHES "^([^ \t\r\n()'`@,\"]+)(.*)")
# FIXME Add [ ] brackets here.
# Identifiers
set(__result DATUM)
set(__start "${${ctx}_cur}")
string(LENGTH "${CMAKE_MATCH_1}" __len)
math(EXPR __end "${${ctx}_cur}+${__len}")
math(EXPR ${ctx}_cur "${__end}")
set(${ctx}_buf "${CMAKE_MATCH_2}")
else()
message(FATAL_ERROR "Unmatched.${${ctx}_buf}")
endif()
elseif(${__st} STREQUAL STR)
if("${${ctx}_buf}" MATCHES "^([^\\\\\"]*)(\\\\?\"?)(.*)")
if("${CMAKE_MATCH_2}" STREQUAL "\"")
# Captured full string
string(LENGTH "${CMAKE_MATCH_1}" __len)
math(EXPR __end "${${ctx}_cur}+${__len}")
math(EXPR ${ctx}_cur "${__end}+1")
set(${ctx}_st NUL)
set(__start ${${ctx}_tkns})
set(__result STRING)
else()
# Partial string
string(LENGTH "${CMAKE_MATCH_3}" __len)
math(EXPR ${ctx}_cur "${${ctx}_end}-${__len}")
set(__result AGAIN)
endif()
set(${ctx}_buf "${CMAKE_MATCH_3}")
else()
message(FATAL_ERROR "Perhaps unterminated string.")
endif()
elseif(${__st} STREQUAL BCM)
set(__result AGAIN)
if("${${ctx}_buf}" MATCHES "^([^|#]+)(#?\\|?#?)(.*)")
if("${CMAKE_MATCH_2}" STREQUAL "#|")
# Inc. level
list(APPEND ${ctx}_bcdep x)
elseif("${CMAKE_MATCH_2}" STREQUAL "|#")
# Dec. level
list(REMOVE_AT ${ctx}_bcdep 0)
elseif("${CMAKE_MATCH_2}" STREQUAL "#|#")
message(FATAL_ERROR "FIXME")
else()
# Do nothing
endif()
if(NOT ${ctx}_bcdep)
set(${ctx}_st NUL)
endif()
string(LENGTH "${CMAKE_MATCH_3}" __len)
set(${ctx}_buf "${CMAKE_MATCH_3}")
math(EXPR ${ctx}_cur "${${ctx}_end}-${__len}")
set(__result AGAIN)
else()
message(FATAL_ERROR "Perhaps unterminated block comment.")
endif()
else()
message(FATAL_ERROR "Invalid state: ${__st}")
endif()
set(${out_result} ${__result})
set(${out_start} ${__start})
set(${out_end} ${__end})
endmacro()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment