ruv/file-01.string-literal-via-recognizer-v5.fth Secret

## file-01.string-literal-via-recognizer-v5.fth
\ String literals like "abc def"
\ via Resolver API v5 (a possible variant)
\ https://forth-standard.org/proposals/minimalistic-core-api-for-recognizers

\ NB: additional parsing (if any) takes place not on recognizing but on further token translating

: parse-slit-end ( sd.start -- sd.full )
  over + char+ source drop >in @ + <> ( addr.start|0 flag )
  \ Do we have a padding between the string sd.start and the parse area?
  if 0<> -11 and throw drop '"' parse exit then \ -11 "result out of range"
  '"' parse + over -
;

: tt-slit-parsing ( c-addr1 u1 -- c-addr2 u2 | ) parse-slit-end tt-slit ;
\ NB: tt-slit is a provided token translator

: quot ( -- sd.quot ) s\" \"" ;

: recognize-string ( sd.lexeme -- sd tt.slit|tt.slit-parsing | 0 )
  quot match-head 0= if 2drop 0 exit then quot match-tail if ['] tt-slit exit then
  2dup quot contains if 2drop 0 exit then \ fail if '"' is found in the middle of the string
  ['] tt-slit-parsing
;

## file-02.string-literal-via-resolver.fth
\ String literals like "abc def"
\ via Resolver API v1
\ https://github.com/ruv/forth-design-exp/blob/master/docs/resolver-api.md

\ NB: additional parsing (if any) takes place not on resolving but on further token translating

: parse-slit-end ( c-addr u1 -- c-addr u2 )
  over + char+ source drop >in @ + <>
  \ ensure that (c-addr u1) is inside SOURCE and before PARSE-AREA
  if 0<> -11 and throw drop '"' parse exit then
  \ -11 "result out of range"
  '"' parse + over -
;

: tt-slit-parsing ( c-addr1 u1 -- c-addr2 u2 | ) parse-slit-end tt-slit ;
\ NB: tt-slit is a provided token translator

: resolve-sliteral ( c-addr1 u1 -- c-addr2 u2 tt-slit|tt-slit-parsing | c-addr1 u1 0 )
  `" match-head ?E0 `" match-tail if 'tt-slit exit then
  2dup `" contains if -1 /string 0 exit then \ fail if '"' is found in the middle of the string
  'tt-slit-parsing
;

\ Appending at the top for testing
' resolve-sliteral preempt-resolver

## file-03.string-literal-via-recognizer-v4.fth
\ String literals like "abc def"
\ via Recognizer API v4
\ http://amforth.sourceforge.net/pr/Recognizer-rfc-D.html


\ NB: additional parsing (if any) takes place on further interpreting or compiling

\ NB: POSTPONE is not applicable to a string literal containing blanks.


' noop  \ actually we need to copy to a buffer here - carbon-slit
' slit, dup
rectype: rectype-slit

: parse-slit-end ( c-addr u1 -- c-addr u2 )
  over + char+ source drop >in @ + <>
  \ ensure that (c-addr u1) is inside SOURCE and before PARSE-AREA
  if 0<> -11 and throw drop '"' parse exit then
  \ -11 "result out of range"
  '"' parse + over -
;
\ NB: parse-slit-end may accept the first part of a string from any location (even outside SOURCE)
\ and concatanate it with the second part in a special buffer. And it may even parse multiple
\ input lines to support multiline string literals.
\ For simplicity of the example, just a basic variant is implemented.


[: parse-slit-end  ;] \ carbon-slit is missed for simplicity
[: parse-slit-end slit, ;]
[: -32 throw ;] \ -32 invalid name argument
rectype: rectype-slit-parsing

: rec-sliteral ( c-addr1 u1 -- c-addr2 u2 rectype-slit|rectype-slit-parsing | 0 )
  dup 0= if 2drop rectype-null exit then
  over c@ '"' <> if 2drop rectype-null exit then
  1 /string dup 0= if rectype-slit-parsing exit then
  \ todo: fail if '"' is found in the middle of the string (c-addr1 u1)
  2dup + char- c@ '"' = if char- rectype-slit exit then
  rectype-slit-parsing
;


\ Appending at the top for testing
forth-recognizer get-recognizer
'rec-markup swap 1+ forth-recognizer set-recognizer
	\ String literals like "abc def"
	\ via Resolver API v5 (a possible variant)
	\ https://forth-standard.org/proposals/minimalistic-core-api-for-recognizers

	\ NB: additional parsing (if any) takes place not on recognizing but on further token translating

	: parse-slit-end ( sd.start -- sd.full )
	over + char+ source drop >in @ + <> ( addr.start\|0 flag )
	\ Do we have a padding between the string sd.start and the parse area?
	if 0<> -11 and throw drop '"' parse exit then \ -11 "result out of range"
	'"' parse + over -
	;

	: tt-slit-parsing ( c-addr1 u1 -- c-addr2 u2 \| ) parse-slit-end tt-slit ;
	\ NB: tt-slit is a provided token translator

	: quot ( -- sd.quot ) s\" \"" ;

	: recognize-string ( sd.lexeme -- sd tt.slit\|tt.slit-parsing \| 0 )
	quot match-head 0= if 2drop 0 exit then quot match-tail if ['] tt-slit exit then
	2dup quot contains if 2drop 0 exit then \ fail if '"' is found in the middle of the string
	['] tt-slit-parsing
	;
	\ String literals like "abc def"
	\ via Resolver API v1
	\ https://github.com/ruv/forth-design-exp/blob/master/docs/resolver-api.md

	\ NB: additional parsing (if any) takes place not on resolving but on further token translating

	: parse-slit-end ( c-addr u1 -- c-addr u2 )
	over + char+ source drop >in @ + <>
	\ ensure that (c-addr u1) is inside SOURCE and before PARSE-AREA
	if 0<> -11 and throw drop '"' parse exit then
	\ -11 "result out of range"
	'"' parse + over -
	;

	: tt-slit-parsing ( c-addr1 u1 -- c-addr2 u2 \| ) parse-slit-end tt-slit ;
	\ NB: tt-slit is a provided token translator

	: resolve-sliteral ( c-addr1 u1 -- c-addr2 u2 tt-slit\|tt-slit-parsing \| c-addr1 u1 0 )
	`" match-head ?E0 `" match-tail if 'tt-slit exit then
	2dup `" contains if -1 /string 0 exit then \ fail if '"' is found in the middle of the string
	'tt-slit-parsing
	;

	\ Appending at the top for testing
	' resolve-sliteral preempt-resolver
	\ String literals like "abc def"
	\ via Recognizer API v4
	\ http://amforth.sourceforge.net/pr/Recognizer-rfc-D.html


	\ NB: additional parsing (if any) takes place on further interpreting or compiling

	\ NB: POSTPONE is not applicable to a string literal containing blanks.


	' noop \ actually we need to copy to a buffer here - carbon-slit
	' slit, dup
	rectype: rectype-slit

	: parse-slit-end ( c-addr u1 -- c-addr u2 )
	over + char+ source drop >in @ + <>
	\ ensure that (c-addr u1) is inside SOURCE and before PARSE-AREA
	if 0<> -11 and throw drop '"' parse exit then
	\ -11 "result out of range"
	'"' parse + over -
	;
	\ NB: parse-slit-end may accept the first part of a string from any location (even outside SOURCE)
	\ and concatanate it with the second part in a special buffer. And it may even parse multiple
	\ input lines to support multiline string literals.
	\ For simplicity of the example, just a basic variant is implemented.


	[: parse-slit-end ;] \ carbon-slit is missed for simplicity
	[: parse-slit-end slit, ;]
	[: -32 throw ;] \ -32 invalid name argument
	rectype: rectype-slit-parsing

	: rec-sliteral ( c-addr1 u1 -- c-addr2 u2 rectype-slit\|rectype-slit-parsing \| 0 )
	dup 0= if 2drop rectype-null exit then
	over c@ '"' <> if 2drop rectype-null exit then
	1 /string dup 0= if rectype-slit-parsing exit then
	\ todo: fail if '"' is found in the middle of the string (c-addr1 u1)
	2dup + char- c@ '"' = if char- rectype-slit exit then
	rectype-slit-parsing
	;


	\ Appending at the top for testing
	forth-recognizer get-recognizer
	'rec-markup swap 1+ forth-recognizer set-recognizer