meijeru/unicode-multiple.r

## unicode-multiple.r
REBOL []

utf8-to-cps: func [	; yields a block of integers >= 0  and < 1114112
					; coding errors are skipped
	u [binary!]
	/local bcp b1 b2 b3 b4
][
	bcp: make block! length? u ; overestimated
	while [not tail? u][
		b1: u/1
		case [
			b1 < 128 [
				insert tail bcp b1
				u: skip u 1
			]
			b1 < 192 [
				u: skip u 1
			]
			b1 < 224 [
				either all [
					not tail? skip u 1
					(b2: u/2) >= 128 b2 < 192
				][
					insert tail bcp (shift/left b1 - 192 6) or (b2 - 128)
					u: skip u 2
				][
					u: skip u 1
				]
			]
			b1 < 240 [
				either all [
					not tail? skip u 2
					(b2: u/2) >= 128 b2 < 192
					(b3: u/3) >= 128 b3 < 192
				][
					insert tail bcp (shift/left b1  - 224 12)
						or (shift/left b2 - 128 6) or (b3 - 128)
					u: skip u 3
				][
					u: skip u 1
				]
			]
			b1 < 248 [
				either all [
					not tail? skip u 3
					(b2: u/2) >= 128 b2 < 192
					(b3: u/3) >= 128 b3 < 192
					(b3: u/4) >= 128 b4 < 192
				][
					insert tail bcp (shift/left b1 - 240 18)
						or (shift/left b2 - 128 12)
						or (shift/left b3  - 128 6) or (b4 - 128)
					u: skip u 4
				][
					u: skip u 1
				]
			]
		]
	]
	bcp
]
	REBOL []

	utf8-to-cps: func [ ; yields a block of integers >= 0 and < 1114112
	; coding errors are skipped
	u [binary!]
	/local bcp b1 b2 b3 b4
	][
	bcp: make block! length? u ; overestimated
	while [not tail? u][
	b1: u/1
	case [
	b1 < 128 [
	insert tail bcp b1
	u: skip u 1
	]
	b1 < 192 [
	u: skip u 1
	]
	b1 < 224 [
	either all [
	not tail? skip u 1
	(b2: u/2) >= 128 b2 < 192
	][
	insert tail bcp (shift/left b1 - 192 6) or (b2 - 128)
	u: skip u 2
	][
	u: skip u 1
	]
	]
	b1 < 240 [
	either all [
	not tail? skip u 2
	(b2: u/2) >= 128 b2 < 192
	(b3: u/3) >= 128 b3 < 192
	][
	insert tail bcp (shift/left b1 - 224 12)
	or (shift/left b2 - 128 6) or (b3 - 128)
	u: skip u 3
	][
	u: skip u 1
	]
	]
	b1 < 248 [
	either all [
	not tail? skip u 3
	(b2: u/2) >= 128 b2 < 192
	(b3: u/3) >= 128 b3 < 192
	(b3: u/4) >= 128 b4 < 192
	][
	insert tail bcp (shift/left b1 - 240 18)
	or (shift/left b2 - 128 12)
	or (shift/left b3 - 128 6) or (b4 - 128)
	u: skip u 4
	][
	u: skip u 1
	]
	]
	]
	]
	bcp
	]