Skip to content

Instantly share code, notes, and snippets.

@skial
Last active September 23, 2015 11:49
Show Gist options
  • Save skial/2c9a03e34251727b3330 to your computer and use it in GitHub Desktop.
Save skial/2c9a03e34251727b3330 to your computer and use it in GitHub Desktop.
Unifill Utf8 decoding? problem
-lib unifill
-cp src
-main Main
-dce no
--each
-neko bin/UnifillUTF8.n
--next
-neko bin/utf8.n
--next
-js bin/utf8.js
--next
-python bin/utf8.py
--next
-php bin/php/utf8/
--next
-java bin/java/utf8/
--next
-cmd neko bin/utf8.n
--next
-cmd python bin/utf8.py
--next
-cmd java -jar "bin/java/utf8/Main.jar"
--next
-cmd node bin/utf8.js
haxe build.hxml
haxelib run hxjava hxjava_build.txt --haxe-version 3200 --feature-level 1
C:\Program Files\java\jdk1.8.0_60\bin\javac.exe "-sourcepath" "src" "-d" "obj" "
-g:none" "@cmd"
Main.hx:15: ---python---
Main.hx:28: 3,true
Main.hx:29: 3,true
Main.hx:30: 8594,true
Main.hx:31: 966,true
Main.hx:32: 171,true
Main.hx:33: [2192,3C6,AB]
Main.hx:36: 7
Main.hx:37: 3,true
Main.hx:38: 8594,true
Main.hx:43: InvalidCodeUnitSequence(1)
Main.hx:48: InvalidCodeUnitSequence(2)
Main.hx:49: [E2,86,92,CF,86,C2,AB]
Main.hx:57: 7
Main.hx:58: 3,true
Main.hx:59: 171,true
Main.hx:64: InvalidCodeUnitSequence(1)
Main.hx:67: 966,false
Main.hx:70: [C2,AB,CF,86,E2,86,92]
Main.hx:78: 7
Main.hx:79: 3,true
Main.hx:80: 171,true
Main.hx:85: InvalidCodeUnitSequence(1)
Main.hx:88: 8594,false
Main.hx:91: [C2,AB,E2,86,92,CF,86]
Main.hx:99: 7
Main.hx:100: 3,true
Main.hx:101: 171,true
Main.hx:106: InvalidCodeUnitSequence(1)
Main.hx:109: 8594,false
Main.hx:112: [C2,AB,E2,86,92,CF,86]
Main.hx:116: 7
Main.hx:117: 3,true
Main.hx:118: 171,true
Main.hx:123: InvalidCodeUnitSequence(1)
Main.hx:126: 8594,false
Main.hx:129: [C2,AB,E2,86,92,CF,86]
Main.hx:15: ---java---
Main.hx:28: 3,true
Main.hx:29: 3,true
Main.hx:30: 8594,true
Main.hx:31: 966,true
Main.hx:32: 171,true
Main.hx:33: [2192,3C6,AB]
Main.hx:36: 7
Main.hx:37: 3,true
Main.hx:38: 8594,true
Main.hx:43: InvalidCodeUnitSequence(1)
Main.hx:48: InvalidCodeUnitSequence(2)
Main.hx:49: [E2,86,92,CF,86,C2,AB]
Main.hx:57: 7
Main.hx:58: 3,true
Main.hx:59: 171,true
Main.hx:64: InvalidCodeUnitSequence(1)
Main.hx:67: 966,false
Main.hx:70: [C2,AB,CF,86,E2,86,92]
Main.hx:78: 7
Main.hx:79: 3,true
Main.hx:80: 171,true
Main.hx:85: InvalidCodeUnitSequence(1)
Main.hx:88: 8594,false
Main.hx:91: [C2,AB,E2,86,92,CF,86]
Main.hx:99: 7
Main.hx:100: 3,true
Main.hx:101: 171,true
Main.hx:106: InvalidCodeUnitSequence(1)
Main.hx:109: 8594,false
Main.hx:112: [C2,AB,E2,86,92,CF,86]
Main.hx:116: 7
Main.hx:117: 3,true
Main.hx:118: 171,true
Main.hx:123: InvalidCodeUnitSequence(1)
Main.hx:126: 8594,false
Main.hx:129: [C2,AB,E2,86,92,CF,86]
Main.hx:15: ---js---
Main.hx:28: 3,true
Main.hx:29: 3,true
Main.hx:30: 8594,true
Main.hx:31: 966,true
Main.hx:32: 171,true
Main.hx:33: [2192,3C6,AB]
Main.hx:36: 7
Main.hx:37: 3,true
Main.hx:38: 8594,true
C:\Users\skial\Dropbox\dev\skialbainn\src\UnifillUTF8\bin\utf8.js:172
haxe_CallStack.lastException = e;
^
ReferenceError: haxe_CallStack is not defined
at Function.Main.main (C:\Users\skial\Dropbox\dev\skialbainn\src\UnifillUTF8
\bin\utf8.js:172:3)
at console.undefined.log (C:\Users\skial\Dropbox\dev\skialbainn\src\UnifillU
TF8\bin\utf8.js:3316:6)
at Object.<anonymous> (C:\Users\skial\Dropbox\dev\skialbainn\src\UnifillUTF8
\bin\utf8.js:3317:3)
at Module._compile (module.js:460:26)
at Object.Module._extensions..js (module.js:478:10)
at Module.load (module.js:355:32)
at Function.Module._load (module.js:310:12)
at Function.Module.runMain (module.js:501:10)
at startup (node.js:129:16)
at node.js:814:3
Error: Command failed with error 1
package;
import unifill.Utf8;
import unifill.InternalEncoding;
using unifill.Unifill;
/**
* ...
* @author Skial Bainn
*/
class Main {
static function main() {
trace(
#if js '---js---'
#elseif neko '---neko---'
#elseif java '---java---'
#elseif python '---python---'
#else '---test---'
#end
);
// char, number, utf-8
// →, 8594, E2 86 92
// φ, 966, CF 86
// «, 171, C2 AB
var value = '→φ«';
trace( value.uLength(), value.uLength() == 3 ); // 3
trace( InternalEncoding.codePointCount( value, 0, value.length ), InternalEncoding.codePointCount( value, 0, value.length ) == 3 ); // 3
trace( value.uCodePointAt( 0 ).toInt(), value.uCodePointAt( 0 ).toInt() == 8594 );
trace( value.uCodePointAt( 1 ).toInt(), value.uCodePointAt( 1 ).toInt() == 966 );
trace( value.uCodePointAt( 2 ).toInt(), value.uCodePointAt( 2 ).toInt() == 171 );
trace( [for (i in 0...value.length) StringTools.hex(InternalEncoding.codeUnitAt(value, i)) ] ); //[E2, 86, 92, CF, 86, C2, AB]
var utf8 = Utf8.fromString( value );
trace( utf8.length );
trace( utf8.codePointCount(0, utf8.length), utf8.codePointCount(0, utf8.length) == 3 );
trace( utf8.codePointAt( 0 ), utf8.codePointAt( 0 ) == 8594 );
try {
trace( utf8.codePointAt( 1 ), utf8.codePointAt( 1 ) == 966 ); // Causes InvalidCodeUnitSequence(1)
} catch (e:Dynamic) trace( e );
try {
trace( utf8.codePointAt( 2 ), utf8.codePointAt( 2 ) == 171 ); // Causes InvalidCodeUnitSequence(2)
} catch (e:Dynamic) trace( e );
trace( [for (i in 0...utf8.length) StringTools.hex(utf8.codeUnitAt(i)) ] ); //[E2, 86, 92, CF, 86, C2, AB]
// char, number, utf-8
// «, 171, C2 AB
// φ, 966, CF 86
// →, 8594, E2 86 92
value = '«φ→';
utf8 = Utf8.fromString( value );
trace( utf8.length );
trace( utf8.codePointCount(0, utf8.length), utf8.codePointCount(0, utf8.length) == 3 );
trace( utf8.codePointAt( 0 ), utf8.codePointAt( 0 ) == 171 );
try {
trace( utf8.codePointAt( 1 ), utf8.codePointAt( 1 ) == 966 ); // Causes InvalidCodeUnitSequence(1)
} catch (e:Dynamic) trace( e );
try {
trace( utf8.codePointAt( 2 ), utf8.codePointAt( 2 ) == 8594 ); // Outputs 966
} catch (e:Dynamic) trace( e );
trace( [for (i in 0...utf8.length) StringTools.hex(utf8.codeUnitAt(i)) ] ); //[C2, AB, CF, 86, E2, 86, 92]
// char, number, utf-8
// «, 171, C2 AB
// →, 8594, E2 86 92
// φ, 966, CF 86
value = '«→φ';
utf8 = Utf8.fromString( value );
trace( utf8.length );
trace( utf8.codePointCount(0, utf8.length), utf8.codePointCount(0, utf8.length) == 3 );
trace( utf8.codePointAt( 0 ), utf8.codePointAt( 0 ) == 171 );
try {
trace( utf8.codePointAt( 1 ), utf8.codePointAt( 1 ) == 8594 ); // Casuses InvalidCodeUnitSequence(1)
} catch (e:Dynamic) trace( e );
try {
trace( utf8.codePointAt( 2 ), utf8.codePointAt( 2 ) == 966 ); // Outputs 8594
} catch (e:Dynamic) trace( e );
trace( [for (i in 0...utf8.length) StringTools.hex(utf8.codeUnitAt(i)) ] ); //[C2, AB, E2, 86, 92, CF, 86]
// char, number, utf-8
// «, 171, C2 AB
// →, 8594, E2 86 92
// φ, 966, CF 86
value = '\u00AB\u2192\u03C6';
utf8 = Utf8.fromString( value );
trace( utf8.length );
trace( utf8.codePointCount(0, utf8.length), utf8.codePointCount(0, utf8.length) == 3 );
trace( utf8.codePointAt( 0 ), utf8.codePointAt( 0 ) == 171 );
try {
trace( utf8.codePointAt( 1 ), utf8.codePointAt( 1 ) == 8594 ); // Causes InvalidCodeUnitSequence(1)
} catch (e:Dynamic) trace( e );
try {
trace( utf8.codePointAt( 2 ), utf8.codePointAt( 2 ) == 966 ); // Outputs 8594
} catch (e:Dynamic) trace( e );
trace( [for (i in 0...utf8.length) StringTools.hex(utf8.codeUnitAt(i)) ] ); //[C2, AB, E", 86, 92, CF 86]
value = InternalEncoding.fromCodePoints( [171, 8594, 966] );
utf8 = Utf8.fromCodePoints( [171, 8594, 966] );
trace( utf8.length );
trace( utf8.codePointCount(0, utf8.length), utf8.codePointCount(0, utf8.length) == 3 );
trace( utf8.codePointAt( 0 ), utf8.codePointAt( 0 ) == 171 );
try {
trace( utf8.codePointAt( 1 ), utf8.codePointAt( 1 ) == 8594 ); // Causes InvalidCodeUnitSequence(1)
} catch (e:Dynamic) trace( e );
try {
trace( utf8.codePointAt( 2 ), utf8.codePointAt( 2 ) == 966 ); // Outputs 8594
} catch (e:Dynamic) trace( e );
trace( [for (i in 0...utf8.length) StringTools.hex(utf8.codeUnitAt(i)) ] ); //[C2, AB, E", 86, 92, CF 86]
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment