-
-
Save zengargoyle/985145 to your computer and use it in GitHub Desktop.
x86 assembly UTF-16 -> UTF-8 test (GAS Syntax)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
int out2(unsigned short int t) { | |
unsigned char c[3]; | |
unsigned char *b; | |
unsigned char l = 0; | |
b = c; | |
// 0000 0000 0xxx xxxx -> 0xxx xxxx | |
// 0000 0yyy yyxx xxxx -> 110y yyyy 10xx xxxx | |
// zzzz yyyy yyxx xxxx -> 1110 zzzz 10yy yyyy 10xx xxxx | |
if ( ! (t & 0xFF80) ) { | |
c[l++] = t & 0x007F; | |
} | |
else { | |
if ( ! (t & 0xF800) ) { | |
c[l++] = 0xC0 | (t & 0x07C0) >> 6; | |
} | |
else { | |
c[l++] = 0xE0 | (t & 0xF000) >> 12; | |
c[l++] = 0x80 | (t & 0x0FC0) >> 6; | |
} | |
c[l++] = 0x80 | t & 0x003F; | |
} | |
while(l--) | |
printf("%c",*b++); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SYS_WRITE = 4 | |
SYS_EXIT = 1 | |
STDOUT = 1 | |
RC_OK = 0 | |
.local ubuf | |
.comm ubuf,3,1 | |
.data | |
utwo: | |
.short 0x0024, 0x000a | |
.short 0x00A2, 0x000a | |
.short 0x20AC, 0x000a | |
.short 0x0000 | |
.text | |
.global _start | |
_start: | |
xorl %eax,%eax | |
xorl %ebx,%ebx | |
xorl %ecx,%ecx | |
loop: | |
movw utwo(,%ebx,2),%cx | |
test %cx,%cx | |
jz done | |
call coder | |
call putc | |
inc %ebx | |
jmp loop | |
coder: | |
// in: %ecx -> w | |
// out: msg -> %ecx | |
// out: len -> %edx | |
xorl %edx,%edx | |
test $0xFF80,%cx | |
jnz U23 | |
movw %cx,%ax | |
andw $0x007F,%ax # 0xxx xxxx | |
jmp cdone | |
U23: | |
test $0xF800,%cx | |
jnz U3 | |
movw %cx,%ax | |
andw $0x07C0,%ax | |
sarw $6,%ax | |
orw $0x00C0,%ax # 110y yyxx | |
movb %al,ubuf(,%edx,1) | |
jmp U23E | |
U3: | |
movw %cx,%ax | |
andw $0xF000,%ax | |
sarw $12,%ax | |
orw $0x00E0,%ax # 1110 yyyy | |
movb %al,ubuf(,%edx,1) | |
inc %edx | |
movw %cx,%ax | |
andw $0x0FC0,%ax | |
sarw $6,%ax | |
orw $0x0080,%ax # 10yy yyxx | |
movb %al,ubuf(,%edx,1) | |
U23E: | |
inc %edx | |
movw %cx,%ax | |
andw $0x003F,%ax | |
orw $0x0080,%ax # 10xx xxxx | |
cdone: | |
lea ubuf,%ecx | |
movb %al,(%ecx,%edx,1) | |
inc %edx | |
ret | |
putc: | |
pushl %ebx | |
movl $STDOUT,%ebx | |
movl $SYS_WRITE,%eax | |
int $0x80 | |
popl %ebx | |
ret | |
done: | |
movl $RC_OK,%ebx | |
movl $SYS_EXIT,%eax | |
int $0x80 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment