Instantly share code, notes, and snippets.

@R32 /Main.hx
Last active Sep 11, 2018

Embed
What would you like to do?
chinese handling for hxcpp on windows
class Main {
static function main() {
Mbs.setlocale(LC_CTYPE, ""); // 1.local setting
var utfs = "你好, 世界!";
var mbs = Mbs.utf8tombs(utfs);
trace(mbs);
}
}
#ifndef FRAW_MBS
#define FRAW_MBS
#include "stdlib.h"
#include "stdint.h"
#include "string.h"
#include "locale.h"
#include "wchar.h"
#include "errno.h"
#include <malloc.h>
#if defined(__GNUC__) &&!defined(__MINGW32__)
# include <alloca.h>
#endif
#include "hxcpp.h"
// Copyright (c) 2008-2010 Bjoern Hoehrmann <bjoern@hoehrmann.de>
// See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details.
#define UTF8_ACCEPT 0
#define UTF8_REJECT 12
static const uint8_t utf8d[] = {
// The first part of the table maps bytes to character classes that
// to reduce the size of the transition table and create bitmasks.
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
8, 8, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
10, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 3, 11, 6, 6, 6, 5, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
// The second part is a transition table that maps a combination
// of a state of the automaton and a character class to a state.
0, 12, 24, 36, 60, 96, 84, 12, 12, 12, 48, 72, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
12, 0, 12, 12, 12, 12, 12, 0, 12, 0, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 24, 12, 12,
12, 12, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 24, 12, 12, 12, 12, 12, 12, 12, 24, 12, 12,
12, 12, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12, 12, 36, 12, 12, 12, 12, 12, 36, 12, 36, 12, 12,
12, 36, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
};
static uint32_t inline decode(uint32_t* state, uint32_t* codep, uint8_t byte) {
uint8_t type = utf8d[byte];
*codep = (*state != UTF8_ACCEPT) ?
(byte & 0x3fu) | (*codep << 6) :
(0xff >> type) & (byte);
*state = utf8d[256 + *state + type];
return *state;
}
uint32_t utf8towcs(wchar_t* out, const unsigned char* src) {
uint8_t byte;
uint32_t codep = 0;
uint32_t state = 0;
uint32_t i = 0;
if (out == NULL) {
while (byte = (uint8_t)*src++) {
decode(&state, &codep, byte);
if (state == UTF8_REJECT) {
errno = EILSEQ;
return -1;
} else if (state == UTF8_ACCEPT) {
if (codep < 0xFFFF) {
++ i;
} else {
i += 2;
}
}
}
} else {
while (byte = (uint8_t)*src++) {
decode(&state, &codep, byte);
if (state == UTF8_REJECT) {
errno = EILSEQ;
return -1;
} else if (state == UTF8_ACCEPT) {
if (codep < 0xFFFF) {
out[i++] = (wchar_t)codep;
} else {
out[i++] = (wchar_t) (0xD7C0 + (codep >> 10));
out[i++] = (wchar_t) (0xDC00 + (codep & 0x3FF));
}
}
}
}
return i;
}
uint32_t wcstoutf8(unsigned char* out, const wchar_t* src) {
wchar_t c = 0;
uint32_t i = 0;
if (out == NULL) {
while (c = *src++) {
if (c < 0x80) {
i++;
} else if (c < 0x800) {
i += 2;
} else if (c >= 0xD800 && c <= 0xDFFF) { // surrogate pair
src ++;
i += 4;
} else {
i += 3;
}
}
} else {
while (c = *src++) {
if (c < 0x80) {
out[i++] = (unsigned char)c;
} else if (c < 0x800) {
out[i++] = (unsigned char)(0xC0 | (c >> 6));
out[i++] = (unsigned char)(0x80 | (c & 63));
} else if (c >= 0xD800 && c <= 0xDFFF) {
int k = ((((int)c - 0xD800) << 10) | (((int)*src++) - 0xDC00)) + 0x10000;
out[i++] = (unsigned char)(0xF0 |(k>>18));
out[i++] = (unsigned char)(0x80 | ((k >> 12) & 63));
out[i++] = (unsigned char)(0x80 | ((k >> 6) & 63));
out[i++] = (unsigned char)(0x80 | (k & 63));
} else {
out[i++] = (unsigned char)(0xE0 | (c >> 12));
out[i++] = (unsigned char)(0x80 | ((c >> 6) & 63));
out[i++] = (unsigned char)(0x80 | (c & 63));
}
}
}
return i;
}
static String utf8tombs(String hxstr) {
int i = 0;
uint32_t mbs_len;
const char* src = hxstr.__s;
uint32_t wcs_len = utf8towcs(NULL, (unsigned char*)src);
if (wcs_len == -1) return String("", 0);
wchar_t* wcs = (wchar_t*) alloca(sizeof(wchar_t) * (wcs_len + 1));
utf8towcs(wcs, (unsigned char*)src);
wcs[wcs_len] = 0;
mbs_len = wcstombs(NULL, wcs, 0);
char* out = (char*) hx::InternalNew(mbs_len + 1, false);
wcstombs(out, wcs, mbs_len);
out[mbs_len] = 0;
return String(out, mbs_len);
}
static String mbstoutf8(String hxstr) {
uint32_t mbs_len;
const char* src = hxstr.__s;
uint32_t wcs_len = mbstowcs(NULL, src, 0);
if (wcs_len == -1) return String("", 0);
wchar_t* wcs = (wchar_t*) alloca(sizeof(wchar_t) * (wcs_len + 1));
mbstowcs(wcs, src, wcs_len);
wcs[wcs_len] = 0;
mbs_len = wcstoutf8(NULL, wcs);
char* out = (char*) hx::InternalNew(mbs_len + 1, false);
wcstoutf8((unsigned char*)out, wcs);
out[mbs_len] = 0;
return String(out, mbs_len);
}
#endif
import cpp.RawConstPointer;
@:enum abstract LocaleCategory(Int) {
var LC_ALL = 0;
var LC_COLLATE = 1;
var LC_CTYPE = 2;
var LC_MONETARY = 3;
var LC_NUMERIC = 4;
var LC_TIME = 5;
}
@:include("./mbs.h")
extern class Mbs {
@:native("::setlocale")
private static function _setlocale(cat: LocaleCategory, locale: RawConstPointer<cpp.Char>): RawConstPointer<cpp.Char>;
static inline function setlocale(cat: LocaleCategory, locale: String): Void {
_setlocale(cat, cpp.NativeString.raw(locale));
}
@:native("::utf8tombs") static function utf8tombs(str: String): String;
@:native("::mbstoutf8") static function mbstoutf8(str: String): String;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment