Created
July 14, 2014 14:01
-
-
Save anonymous/1052abb7c6d542f16037 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* Implementation of character set conversions */ | |
/* Maps Atari ST characters 0x80..0xFF to unicode code points | |
* see http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT | |
*/ | |
static int mapAtariToUnicode[128] = | |
{ | |
0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, | |
0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, | |
0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, | |
0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x00DF, 0x0192, | |
0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, | |
0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, | |
0x00E3, 0x00F5, 0x00D8, 0x00F8, 0x0153, 0x0152, 0x00C0, 0x00C3, | |
0x00D5, 0x00A8, 0x00B4, 0x2020, 0x00B6, 0x00A9, 0x00AE, 0x2122, | |
0x0133, 0x0132, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, | |
0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DB, 0x05DC, 0x05DE, 0x05E0, | |
0x05E1, 0x05E2, 0x05E4, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, | |
0x05DF, 0x05DA, 0x05DD, 0x05E3, 0x05E5, 0x00A7, 0x2227, 0x221E, | |
0x03B1, 0x03B2, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4, | |
0x03A6, 0x0398, 0x03A9, 0x03B4, 0x222E, 0x03C6, 0x2208, 0x2229, | |
0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, | |
0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x00B3, 0x00AF | |
}; | |
/* Maps Windows codepage 1252 characters 0x80..0xFF to unicode code points | |
* see http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT | |
*/ | |
static int mapWindowsToUnicode[128] = | |
{ | |
0x20AC, -1, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021, | |
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, -1, 0x017D, -1, | |
-1, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014, | |
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, -1, 0x017E, 0x0178, | |
0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7, | |
0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF, | |
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7, | |
0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF, | |
0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, | |
0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF, | |
0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7, | |
0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF, | |
0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, | |
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF, | |
0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7, | |
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF | |
}; | |
/* Maps Atari ST characters 0x80..0xFF to Windows1252 characters, where possible. */ | |
static int mapAtariToWindows[128]; | |
/* Maps Windows1252 characters 0x80..0xFF to Atari ST characters, where possible. */ | |
static int mapWindowsToAtari[128]; | |
/** | |
* This function initializes the mapping tables mapAtariToWindows[] and mapWindowsToAtari[]. | |
* For characters which do not have a mapping, -1 is inserted into the table. | |
*/ | |
void initCharacterMappings() | |
{ | |
int i, j, u; | |
for (i = 0; i < 128; i++) | |
{ | |
u = mapAtariToUnicode[i]; | |
for (j = 0; j < 128 && mapWindowsToUnicode[j] != u; j++) | |
; | |
mapAtariToWindows[i] = (j < 128 ? j + 128 : -1); | |
u = mapWindowsToUnicode[i]; | |
for (j = 0; j < 128 && mapAtariToUnicode[j] != u; j++) | |
; | |
mapWindowsToAtari[i] = (j < 128 ? j + 128 : -1); | |
} | |
} | |
/** | |
* Convert a 0-terminated string in the AtariST character set to a | |
* 0-terminated utf-8 encoded string. | |
* dest[] must have space for at most 3*strlen(source)+1 chars, as a single | |
* character of the AtariST charset can consume up to 3 bytes in utf-8. | |
*/ | |
void Str_AtariToUtf8(const char *source, char *dest) | |
{ | |
int c; | |
while (*source) | |
{ | |
c = *source++ & 255; | |
if (c >= 128) | |
{ | |
c = mapAtariToUnicode[c & 127]; | |
} | |
if (c < 128) | |
{ | |
*dest++ = c; // 0xxxxxxx | |
} | |
else if (c < 2048) | |
{ | |
*dest++ = (c >> 6) | 192; // 110xxxxx | |
*dest++ = (c & 63) | 128; // 10xxxxxx | |
} | |
else | |
{ | |
*dest++ = (c >> 12) | 224; // 1110xxxx | |
*dest++ = ((c >> 6) & 63) | 128; // 10xxxxxx | |
*dest++ = (c & 63) | 128; // 10xxxxxx | |
} | |
} | |
*dest = 0; | |
} | |
/** | |
* Convert a 0-terminated utf-8 encoded string to a 0-terminated string | |
* in the AtariST character set. | |
* replacementChar is inserted where there is no mapping. | |
*/ | |
void Str_Utf8ToAtari(const char *source, char *dest, char replacementChar) | |
{ | |
int c, c2, c3, i; | |
while (*source) | |
{ | |
c = *source++ & 255; | |
if (c < 128) { // single-byte utf-8 code | |
*dest++ = c; | |
} | |
else if (c < 192) { // invalid utf-8 encoding | |
*dest++ = replacementChar; | |
} | |
else // multi-byte utf-8 code | |
{ | |
if ((c >= 192) && (c < 224)) | |
{ | |
c2 = *source++; | |
c = ((c & 31) << 6) | (c2 & 63); | |
} | |
else if (c >= 224) | |
{ | |
c2 = *source++; | |
c3 = *source++; | |
c = ((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63); | |
} | |
// search AtariST character code for unicode codepoint c | |
for (i = 0; i < 128; i++) | |
{ | |
if (mapAtariToUnicode[i] == c) { break; } | |
} | |
*dest++ = (i < 128 ? i + 128 : replacementChar); | |
} | |
} | |
*dest = 0; | |
} | |
/** | |
* Convert a 0-terminated string in the AtariST character set to a | |
* 0-terminated string in Windows1252 character set. | |
* replacementChar is inserted where there is no mapping. | |
*/ | |
void Str_AtariToWindows(const char *source, char *dest, char replacementChar) | |
{ | |
int c; | |
if (mapAtariToWindows[0] == 0) | |
{ | |
initCharacterMappings(); | |
} | |
while (*source) | |
{ | |
c = *source++ & 255; | |
if (c >= 128) | |
{ | |
c = mapAtariToWindows[c & 127]; | |
} | |
*dest++ = (c < 0 ? replacementChar : c); | |
} | |
*dest = 0; | |
} | |
/** | |
* Convert a 0-terminated string in the Windows1252 character set to a | |
* 0-terminated string in AtariST character set. | |
* replacementChar is inserted where there is no mapping. | |
*/ | |
void Str_WindowsToAtari(const char *source, char *dest, char replacementChar) | |
{ | |
int c; | |
if (mapWindowsToAtari[0] == 0) | |
{ | |
initCharacterMappings(); | |
} | |
while (*source) | |
{ | |
c = *source++ & 255; | |
if (c >= 128) | |
{ | |
c = mapWindowsToAtari[c & 127]; | |
} | |
*dest++ = (c < 0 ? replacementChar : c); | |
} | |
*dest = 0; | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment