Skip to content

Instantly share code, notes, and snippets.

Created July 14, 2014 14:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/1052abb7c6d542f16037 to your computer and use it in GitHub Desktop.
Save anonymous/1052abb7c6d542f16037 to your computer and use it in GitHub Desktop.
/* Implementation of character set conversions */
/* Maps Atari ST characters 0x80..0xFF to unicode code points
* see http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT
*/
static int mapAtariToUnicode[128] =
{
0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x00DF, 0x0192,
0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
0x00E3, 0x00F5, 0x00D8, 0x00F8, 0x0153, 0x0152, 0x00C0, 0x00C3,
0x00D5, 0x00A8, 0x00B4, 0x2020, 0x00B6, 0x00A9, 0x00AE, 0x2122,
0x0133, 0x0132, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5,
0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DB, 0x05DC, 0x05DE, 0x05E0,
0x05E1, 0x05E2, 0x05E4, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA,
0x05DF, 0x05DA, 0x05DD, 0x05E3, 0x05E5, 0x00A7, 0x2227, 0x221E,
0x03B1, 0x03B2, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4,
0x03A6, 0x0398, 0x03A9, 0x03B4, 0x222E, 0x03C6, 0x2208, 0x2229,
0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248,
0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x00B3, 0x00AF
};
/* Maps Windows codepage 1252 characters 0x80..0xFF to unicode code points
* see http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP1252.TXT
*/
static int mapWindowsToUnicode[128] =
{
0x20AC, -1, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, -1, 0x017D, -1,
-1, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, -1, 0x017E, 0x0178,
0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
};
/* Maps Atari ST characters 0x80..0xFF to Windows1252 characters, where possible. */
static int mapAtariToWindows[128];
/* Maps Windows1252 characters 0x80..0xFF to Atari ST characters, where possible. */
static int mapWindowsToAtari[128];
/**
* This function initializes the mapping tables mapAtariToWindows[] and mapWindowsToAtari[].
* For characters which do not have a mapping, -1 is inserted into the table.
*/
void initCharacterMappings()
{
int i, j, u;
for (i = 0; i < 128; i++)
{
u = mapAtariToUnicode[i];
for (j = 0; j < 128 && mapWindowsToUnicode[j] != u; j++)
;
mapAtariToWindows[i] = (j < 128 ? j + 128 : -1);
u = mapWindowsToUnicode[i];
for (j = 0; j < 128 && mapAtariToUnicode[j] != u; j++)
;
mapWindowsToAtari[i] = (j < 128 ? j + 128 : -1);
}
}
/**
* Convert a 0-terminated string in the AtariST character set to a
* 0-terminated utf-8 encoded string.
* dest[] must have space for at most 3*strlen(source)+1 chars, as a single
* character of the AtariST charset can consume up to 3 bytes in utf-8.
*/
void Str_AtariToUtf8(const char *source, char *dest)
{
int c;
while (*source)
{
c = *source++ & 255;
if (c >= 128)
{
c = mapAtariToUnicode[c & 127];
}
if (c < 128)
{
*dest++ = c; // 0xxxxxxx
}
else if (c < 2048)
{
*dest++ = (c >> 6) | 192; // 110xxxxx
*dest++ = (c & 63) | 128; // 10xxxxxx
}
else
{
*dest++ = (c >> 12) | 224; // 1110xxxx
*dest++ = ((c >> 6) & 63) | 128; // 10xxxxxx
*dest++ = (c & 63) | 128; // 10xxxxxx
}
}
*dest = 0;
}
/**
* Convert a 0-terminated utf-8 encoded string to a 0-terminated string
* in the AtariST character set.
* replacementChar is inserted where there is no mapping.
*/
void Str_Utf8ToAtari(const char *source, char *dest, char replacementChar)
{
int c, c2, c3, i;
while (*source)
{
c = *source++ & 255;
if (c < 128) { // single-byte utf-8 code
*dest++ = c;
}
else if (c < 192) { // invalid utf-8 encoding
*dest++ = replacementChar;
}
else // multi-byte utf-8 code
{
if ((c >= 192) && (c < 224))
{
c2 = *source++;
c = ((c & 31) << 6) | (c2 & 63);
}
else if (c >= 224)
{
c2 = *source++;
c3 = *source++;
c = ((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63);
}
// search AtariST character code for unicode codepoint c
for (i = 0; i < 128; i++)
{
if (mapAtariToUnicode[i] == c) { break; }
}
*dest++ = (i < 128 ? i + 128 : replacementChar);
}
}
*dest = 0;
}
/**
* Convert a 0-terminated string in the AtariST character set to a
* 0-terminated string in Windows1252 character set.
* replacementChar is inserted where there is no mapping.
*/
void Str_AtariToWindows(const char *source, char *dest, char replacementChar)
{
int c;
if (mapAtariToWindows[0] == 0)
{
initCharacterMappings();
}
while (*source)
{
c = *source++ & 255;
if (c >= 128)
{
c = mapAtariToWindows[c & 127];
}
*dest++ = (c < 0 ? replacementChar : c);
}
*dest = 0;
}
/**
* Convert a 0-terminated string in the Windows1252 character set to a
* 0-terminated string in AtariST character set.
* replacementChar is inserted where there is no mapping.
*/
void Str_WindowsToAtari(const char *source, char *dest, char replacementChar)
{
int c;
if (mapWindowsToAtari[0] == 0)
{
initCharacterMappings();
}
while (*source)
{
c = *source++ & 255;
if (c >= 128)
{
c = mapWindowsToAtari[c & 127];
}
*dest++ = (c < 0 ? replacementChar : c);
}
*dest = 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment