Created
July 24, 2014 08:34
-
-
Save anonymous/b53a3a3e8b06cdf0881c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Only in hatari: build | |
Only in hatari: build.mingw | |
Binary files hatari-orig/.hg/dirstate and hatari/.hg/dirstate differ | |
diff -ruwb hatari-orig/src/gemdos.c hatari/src/gemdos.c | |
--- hatari-orig/src/gemdos.c 2014-07-24 01:19:28.713746178 +0200 | |
+++ hatari/src/gemdos.c 2014-07-24 10:21:41.638299913 +0200 | |
@@ -1114,7 +1114,11 @@ | |
*/ | |
modified = false; | |
- /* catch potentially invalid characters */ | |
+ /* Commented out because this is risky. When deleting files which | |
+ * contain invalid characters, the inserted wildcard '?' may match | |
+ * also on other files, and an arbitrary file may be deleted. | |
+ * | |
+ * catch potentially invalid characters. | |
for (tmp = name; *tmp; tmp++) | |
{ | |
if (*tmp == INVALID_CHAR) | |
@@ -1123,6 +1127,7 @@ | |
modified = true; | |
} | |
} | |
+ */ | |
/* catch potentially too long extension */ | |
for (dot = 0; name[dot] && name[dot] != '.'; dot++); | |
@@ -1229,8 +1234,12 @@ | |
void GemDOS_CreateHardDriveFileName(int Drive, const char *pszFileName, | |
char *pszDestName, int nDestNameLen) | |
{ | |
- const char *s, *filename = pszFileName; | |
+ const char *s, *filename; | |
int minlen; | |
+ char pszFileNameHost[FILENAME_MAX]; | |
+ | |
+ Str_AtariToHost(pszFileName, pszFileNameHost, FILENAME_MAX, INVALID_CHAR); | |
+ filename = pszFileName = pszFileNameHost; | |
/* Is it a valid hard drive? */ | |
if (Drive < 2) | |
diff -ruwb hatari-orig/src/includes/str.h hatari/src/includes/str.h | |
--- hatari-orig/src/includes/str.h 2014-07-24 01:19:28.733746178 +0200 | |
+++ hatari/src/includes/str.h 2014-07-23 20:54:21.319988000 +0200 | |
@@ -31,4 +31,15 @@ | |
extern void Str_Filename2TOSname(const char *src, char *dst); | |
extern void Str_Dump_Hex_Ascii ( char *p , int Len , int Width , const char *Suffix , FILE *pFile ); | |
+/* Interface of character set conversions */ | |
+extern void initCharacterMappings(void); | |
+extern void Str_AtariToUtf8(const char *source, char *dest, int destLen); | |
+extern void Str_Utf8ToAtari(const char *source, char *dest, char replacementChar); | |
+extern void Str_AtariToLocal(const char *source, char *dest, int destLen, char replacementChar); | |
+extern void Str_LocalToAtari(const char *source, char *dest, char replacementChar); | |
+extern void Str_DecomposedToPrecomposedUtf8(const char *source, char * dest); | |
+extern void Str_AtariToHost(const char *source, char *dest, int destLen, char replacementChar); | |
+extern void Str_HostToAtari(const char *source, char *dest, char replacementChar); | |
+ | |
+ | |
#endif /* HATARI_STR_H */ | |
diff -ruwb hatari-orig/src/str.c hatari/src/str.c | |
--- hatari-orig/src/str.c 2014-07-24 01:19:28.750746178 +0200 | |
+++ hatari/src/str.c 2014-07-24 10:31:19.265324715 +0200 | |
@@ -12,6 +12,7 @@ | |
#include <ctype.h> | |
#include <stdbool.h> | |
#include <stdlib.h> | |
+#include <locale.h> | |
#include <SDL_types.h> | |
#include "configuration.h" | |
#include "str.h" | |
@@ -136,6 +137,9 @@ | |
int len; | |
src = strdup(source); /* dup so that it can be modified */ | |
+ | |
+ /* convert host string encoding to AtariST character set */ | |
+ Str_HostToAtari(source, src, INVALID_CHAR); | |
len = strlen(src); | |
/* does filename have an extension? */ | |
@@ -164,7 +168,8 @@ | |
/* upcase and replace rest of invalid characters */ | |
for (tmp = dst; *tmp; tmp++) | |
{ | |
- if (*tmp < 33 || *tmp > 126) | |
+ /* invalid characters above 0x80 have already been replaced */ | |
+ if (((unsigned char)*tmp) < 32 || *tmp == 127) | |
*tmp = INVALID_CHAR; | |
else | |
{ | |
@@ -180,6 +185,7 @@ | |
*tmp = INVALID_CHAR; | |
break; | |
default: | |
+ if (((unsigned char)*tmp) < 128) | |
*tmp = toupper((unsigned char)*tmp); | |
} | |
} | |
@@ -187,6 +193,325 @@ | |
} | |
+/* ---------------------------------------------------------------------- */ | |
+ | |
+/* Implementation of character set conversions */ | |
+ | |
+/* Maps AtariST characters 0x80..0xFF to unicode code points | |
+ * see http://www.unicode.org/Public/MAPPINGS/VENDORS/MISC/ATARIST.TXT | |
+ */ | |
+static int mapAtariToUnicode[128] = | |
+{ | |
+ 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7, | |
+ 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5, | |
+ 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9, | |
+ 0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x00DF, 0x0192, | |
+ 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA, | |
+ 0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB, | |
+ 0x00E3, 0x00F5, 0x00D8, 0x00F8, 0x0153, 0x0152, 0x00C0, 0x00C3, | |
+ 0x00D5, 0x00A8, 0x00B4, 0x2020, 0x00B6, 0x00A9, 0x00AE, 0x2122, | |
+ 0x0133, 0x0132, 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, | |
+ 0x05D6, 0x05D7, 0x05D8, 0x05D9, 0x05DB, 0x05DC, 0x05DE, 0x05E0, | |
+ 0x05E1, 0x05E2, 0x05E4, 0x05E6, 0x05E7, 0x05E8, 0x05E9, 0x05EA, | |
+ 0x05DF, 0x05DA, 0x05DD, 0x05E3, 0x05E5, 0x00A7, 0x2227, 0x221E, | |
+ 0x03B1, 0x03B2, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4, | |
+ 0x03A6, 0x0398, 0x03A9, 0x03B4, 0x222E, 0x03C6, 0x2208, 0x2229, | |
+ 0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248, | |
+ 0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x00B3, 0x00AF | |
+}; | |
+ | |
+/* Hashtable which maps unicode code points to AtariST characters 0x80..0xFF. | |
+ * The last 9 bits of the unicode code point provide a hash function | |
+ * without collisions. | |
+ */ | |
+static char mapUnicodeToAtari[512]; | |
+static bool characterMappingsInitialized = false; | |
+ | |
+/* Define this only for an old Linux system which does not store | |
+ * pathnames in UTF-8. If this is defined, pathnames are converted | |
+ * to the host character set as defined by the locale. | |
+ * Do not define this for OSX, as the unicode pathnames then won't | |
+ * be converted from the decomposed to the precomposed form. | |
+ */ | |
+/* #define USE_LOCALE_CHARSET 1 */ | |
+ | |
+ | |
+/** | |
+ * This function initializes the mapUnicodeToAtari[] hashtable. | |
+ */ | |
+void initCharacterMappings(void) | |
+{ | |
+ int i; | |
+ for (i = 0; i < 128; i++) | |
+ { | |
+ mapUnicodeToAtari[mapAtariToUnicode[i] & 511] = i; | |
+ } | |
+ characterMappingsInitialized = true; | |
+ | |
+#if defined(WIN32) || defined(USE_LOCALE_CHARSET) | |
+ setlocale(LC_ALL, ""); | |
+#endif | |
+} | |
+ | |
+/** | |
+ * Convert a 0-terminated string in the AtariST character set to a 0-terminated | |
+ * UTF-8 encoded string. destLen is the number of available bytes in dest[]. | |
+ * A single character of the AtariST charset can consume up to 3 bytes in UTF-8. | |
+ */ | |
+void Str_AtariToUtf8(const char *source, char *dest, int destLen) | |
+{ | |
+ int c; | |
+ while (*source) | |
+ { | |
+ c = *source++ & 255; | |
+ if (c >= 128) | |
+ { | |
+ c = mapAtariToUnicode[c & 127]; | |
+ } | |
+ if (c < 128 && destLen > 1) | |
+ { | |
+ *dest++ = c; // 0xxxxxxx | |
+ destLen--; | |
+ } | |
+ else if (c < 2048 && destLen > 2) | |
+ { | |
+ *dest++ = (c >> 6) | 192; // 110xxxxx | |
+ *dest++ = (c & 63) | 128; // 10xxxxxx | |
+ destLen -= 2; | |
+ } | |
+ else if (destLen > 3) | |
+ { | |
+ *dest++ = (c >> 12) | 224; // 1110xxxx | |
+ *dest++ = ((c >> 6) & 63) | 128; // 10xxxxxx | |
+ *dest++ = (c & 63) | 128; // 10xxxxxx | |
+ destLen -= 3; | |
+ } | |
+ } | |
+ *dest = 0; | |
+} | |
+ | |
+/** | |
+ * Convert a 0-terminated utf-8 encoded string to a 0-terminated string | |
+ * in the AtariST character set. | |
+ * replacementChar is inserted when there is no mapping. | |
+ */ | |
+void Str_Utf8ToAtari(const char *source, char *dest, char replacementChar) | |
+{ | |
+ int c, c2, c3, i; | |
+ if (!characterMappingsInitialized) { initCharacterMappings(); } | |
+ | |
+ while (*source) | |
+ { | |
+ c = *source++ & 255; | |
+ if (c < 128) // single-byte utf-8 code (0xxxxxxx) | |
+ { | |
+ *dest++ = c; | |
+ } | |
+ else if (c < 192) // invalid utf-8 encoding (10xxxxxx) | |
+ { | |
+ *dest++ = replacementChar; | |
+ } | |
+ else // multi-byte utf-8 code | |
+ { | |
+ if (c < 224) // 110xxxxx, 10xxxxxx | |
+ { | |
+ c2 = *source++; | |
+ c = ((c & 31) << 6) | (c2 & 63); | |
+ } | |
+ else if (c < 240) // 1110xxxx, 10xxxxxx, 10xxxxxx | |
+ { | |
+ c2 = *source++; | |
+ c3 = *source++; | |
+ c = ((c & 15) << 12) | ((c2 & 63) << 6) | (c3 & 63); | |
+ } | |
+ | |
+ // find AtariST character code for unicode code point c | |
+ i = mapUnicodeToAtari[c & 511]; | |
+ *dest++ = (mapAtariToUnicode[i] == c ? i + 128 : replacementChar); | |
+ } | |
+ } | |
+ *dest = 0; | |
+} | |
+ | |
+ | |
+/** | |
+ * Convert a string from the AtariST character set into the host representation as | |
+ * defined by the current locale. Characters which do not exist in character set | |
+ * of the host as defined by the locale will be replaced by replacementChar. | |
+ */ | |
+void Str_AtariToLocal(const char *source, char *dest, int destLen, char replacementChar) | |
+{ | |
+ int c, i; | |
+ if (!characterMappingsInitialized) { initCharacterMappings(); } | |
+ | |
+ while (*source && destLen > (int)MB_CUR_MAX) | |
+ { | |
+ c = *source++ & 255; | |
+ if (c >= 128) | |
+ c = mapAtariToUnicode[c & 127]; | |
+ /* convert the unicode code point c to a character in the current locale */ | |
+ i = wctomb(dest, c); | |
+ if (i < 0) | |
+ { | |
+ *dest = replacementChar; | |
+ i = 1; | |
+ } | |
+ dest += i; | |
+ destLen -= i; | |
+ } | |
+ *dest = 0; | |
+} | |
+ | |
+/** | |
+ * Convert a string from the character set defined by current host locale into the | |
+ * AtariST character set. Characters which do not exist in the AtariST character set | |
+ * will be replaced by replacementChar. | |
+ */ | |
+void Str_LocalToAtari(const char *source, char *dest, char replacementChar) | |
+{ | |
+ int i; | |
+ wchar_t c; | |
+ if (!characterMappingsInitialized) { initCharacterMappings(); } | |
+ | |
+ while (*source) | |
+ { | |
+ /* convert a character from the current locale into an unicode code point */ | |
+ i = mbtowc(&c, source, 4); | |
+ if (i < 0) | |
+ { | |
+ c = replacementChar; | |
+ i = 1; | |
+ } | |
+ source += i; | |
+ if (c >= 128) | |
+ { | |
+ // find AtariST character code for unicode code point c | |
+ i = mapUnicodeToAtari[c & 511]; | |
+ c = (mapAtariToUnicode[i] == c ? i + 128 : replacementChar); | |
+ } | |
+ *dest++ = c; | |
+ } | |
+ *dest = 0; | |
+} | |
+ | |
+/* This table is needed to convert the UTF-8 representation of paths with | |
+ * diacritical marks from the decomposed form (as returned by OSX) into the | |
+ * precomposed form. Combining unicode characters are 0x0300..0x036F. | |
+ * This table contains only those characters which are part of the AtariST | |
+ * character set. | |
+ */ | |
+static int mapDecomposedPrecomposed[] = | |
+{ | |
+ 'A', 0x0300, 0xC0, /* À */ | |
+ 'A', 0x0301, 0xC1, /* Á */ | |
+ 'A', 0x0302, 0xC2, /* Â */ | |
+ 'A', 0x0303, 0xC3, /* Ã */ | |
+ 'A', 0x0308, 0xC4, /* Ä */ | |
+ 'A', 0x030A, 0xC5, /* Å */ | |
+ 'C', 0x0327, 0xC7, /* Ç */ | |
+ 'E', 0x0300, 0xC8, /* È */ | |
+ 'E', 0x0301, 0xC9, /* É */ | |
+ 'E', 0x0302, 0xCA, /* Ê */ | |
+ 'E', 0x0308, 0xCB, /* Ë */ | |
+ 'I', 0x0300, 0xCC, /* Ì */ | |
+ 'I', 0x0301, 0xCD, /* Í */ | |
+ 'I', 0x0302, 0xCE, /* Î */ | |
+ 'I', 0x0308, 0xCF, /* Ï */ | |
+ 'N', 0x0303, 0xD1, /* Ñ */ | |
+ 'O', 0x0300, 0xD2, /* Ò */ | |
+ 'O', 0x0301, 0xD3, /* Ó */ | |
+ 'O', 0x0302, 0xD4, /* Ô */ | |
+ 'O', 0x0303, 0xD5, /* Õ */ | |
+ 'O', 0x0308, 0xD6, /* Ö */ | |
+ 'U', 0x0300, 0xD9, /* Ù */ | |
+ 'U', 0x0301, 0xDA, /* Ú */ | |
+ 'U', 0x0302, 0xDB, /* Û */ | |
+ 'U', 0x0308, 0xDC, /* Ü */ | |
+ 'Y', 0x0301, 0xDD, /* Ý */ | |
+ 'a', 0x0300, 0xE0, /* à */ | |
+ 'a', 0x0301, 0xE1, /* á */ | |
+ 'a', 0x0302, 0xE2, /* â */ | |
+ 'a', 0x0303, 0xE3, /* ã */ | |
+ 'a', 0x0308, 0xE4, /* ä */ | |
+ 'a', 0x030A, 0xE5, /* å */ | |
+ 'c', 0x0327, 0xE7, /* ç */ | |
+ 'e', 0x0300, 0xE8, /* è */ | |
+ 'e', 0x0301, 0xE9, /* é */ | |
+ 'e', 0x0302, 0xEA, /* ê */ | |
+ 'e', 0x0308, 0xEB, /* ë */ | |
+ 'i', 0x0300, 0xEC, /* ì */ | |
+ 'i', 0x0301, 0xED, /* í */ | |
+ 'i', 0x0302, 0xEE, /* î */ | |
+ 'i', 0x0308, 0xEF, /* ï */ | |
+ 'n', 0x0303, 0xF1, /* ñ */ | |
+ 'o', 0x0300, 0xF2, /* ò */ | |
+ 'o', 0x0301, 0xF3, /* ó */ | |
+ 'o', 0x0302, 0xF4, /* ô */ | |
+ 'o', 0x0303, 0xF5, /* õ */ | |
+ 'o', 0x0308, 0xF6, /* ö */ | |
+ 'u', 0x0300, 0xF9, /* ù */ | |
+ 'u', 0x0301, 0xFA, /* ú */ | |
+ 'u', 0x0302, 0xFB, /* û */ | |
+ 'u', 0x0308, 0xFC, /* ü */ | |
+ 'y', 0x0301, 0xFD, /* ý */ | |
+ 'y', 0x0308, 0xFF, /* ÿ */ | |
+ 0 | |
+}; | |
+ | |
+/** | |
+ * Convert decomposed AtariST characters in an UTF-8 encoded string into the | |
+ * precomposed form. This is needed as OSX returns filesystem paths in the | |
+ * decomposed form (NFD). | |
+ */ | |
+void Str_DecomposedToPrecomposedUtf8(const char *source, char * dest) | |
+{ | |
+ int c, c1, i; | |
+ while (*source) | |
+ { | |
+ c = *source++ & 255; | |
+ /* do we have a combining character behind the current character */ | |
+ if ((source[0] & 0xFC) == 0xCC) /* 0x03XX is in UTF-8: 110011xx 10xxxxxx */ | |
+ { | |
+ c1 = ((source[0] & 31) << 6) | (source[1] & 63); | |
+ for (i = 0; mapDecomposedPrecomposed[i]; i += 3) | |
+ { | |
+ if (mapDecomposedPrecomposed[i] == c && mapDecomposedPrecomposed[i + 1] == c1) | |
+ { | |
+ c = mapDecomposedPrecomposed[i + 2]; | |
+ source += 2; | |
+ break; | |
+ } | |
+ } | |
+ } | |
+ *dest++ = c; | |
+ } | |
+ *dest = 0; | |
+} | |
+ | |
+ | |
+void Str_AtariToHost(const char *source, char *dest, int destLen, char replacementChar) | |
+{ | |
+#if defined(WIN32) || defined(USE_LOCALE_CHARSET) | |
+ Str_AtariToLocal(source, dest, destLen, replacementChar); | |
+#else | |
+ Str_AtariToUtf8(source, dest, destLen); | |
+#endif | |
+} | |
+ | |
+void Str_HostToAtari(const char *source, char *dest, char replacementChar) | |
+{ | |
+#if defined(WIN32) || defined(USE_LOCALE_CHARSET) | |
+ Str_LocalToAtari(source, dest, replacementChar); | |
+#else | |
+ Str_Utf8ToAtari(source, dest, replacementChar); | |
+ Str_DecomposedToPrecomposedUtf8(dest, dest); /* for OSX */ | |
+#endif | |
+} | |
+ | |
+/* ---------------------------------------------------------------------- */ | |
+ | |
+ | |
+ | |
/** | |
* Print an Hex/Ascii dump of Len bytes located at *p | |
* Each line consists of Width bytes, printed as an hexa value and as a char |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment