Skip to content

Instantly share code, notes, and snippets.

@ynkdir
Last active September 21, 2024 14:19
Show Gist options
  • Save ynkdir/b92727e2a52e55a4010f to your computer and use it in GitHub Desktop.
Save ynkdir/b92727e2a52e55a4010f to your computer and use it in GitHub Desktop.
NLS file format memo
svn://svn.reactos.org/reactos/trunk/reactos/tools/create_nls/
HKEY_LOCAL_MACHINE\System\CurrentControlSet\Control\Nls
format of NLS file such as C:\Windows\System32\C_932.NLS
================================================================================
NLS CODEPAGE (C_XXX.NLS)
================================================================================
+---------------------------------------------------------------
HEADER | WORD wSize size in word (0x0D)
| WORD CodePage
| WORD MaxCharSize
| BYTE[2] DefaultChar
| WORD UnicodeDefaultChar
| WORD unknown1 (maybe Unicode char of DefaultChar)
| WORD unknown2 (maybe CodePage char of UnicodeDefaultChar)
| BYTE[12] LeadByte
+---------------------------------------------------------------
MB2WC TABLE | WORD offset of Unicode to CP table in word
| WORD[256] primary CP to Unicode table
| WORD OEM glyph table size in words
| WORD[size] OEM to Unicode table
| WORD Number of DBCS LeadByte range
| if range != 0:
| WORD[256] offsets
| WORD[num_of_leadbyte][256] sub table
+---------------------------------------------------------------
WC2MB TABLE | WORD Unknown (It seems 0x0000 for MaxCharSize==1, 0x0004 for MaxCharSize==2)
| BYTE[65536] or WORD[65536] (depends on MaxCharSize) Unicode To CP table
+---------------------------------------------------------------
================================================================================
#include <windows.h>
#include <stdio.h>
#include <stdlib.h>
typedef struct {
WORD wSize; /* in words 0x000D */
WORD CodePage;
WORD MaxCharSize; /* 1 or 2 */
BYTE DefaultChar[MAX_DEFAULTCHAR];
WCHAR UnicodeDefaultChar;
WCHAR unknown1;
WCHAR unknown2;
BYTE LeadByte[MAX_LEADBYTES];
} NLS_FILE_HEADER;
static size_t
fsize(FILE *f)
{
size_t p;
size_t s;
p = ftell(f);
fseek(f, 0, SEEK_END);
s = ftell(f);
fseek(f, p, SEEK_SET);
return s;
}
static void *
readfile(char *path, size_t *psize)
{
FILE *f = fopen(path, "rb");
size_t s = fsize(f);
void *p = malloc(s);
fread(p, 1, s, f);
fclose(f);
*psize = s;
return p;
}
static void
writefile(char *path, void *data, size_t size)
{
FILE *f = fopen(path, "wb");
fwrite(data, 1, size, f);
fclose(f);
}
int main(int argc, char **argv)
{
size_t s;
WORD *base;
base = readfile(argv[1], &s);
NLS_FILE_HEADER *nls = (NLS_FILE_HEADER *)&base[0];
printf("Size = %d\n", nls->wSize);
printf("CodePage = %d\n", nls->CodePage);
printf("MaxCharSize = %d\n", nls->MaxCharSize);
printf("DefaultChar = %02x%02x\n", nls->DefaultChar[1], nls->DefaultChar[0]);
printf("UnicodeDefaultChar = %04x\n", nls->UnicodeDefaultChar);
printf("unknown1 = %04x\n", nls->unknown1);
printf("unknown1 = %04x\n", nls->unknown2);
printf("LeadByte = %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x %02x\n", nls->LeadByte[0], nls->LeadByte[1], nls->LeadByte[2], nls->LeadByte[3], nls->LeadByte[4], nls->LeadByte[5], nls->LeadByte[6], nls->LeadByte[7], nls->LeadByte[8], nls->LeadByte[9], nls->LeadByte[10], nls->LeadByte[11]);
WORD *cp_to_wc_table_size = base + nls->wSize;
WORD *cp_to_wc_table_base = base + nls->wSize + 1;
printf("offset of Unicode To CP table in words = %d\n", *cp_to_wc_table_size);
WORD *primary_table_base = cp_to_wc_table_base;
printf("primary table\n");
for (int i = 0; i < 256; ++i)
printf("0x%02x => U+%04x\n", i, primary_table_base[i]);
WORD *oem_table_size = primary_table_base + 256;
WORD *oem_table_base = primary_table_base + 256 + 1;
printf("oem size = %d\n", *oem_table_size);
for (int i = 0; i < *oem_table_size; ++i)
printf("oem[0x%02x] => U+%04x\n", i, oem_table_base[i]);
WORD *num_of_dbcs_leadbyte_range = oem_table_base + *oem_table_size;
WORD *dbcs_table_base = oem_table_base + *oem_table_size + 1;
WORD *dbcs_leadbyte_offs = dbcs_table_base;
WORD num_of_dbcs_leadbyte = 0;
if (*num_of_dbcs_leadbyte_range != 0) {
for (int i = 0; i < 256; ++i) {
if (dbcs_leadbyte_offs[i] != 0)
num_of_dbcs_leadbyte++;
}
}
WORD dbcs_table_size = 0;
if (*num_of_dbcs_leadbyte_range != 0)
dbcs_table_size = 256 + 256 * num_of_dbcs_leadbyte;
printf("num_of_dbcs_leadbyte_range = %d\n", *num_of_dbcs_leadbyte_range);
printf("num_of_dbcs_leadbyte = %d\n", num_of_dbcs_leadbyte);
if (*num_of_dbcs_leadbyte_range != 0) {
for (int i = 0; i < 256; ++i)
printf("off[%d] = %d\n", i, dbcs_leadbyte_offs[i]);
for (int i = 0; i < 256; ++i) {
if (dbcs_leadbyte_offs[i] == 0)
continue;
for (int j = 0; j < 256; ++j)
printf("0x%02x%02x => U+%04x\n", i, j, dbcs_table_base[dbcs_leadbyte_offs[i] + j]);
}
}
WORD *p_unknown = cp_to_wc_table_base + *cp_to_wc_table_base;
printf("p_unknown = %04x\n", *p_unknown);
WORD *wc_to_cp_table_base2 = cp_to_wc_table_base + *cp_to_wc_table_size + 1;
BYTE *wc_to_cp_table_base1 = (BYTE *)wc_to_cp_table_base2;
for (int i = 0; i < 65536; ++i) {
if (nls->MaxCharSize == 1)
printf("U+%04x => 0x%02x\n", i, wc_to_cp_table_base1[i]);
else
printf("U+%04x => 0x%04x\n", i, wc_to_cp_table_base2[i]);
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment