yetanotherchris/gist:4964045

## gistfile1.cs
// Ascii/Latin reference:
// 97  - a
// 98  - b
// 100 - c
// 101 - d
// 63  - ?
// ë   - 235
// '?' is from Gujarati (Gujarati in Unicode is from 0A80 - 0AFF). The symbol is U+0A86.
// 'ë' is from Windows 1252 which is a subset of ISO 8859-2 Latin Alphabet 2 (extended ascii to include accented symbols,
// special letters and symbols ). Windows 1252 is a legacy codepage used by previous versions of Windows and isn't a Unicode standard.
// However, it is implemented in Windows NT as 1 byte Unicode.
// 'ë' is Symbol #235.
string s = "abcde?ë";

// This will try to translate the Gujarati symbol to ascii, fail and converts
// it to ascii character 63 (a question mark symbol '?').
// It will do the same with 'ë' as this isn't an ascii character.
//
// Ascii is 8 bit/1 byte per character.
b = Encoding.ASCII.GetBytes(s);

// Windows 1252 is Encoding.Default for most Western users. So this will sucessfully get the right
// number (235) for 'ë', however the Gujarati symbol will still fail.
//
// Windows 1252 is 8 bit/1 byte per character.
b = Encoding.Default.GetBytes(s);

// UTF8 is the commonest Unicode encoding, and can encode any Unicode characters.
// It uses 1 byte to represent ascii characters, but up to 4 for others.
b = Encoding.UTF8.GetBytes(s); //

// Now it's 16 bit/2 bytes per char and will pickup the Gujarati symbol correctly.
// The byte array will now be 14 in length. This is because every character has a numerical
// 2 byte value (16 bit, 65536 possibilities). Index 10+11 in the array hold the value of 0A86
// [10] 134 (86).
// [11] 10 (0A)
// (Little Endian format so LSB first).
//
// UTF16 can go up to 4 bytes per character for symbols higher than U+FFFF.
b = Encoding.Unicode.GetBytes(s);

// UTF32 translate it to 4 bytes per character, so the array is 32 bytes now.
b = Encoding.UTF32.GetBytes(s);
	// Ascii/Latin reference:
	// 97 - a
	// 98 - b
	// 100 - c
	// 101 - d
	// 63 - ?
	// ë - 235
	// '?' is from Gujarati (Gujarati in Unicode is from 0A80 - 0AFF). The symbol is U+0A86.
	// 'ë' is from Windows 1252 which is a subset of ISO 8859-2 Latin Alphabet 2 (extended ascii to include accented symbols,
	// special letters and symbols ). Windows 1252 is a legacy codepage used by previous versions of Windows and isn't a Unicode standard.
	// However, it is implemented in Windows NT as 1 byte Unicode.
	// 'ë' is Symbol #235.
	string s = "abcde?ë";

	// This will try to translate the Gujarati symbol to ascii, fail and converts
	// it to ascii character 63 (a question mark symbol '?').
	// It will do the same with 'ë' as this isn't an ascii character.
	//
	// Ascii is 8 bit/1 byte per character.
	b = Encoding.ASCII.GetBytes(s);

	// Windows 1252 is Encoding.Default for most Western users. So this will sucessfully get the right
	// number (235) for 'ë', however the Gujarati symbol will still fail.
	//
	// Windows 1252 is 8 bit/1 byte per character.
	b = Encoding.Default.GetBytes(s);

	// UTF8 is the commonest Unicode encoding, and can encode any Unicode characters.
	// It uses 1 byte to represent ascii characters, but up to 4 for others.
	b = Encoding.UTF8.GetBytes(s); //

	// Now it's 16 bit/2 bytes per char and will pickup the Gujarati symbol correctly.
	// The byte array will now be 14 in length. This is because every character has a numerical
	// 2 byte value (16 bit, 65536 possibilities). Index 10+11 in the array hold the value of 0A86
	// [10] 134 (86).
	// [11] 10 (0A)
	// (Little Endian format so LSB first).
	//
	// UTF16 can go up to 4 bytes per character for symbols higher than U+FFFF.
	b = Encoding.Unicode.GetBytes(s);

	// UTF32 translate it to 4 bytes per character, so the array is 32 bytes now.
	b = Encoding.UTF32.GetBytes(s);