Skip to content

Instantly share code, notes, and snippets.

@mpenkov
Last active December 12, 2015 05:19
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mpenkov/4721121 to your computer and use it in GitHub Desktop.
Save mpenkov/4721121 to your computer and use it in GitHub Desktop.
Character encoding
<!--
vim: shiftwidth=2
-->
<html>
<head><title>Clobber</title></head>
<body>
<form>
<textarea id="textArea" readonly="true" rows="10" cols="70"></textarea><br/>
<input id="inputText" type="text" text="Enter some text here" size="50" value="Enter some Unicode here...">
<input id="checkbox" type="checkbox">clobber the submitted text</input>
<input type="button" value="Msg" onclick="onSubmit();">
</form>
<script>
// Append the text from inputText to the textArea. Optionally, clobber the
// text by encoding it to ASCII prior to appending.
function onSubmit() {
var textArea = document.getElementById("textArea");
var inputText = document.getElementById("inputText");
if (textArea.value.length > 0)
textArea.value += "\n";
var checkbox = document.getElementById("checkbox");
var text;
if (checkbox.checked) {
// If you attempt to represent the string using a limited character set,
// then the characters that are not supported by that character set will
// be irrecoverably lost.
//
// Note that this problem is not ASCII-specific: any other encoding that
// does not support the same character set as Unicode, such as KOI-8,
// will also cause the same problem.
text = toUtf16(toAscii(inputText.value));
} else {
// If you keep characters in JavaScript's native UTF-16, then everything
// will work properly, since that encoding supports any character
// imaginable.
text = inputText.value;
}
textArea.value += text;
inputText.value = "";
}
// Converts a UTF-16 string to an array of ASCII codes.
// Non-ASCII characters are replaced with "?", since they cannot be represented
// by the ASCII character set. Returns the array.
function toAscii(utf16) {
ascii = Array();
for (var i = 0; i < utf16.length; ++i) {
code = utf16.charCodeAt(i);
if (code > 255)
code = "?".charCodeAt(0);
ascii[i] = code;
}
return ascii;
}
// Converts an array of ASCII codes into a normal JavaScript string (signed
// UTF-16). Returns the string.
function toUtf16(ascii) {
utf16 = Array();
for (var i = 0; i < ascii.length; ++i)
utf16[i] = String.fromCharCode(ascii[i]);
return utf16.join("");
}
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment