Skip to content

Instantly share code, notes, and snippets.

@faisalman
Last active June 4, 2020 12:02
Show Gist options
  • Star 5 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
  • Save faisalman/4674323 to your computer and use it in GitHub Desktop.
Save faisalman/4674323 to your computer and use it in GitHub Desktop.
7-bit GSM 03.38 <-> Unicode Map in JS
// Unicode to 7-bit GSM
var write = "Hello, Faisalman!";
var msgSubmit = "";
write.split('').forEach(function(i) {
msgSubmit += parseInt(Unicode2GSM[i], 10).toString(16).toUpperCase() + " ";
});
console.log(msgSubmit); // "48 65 6C 6C 6F 2C 20 46 61 69 73 61 6C 6D 61 6E 21 "
// 7-bit GSM to Unicode
var read = "48 65 6C 6C 6F 2C 20 46 61 69 73 61 6C 6D 61 6E 21 ";
var msgReceived = "";
read.split(" ").forEach(function(i) {
msgReceived += (i !== "") ? GSM2Unicode[parseInt(i, 16)] : "";
});
console.log(msgReceived); // "Hello, Faisalman!"
/**
* Convert From/To GSM03.38/Unicode in JavaScript
* Mapping source: ftp://ftp.unicode.org/Public/MAPPINGS/ETSI/GSM0338.TXT
* https://gist.github.com/faisalman
*
* Copyright 2013, Faisalman <fyzlman@gmail.com>
* Licensed under The MIT License
* http://www.opensource.org/licenses/mit-license
*/
var GSM2Unicode = {
0x00 : "\u0040", // COMMERCIAL AT
0x01 : "\u00A3", // POUND SIGN
0x02 : "\u0024", // DOLLAR SIGN
0x03 : "\u00A5", // YEN SIGN
0x04 : "\u00E8", // LATIN SMALL LETTER E WITH GRAVE
0x05 : "\u00E9", // LATIN SMALL LETTER E WITH ACUTE
0x06 : "\u00F9", // LATIN SMALL LETTER U WITH GRAVE
0x07 : "\u00EC", // LATIN SMALL LETTER I WITH GRAVE
0x08 : "\u00F2", // LATIN SMALL LETTER O WITH GRAVE
0x09 : "\u00E7", // LATIN SMALL LETTER C WITH CEDILLA
0x0A : "\u000A", // LINE FEED
0x0B : "\u00D8", // LATIN CAPITAL LETTER O WITH STROKE
0x0C : "\u00F8", // LATIN SMALL LETTER O WITH STROKE
0x0D : "\u000D", // CARRIAGE RETURN
0x0E : "\u00C5", // LATIN CAPITAL LETTER A WITH RING ABOVE
0x0F : "\u00E5", // LATIN SMALL LETTER A WITH RING ABOVE
0x10 : "\u0394", // GREEK CAPITAL LETTER DELTA
0x11 : "\u005F", // LOW LINE
0x12 : "\u03A6", // GREEK CAPITAL LETTER PHI
0x13 : "\u0393", // GREEK CAPITAL LETTER GAMMA
0x14 : "\u039B", // GREEK CAPITAL LETTER LAMDA
0x15 : "\u03A9", // GREEK CAPITAL LETTER OMEGA
0x16 : "\u03A0", // GREEK CAPITAL LETTER PI
0x17 : "\u03A8", // GREEK CAPITAL LETTER PSI
0x18 : "\u03A3", // GREEK CAPITAL LETTER SIGMA
0x19 : "\u0398", // GREEK CAPITAL LETTER THETA
0x1A : "\u039E", // GREEK CAPITAL LETTER XI
0x1B : "\u00A0", // ESCAPE TO EXTENSION TABLE
0x1C : "\u00C6", // LATIN CAPITAL LETTER AE
0x1D : "\u00E6", // LATIN SMALL LETTER AE
0x1E : "\u00DF", // LATIN SMALL LETTER SHARP S (German)
0x1F : "\u00C9", // LATIN CAPITAL LETTER E WITH ACUTE
0x20 : "\u0020", // SPACE
0x21 : "\u0021", // EXCLAMATION MARK
0x22 : "\u0022", // QUOTATION MARK
0x23 : "\u0023", // NUMBER SIGN
0x24 : "\u00A4", // CURRENCY SIGN
0x25 : "\u0025", // PERCENT SIGN
0x26 : "\u0026", // AMPERSAND
0x27 : "\u0027", // APOSTROPHE
0x28 : "\u0028", // LEFT PARENTHESIS
0x29 : "\u0029", // RIGHT PARENTHESIS
0x2A : "\u002A", // ASTERISK
0x2B : "\u002B", // PLUS SIGN
0x2C : "\u002C", // COMMA
0x2D : "\u002D", // HYPHEN-MINUS
0x2E : "\u002E", // FULL STOP
0x2F : "\u002F", // SOLIDUS
0x30 : "\u0030", // DIGIT ZERO
0x31 : "\u0031", // DIGIT ONE
0x32 : "\u0032", // DIGIT TWO
0x33 : "\u0033", // DIGIT THREE
0x34 : "\u0034", // DIGIT FOUR
0x35 : "\u0035", // DIGIT FIVE
0x36 : "\u0036", // DIGIT SIX
0x37 : "\u0037", // DIGIT SEVEN
0x38 : "\u0038", // DIGIT EIGHT
0x39 : "\u0039", // DIGIT NINE
0x3A : "\u003A", // COLON
0x3B : "\u003B", // SEMICOLON
0x3C : "\u003C", // LESS-THAN SIGN
0x3D : "\u003D", // EQUALS SIGN
0x3E : "\u003E", // GREATER-THAN SIGN
0x3F : "\u003F", // QUESTION MARK
0x40 : "\u00A1", // INVERTED EXCLAMATION MARK
0x41 : "\u0041", // LATIN CAPITAL LETTER A
0x42 : "\u0042", // LATIN CAPITAL LETTER B
0x43 : "\u0043", // LATIN CAPITAL LETTER C
0x44 : "\u0044", // LATIN CAPITAL LETTER D
0x45 : "\u0045", // LATIN CAPITAL LETTER E
0x46 : "\u0046", // LATIN CAPITAL LETTER F
0x47 : "\u0047", // LATIN CAPITAL LETTER G
0x48 : "\u0048", // LATIN CAPITAL LETTER H
0x49 : "\u0049", // LATIN CAPITAL LETTER I
0x4A : "\u004A", // LATIN CAPITAL LETTER J
0x4B : "\u004B", // LATIN CAPITAL LETTER K
0x4C : "\u004C", // LATIN CAPITAL LETTER L
0x4D : "\u004D", // LATIN CAPITAL LETTER M
0x4E : "\u004E", // LATIN CAPITAL LETTER N
0x4F : "\u004F", // LATIN CAPITAL LETTER O
0x50 : "\u0050", // LATIN CAPITAL LETTER P
0x51 : "\u0051", // LATIN CAPITAL LETTER Q
0x52 : "\u0052", // LATIN CAPITAL LETTER R
0x53 : "\u0053", // LATIN CAPITAL LETTER S
0x54 : "\u0054", // LATIN CAPITAL LETTER T
0x55 : "\u0055", // LATIN CAPITAL LETTER U
0x56 : "\u0056", // LATIN CAPITAL LETTER V
0x57 : "\u0057", // LATIN CAPITAL LETTER W
0x58 : "\u0058", // LATIN CAPITAL LETTER X
0x59 : "\u0059", // LATIN CAPITAL LETTER Y
0x5A : "\u005A", // LATIN CAPITAL LETTER Z
0x5B : "\u00C4", // LATIN CAPITAL LETTER A WITH DIAERESIS
0x5C : "\u00D6", // LATIN CAPITAL LETTER O WITH DIAERESIS
0x5D : "\u00D1", // LATIN CAPITAL LETTER N WITH TILDE
0x5E : "\u00DC", // LATIN CAPITAL LETTER U WITH DIAERESIS
0x5F : "\u00A7", // SECTION SIGN
0x60 : "\u00BF", // INVERTED QUESTION MARK
0x61 : "\u0061", // LATIN SMALL LETTER A
0x62 : "\u0062", // LATIN SMALL LETTER B
0x63 : "\u0063", // LATIN SMALL LETTER C
0x64 : "\u0064", // LATIN SMALL LETTER D
0x65 : "\u0065", // LATIN SMALL LETTER E
0x66 : "\u0066", // LATIN SMALL LETTER F
0x67 : "\u0067", // LATIN SMALL LETTER G
0x68 : "\u0068", // LATIN SMALL LETTER H
0x69 : "\u0069", // LATIN SMALL LETTER I
0x6A : "\u006A", // LATIN SMALL LETTER J
0x6B : "\u006B", // LATIN SMALL LETTER K
0x6C : "\u006C", // LATIN SMALL LETTER L
0x6D : "\u006D", // LATIN SMALL LETTER M
0x6E : "\u006E", // LATIN SMALL LETTER N
0x6F : "\u006F", // LATIN SMALL LETTER O
0x70 : "\u0070", // LATIN SMALL LETTER P
0x71 : "\u0071", // LATIN SMALL LETTER Q
0x72 : "\u0072", // LATIN SMALL LETTER R
0x73 : "\u0073", // LATIN SMALL LETTER S
0x74 : "\u0074", // LATIN SMALL LETTER T
0x75 : "\u0075", // LATIN SMALL LETTER U
0x76 : "\u0076", // LATIN SMALL LETTER V
0x77 : "\u0077", // LATIN SMALL LETTER W
0x78 : "\u0078", // LATIN SMALL LETTER X
0x79 : "\u0079", // LATIN SMALL LETTER Y
0x7A : "\u007A", // LATIN SMALL LETTER Z
0x7B : "\u00E4", // LATIN SMALL LETTER A WITH DIAERESIS
0x7C : "\u00F6", // LATIN SMALL LETTER O WITH DIAERESIS
0x7D : "\u00F1", // LATIN SMALL LETTER N WITH TILDE
0x7E : "\u00FC", // LATIN SMALL LETTER U WITH DIAERESIS
0x7F : "\u00E0" // LATIN SMALL LETTER A WITH GRAVE
};
var Unicode2GSM = (function() {
var map = {};
for(var i in GSM2Unicode) {
map[GSM2Unicode[i]] = i;
}
return map;
})();
@kanazir
Copy link

kanazir commented Nov 12, 2015

Nice, but it doesn't cover {} [] ~ € characters. They take two bytes in GSM charset, not one.

@ashish200025
Copy link

$dict = array(
'@' => "\x00", '£' => "\x01", '$' => "\x02", '¥' => "\x03", 'è' => "\x04", 'é' => "\x05", 'ù' => "\x06", 'ì' => "\x07", 'ò' => "\x08", 'Ç' => "\x09", 'Ø' => "\x0B", 'ø' => "\x0C", 'Å' => "\x0E", 'å' => "\x0F",
'Δ' => "\x10", '_' => "\x11", 'Φ' => "\x12", 'Γ' => "\x13", 'Λ' => "\x14", 'Ω' => "\x15", 'Π' => "\x16", 'Ψ' => "\x17", 'Σ' => "\x18", 'Θ' => "\x19", 'Ξ' => "\x1A", 'Æ' => "\x1C", 'æ' => "\x1D", 'ß' => "\x1E", 'É' => "\x1F",
// all \x2? removed
// all \x3? removed
// all \x4? removed
'Ä' => "\x5B", 'Ö' => "\x5C", 'Ñ' => "\x5D", 'Ü' => "\x5E", '§' => "\x5F",
'¿' => "\x60",
'ä' => "\x7B", 'ö' => "\x7C", 'ñ' => "\x7D", 'ü' => "\x7E", 'à' => "\x7F",
'^' => "\x1B\x14", '{' => "\x1B\x28", '}' => "\x1B\x29", '\' => "\x1B\x2F", '[' => "\x1B\x3C", '~' => "\x1B\x3D", ']' => "\x1B\x3E", '|' => "\x1B\x40", '€' => "\x1B\x65"
);

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment