Last active
October 7, 2018 20:51
-
-
Save jackwillis/7b6158e27ef881962595158f3ff8531b to your computer and use it in GitHub Desktop.
GSM 03.38 basic character set (“GSM-7”) in Ruby
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
require 'set' | |
module GSM7 | |
# https://en.wikipedia.org/wiki/GSM_03.38#GSM_7-bit_default_alphabet_and_extension_table_of_3GPP_TS_23.038_/_GSM_03.38 | |
BASIC_CHARACTER_SET = Set.new([ | |
"\n", "\r", " ", "!", "\"", "#", "$", "%", "&", "'", "(", ")", "*", "+", ",", "-", ".", "/", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", ":", ";", "<", "=", ">", "?", "@", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "_", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "¡", "£", "¤", "¥", "§", "¿", "Ä", "Å", "Æ", "Ç", "É", "Ñ", "Ö", "Ø", "Ü", "ß", "à", "ä", "å", "æ", "è", "é", "ì", "ñ", "ò", "ö", "ø", "ù", "ü", "Γ", "Δ", "Θ", "Λ", "Ξ", "Π", "Σ", "Φ", "Ψ", "Ω" | |
]).freeze | |
def self.basic_characters_only?(string) | |
Set.new(string.chars).subset?(BASIC_CHARACTER_SET) | |
end | |
def self.non_basic_characters(string) | |
Set.new(string.chars).difference(BASIC_CHARACTER_SET) | |
end | |
end | |
# Emojis are not allowed | |
GSM7.basic_characters_only? 'Halloween party tonight 🎃👻' #=> false | |
GSM7.non_basic_characters 'Halloween party tonight 🎃👻' #=> #<Set: {"🎃", "👻"}> | |
GSM7.basic_characters_only? 'Halloween party tonight!' #=> true | |
GSM7.non_basic_characters 'Halloween party tonight!' #=> #<Set: {}> | |
# Smart quotes and dashes are not allowed | |
GSM7.basic_characters_only? 'Lorem “ipsum” dolor – sit amet' #=> false | |
GSM7.non_basic_characters 'Lorem “ipsum” dolor – sit amet' #=> #<Set: {"“", "”", "–"}> | |
GSM7.basic_characters_only? 'Lorem "ipsum" dolor -- sit amet' #=> true | |
GSM7.non_basic_characters 'Lorem "ipsum" dolor -- sit amet' #=> #<Set: {}> | |
# Square brackets are not allowed | |
GSM7.basic_characters_only? '7:30 show: VIEW FROM MY [SIC] BED' #=> false | |
GSM7.non_basic_characters '7:30 show: VIEW FROM MY [SIC] BED' #=> #<Set: {"[", "]"}> | |
GSM7.basic_characters_only? '7:30 show: VIEW FROM MY (SIC) BED' #=> true | |
GSM7.non_basic_characters '7:30 show: VIEW FROM MY (SIC) BED' #=> #<Set: {}> | |
# Only basic Western European characters are supported | |
GSM7.basic_characters_only? 'école mañana façade könig værdi' #=> true | |
GSM7.basic_characters_only? '道可道非常道,名可名非常名。' #=> false |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment