Created
December 2, 2015 21:36
-
-
Save cfilipov/c51258947a47fa80d973 to your computer and use it in GitHub Desktop.
Strings & Unicode in Swift
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*: | |
# Strings and Unicode | |
*/ | |
import Foundation | |
/*: Some things we can't do */ | |
//let ❤ = "❤" // error: Expected a pattern | |
//let ☆ = "☆" // error: Expected a pattern | |
/*: Some things we probbaly shouldn't be able to do */ | |
let e⃝⃞⃟⃠⃣⃤⃥⃦⃪⃧꙰꙲꙱ = "e⃝⃞⃟⃠⃣⃤⃥⃦⃪⃧꙰꙲꙱" // Yeah, this valid | |
/*: | |
## Some Extensions | |
*/ | |
extension String { | |
var unicodeName: String { | |
let cfstr = NSMutableString(string: self) as CFMutableString | |
var range = CFRangeMake(0, CFStringGetLength(cfstr)) | |
CFStringTransform(cfstr, &range, kCFStringTransformToUnicodeName, false) | |
return String(cfstr) | |
} | |
} | |
"😄".unicodeName // \\N{SMILING FACE WITH OPEN MOUTH AND SMILING EYES} | |
/* | |
// This crashes the playground | |
extension UnicodeScalar : IntegerLiteralConvertible { | |
init(integerLiteral value: Self.IntegerLiteralType) { | |
self = UnicodeScalar(value) | |
} | |
} | |
*/ | |
extension UnicodeScalar { | |
var codePointString: String { | |
return "U+\(String(self.value, radix: 16))" | |
} | |
} | |
/*: ## Simple string, Latin character */ | |
let a₁ = "a" | |
String("\u{61}") // "a" | |
a₁.characters.count // 1 | |
a₁.unicodeScalars.count // 1 | |
a₁.utf16.count // 1 | |
a₁.utf8.count // 1 | |
let a₂: UnicodeScalar = "a" // 97 | |
a₂.codePointString // U+61 | |
String(a₂.value, radix: 16) // 61 | |
String(UnicodeScalar(0x61)) // "A" | |
/*: | |
## Some Chineese character | |
This is the Chineese character for "Toy" or "Toys" (accoring to Google translate). | |
*/ | |
let 玩₁ = "玩" | |
String("\u{73a9}") // 玩 | |
玩₁.characters.count // 1 | |
玩₁.unicodeScalars.count // 1 | |
玩₁.utf16.count // 1 | |
玩₁.utf8.count // 3 | |
let 玩₂: UnicodeScalar = "玩" // 29609 | |
String(玩₂.value, radix: 16) // 73a9 | |
玩₁.unicodeName // \\N{CJK UNIFIED IDEOGRAPH-73A9} | |
String(UnicodeScalar(0x73a9)) // "玩" | |
/*: | |
## Grapheme Cluster: Combining Characters | |
*/ | |
let কী₁ = "কী" // U+0995 BENGALI LETTER KA | |
String("\u{0995}\u{09C0}") // "কী" | |
কী₁.characters.count // 1 | |
কী₁.unicodeScalars.count // 2 | |
কী₁.utf16.count // 2 | |
কী₁.utf8.count // 6 | |
কী₁.unicodeScalars.dropFirst() // ী // U+09C0 BENGALI VOWEL SIGN II | |
কী₁.unicodeScalars.dropLast() // ক | |
//let কী₂: UnicodeScalar = "কী" // error: cannot convert value of type 'String' to specified type 'UnicodeScalar' | |
কী₁.unicodeName // \N{BENGALI LETTER KA}\N{BENGALI VOWEL SIGN II} | |
String(UnicodeScalar(0x0995)) // "ক" | |
String(UnicodeScalar(0x09C0)) // "ী" | |
/*: | |
## Grapheme Cluster: Emoji | |
*/ | |
let 🇺🇸 = "🇺🇸" | |
🇺🇸.characters.count // 1 | |
🇺🇸.unicodeScalars.count // 2 | |
🇺🇸.utf16.count // 4 | |
🇺🇸.utf8.count // 8 | |
🇺🇸.unicodeScalars.dropFirst() // 🇸 | |
🇺🇸.unicodeScalars.dropLast() // 🇺 | |
🇺🇸.unicodeName // \N{REGIONAL INDICATOR SYMBOL LETTER U}\N{REGIONAL INDICATOR SYMBOL LETTER S} | |
/*: | |
## Zalgo Text | |
*/ | |
let zalgo = "e⃝⃞⃟⃠⃣⃤⃥⃦⃪⃧꙰꙲꙱" | |
for s in zalgo.unicodeScalars { | |
print("\(s)\t\(s.codePointString)\t\(String(s).unicodeName)") | |
} | |
/* | |
e U+65 e | |
⃝ U+20dd \N{COMBINING ENCLOSING CIRCLE} | |
⃞ U+20de \N{COMBINING ENCLOSING SQUARE} | |
⃟ U+20df \N{COMBINING ENCLOSING DIAMOND} | |
⃠ U+20e0 \N{COMBINING ENCLOSING CIRCLE BACKSLASH} | |
⃣ U+20e3 \N{COMBINING ENCLOSING KEYCAP} | |
⃤ U+20e4 \N{COMBINING ENCLOSING UPWARD POINTING TRIANGLE} | |
⃥ U+20e5 \N{COMBINING REVERSE SOLIDUS OVERLAY} | |
⃦ U+20e6 \N{COMBINING DOUBLE VERTICAL STROKE OVERLAY} | |
⃧ U+20e7 \N{COMBINING ANNUITY SYMBOL} | |
⃪ U+20ea \N{COMBINING LEFTWARDS ARROW OVERLAY} | |
꙰ U+a670 \N{COMBINING CYRILLIC TEN MILLIONS SIGN} | |
꙲ U+a672 \N{COMBINING CYRILLIC THOUSAND MILLIONS SIGN} | |
꙱ U+a671 \N{COMBINING CYRILLIC HUNDRED MILLIONS SIGN} | |
*/ | |
for s in zalgo.unicodeScalars { // e ⃝ ⃞ ⃟ ⃠ ⃣ ⃤ ⃥ ⃦ ⃧ ⃪ ꙰ ꙲ ꙱ | |
print("\(s) ", terminator: "") | |
} | |
let combined = "e\u{20dd}\u{20e7}" // "e⃝⃧" | |
/*: | |
## See Also | |
* [Why is Swift's String API So Hard? - mikeash.com](https://www.mikeash.com/pyblog/friday-qa-2015-11-06-why-is-swifts-string-api-so-hard.html) | |
* [What is the difference between ‘combining characters’ and ‘grapheme extenders’ in Unicode? - Stack Overflow](http://stackoverflow.com/questions/21722729) | |
* [Strings in Swift - Ole Begemann](http://oleb.net/blog/2014/07/swift-strings) | |
* [CFStringTransform - NSHipster](http://nshipster.com/cfstringtransform/) | |
* [Re: Origin of the U+nnnn notation](http://unicode.org/mail-arch/unicode-ml/y2005-m11/0060.html) | |
*/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment