Skip to content

Instantly share code, notes, and snippets.

@cfilipov
Created December 2, 2015 21:36
Show Gist options
  • Save cfilipov/c51258947a47fa80d973 to your computer and use it in GitHub Desktop.
Save cfilipov/c51258947a47fa80d973 to your computer and use it in GitHub Desktop.
Strings & Unicode in Swift
/*:
# Strings and Unicode
*/
import Foundation
/*: Some things we can't do */
//let ❤ = "❤" // error: Expected a pattern
//let ☆ = "☆" // error: Expected a pattern
/*: Some things we probbaly shouldn't be able to do */
let e⃝⃞⃟⃠⃣⃤⃥⃦⃪⃧꙰꙲꙱ = "e⃝⃞⃟⃠⃣⃤⃥⃦⃪⃧꙰꙲꙱" // Yeah, this valid
/*:
## Some Extensions
*/
extension String {
var unicodeName: String {
let cfstr = NSMutableString(string: self) as CFMutableString
var range = CFRangeMake(0, CFStringGetLength(cfstr))
CFStringTransform(cfstr, &range, kCFStringTransformToUnicodeName, false)
return String(cfstr)
}
}
"😄".unicodeName // \\N{SMILING FACE WITH OPEN MOUTH AND SMILING EYES}
/*
// This crashes the playground
extension UnicodeScalar : IntegerLiteralConvertible {
init(integerLiteral value: Self.IntegerLiteralType) {
self = UnicodeScalar(value)
}
}
*/
extension UnicodeScalar {
var codePointString: String {
return "U+\(String(self.value, radix: 16))"
}
}
/*: ## Simple string, Latin character */
let a₁ = "a"
String("\u{61}") // "a"
a₁.characters.count // 1
a₁.unicodeScalars.count // 1
a₁.utf16.count // 1
a₁.utf8.count // 1
let a₂: UnicodeScalar = "a" // 97
a₂.codePointString // U+61
String(a₂.value, radix: 16) // 61
String(UnicodeScalar(0x61)) // "A"
/*:
## Some Chineese character
This is the Chineese character for "Toy" or "Toys" (accoring to Google translate).
*/
let 玩₁ = "玩"
String("\u{73a9}") // 玩
玩₁.characters.count // 1
玩₁.unicodeScalars.count // 1
玩₁.utf16.count // 1
玩₁.utf8.count // 3
let 玩₂: UnicodeScalar = "玩" // 29609
String(玩₂.value, radix: 16) // 73a9
玩₁.unicodeName // \\N{CJK UNIFIED IDEOGRAPH-73A9}
String(UnicodeScalar(0x73a9)) // "玩"
/*:
## Grapheme Cluster: Combining Characters
*/
let কী₁ = "কী" // U+0995 BENGALI LETTER KA
String("\u{0995}\u{09C0}") // "কী"
কী₁.characters.count // 1
কী₁.unicodeScalars.count // 2
কী₁.utf16.count // 2
কী₁.utf8.count // 6
কী₁.unicodeScalars.dropFirst() // ী // U+09C0 BENGALI VOWEL SIGN II
কী₁.unicodeScalars.dropLast() // ক
//let কী₂: UnicodeScalar = "কী" // error: cannot convert value of type 'String' to specified type 'UnicodeScalar'
কী₁.unicodeName // \N{BENGALI LETTER KA}\N{BENGALI VOWEL SIGN II}
String(UnicodeScalar(0x0995)) // "ক"
String(UnicodeScalar(0x09C0)) // "ী"
/*:
## Grapheme Cluster: Emoji
*/
let 🇺🇸 = "🇺🇸"
🇺🇸.characters.count // 1
🇺🇸.unicodeScalars.count // 2
🇺🇸.utf16.count // 4
🇺🇸.utf8.count // 8
🇺🇸.unicodeScalars.dropFirst() // 🇸
🇺🇸.unicodeScalars.dropLast() // 🇺
🇺🇸.unicodeName // \N{REGIONAL INDICATOR SYMBOL LETTER U}\N{REGIONAL INDICATOR SYMBOL LETTER S}
/*:
## Zalgo Text
*/
let zalgo = "e⃝⃞⃟⃠⃣⃤⃥⃦⃪⃧꙰꙲꙱"
for s in zalgo.unicodeScalars {
print("\(s)\t\(s.codePointString)\t\(String(s).unicodeName)")
}
/*
e U+65 e
⃝ U+20dd \N{COMBINING ENCLOSING CIRCLE}
⃞ U+20de \N{COMBINING ENCLOSING SQUARE}
⃟ U+20df \N{COMBINING ENCLOSING DIAMOND}
⃠ U+20e0 \N{COMBINING ENCLOSING CIRCLE BACKSLASH}
⃣ U+20e3 \N{COMBINING ENCLOSING KEYCAP}
⃤ U+20e4 \N{COMBINING ENCLOSING UPWARD POINTING TRIANGLE}
⃥ U+20e5 \N{COMBINING REVERSE SOLIDUS OVERLAY}
⃦ U+20e6 \N{COMBINING DOUBLE VERTICAL STROKE OVERLAY}
⃧ U+20e7 \N{COMBINING ANNUITY SYMBOL}
⃪ U+20ea \N{COMBINING LEFTWARDS ARROW OVERLAY}
꙰ U+a670 \N{COMBINING CYRILLIC TEN MILLIONS SIGN}
꙲ U+a672 \N{COMBINING CYRILLIC THOUSAND MILLIONS SIGN}
꙱ U+a671 \N{COMBINING CYRILLIC HUNDRED MILLIONS SIGN}
*/
for s in zalgo.unicodeScalars { // e ⃝ ⃞ ⃟ ⃠ ⃣ ⃤ ⃥ ⃦ ⃧ ⃪ ꙰ ꙲ ꙱
print("\(s) ", terminator: "")
}
let combined = "e\u{20dd}\u{20e7}" // "e⃝⃧"
/*:
## See Also
* [Why is Swift's String API So Hard? - mikeash.com](https://www.mikeash.com/pyblog/friday-qa-2015-11-06-why-is-swifts-string-api-so-hard.html)
* [What is the difference between ‘combining characters’ and ‘grapheme extenders’ in Unicode? - Stack Overflow](http://stackoverflow.com/questions/21722729)
* [Strings in Swift - Ole Begemann](http://oleb.net/blog/2014/07/swift-strings)
* [CFString​Transform - NSHipster](http://nshipster.com/cfstringtransform/)
* [Re: Origin of the U+nnnn notation](http://unicode.org/mail-arch/unicode-ml/y2005-m11/0060.html)
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment