Skip to content

Instantly share code, notes, and snippets.

@norio-nomura
Last active February 10, 2017 00:30
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save norio-nomura/2a79822004e7c89228300cf19595ca99 to your computer and use it in GitHub Desktop.
Save norio-nomura/2a79822004e7c89228300cf19595ca99 to your computer and use it in GitHub Desktop.
extension String {
public var unescapeHTMLUsingArrayOfUnichar: String {
var buffer = [unichar](repeating: 0, count: utf16.count)
NSString(string: self).getCharacters(&buffer)
var end = buffer.endIndex
let ampersand = unichar(UInt8(ascii: "&"))
let semicolon = unichar(UInt8(ascii: ";"))
let sharp = unichar(UInt8(ascii: "#"))
let hexPrefixes = ["X", "x"].map { unichar(UInt8(ascii: $0)) }
while let begin = buffer.prefix(upTo: end).reversed().index(of: ampersand).map({ buffer.index(before: $0.base) }) {
defer { end = begin }
// if we don't find a semicolon in the range, we don't have a sequence
guard let semicolonIndex = buffer[begin..<end].index(of: semicolon) else { continue }
let range = begin...semicolonIndex
// a squence must be longer than 3 (&lt;) and less than 11 (&thetasym;)
guard 4...10 ~= range.count else { continue }
let character: unichar?
if buffer[begin + 1] == sharp {
let char2 = buffer[begin + 2]
if hexPrefixes.contains(char2) {
// Hex escape squences &#xa3;
let hexString = String(utf16Storage: buffer[begin + 3..<semicolonIndex])
character = unichar(hexString, radix: 16)
} else {
// Decimal Sequences &#123;
let decimalString = String(utf16Storage: buffer[begin + 2..<semicolonIndex])
character = unichar(decimalString)
}
} else {
// "standard" sequences
let escapedNameRange = begin + 1..<semicolonIndex
let escapedName = String(utf16Storage: buffer[escapedNameRange])
character = tableMap[escapedNameRange.count]?[escapedName]
}
if let character = character {
buffer[range] = [character]
}
}
return String(utf16Storage: buffer)
}
private init<T>(utf16Storage: T) where T: ContiguousStorage, T.Iterator.Element == unichar {
self = utf16Storage.withUnsafeBufferPointer {
String(utf16CodeUnits: $0.baseAddress!, count: $0.count)
}
}
}
private protocol ContiguousStorage: Sequence {
func withUnsafeBufferPointer<R>(
_ body: (UnsafeBufferPointer<Iterator.Element>) throws -> R
) rethrows -> R
}
extension Array: ContiguousStorage {}
extension ArraySlice: ContiguousStorage {}
extension ContiguousArray: ContiguousStorage {}
private func escapeMap(from array: [HTMLEscapeMap]) -> [String:unichar] {
var map = [String: unichar](minimumCapacity: array.count)
array.forEach {
map[$0.name] = $0.character.utf16.first!
}
return map
}
private let tableMap: [Int:[String:unichar]] = [
2: escapeMap(from:unicodeHTMLEscapeMapNameLength_2),
3: escapeMap(from:unicodeHTMLEscapeMapNameLength_3),
4: escapeMap(from:unicodeHTMLEscapeMapNameLength_4),
5: escapeMap(from:unicodeHTMLEscapeMapNameLength_5),
6: escapeMap(from:unicodeHTMLEscapeMapNameLength_6),
7: escapeMap(from:unicodeHTMLEscapeMapNameLength_7),
8: escapeMap(from:unicodeHTMLEscapeMapNameLength_8),
]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment