Skip to content

Instantly share code, notes, and snippets.

@phillipcaudell
Created March 4, 2024 09:33
Show Gist options
  • Save phillipcaudell/3ee1b696a42337b57612a5814f940ddd to your computer and use it in GitHub Desktop.
Save phillipcaudell/3ee1b696a42337b57612a5814f940ddd to your computer and use it in GitHub Desktop.
An extension that adds the ability to initialise a String.Encoding from an IANA charset name.
import Foundation
extension String.Encoding {
/// Creates a new instance using the specified IANA charset name.
public init?(charsetName name: String) {
let match = name
.lowercased()
.replacing("-", with: "")
.replacing("_", with: "")
switch match {
case "usascii":
self = .init(rawValue: 1)
case "xnextstep":
self = .init(rawValue: 2)
case "eucjp":
self = .init(rawValue: 3)
case "utf8":
self = .init(rawValue: 4)
case "iso88591":
self = .init(rawValue: 5)
case "xmacsymbol":
self = .init(rawValue: 6)
case "cp932":
self = .init(rawValue: 8)
case "iso88592":
self = .init(rawValue: 9)
case "utf16":
self = .init(rawValue: 10)
case "windows1251":
self = .init(rawValue: 11)
case "windows1252":
self = .init(rawValue: 12)
case "windows1253":
self = .init(rawValue: 13)
case "windows1254":
self = .init(rawValue: 14)
case "windows1250":
self = .init(rawValue: 15)
case "iso2022jp":
self = .init(rawValue: 21)
case "macintosh":
self = .init(rawValue: 30)
default:
return nil
}
}
/// Returns the IANA charset name.
public var charsetName: String? {
switch rawValue {
case 1:
return "us-ascii"
case 2:
return "x-nextstep"
case 3:
return "euc-jp"
case 4:
return "utf-8"
case 5:
return "iso-8859-1"
case 6:
return "x-mac-symbol"
case 8:
return "cp932"
case 9:
return "iso-8859-2"
case 10:
return "utf-16"
case 11:
return "windows-1251"
case 12:
return "windows-1252"
case 13:
return "windows-1253"
case 14:
return "windows-1254"
case 15:
return "windows-1250"
case 21:
return "iso-2022-jp"
case 30:
return "macintosh"
default:
return nil
}
}
}
/// Optional codable support for String.Encoding
extension String.Encoding: Codable {
public func encode(to encoder: Encoder) throws {
var container = encoder.singleValueContainer()
try container.encode(self.charsetName)
}
public init(from decoder: Decoder) throws {
let container = try decoder.singleValueContainer()
let charsetName = try container.decode(String.self)
self = .init(charsetName: charsetName) ?? .ascii
}
}
#if canImport(Foundation)
extension String.Encoding {
/// Prints out switch statements that can be transplanted into our charset function.
///
/// CFStringConvertEncodingToIANACharSetName and related APIs are unavailable on platforms where
/// we may wish to run SwiftEmail. For this reason we need our own equivalents.
/// This function prints out switch statements that can be transplanted into the above functions.
static func printIANASwitchStatements() {
var decodeStatements = [String]()
var encodeStatements = [String]()
// As String.Encoding isn't CaseIterable let's iterate through
// rawValue. Choose a high index to be sure we don't miss any.
for index in 1...1000 {
let encoding = String.Encoding(rawValue: UInt(index))
guard encoding.description.isEmpty == false else {
continue
}
let cfEncoding = CFStringConvertNSStringEncodingToEncoding(encoding.rawValue)
guard let name = CFStringConvertEncodingToIANACharSetName(cfEncoding) as? String else {
continue
}
// Need to handle all variants (us-ascii, us_ascii, usascii) so
// easier to just strip the dash out.
let strippedName = name.replacing("-", with: "")
// Print out the initialiser statements
let decode = """
case "\(strippedName)":
self = .init(rawValue: \(encoding.rawValue))
"""
decodeStatements.append(decode)
// Print out the charsetName statements
let encode = """
case \(encoding.rawValue):
return "\(name)"
"""
encodeStatements.append(encode)
}
print("Initialiser statements:")
print(decodeStatements.joined(separator: "\n"))
print("Charset name statements:")
print(encodeStatements.joined(separator: "\n"))
}
}
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment