Skip to content

Instantly share code, notes, and snippets.

@ha1f
Last active September 19, 2017 09:06
Show Gist options
  • Save ha1f/356e0ea9c27762873cbdc26f6cf691ce to your computer and use it in GitHub Desktop.
Save ha1f/356e0ea9c27762873cbdc26f6cf691ce to your computer and use it in GitHub Desktop.
//
// LanguageDetector(Swift3.x).swift
// Musubi
//
// Created by はるふ on 2017/07/12.
// Copyright ©ha1f 2017年 line. All rights reserved.
//
import Foundation
struct LanguageDetector {
static let notDetermined = "und"
private let tagger = NSLinguisticTagger(tagSchemes: [NSLinguisticTagSchemeLanguage], options: 0)
// returns BCP-47 format
func detect(_ text: String) -> String {
guard !text.isEmpty else {
return LanguageDetector.notDetermined
}
tagger.string = text
return tagger.tag(at: 0, scheme: NSLinguisticTagSchemeLanguage, tokenRange: nil, sentenceRange: nil) ?? LanguageDetector.notDetermined
}
}
//
// LanguageDetector(Swift4.x).swift
// Musubi
//
// Created by はるふ on 2017/07/12.
// Copyright ©ha1f 2017年 line. All rights reserved.
//
import Foundation
public struct LanguageDetector {
static let undetermined = "und"
private let tagger = NSLinguisticTagger(tagSchemes: [.language], options: 0)
// returns BCP-47 format
func detect(_ text: String) -> String {
guard !text.isEmpty else {
return LanguageDetector.undetermined
}
tagger.string = text
return tagger.tag(at: 0, scheme: .language, tokenRange: nil, sentenceRange: nil) ?? LanguageDetector.undetermined
}
}
//
// LanguageDetectorTests.swift
// MusubiTests
//
// Created by はるふ on 2017/07/12.
// Copyright ©ha1f 2017年 line. All rights reserved.
//
import XCTest
@testable import Musubi
class LanguageDetectorTests: XCTestCase {
let detector = LanguageDetector()
func testDetect() {
let de = "Rom ist nicht an einem Tag erbaut worden"
let ja = "ローマは一日にして成らず"
let fr = "Paris ne s'est pas fait en un jour"
let en = "Rome was not built in a day"
let it = "Roma non fu fatta in un giorno, Roma non è stata costruita in un giorno"
let el = "Η Ρώμη δεν χτίστηκε σε μια μέρα"
let ru = "Москва не сразу строилась"
let zh_Hant = "羅馬非朝夕建成的"
let zh_Hans = "罗马不是一日建成的"
let ko = "로마는 하루아치메 이루어지지 아낟따"
let th = "และจะมีคุณค่า'มากขึ้น'เมื่อเราทำความดีนั้นอย่างสม่ำเสมอ"
XCTAssertEqual(detector.detect(de), "de")
XCTAssertEqual(detector.detect(ja), "ja")
XCTAssertEqual(detector.detect(en), "en")
XCTAssertEqual(detector.detect(fr), "fr")
XCTAssertEqual(detector.detect(it), "it")
XCTAssertEqual(detector.detect(el), "el")
XCTAssertEqual(detector.detect(ru), "ru")
XCTAssertEqual(detector.detect(zh_Hant), "zh-Hant")
XCTAssertEqual(detector.detect(zh_Hans), "zh-Hans")
XCTAssertEqual(detector.detect(ko), "ko")
XCTAssertEqual(detector.detect(th), "th")
}
func testDetectUnd() {
XCTAssertEqual(detector.detect(""), LanguageDetector.notDetermined)
XCTAssertEqual(detector.detect("a"), LanguageDetector.notDetermined)
XCTAssertEqual(detector.detect("大"), LanguageDetector.notDetermined)
}
}
@ha1f
Copy link
Author

ha1f commented Jul 14, 2017

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment