Created
December 2, 2012 20:35
-
-
Save torsten/4190944 to your computer and use it in GitHub Desktop.
iOS: Heuristic to check for non latin languages
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// clang -framework Foundation check-non-latin.m && ./a.out | |
#import <Foundation/Foundation.h> | |
int main (int argc, char const *argv[]) | |
{ | |
@autoreleasepool | |
{ | |
NSDictionary *langToText = @{@"arabic": [NSString stringWithUTF8String:"العربي"], | |
@"japanese": [NSString stringWithUTF8String:"日本"], | |
@"german": [NSString stringWithUTF8String:"äö"], | |
@"turkish": [NSString stringWithUTF8String:"üç"], | |
@"hebrew": [NSString stringWithUTF8String:"עִבְרִית"], | |
@"chinese": [NSString stringWithUTF8String:"汉语 /漢語"], | |
@"czech": [NSString stringWithUTF8String:"Čé"]}; | |
for (NSString *lang in langToText) | |
{ | |
NSRange r = | |
[[langToText objectForKey:lang] rangeOfCharacterFromSet:[NSCharacterSet decomposableCharacterSet]]; | |
if (r.location == NSNotFound) | |
{ | |
NSLog(@"%@ is not latin", lang); | |
} | |
else | |
{ | |
NSLog(@"%@ is latin", lang); | |
} | |
} | |
} | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ clang -framework Foundation check-non-latin.m && ./a.out | |
2012-12-02 21:34:05.553 a.out[38045:707] arabic is not latin | |
2012-12-02 21:34:05.556 a.out[38045:707] japanese is not latin | |
2012-12-02 21:34:05.557 a.out[38045:707] german is latin | |
2012-12-02 21:34:05.559 a.out[38045:707] hebrew is not latin | |
2012-12-02 21:34:05.559 a.out[38045:707] chinese is not latin | |
2012-12-02 21:34:05.560 a.out[38045:707] czech is latin | |
2012-12-02 21:34:05.561 a.out[38045:707] turkish is latin |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment