Skip to content

Instantly share code, notes, and snippets.

@karpelcevs
Created December 10, 2015 08:37
Show Gist options
  • Save karpelcevs/1baa155b6c16a7acb0d4 to your computer and use it in GitHub Desktop.
Save karpelcevs/1baa155b6c16a7acb0d4 to your computer and use it in GitHub Desktop.
Emoji detection in NSString
@implementation NSString (Emoji)
- (BOOL)isContainingEmoji
{
__block BOOL returnValue = NO;
[self enumerateSubstringsInRange:NSMakeRange(0, [self length]) options:NSStringEnumerationByComposedCharacterSequences usingBlock:
^(NSString *substring, NSRange substringRange, NSRange enclosingRange, BOOL *stop) {
const unichar hs = [substring characterAtIndex:0];
/*
* Surrogate pair. All surrogates are removed because they can cause problems on the server.
* Most of them are emoji characters.
*/
if (0xd800 <= hs && hs <= 0xdbff) {
if (substring.length > 1) {
const unichar ls = [substring characterAtIndex:1];
const int uc = ((hs - 0xd800) * 0x400) + (ls - 0xdc00) + 0x10000;
if (0x1d000 <= uc && uc <= 0x1f77f) {
returnValue = YES;
}
}
} else {
/*
* Previously, this included from U+2100 up to U+26FF, however, most of them are not
* emoji but rather mere special symbols. They only suspicious ranges are U+23xx and U+26xx.
* Strip them with a few exceptions for Japanese.
*/
if ((0x2300 <= hs && hs <= 0x23ff) || (0x2600 <= hs && hs <= 0x26ff))
returnValue = YES;
/*
* Override special chars, even though they were marked as YES before.
* U+263B is ☻ and used in Japanese to toggle small hiragana and dakuten.
* On keyboard it seems like different char and isn't actually added to the text when toggling.
* Others are ☆♪⌒, and are part of the basic Japanese keyboard and are widely used there.
* Allowing these doesn't break anything, character displays fine on all platforms.
*/
if (hs == 0x263b || hs == 0x2606 || hs == 0x266a || hs == 0x2312)
returnValue = NO;
}
}];
return returnValue;
}
@end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment