Created
November 6, 2012 04:22
-
-
Save seiji/4022535 to your computer and use it in GitHub Desktop.
utf8mb4
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#import "NSString+Unicode.h" | |
@implementation NSString (Unicode) | |
- (NSUInteger)utf8mb4length { | |
const char *bytes= [self UTF8String]; | |
unsigned int length = [self lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; | |
unsigned int calc = 0; | |
for (int i=0; i<length; i++) { | |
if ((unsigned char)bytes[i] < 0x80) { | |
calc++; | |
} else if ((unsigned char)bytes[i] < 0xE0) { | |
calc++; | |
i+=1; | |
} else if ((unsigned char)bytes[i] < 0xF0) { | |
calc++; | |
i+=2; | |
} else if ((unsigned char)bytes[i] < 0xF8) { | |
calc++; | |
i+=3; | |
} else if ((unsigned char)bytes[i] < 0xFC) { | |
calc++; | |
i+=4; | |
} else if ((unsigned char)bytes[i] < 0xFE) { | |
calc++; | |
i+=5; | |
} | |
} | |
return calc; | |
} | |
- (NSUInteger)utf8mb4Offset:(NSUInteger)offset { | |
const char *bytes= [self UTF8String]; | |
unsigned int length = [self lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; | |
unsigned int calc = 0; | |
for (int i=0; i<length && offset != 0; i++) { | |
if ((unsigned char)bytes[i] < 0x80) { | |
offset--; | |
calc++; | |
} else if ((unsigned char)bytes[i] < 0xE0) { | |
offset--; | |
calc++; | |
i+=1; | |
} else if ((unsigned char)bytes[i] < 0xF0) { | |
offset--; | |
calc++; | |
i+=2; | |
} else if ((unsigned char)bytes[i] < 0xF8) { | |
offset-=2; | |
calc++; | |
i+=3; | |
} else if ((unsigned char)bytes[i] < 0xFC) { | |
offset-=2; | |
calc++; | |
i+=4; | |
} else if ((unsigned char)bytes[i] < 0xFE) { | |
offset-=2; | |
calc++; | |
i+=5; | |
} | |
} | |
return calc; | |
} | |
@end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment