Created
December 15, 2010 23:05
-
-
Save samuraisam/742749 to your computer and use it in GitHub Desktop.
Simple, reliable word truncation whether HTML exists in the text or not.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@interface NSString (WordTruncation) | |
- (NSString *)substringOfHTMLToWordIndex:(NSUInteger)to appendOnTruncate:(NSString *)append softLimit:(int)leSoftLimit; | |
- (NSString *)substringToWordIndex:(NSUInteger)to; | |
@end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@implementation NSString (WordTruncation) | |
- (NSString *)substringOfHTMLToWordIndex:(NSUInteger)to appendOnTruncate:(NSString *)append softLimit:(int)leSoftLimit | |
{ | |
if (to <= 0) return @""; | |
static NSArray *htmlSinglets = nil; | |
static NSString *wordsRegex = @"&[^;]*;|<[^<>]*>|(\\w[\\w-]*)", | |
*tagsRegex = @"<(/)?([^ >]+?)(?: (/)| [^>]*?)?>"; | |
if (!htmlSinglets) htmlSinglets = [array_(@"br", @"col", @"link", @"base", @"img", | |
@"param", @"area", @"hr", @"input") retain]; | |
NSString *closingTag, *tagName, *selfClosing, *tag, *tail; | |
NSUInteger i = 0, pos = 0, endTextPos = 0, words = 0, softEndTextPos = 0; | |
NSMutableArray *openTags = [NSMutableArray array]; | |
NSRange match1, match2; | |
NSString *copy = self; | |
while (words <= (to + leSoftLimit)) { | |
tail = [copy substringFromIndex:pos]; | |
match1 = [tail rangeOfRegex:wordsRegex capture:0]; | |
match2 = [tail rangeOfRegex:wordsRegex capture:1]; | |
pos += (match1.location + match1.length); | |
if (match1.location == NSNotFound) break; // end of string | |
tag = [tail substringWithRange:match1]; | |
if ([tag hasPrefix:@"&"] && [tag hasSuffix:@";"]) goto _continue; // entity | |
if (match2.location != NSNotFound) { // real word | |
words++; | |
if (words == to) endTextPos = pos; // advance cursors | |
if (words == (to + leSoftLimit)) softEndTextPos = pos; | |
goto _continue; | |
} | |
closingTag = [tag stringByMatching:tagsRegex capture:0]; | |
selfClosing = [tag stringByMatching:tagsRegex capture:1]; | |
tagName = [[tag stringByMatching:tagsRegex capture:2] lowercaseString]; | |
if (selfClosing || [htmlSinglets containsObject:tagName]) { | |
// do jack shit if self closing or ignorable | |
} else if (closingTag) { | |
// check for match in open tags list | |
i = [openTags indexOfObject:tagName]; | |
if (i != NSNotFound) { | |
// SGML: and end tag closes, back to the matching start tag | |
// all intervening start tags with ommited end tags | |
openTags = [[openTags objectsAtIndexes:[NSIndexSet indexSetWithIndexesInRange: | |
NSMakeRange(i+1, [openTags count] - 1)]] mutableCopy]; | |
} | |
} else { // add to start of open tags list | |
[openTags insertObject:tagName atIndex:0]; | |
} | |
_continue: | |
closingTag = tagName = selfClosing = tag = nil; | |
continue; | |
} | |
if (words <= to || (leSoftLimit > 0 && words <= leSoftLimit)) | |
return copy; // no truncation needed, take into account soft limit | |
NSMutableString *ret = [[copy substringToIndex:endTextPos] mutableCopy]; | |
for (tag in openTags) { // close open tags | |
[ret appendFormat:@"</%@>", tag]; | |
} | |
if (append) | |
[ret appendFormat:@"%@", append]; | |
return ret; | |
} | |
- (NSString *)substringToWordIndex:(NSUInteger)to | |
{ | |
NSArray *words = [self componentsSeparatedByRegex:@"\\s+"]; | |
if ([words count] > to) { | |
words = [words objectsAtIndexes:[NSIndexSet indexSetWithIndexesInRange: | |
NSMakeRange(0, to)]]; | |
} | |
return [words componentsJoinedByString:@" "]; | |
} | |
@end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment