Skip to content

Instantly share code, notes, and snippets.

@samuraisam
Created December 15, 2010 23:05
Show Gist options
  • Save samuraisam/742749 to your computer and use it in GitHub Desktop.
Save samuraisam/742749 to your computer and use it in GitHub Desktop.
Simple, reliable word truncation whether HTML exists in the text or not.
@interface NSString (WordTruncation)
- (NSString *)substringOfHTMLToWordIndex:(NSUInteger)to appendOnTruncate:(NSString *)append softLimit:(int)leSoftLimit;
- (NSString *)substringToWordIndex:(NSUInteger)to;
@end
@implementation NSString (WordTruncation)
- (NSString *)substringOfHTMLToWordIndex:(NSUInteger)to appendOnTruncate:(NSString *)append softLimit:(int)leSoftLimit
{
if (to <= 0) return @"";
static NSArray *htmlSinglets = nil;
static NSString *wordsRegex = @"&[^;]*;|<[^<>]*>|(\\w[\\w-]*)",
*tagsRegex = @"<(/)?([^ >]+?)(?: (/)| [^>]*?)?>";
if (!htmlSinglets) htmlSinglets = [array_(@"br", @"col", @"link", @"base", @"img",
@"param", @"area", @"hr", @"input") retain];
NSString *closingTag, *tagName, *selfClosing, *tag, *tail;
NSUInteger i = 0, pos = 0, endTextPos = 0, words = 0, softEndTextPos = 0;
NSMutableArray *openTags = [NSMutableArray array];
NSRange match1, match2;
NSString *copy = self;
while (words <= (to + leSoftLimit)) {
tail = [copy substringFromIndex:pos];
match1 = [tail rangeOfRegex:wordsRegex capture:0];
match2 = [tail rangeOfRegex:wordsRegex capture:1];
pos += (match1.location + match1.length);
if (match1.location == NSNotFound) break; // end of string
tag = [tail substringWithRange:match1];
if ([tag hasPrefix:@"&"] && [tag hasSuffix:@";"]) goto _continue; // entity
if (match2.location != NSNotFound) { // real word
words++;
if (words == to) endTextPos = pos; // advance cursors
if (words == (to + leSoftLimit)) softEndTextPos = pos;
goto _continue;
}
closingTag = [tag stringByMatching:tagsRegex capture:0];
selfClosing = [tag stringByMatching:tagsRegex capture:1];
tagName = [[tag stringByMatching:tagsRegex capture:2] lowercaseString];
if (selfClosing || [htmlSinglets containsObject:tagName]) {
// do jack shit if self closing or ignorable
} else if (closingTag) {
// check for match in open tags list
i = [openTags indexOfObject:tagName];
if (i != NSNotFound) {
// SGML: and end tag closes, back to the matching start tag
// all intervening start tags with ommited end tags
openTags = [[openTags objectsAtIndexes:[NSIndexSet indexSetWithIndexesInRange:
NSMakeRange(i+1, [openTags count] - 1)]] mutableCopy];
}
} else { // add to start of open tags list
[openTags insertObject:tagName atIndex:0];
}
_continue:
closingTag = tagName = selfClosing = tag = nil;
continue;
}
if (words <= to || (leSoftLimit > 0 && words <= leSoftLimit))
return copy; // no truncation needed, take into account soft limit
NSMutableString *ret = [[copy substringToIndex:endTextPos] mutableCopy];
for (tag in openTags) { // close open tags
[ret appendFormat:@"</%@>", tag];
}
if (append)
[ret appendFormat:@"%@", append];
return ret;
}
- (NSString *)substringToWordIndex:(NSUInteger)to
{
NSArray *words = [self componentsSeparatedByRegex:@"\\s+"];
if ([words count] > to) {
words = [words objectsAtIndexes:[NSIndexSet indexSetWithIndexesInRange:
NSMakeRange(0, to)]];
}
return [words componentsJoinedByString:@" "];
}
@end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment