Last active
July 12, 2023 21:29
-
-
Save jlevy/9a59fcfeddc0d507ddda25ed6e994e7c to your computer and use it in GitHub Desktop.
Human-friendly truncation of strings in TypeScript, by character, word, and paragraph, with Markdown compatibility
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { | |
MARKDOWN_FORMAT_CHARS, | |
truncate, | |
truncateCondensed, | |
truncateOnParagraphs, | |
truncateOnWords, | |
} from './friendlyTruncate'; | |
describe('truncate', () => { | |
it('returns the input if it is null or shorter than the limit', () => { | |
expect(truncate(null)).toBeNull(); | |
expect(truncate('short', 10)).toBe('short'); | |
}); | |
it('truncates the input and adds an ellipsis if it is longer than the limit', () => { | |
expect(truncate('longer than limit', 10)).toBe('longer th…'); | |
}); | |
}); | |
describe('truncateCondensed', () => { | |
it('condenses whitespace and truncates the input', () => { | |
expect(truncateCondensed('longer than \n\t limit', 10)).toBe('longer th…'); | |
}); | |
it('preserves newlines if preserveNewlines is true', () => { | |
expect(truncateCondensed('longer than \n\t limit', 20, true)).toBe('longer than \n limit'); | |
}); | |
}); | |
describe('truncateOnWords', () => { | |
it('truncates on word boundaries', () => { | |
expect(truncateOnWords('longer than limit', 10)).toBe('longer…'); | |
}); | |
it('avoids ending on a pattern (this avoids broken Markdown output)', () => { | |
expect( | |
truncateOnWords('longer *than* [limit](http://example.com)', 15, MARKDOWN_FORMAT_CHARS), | |
).toBe('longer…'); | |
}); | |
}); | |
describe('truncateOnParagraphs', () => { | |
it('truncates on paragraph boundaries', () => { | |
expect(truncateOnParagraphs('longer\n\nthan\n\nlimit', 10)).toBe('longer\n\n…'); | |
}); | |
it('treats bulleted list items as separate paragraphs if isMarkdown is true', () => { | |
expect(truncateOnParagraphs('longer\n* than\n* limit', 50, '…', true)).toBe( | |
'longer\n\n* than\n\n* limit', | |
); | |
}); | |
it('treats list items as separate paragraphs if isMarkdown is true', () => { | |
expect(truncateOnParagraphs('longer\n* than\n* limit', 12, '…', true)).toBe('longer\n\n…'); | |
}); | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Truncate a string to a given length, adding ellipsis if appropriate. | |
*/ | |
export const truncate = ( | |
input: string | null, | |
num: number = 80, | |
ellipsis: string = '…', | |
): string | null => { | |
if (!input) { | |
return input; | |
} | |
if (input.length <= num) { | |
return input; | |
} | |
return input.slice(0, num - ellipsis.length) + ellipsis; | |
}; | |
/** | |
* Truncate a string to a given length, adding ellipsis if appropriate, and condense all whitespace. | |
*/ | |
export const truncateCondensed = ( | |
input: string | null, | |
num: number = 80, | |
preserveNewlines: boolean = false, | |
): string | null => { | |
// Replace all whitespace with a single space. | |
if (!input) { | |
return input; | |
} | |
return truncate(input.replace(preserveNewlines ? /[ \t\r]+/g : /\s+/g, ' '), num); | |
}; | |
/** | |
* Truncate a string but only on word boundaries if possible. Adds ellipsis if appropriate. | |
*/ | |
export const truncateOnWords = ( | |
input: string | null, | |
length: number, | |
avoidEndingRe: RegExp | null = null, | |
): string | null => { | |
if (!input) { | |
return input; | |
} | |
let words = input.split(/\s+/); | |
let outputWords: string[] = []; | |
let currentLength = 0; | |
for (let word of words) { | |
// +1 accounts for space. | |
if (currentLength + word.length + 1 <= length) { | |
outputWords.push(word); | |
currentLength += word.length + 1; | |
} else { | |
break; | |
} | |
} | |
// If no words were added (first word is too long), fall back to truncate. | |
if (outputWords.length === 0) { | |
return truncate(input, length); | |
} | |
// Avoid ending on certain patterns (useful to avoid Markdown formatting or ugly truncations). | |
// Best effort only, not a guarantee. | |
if (avoidEndingRe) { | |
let removed = 0; | |
while ( | |
outputWords.length > 1 && | |
removed < 4 && | |
outputWords[outputWords.length - 1].match(avoidEndingRe) | |
) { | |
outputWords.pop(); | |
removed++; | |
} | |
} | |
let output = outputWords.join(' '); | |
if (currentLength <= input.length) { | |
output += '…'; | |
} | |
return output; | |
}; | |
export const MARKDOWN_FORMAT_CHARS = /[*_`\[\]()]/g; | |
/** | |
* Truncate text or Markdown on paragraph boundaries if possible. Adds ellipsis if appropriate. | |
*/ | |
export const truncateOnParagraphs = ( | |
input: string | null, | |
length: number, | |
ellipsis: string = '…', | |
isMarkdown: boolean = false, | |
): string | null => { | |
if (!input) { | |
return input; | |
} | |
let normInput = input; | |
if (isMarkdown) { | |
normInput = normInput.replace(/(\n *[-*] )/g, '\n$1'); | |
} | |
let paragraphs = normInput.split(/\r?\n\r?\n/); | |
let outputParagraphs: string[] = []; | |
let currentLength = 0; | |
for (let paragraph of paragraphs) { | |
// +2 accounts for newline characters. | |
if (currentLength + paragraph.length + 2 <= length) { | |
outputParagraphs.push(paragraph); | |
currentLength += paragraph.length + 2; | |
} else { | |
break; | |
} | |
} | |
// If no paragraphs were added (first paragraph is too long), fall back to truncateOnWords. | |
if (outputParagraphs.length === 0) { | |
return truncateOnWords(normInput, length, isMarkdown ? MARKDOWN_FORMAT_CHARS : null); | |
} | |
let output = outputParagraphs.join('\n\n'); | |
if (currentLength < input.length) { | |
output += '\n\n' + (ellipsis || ''); | |
} | |
return output; | |
}; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment