Last active
March 5, 2025 14:51
-
-
Save dinhanhthi/b18e7cd090d2b70004cd3cba71256648 to your computer and use it in GitHub Desktop.
Convert markdown string to plain text using marked.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { marked, Marked } from "marked"; | |
function listFactory(token, parser) { | |
let depth = 0; // Track nesting level | |
const { items, ordered, start } = token; // Destructure the token object | |
const indent = " ".repeat(depth * 2); | |
const startNum = ordered ? start || 1 : null; | |
const listItems = items.map((item, index) => { | |
const prefix = ordered ? `${startNum + index}. ` : "- "; | |
depth++; // Increase depth for nested content | |
const itemContent = parser.parse(item.tokens); // Parse nested tokens | |
depth--; // Decrease depth after parsing | |
// Split content into lines and filter out empty ones | |
const contentLines = itemContent | |
.split("\n") | |
.filter((line) => line.trim() !== ""); | |
if (contentLines.length === 0) return indent + prefix; | |
// Prepend prefix to the first line | |
contentLines[0] = indent + prefix + contentLines[0]; | |
// Indent subsequent lines for nested content | |
for (let i = 1; i < contentLines.length; i++) { | |
contentLines[i] = " ".repeat((depth + 1) * 2) + contentLines[i]; | |
} | |
return contentLines.join("\n"); | |
}); | |
// Join items with newlines and add an extra newline | |
return listItems.join("\n") + "\n"; | |
} | |
function removeNewlinesOutsidePre(htmlString: string) { | |
const parts = htmlString.split(/(<pre(?:\s[^>]*)?>[\s\S]*?<\/pre>)/g); | |
const processedParts = parts.map((part) => { | |
if (!part.match(/^<pre(?:\s[^>]*)?>/)) { | |
return part.replace(/\n/g, ""); | |
} | |
return part; | |
}); | |
return processedParts.join(""); | |
} | |
function handleMarkdown(markdownText?: string, asPlain?: boolean): string { | |
if (!markdownText) return markdownText; | |
const baseRenderer = { | |
paragraph({ tokens }) { | |
const text = this.parser.parseInline(tokens); | |
return text + "\n"; | |
}, | |
}; | |
const plainRenderer = { | |
heading(token) { | |
return token.raw; | |
}, | |
hr(token) { | |
return token.raw + '\n'; | |
}, | |
list(token) { | |
return listFactory(token, this.parser); | |
}, | |
link(token) { | |
return token.href; | |
}, | |
image(token) { | |
return `[image: ${token.href}]`; | |
}, | |
strong(token) { | |
const text = this.parser.parseInline(token.tokens); | |
return text; | |
}, | |
em(token) { | |
const text = this.parser.parseInline(token.tokens); | |
return text; | |
}, | |
table(token) { | |
return token.raw | |
}, | |
code(token) { | |
// console.log(`text: `, token.text); | |
return token.raw + '\n'; | |
}, | |
codespan(token) { | |
return token.raw; | |
}, | |
blockquote(token) { | |
return token.raw + '\n'; | |
} | |
}; | |
const advancedRenderer = { | |
heading({ tokens }) { | |
const text = this.parser.parseInline(tokens); | |
return `<b>${text}</b><br>`; | |
}, | |
list(token) { | |
return listFactory(token, this.parser); | |
}, | |
}; | |
const renderer = asPlain | |
? { | |
...baseRenderer, | |
...plainRenderer, | |
} | |
: { | |
...baseRenderer, | |
...advancedRenderer, | |
}; | |
const renderedText = new Marked().use({ renderer }).parse(markdownText); | |
return asPlain ? renderedText : removeNewlinesOutsidePre(renderedText); | |
} | |
const text = ` | |
## thu nghiem | |
Xem the nao \`code\` xem the nao. | |
> blockquote nay ne | |
> hang thu hai | |
--- | |
1. Item _thu nghiem_ \`code\` 1 | |
2. Item 2 | |
- List item 1 | |
- Nested item 1.1 | |
- Nestted item 1.2 | |
- List item 2 https://abc.com | |
- [] asdasdas | |
Link: [text link](http://xxx.com) | |
\`\`\` | |
\\begin{align} | |
x + y = 1 | |
\\end{align} | |
\`\`\` | |
 | |
| Name | Age | City | | |
|-------|-----|------------| | |
| Alice | 25 | New York | | |
| Bob | 30 | London | | |
| Carol | 28 | Paris | | |
`; | |
// handleMarkdown(text, true) | |
handleMarkdown(text, false); | |
console.log("\n πππππ \n"); | |
handleMarkdown(text, true); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment