Skip to content

Instantly share code, notes, and snippets.

@trevordevore
Created November 6, 2017 15:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save trevordevore/5eaa7333841d15f5bbbde490636dc143 to your computer and use it in GitHub Desktop.
Save trevordevore/5eaa7333841d15f5bbbde490636dc143 to your computer and use it in GitHub Desktop.
LiveCode htmlText to Markdown
function htmlTextToMarkdown pText, pEscape
put pEscape is not false into pEscape
replace "<b>" with "<strong>" in pText
replace "</b>" with "</strong>" in pText
## http://daringfireball.net/projects/markdown/syntax
## Doesn't support targets as far as I can tell
_RemoveATagTarget pText
## http://daringfireball.net/projects/markdown/syntax
## Note that we strip color currently
if pEscape then
_EscapeCharacters pText, "\`*_{}[]()#+-.!", "\"
end if
_ListReplace pText, "ol", "<p></p>", "", "<p>1. ", "</p>"
_ListReplace pText, "ul", "<p></p>", "", "<p>* ", "</p>"
## Locate instances of "Code" (Courier font)
_ConvertFontToPreTag "Courier", pText, "___pre___", "---pre---"
_OpeningTagReplace pText, "<i>", "*"
_ClosingTagReplace pText, "</i>", "*"
_OpeningTagReplace pText, "<b>", "**"
_ClosingTagReplace pText, "</b>", "**"
_OpeningTagReplace pText, "<strong>", "**"
_ClosingTagReplace pText, "</strong>", "**"
_OpeningTagReplace pText, "<u>", "*" ## markdown doesn't support underline, use italics
_ClosingTagReplace pText, "</u>", "*"
## Replace "<a href="URL">...</a> with ...:URL
## ...
_ClosingTagReplace pText, "</a>", "</a>" ## Fix spacing around </a> tag
-- Converts <a href> tags to markdown style links: [link text](url)
_ConvertAHrefToMarkup pText, "[", "]", "(", ")"
## Get plain UTF8 text
lock screen
lock messages
create field "htmlTextToMarkdownConverter"
set the HTMLText of it to pText
put textEncode(the text of it, "utf8") into pText
delete it
unlock messages
unlock screen
replace "___pre___" with "<pre>" in pText
replace "---pre---" with "</pre>" in pText
return pText
end htmlTextToMarkdown
-- Strip target="" from a tag
private command _RemoveATagTarget @pText
local theCharNo,theDeletedCharCount,theEndCharNo,theLink
local theOffset,theStartCharNo, theFoundChunks
put 0 into theOffset
repeat forever
## Seek out <a href="">
put offset("<a href=" & quote, pText, theOffset) into theStartCharNo
if theStartCharNo > 0 then
add theOffset to theStartCharNo
put offset(quote & ">", pText, theStartCharNo) into theEndCharNo
if theEndCharNo > 0 then
## Found it, add to list of chunks
add theStartCharNo to theEndCharNo
## Build up list where first chunks are at end of list
put theStartCharNo,theEndCharNo & cr before theFoundChunks
put theEndCharNo + 1 into theOffset
else
exit repeat
end if
else
exit repeat
end if
end repeat
delete the last char of theFoundChunks
repeat for each line theChunk in theFoundChunks
replace " target=" & quote & "_blank" & quote with empty in char (item 1 of theChunk) to (item 2 of theChunk) of pText
end repeat
return empty
end _RemoveATagTarget
private command _EscapeCharacters @pString, pCharsToEscape, pEscapeChar
if pEscapeChar is empty then put "\" into pEscapeChar
repeat for each char theChar in pCharsToEscape
replace theChar with pEscapeChar & theChar in pString
end repeat
return empty
end _EscapeCharacters
private command _ListReplace @pText, pTag, pTagPrefix, pTagSuffix, pItemPrefix, pItemSuffix
local theCharNo, theEndCharNo, theReplacement
repeat forever
put offset("<" & pTag & ">", pText) into theCharNo
if theCharNo > 0 then
put offset("</" & pTag & ">", pText) into theEndCharNo
if theEndCharNo > 0 then
put theEndCharNo + 2 + length(pTag) into theEndCharNo
put char theCharNo to theEndCharNo of pText into theReplacement
replace "<" & pTag & ">" with pTagPrefix in theReplacement
replace "</" & pTag & ">" with pTagSuffix in theReplacement
replace "<li>" & cr & "<p>" with pItemPrefix in theReplacement
replace "</p>" & cr & "</li>" with pItemSuffix in theReplacement
put theReplacement into char theCharNo to theEndCharNo of pText
else
exit repeat
end if
else
exit repeat
end if
end repeat
end _ListReplace
command _ConvertFontToPreTag pFont, @pText, pOpenPreTag, pClosingPreTag
local theCharNo, theFirstStartCharNo, theLastStartCharNo
local theLastCharNo, theFoundA, theIndex
local theOffset, thePTagCharNo
if paramCount() < 3 then put "<pre>" into pOpenPreTag
if paramCount() < 4 then put "</pre>" into pClosingPreTag
-- <p><font face="Courier">This is great</font><br />
-- <font face="Courier">&nbsp;&nbsp;&nbsp;&nbsp;What is this?</font><br />
-- <font face="Courier">&nbsp;&nbsp;&nbsp;&nbsp;I don't know?</font><br />
-- <font face="Courier">I like it</font></p>
-- <p><pre>This is great
-- &nbsp;&nbsp;&nbsp;&nbsp;What is this?
-- &nbsp;&nbsp;&nbsp;&nbsp;I don't know?
-- I like it</pre></p>
put 0 into theOffset
repeat forever
## Seek out first instance of <font face="Courier"
if theFirstStartCharNo is empty then
put offset("<font face=" & quote & pFont & quote, pText, theOffset) into theCharNo
if theCharNo > 0 then
## Store the start character of first instance of courier
add theCharNo to theOffset
put theOffset into theFirstStartCharNo
put theFirstStartCharNo into theLastStartCharNo
else
exit repeat # no font tags
end if
else
# p tags will terminate a found set (<br> will not).
put offset("</p>", pText, theOffset) into thePTagCharNo
if thePTagCharNo > 0 then add theOffset to thePTagCharNo
## Keep looking for more instances
put offset("<font face=" & quote & pFont & quote, pText, theOffset) into theCharNo
if theCharNo > 0 AND (theCharNo + theOffset) < thePTagCharNo then
-- if theCharNo > 0 then
add theCharNo to theOffset
-- put theOffset into theLastStartCharNo
else
## No more found. Find closing </font> so we now where to put closing </pre> tag.
put offset("</font>", pText, theOffset) into theCharNo
if theCharNo > 0 then
## Found the closing tag. Store it
add theCharNo to theOffset
put theOffset + 6 into theLastCharNo
## Now log everything and start over
put the number of elements of theFoundA + 1 into theIndex
put theFirstStartCharNo into theFoundA[theIndex]["start char no"]
put theLastCharNo into theFoundA[theIndex]["end char no"]
put empty into theFirstStartCharNo
-- put empty into theLastStartCharNo
put empty into theLastCharNo
else
exit repeat ## no closing font. Shouldn't happen.
end if
end if
end if
end repeat
local theText
repeat with theIndex = the number elements of theFoundA down to 1
## 'start char no' = the start of <font face...
## 'end char no' is the last char in </font>
put char theFoundA[theIndex]["start char no"] to theFoundA[theIndex]["end char no"] of pText into theText
replace "<br />" with empty in theText
replace "<p>" with empty in theText
replace "</p>" with empty in theText
put pOpenPreTag & theText & pClosingPreTag into char theFoundA[theIndex]["start char no"] to theFoundA[theIndex]["end char no"] of pText
-- put pClosingPreTag after char theFoundA[theIndex]["end char no"] of pText
-- put pOpenPreTag before char theFoundA[theIndex]["start char no"] of pText
## Now replace <br>s that ScreenSteps inserted
-- replace "<br />" with empty in char theFoundA[theIndex]["start char no"] to theFoundA[theIndex]["end char no"] of pText
end repeat
return empty
end _ConvertFontToPreTag
private command _OpeningTagReplace @pText, pTag, pReplacement
local itMatched
local startTheTag, endTheTag, startTheWhiteSpace, endTheWhiteSpace, startTheText, endTheText
local theWhiteSpace, theText
-- <b>( +?)(\S)
-- \1*\2
repeat forever
put matchChunk(pText, "(" & pTag & ")( +?)(\S)", startTheTag, endTheTag, \
startTheWhiteSpace, endTheWhiteSpace, startTheText, endTheText) into itMatched
if itMatched then
put char startTheWhiteSpace to endTheWhiteSpace of pText into theWhiteSpace
put char startTheText to endTheText of pText into theText
put theWhiteSpace & pReplacement & theText into char startTheTag to endTheText of pText
else
## Any stragglers that didn't match
replace pTag with pReplacement in pText
exit repeat
end if
end repeat
return empty
end _OpeningTagReplace
private command _ClosingTagReplace @pText, pTag, pReplacement
local itMatched
local startTheWhiteSpace, endTheWhiteSpace, startTheTag, endTheTag
local theWhiteSpace
-- ( +?)(</b>)
-- *\1
repeat forever
put matchChunk(pText, "( +?)(" & pTag & ")", startTheWhiteSpace, endTheWhiteSpace, startTheTag, endTheTag) into itMatched
if itMatched then
put char startTheWhiteSpace to endTheWhiteSpace of pText into theWhiteSpace
put pReplacement & theWhiteSpace into char startTheWhiteSpace to endTheTag of pText
else
## Any stragglers that didn't match
replace pTag with pReplacement in pText
exit repeat
end if
end repeat
return empty
end _ClosingTagReplace
-- Converts <a href> tags to markdown style links: [link text](url)
private command _ConvertAHrefToMarkup @pText, pPreLink, pPostLink, pPreURL, pPostURL
local theCharNo,theDeletedCharCount,theEndCharNo,theLink
local theOffset,theStartCharNo
put 0 into theOffset
repeat forever
## Seek out <a href="">
put offset("<a href=" & quote, pText, theOffset) into theStartCharNo
if theStartCharNo > 0 then
add theOffset to theStartCharNo
put offset(quote & ">", pText, theStartCharNo) into theEndCharNo
if theEndCharNo > 0 then
add theStartCharNo to theEndCharNo
put char (theStartCharNo + 9) to (theEndCharNo - 1) of pText into theLink
## Seek out </a>
put offset("</a>", pText, theEndCharNo + 1) into theCharNo
if theCharNo > 0 then
local theLinkText
add theEndCharNo + 1 to theCharNo
put char (theEndCharNo + 1) to (theCharNo - 1) of pText into theLinkText
## Delete closing </a> tag
delete char theCharNo to (theCharNo + 3) of pText
## Delete opening <a href=""> tag
delete char theStartCharNo to (theEndCharNo + 1) of pText
put (theEndCharNo + 1) - theStartCharNo into theDeletedCharCount
## Add post-link, pre-url and post-url text
put pPostLink & pPreURL & theLink & pPostURL before char (theCharNo - theDeletedCharCount - 1) of pText
## Add pre link text
put pPreLink before char theStartCharNo of pText
## Update offset
put theStartCharNo + length(pPreLink) + length(pPostLink) + length(pPreURL) \
+ length(pPostURL) + length(theLinkText) + length(theLink) - 1 into theOffset
-- put "start char:" && char theStartCharNo to theStartCharNo + 10 of pText & cr after msg
-- put "char offset:" && char theOffset to theOffset + 10 of pText & cr & "----" & cr after msg
else
exit repeat
end if
else
exit repeat
end if
else
exit repeat
end if
end repeat
return empty
end _ConvertAHrefToMarkup
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment