Skip to content

Instantly share code, notes, and snippets.

@c-smile
Last active May 19, 2019 03:22
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save c-smile/0a9c4af0d530fadc0a71904ddbe86ff9 to your computer and use it in GitHub Desktop.
Save c-smile/0a9c4af0d530fadc0a71904ddbe86ff9 to your computer and use it in GitHub Desktop.
Sciter: HTML to Markdown
namespace MD {
function makeTextStream() {
var out = []; // lines
var curl = ""; // current line
var curo = 0; // current line offset (a.k.a. level)
function outPrefix(text, level = 0) {
if(curl) out.push(curl);
curl = new String(level,' ') + (text || "");
if(text === null) out.push(curl);
curo = curl.length;
return curo;
}
function outText(text) {
text = text.replace(/[ \n\r]{2,}/g," ");
curl += text;
return curo;
}
function getContent() { out.push(curl); curl = ""; return out.join("\n"); }
function getPos() { return curl.length; }
return {
prefix : outPrefix,
text : outText,
content: getContent,
pos : getPos,
out : function(txt) { curl += txt; }
};
}
function gap(stream, el, level, force = false) {
if(el.prior || force)
stream.prefix(null,level);
}
function escape(text)
{
const escapes = [
[/\\/g, "\\\\"],
[/\*/g, "\\*"],
[/^-/g, "\\-"],
[/^\+ /g, "\\+ "],
[/^(=+)/g, "\\$1"],
[/^(#{1,6}) /g, "\\$1 "],
[/`/g, "\\`"],
[/^~~~/g, "\\~~~"],
[/\[/g, "\\["],
[/\]/g, "\\]"],
[/^>/g, "\\>"],
[/_/g, "\\_"],
[/^(\d+)\. /g, "$1\\. "]
];
function reducer(acc, esc) { return acc.replace(esc[0], esc[1]); }
return escapes.reduce( reducer , text);
}
namespace blocks {
function li(stream, el, level) { const prefix = el.parent.tag == "ol" ? "1. " : "* "; emitContent(stream,el,stream.prefix(prefix,level)); }
function ol(stream, el, level) { gap(stream,el,level); emitContent(stream, el, level); }
function ul(stream, el, level) { gap(stream,el,level); emitContent(stream, el, level); }
function dl(stream, el, level) { gap(stream,el,level); emitContent(stream, el, level); }
function dt(stream, el, level) { emitContent(stream,el,stream.prefix("",level)); }
function dd(stream, el, level) { emitContent(stream,el,stream.prefix(": ",level)); }
function blockquote(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("> ",level)); }
function p(stream, el, level) { gap(stream,el,level,true); emitContent(stream, el, level); }
function div(stream, el, level){ gap(stream,el,level); emitContent(stream,el,stream.prefix("",level)); }
function h1(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("# ",level)); }
function h2(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("## ",level)); }
function h3(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("### ",level)); }
function h4(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("#### ",level)); }
function h5(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("##### ",level)); }
function h6(stream, el, level) { gap(stream,el,level); emitContent(stream,el,stream.prefix("###### ",level)); }
function pre(stream, el, level) { gap(stream,el,level); stream.prefix("```",level); stream.out("\n"+el.text); stream.prefix("```",level); }
function img(stream, el, level) { //![GitHub Logo](/images/logo.png)
stream.out("!["); stream.text(el.attributes["alt"] || ""); stream.out("]"); stream.out("(" + el.attributes["src"] + ")");
}
function figure(stream, el, level) { //![GitHub Logo](/images/logo.png)
const caption = el.$(figcaption);
const img = el.$(img);
if(!img || !caption) return;
gap(stream,el,level);
stream.out("![");
stream.text(caption.text || ""); //emitContent(stream,caption,level);
stream.out("]"); stream.out("(" + img.attributes["src"] + ")");
}
function table(stream, el, level) {
var thead = el.$(thead);
var tbody = el.$(tbody);
var tfoot = el.$(tfoot);
gap(stream,el,level);
if( thead ) {
var cellwidths = [];
for(var tr in thead) {
stream.prefix("",level);
for(var td in tr) {
var pos = stream.pos(); emitContent(stream,td,level); cellwidths.push(stream.pos() - pos);
if( td.next ) stream.out(" | ");
}
}
stream.prefix("",level);
for(var (index,w) in cellwidths) {
stream.out(new String(w,'-'));
if( index != cellwidths.length - 1 ) stream.out(" | ");
}
}
if(tbody) {
for(var tr in tbody) {
stream.prefix("",level);
for(var td in tr) {
emitContent(stream,td,level);
if( td.next ) stream.out(" | ");
}
}
}
}
function thunk(stream, el, level) { emitContent(stream, el, level); }
}
namespace spans {
function strong(stream,el) { stream.out("**"); emitContent(stream, el); stream.out("**"); }
function em(stream,el) { stream.out("*"); emitContent(stream, el); stream.out("*"); }
function code(stream,el) { stream.out("`"); emitContent(stream, el); stream.out("`"); }
function a(stream,el) { stream.out("["); // [GitHub](http://github.com)
emitContent(stream, el);
stream.out("]");
stream.out("(" + el.attributes["href"] + ")");}
// aliases
const b = strong;
const i = em;
//const var = code;
const kbd = code;
// unknown span(?)
function thunk(stream,el) { emitContent(stream, el); }
}
function emitBlock(stream, el, parentLevel = 0) {
const emitter = blocks[el.tag] || blocks.thunk;
emitter(stream,el,parentLevel);
}
function emitContent(stream, el, blockContentlevel = 0) {
const isBlock = !!el.state.flowType;
const nodes = el.nodes();
for(var node in nodes) {
if(node.isText) {
var text = node.text;
if(isBlock) {
if(nodes.first === node)
text = text.trim(#left);
if(nodes.last === node)
text = text.trim(#right);
}
if(text)
stream.text(escape(text));
}
else if(node.isElement) {
if(node.tag == "br")
stream.prefix(" ");
else if(node.state.flowType) { // block element
emitBlock(stream, node, blockContentlevel);
} else { // span element
var emitter = spans[node.tag] || spans.thunk;
emitter(stream, node);
}
}
}
}
}
@c-smile
Copy link
Author

c-smile commented Nov 2, 2018

Initial version of HTML-to-Markdown function for https://html-notepad.com

@c-smile
Copy link
Author

c-smile commented Nov 3, 2018

Sample output:

Header header header FOO bar baz one header header header header headr header header header header header header header header header header header

Header2 ** red ` header2

Para1 with some code in it.

Para2

  1. Item 1
  2. Item 2
    • Item2.*
    • Item2.*
  3. Item 3 And para

Para3 with some hyperlink

Para4 with
inside

One
: First
Two
: Second

And pre of course:

function foo() {
  return "bar";
}

blockquote1
blockquote2

Some img

Some text

First Second
11111.1 22222.1
11111.2 22222.2

@c-smile
Copy link
Author

c-smile commented Nov 3, 2018

real document test

Sciter Architecture

Sciter is not using DOM model like W3C DOM as we have found it too complicated (76 different classes, sic!).

DOM and Window classes

Sciter provides following 8 DOM and Window specific classes accessible in script:

  • Element - DOM element. All HTML elements including document(root), frame, inputs, etc. are Element's in the Sciter.

    • Attributes - collection of named attributes of the html element.
    • Style - collection of style attributes applied to the element.
  • Image - image object. Represents bitmap image. You can draw on the image using Graphics methods.

  • Graphics - object wrapping drawing primitives. You can draw on surface of any Element and Image in the Sciter.

  • View - represents Sciter window. Main Sciter window and Dialog are views.

  • Event - represents current UI event.

  • Sciter - is a global object holding Sciter application specific methods.

View, document, frame and the root element.

Window that Sciter is attached to is represented by the View objects in script.

Each view has root property - reference of the document loaded into the view. This is a root element of the loaded document. Root element in the Sciter is <html> element of the loaded document and there is no dedicated Document class. Document is really root node of element tree - <html> element and its children.

Frames and framesets.

Frames and framesets are also ordinary DOM elements. element has single child element - element of the document loaded into it. parent property of the root element of the document loaded in the frame refer to the element this document loaded into. Simple as it is.

Frames ( elements) in the Sciter can appear in any part of the HTML (not only in ) so there is no difference between and <iframe> in the Sciter. Moreover any block element like

can be declared as a by declaring style="behavior:frame" for it.

Element object has method load that allows to (re)load content of any element and from external source - url or stream (including in-memory dynamic stream). So there is not too much difference between block element like

and frames in the Sciter. Use frames when you need to isolate different style systems or scripts on the same screen.

The same approach is used with s - they are plain DOM elements and may appear at any place where block elements are acceptable. can contain not only elements but any block elements thus in the Sciter is a convenient way to define container with splitters. Morever any block element can be transformed into the frameset by declaring behavior:frameset in its style declaration.

Script evaluation.

Sciter knows and interprets only tiscript fragments and files. To include script block in the document use following elements:

<script type="text/tiscript" src="url-to-script-file" />

or for inline script inclusion:

<script type="text/tiscript">
  // script statements..
</script>

Global namespace, view and self objects.

Document establishes namespace for script execution. All classes and functions defined like this:

<script type="text/tiscript">
  function foo() { ... }
</script>

go to that global document namespace. self and view global variables are members of this namespace.

self
: is a reference to the document (<html> node)
view
: is a reference to the view object (usually it is a Sciter window)

Script execution

Sciter executes scripts as a last step of document loading - after tag is being parsed. So at the moment of any script execution DOM is established and scripts can refer to it.

There are three major steps of script execution in the engine:

  1. execution of the scripts per se including declaration of classes and behaviors;
  2. assignment of behaviors to the elements that have prototype:somebehavior; declared in CSS and
  3. invocation of self.ready() method (if it was declared in the script).

When document needs to be unloaded from the view (e.g. sciter window have got request to close from the user) engine is calling self.closing() method (if it was declared). If that method returns exactly false value then unloading stops. This way document can cancel its own unloading.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment