Skip to content

Instantly share code, notes, and snippets.

@bojidar-bg
Created September 6, 2022 18:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save bojidar-bg/c63f2f139b0ebe48de9a9bc7cf448461 to your computer and use it in GitHub Desktop.
Save bojidar-bg/c63f2f139b0ebe48de9a9bc7cf448461 to your computer and use it in GitHub Desktop.
Trilium -> TiddlyWiki converter & Pandoc WikiText writer

Status: WIP

This is a quick and dirty script + pandoc lua filter I used to import notes from Trilium into TiddlyWiki.

Usage instructions:

  1. Export root note from Trilium as HTML files in a ZIP (enter protected mode to export protected notes too).
  2. Extract the ZIP file to a directory.
  3. Put process.sh and write-wikitext.lua in the root directory.
  4. Edit process.sh to match your exported root note name (change "root" and "transformed-root").
  5. Edit write-wikitext.lua (line 93 and perhaps 95) to match your TiddlyWiki name.
  6. Run process.sh. It should print filenames as it processes them; it will automatically rename pages with overlapping titles.
  7. (Make sure you TiddlyWiki instance is backed up.)
  8. Drag all the .tid files from the tids/ folder into your TiddlyWiki instance.
  9. Profit! (Or rollback, eouch!)

Welcome improvements:

  1. Run multiple pandoc invocations at the same time. (embarassingly parallel; didn't need that for just ~800 notes)
  2. Make sure write-wikitext.lua handles all inputs correctly. E.g. it currently makes links with <a .. title="">..</a> for no good reason.
  3. Cleanup write-wikitext.lua.
  4. Make sure process.sh doesn't break if you have a line starting with title: ... within a note.

Either way, it worked for me; hopefully it would help someone else not waste a few hours writing pandoc filters.

SPDX-License: GPL-2.0-or-later (due to using parts of pandoc's data/sample.lua (which apparently falls under GPL) in writing write-wikitext.lua.)

#!/bin/bash
find root -type d -exec mkdir transformed-\{\} \;
find root -name '*.html' -type f -printf '%p\n' -exec pandoc \{\} -t write-wikitext.lua -o transformed-\{\}.tid \;
mkdir tids
find transformed-root/ -type f -not -name '*.clone.*' -print0 | xargs -0 -i'{}' bash -c 'T="$(sed -n -r "s/title: (.+)/\1/p" "$0" | sed -r "s|/|_|g;s| +$||")"; if [ -e "tids/$T.tid" ]; then T1="$T XX $RANDOM"; echo CONFLICT "$0" "tids/$T.tid" "tids/$T1.tid"; sed "s|$T|$T1|" "$0" >"tids/$T1.tid"; else cp "$0" "tids/$T.tid"; fi' '{}'
-- This is a sample custom writer for pandoc. It produces output
-- that is very similar to that of pandoc's HTML writer.
-- There is one new feature: code blocks marked with class 'dot'
-- are piped through graphviz and images are included in the HTML
-- output using 'data:' URLs. The image format can be controlled
-- via the `image_format` metadata field.
--
-- Invoke with: pandoc -t sample.lua
--
-- Note: you need not have lua installed on your system to use this
-- custom writer. However, if you do have lua installed, you can
-- use it to test changes to the script. 'lua sample.lua' will
-- produce informative error messages if your code contains
-- syntax errors.
local pipe = pandoc.pipe
local stringify = (require 'pandoc.utils').stringify
-- Character escaping
local function escape(s, in_attribute)
return s:gsub('[<>&"\']',
function(x)
if x == '<' then
return '&lt;'
elseif x == '>' then
return '&gt;'
elseif x == '&' then
return '&amp;'
elseif in_attribute and x == '"' then
return '&quot;'
elseif in_attribute and x == "'" then
return '&#39;'
else
return x
end
end):gsub('–', '--'):gsub('—', '---'):gsub('“', '"'):gsub('”', '"'):gsub('‘', '\''):gsub('’', '\'')
end
-- Helper function to convert an attributes table into
-- a string that can be put into HTML tags.
local function attributes(attr, type)
if type == 'class' then
local classes = string.gsub(attr.class, ' ', '.')
if classes == '' then
return ''
else
return '.' .. classes
end
else
local attr_table = {}
for x,y in pairs(attr) do
if y and y ~= '' then
table.insert(attr_table, ' ' .. x .. '="' .. escape(y,true) .. '"')
end
end
return table.concat(attr_table)
end
end
-- Table to store footnotes, so they can be included at the end.
local title = ''
-- Blocksep is used to separate block elements.
function Blocksep()
return '\n\n'
end
-- This function is called once for the whole document. Parameters:
-- body is a string, metadata is a table, variables is a table.
-- This gives you a fragment. You could use the metadata table to
-- fill variables in a custom lua template. Or, pass `--template=...`
-- to pandoc, and pandoc will do the template processing as usual.
function Doc(body, metadata, variables)
local mtime = pandoc.pipe('stat', {'-c', '%Y', PANDOC_STATE.input_files[1]}, ''):sub(1, -2)
local mtimestamp = pandoc.pipe('date', {'--date=@' .. mtime, '+%Y%m%d%H%M%S000'}, ''):sub(1, -2)
local realpath = pandoc.pipe('realpath', {PANDOC_STATE.input_files[1]}, ''):sub(1, -2)
local tags = string.match(realpath, "^.-/?([^/]*)/[^/]*$") or ''
local context, nn = string.match(realpath, "^.-/?([^/]-)/(%d+).html$")
if context and nn then
title = context .. ' ' .. nn
end
local year, month, day, extra = string.match(realpath, "Journal/(%d+)/(%d+)[^/]*/(%d+)(.*)%.html$")
if year and month and day then
title = year .. '-' .. month .. '-' .. day .. string.gsub(extra, '^ %- ', ' ')
tags = 'Journal'
end
return
'created: ' .. mtimestamp .. '\n' ..
'creator: bojidar\n' ..
'modified: ' .. mtimestamp .. '\n' ..
'modifier: trilium-import\n' ..
'title: ' .. title .. '\n' ..
'tags: [[' .. tags .. ']]\n' ..
'type: text/vnd.tiddlywiki\n' ..
'\n\n' ..
string.gsub(string.gsub(body, '^\n+', ''), '\n\n\n+', '\n\n')
end
-- The functions that follow render corresponding pandoc elements.
-- s is always a string, attr is always a table of attributes, and
-- items is always an array of strings (the items in a list).
-- Comments indicate the types of other variables.
function Str(s)
return escape(s)
end
function Space()
return ' '
end
function SoftBreak()
return '\n'
end
function LineBreak()
return '\0'
end
function Emph(s)
return '//' .. s .. '//'
end
function Strong(s)
return '\'\'' .. s .. '\'\''
end
function Underline(s)
return '__' .. s .. '__'
end
function Subscript(s)
return ',,' .. s .. ',,'
end
function Superscript(s)
return '^^' .. s .. '^^'
end
function SmallCaps(s)
return '<span style="font-variant: small-caps;">' .. s .. '</span>'
end
function Strikeout(s)
return '~~' .. s .. '~~'
end
function Link(s, tgt, tit, attr)
if string.find(tgt, '^%.') then
tgt = string.match(s, "^.-/?([^/]*)$")
end
if tit or attr then
return '<a href="' .. escape(tgt,true) .. '" title="' .. escape(tit,true) .. '"' .. attributes(attr) .. '>' .. s .. '</a>'
elseif s == tgt or s == '' then
return '[[' .. tgt .. ']]'
else
return '[[' .. s .. '|' .. tgt .. ']]'
end
end
function Image(s, src, tit, attr)
return '[img[' .. escape(tit,true) .. '|' .. escape(src,true) .. ']]'
end
function Code(s, attr)
return '`' .. escape(s) .. '`'
end
function InlineMath(s)
return '\\(' .. escape(s) .. '\\)'
end
function DisplayMath(s)
return '\\[' .. escape(s) .. '\\]'
end
function SingleQuoted(s)
return '\'' .. s .. '\''
end
function DoubleQuoted(s)
return '"' .. s .. '"'
end
function Note(s)
return s
end
function Span(s, attr)
return s --'<span' .. attributes(attr) .. '>' .. s .. '</span>'
end
function RawInline(format, str)
if format == 'html' then
return str
else
return ''
end
end
function Cite(s, cs)
local ids = {}
for _,cit in ipairs(cs) do
table.insert(ids, cit.citationId)
end
return '<span class="cite" data-citation-ids="' .. table.concat(ids, ',') ..
'">' .. s .. '</span>'
end
function Plain(s)
return s
end
function Para(s)
if string.find(s, "\0") then
return '"""\n' .. string.gsub(string.gsub(s, "\n", " "), "\0", "\n") .. '\n"""'
else
return s
end
end
-- lev is an integer, the header level.
function Header(lev, s, attr)
if lev > 1 then
return string.rep('!', lev-1) .. attributes(attr, 'class') .. ' ' .. s
else
title = s
return ''
end
end
function BlockQuote(s)
return '<<<\n' .. s .. '\n<<<'
end
function HorizontalRule()
return "---"
end
function LineBlock(ls)
return '<div style="white-space: pre-line;">' .. table.concat(ls, '\n') ..
'</div>'
end
function CodeBlock(s, attr)
return '\n```' .. attributes(attr) .. '\n' .. escape(s) .. '```'
end
function BulletList(items)
local buffer = {}
for _, item in pairs(items) do
table.insert(buffer, '* ' .. string.gsub(item, '\n([%*#]) ', '\n*%1'))
end
return table.concat(buffer, '\n')
end
function OrderedList(items)
local buffer = {}
for _, item in pairs(items) do
table.insert(buffer, '# ' .. string.gsub(item, '(\n[%*#]) ', '#%1'))
end
return table.concat(buffer, '\n')
end
function DefinitionList(items)
local buffer = {}
for _,item in pairs(items) do
local k, v = next(item)
table.insert(buffer, ';' .. k .. '\n:' .. table.concat(v, '\n:') .. '\n')
end
return table.concat(buffer, '\n') .. '\n'
end
-- Convert pandoc alignment to something HTML can use.
-- align is AlignLeft, AlignRight, AlignCenter, or AlignDefault.
local function html_align(align)
if align == 'AlignLeft' then
return 'left'
elseif align == 'AlignRight' then
return 'right'
elseif align == 'AlignCenter' then
return 'center'
else
return 'left'
end
end
function CaptionedImage(src, tit, caption, attr)
if #caption == 0 then
return '<p><img src="' .. escape(src,true) .. '" id="' .. attr.id ..
'"/></p>'
else
local ecaption = escape(caption)
return '<figure>\n<img src="' .. escape(src,true) ..
'" id="' .. attr.id .. '" alt="' .. ecaption .. '"/>' ..
'<figcaption>' .. ecaption .. '</figcaption>\n</figure>'
end
end
-- Caption is a string, aligns is an array of strings,
-- widths is an array of floats, headers is an array of
-- strings, rows is an array of arrays of strings.
function Table(caption, aligns, widths, headers, rows)
local buffer = {}
local function add(s)
table.insert(buffer, s)
end
add('<table>')
if caption ~= '' then
add('<caption>' .. escape(caption) .. '</caption>')
end
if widths and widths[1] ~= 0 then
for _, w in pairs(widths) do
add('<col width="' .. string.format('%.0f%%', w * 100) .. '" />')
end
end
local header_row = {}
local empty_header = true
for i, h in pairs(headers) do
local align = html_align(aligns[i])
table.insert(header_row,'<th align="' .. align .. '">' .. h .. '</th>')
empty_header = empty_header and h == ''
end
if not empty_header then
add('<tr class="header">')
for _,h in pairs(header_row) do
add(h)
end
add('</tr>')
end
local class = 'even'
for _, row in pairs(rows) do
class = (class == 'even' and 'odd') or 'even'
add('<tr class="' .. class .. '">')
for i,c in pairs(row) do
add('<td align="' .. html_align(aligns[i]) .. '">' .. c .. '</td>')
end
add('</tr>')
end
add('</table>')
return table.concat(buffer,'\n')
end
function RawBlock(format, str)
if format == 'html' then
return str
else
return ''
end
end
function Div(s, attr)
return '@@' .. attributes(attr, 'class') .. '\n' .. s .. '\n@@'
end
-- The following code will produce runtime warnings when you haven't defined
-- all of the functions you need for the custom writer, so it's useful
-- to include when you're working on a writer.
local meta = {}
meta.__index =
function(_, key)
io.stderr:write(string.format("WARNING: Undefined function '%s'\n",key))
return function() return '' end
end
setmetatable(_G, meta)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment