#!/usr/bin/env ruby |
# frozen_string_literal: true |
# |
# # Archive linkding bookmarks to DEVONthink |
# |
# This script will archive bookmarks from a linkding server |
# to DEVONthink. |
# |
# This script is designed to run once initially, and then be |
# set to run in the background at intervals using cron or |
# launchd. |
# |
# You can specify a certain tag to only archive bookmarks |
# containing that tag. This is useful because if you're just |
# linking a tool or interesting app, you probably don't need |
# an archive of it. Tagging allows selective and intentional |
# archiving. But if you leave it blank, the script will just |
# archive all bookmarks. |
# |
# You can also specify tags for different types of archives, |
# including webloc, webarchive, and pdf. |
# |
# |
# To use the script, you'll need Ruby available. Install |
# with a package manager or version manager if you don't |
# have it. |
# |
# You'll also need to install the HTTParty gem. Run `gem |
# install httparty` to add it. |
# |
# If you run multiple versions, be sure to install the gem |
# to the version of Ruby that the script will run under. You |
# may want to use a wrapper or execute command from your |
# version manager when setting up an automated job so that |
# the correct gems are available. |
# |
# ## USAGE |
# |
# Save the script in your PATH and make it executable with |
# `chmod a+x linkding.rb`. Modify the options section at the |
# top with your server name, API key |
# (https://example.com/settings/integrations), and set the |
# various options. Run it once from the command line to test |
# and to archive initial existing bookmarks with the |
# specified tags. Once run successfully, you can then set up |
# a cron job or launchd agent to run in the background at |
# intervals (I recommend at least a 5 minute interval). I |
# recommend the app |
# [LaunchControl](https://www.soma-zone.com/LaunchControl/) |
# for editing launchd jobs. |
# |
# The script will create a file at `~/.last_linkding_id.txt` |
# to keep track of the last bookmark ID it archived. If you |
# want to re-archive all bookmarks, you can delete this file |
# or run the script with the argument `reset` to clear it. |
# If a numeric argument is provided to the reset, it will |
# set the last ID to that number. |
# |
# |
# See the commented options at top for configuration |
# |
# ## LICENSE |
# |
# MIT license, copyright Brett Terpstra 2025 |
%w[time cgi fileutils json erb shellwords English httparty].each do |filename| |
require filename |
end |
# Configuration |
options = { |
# your linkding install URL |
server: 'https://bookmarks.example.com', |
# API key https://your_install/settings/integrations |
# DEVONthink group to which to save bookmarks |
# Use 'inbox' to save to the inbox |
# Or specify a group with POSIX path, e.g. '/Bookmarks' |
group: '/LinkDing', |
# Use 'global' to save to the global inbox |
# Or specify a database name, e.g. 'Primary' |
database: 'global', |
# Use Readability (no clutter) when creating markdown, |
# web archive, or PDF |
readability: { |
archive: true, |
markdown: true, |
pdf: true |
}, |
# Use Marky the Markdownifier v2 for markdownifying |
# (otherwise use DEVONthink's built-in markdownifier) |
# |
# FIXME: Using Marky for this is currently unpredictable, |
# I'll be digging in to find out why soon. |
marky: false, |
# This tag must be present in addition to the type tags below |
# Leave blank to act on all bookmarks (very slow) |
primary_tag: '.archive', |
# If any tag below is empty, primary_tag will save |
# that type if no other type tag is present. This allows |
# you to set a default type(s) for bookmarks without |
# a specific type tag. |
# |
# For example, if you set primary_tag to '.archive' and |
# leave markdown_tag empty, any bookmark tagged with |
# '.archive' will be saved as markdown if it doesn't have |
# an archive, bookmark, or PDF tag. |
# |
# Tag to save Markdown version |
markdown_tag: '', |
# Tag to save web archive |
archive_tag: '.webarchive', |
# Tag to save webloc bookmark |
bookmark_tag: '.bookmark', |
# Tag to save PDF |
pdf_tag: '.pdf' |
} |
ARCHIVE_TYPES = %i[markdown archive bookmark pdf].freeze |
LAST_ID_FILE = File.expand_path('~/.last_linkding_id.txt') |
# Utility functions |
module Util |
class << self |
# Use HTTParty to perform a HEAD request to follow redirects |
def follow_redirects(original_url) |
response = HTTParty.get(original_url, follow_redirects: false) |
while response.headers['location'] |
final_url = response.headers['location'] |
response = HTTParty.get(final_url, follow_redirects: false) |
end |
final_url || original_url |
rescue StandardError => e |
warn e |
nil |
end |
# Test if URL result is meta redirect |
# |
# @return [String] final url after following redirects |
# |
def redirect?(url) |
content = `curl -SsL "#{url}"`.scrub |
url = redirect?(Regexp.last_match(1)) if content =~ /meta http-equiv=["']refresh["'].*?url=(.*?)["']/ |
url |
end |
end |
end |
# String extensions |
class ::String |
# Escape special characters for AppleScript |
def e_as |
to_s.gsub(/(?=["\\])/, '\\') |
end |
# Convert to Markdown blockquote |
# |
# @return [String] Markdown blockquote |
def block_quote |
split(/\n/).map { |s| "> #{s}" }.join("\n") |
end |
# Convert to Markdown italicized text, line by line |
# |
# @return [String] Markdown italicized text |
def italicize |
split(/\n/).map { |s| s.strip.empty? ? '' : "_#{s}_" }.join("\n") |
end |
# |
# Some fixes for Marky output |
# |
# - Removes metadata to be replaced by this script's metadata |
def marky_fix |
content = strip.scrub |
content.gsub!(/^(date|title|tags|source|description):.*?\n/, '') |
content.strip |
end |
# |
# Discard invalid characters and output a UTF-8 String |
# |
# @return [String] UTF-8 encoded string |
# |
def scrub |
encode('utf-16', invalid: :replace).encode('utf-8') |
end |
# |
# Destructive version of #utf8 |
# |
# @return [String] UTF-8 encoded string, in place |
# |
def scrub! |
replace scrub |
end |
end |
class Bookmark |
attr_reader :url, :title, :description, :notes, :tags, :date, :id |
attr_writer :options |
# Initialize a new Bookmark instance |
def initialize(bookmark, options = {}) |
url = bookmark['url'] |
url = Util.redirect?(url) |
url = Util.follow_redirects(url) |
@url = url.nil? ? bookmark['url'] : url |
@title = bookmark['title'].strip |
@description = bookmark['description'].strip |
@notes = bookmark['notes'].strip.block_quote |
@tags = bookmark['tag_names'] |
@date = bookmark['date_added'] |
@id = bookmark['id'] |
@options = options |
end |
# Remove special tags from tag array |
def clean_tags |
tags = @tags.dup |
tags.delete(@options[:primary_tag]) |
ARCHIVE_TYPES.each do |t| |
tags.delete(@options["#{t}_tag".to_sym]) if @options["#{t}_tag".to_sym] && !@options["#{t}_tag".to_sym].empty? |
end |
tags |
end |
# Determine if bookmark should be saved as a specific type |
def should_save?(type) |
tag = "#{type}_tag".to_sym |
if !@options[tag] || @options[tag].empty? |
ARCHIVE_TYPES.each do |t| |
next if t == type |
ctag = "#{t}_tag".to_sym |
return false if @options[ctag] && !@options[ctag].empty? && @tags.include?(@options[ctag]) |
end |
return @tags.include?(@options[:primary_tag]) ? true : false |
end |
@tags.include?(@options[tag]) |
end |
end |
class Linkding |
# Initialize a new instance |
# |
# @param options [Hash] hash of options |
# |
# @option options [String] :server Linkding server |
# @option options [String] :api_key API key |
# @option options [String] :markdown_tag Tag for markdown |
# @option options [String] :bookmark_tag Tag for webloc |
# @option options [String] :archive_tag Tag for webarchive |
# @option options [String] :pdf_tag Tag for PDF |
def initialize(options = {}) |
return self if options.empty? |
@options = options |
end |
# retrieves the JSON output from the Linkding API |
# |
# @param api_call [String] the API path to call |
# |
# @return [Hash] Converted hash of output |
def get_json(api_call) |
JSON.parse(`curl -SsL -H 'Authorization: Token #{@options[:api_key]}' '#{@options[:server]}#{api_call}'`) |
end |
# compares bookmark array to existing bookmarks to find new urls |
# |
# @return [Array] array of unsaved bookmarks |
# |
def new_bookmarks(tag = nil) |
folder ||= @options[:notes_folder] |
search = "&q=%23#{tag}" if tag && !tag.empty? |
call = "/api/bookmarks/?limit=1000&format=json#{search}" |
json = get_json(call) |
bookmarks = json['results'] |
offset = 0 |
while json['next'] |
offset += 1 |
json = get_json(call + "&offset=#{offset}") |
bookmarks.concat(json['results']) |
end |
last_linkding_id = if File.exist?(LAST_ID_FILE) |
IO.read(LAST_ID_FILE).strip.to_i |
else |
0 |
end |
puts "Starting at bookmark ##{last_linkding_id}" |
bookmarks.sort_by! { |b| b['id'].to_i } |
bookmarks.reject! { |s| s['id'].to_i <= last_linkding_id } |
bookmarks |
end |
# Filter bookmarks based on tag array |
# |
# @param bookmarks [Array] array of bookmarks |
# @param tags [Array] array of tags to filter on. |
# Bookmarks must contain all tags |
# in array to pass. |
# |
# @return [Array] filtered array of bookmarks |
def filter_bookmarks(bookmarks, tags) |
tags = [tags] unless tags.is_a?(Array) |
tags.each do |tag| |
bookmarks.delete_if { |bm| !bm['tag_names'].include?(tag) } |
end |
bookmarks |
end |
# markdownify url with Marky the Markdownifier |
# |
# @param url [String] URL to markdownify |
# |
# @return [String] markdown content |
# |
def marky(url, readability = true) |
r = readability ? '1' : '0' |
call = %(https://heckyesmarkdown.com/api/2/?url=#{CGI.escape(url)}&format=mmd&readability=#{r}) |
`curl -SsL '#{call}'` |
end |
# Save a bookmark to DEVONthink |
# @param type [Symbol] type of archive to save |
# @param bookmark [Bookmark] bookmark to save |
# |
# @return [Boolean] success status |
def save_to_dt(type = :markdown, bookmark) |
name = bookmark.title.e_as |
ctags = bookmark.clean_tags |
tags = ctags.empty? ? '' : ctags.join(',') |
annotation = "#{bookmark.description.e_as}\n\n#{bookmark.notes.e_as}" |
case type |
when :markdown |
if @options[:marky] |
content = marky(bookmark.url, @options[:readability][:markdown]).e_as |
cmd = %(set theRecord to create record with {name:"#{name}", type:markdown, content:"#{content}", URL:"#{bookmark.url}"} in theGroup) |
else |
cmd = %(set theRecord to create Markdown from "#{bookmark.url}" readability #{@options[:readability][:markdown]} name "#{name}" in theGroup) |
end |
when :bookmark |
cmd = %(set theRecord to create record with {name:"#{name}", type:bookmark, URL:"#{bookmark.url}"} in theGroup) |
when :archive |
cmd = %(set theRecord to create web document from "#{bookmark.url}" readability #{@options[:readability][:archive]} name "#{name}" in theGroup ) |
when :pdf |
cmd = %(set theRecord to create PDF document from "#{bookmark.url}" pagination true readability #{@options[:readability][:pdf]} name "#{name}" in theGroup) |
end |
database = if @options[:database] =~ /^global$/i |
'inbox' |
else |
%(database "#{@options[:database]}") |
end |
group = if @options[:group] =~ /^inbox$/i |
%(set theGroup to inbox) |
else |
%(set theGroup to get record at "#{@options[:group]}" in #{database} |
if theGroup is missing value or type of theGroup is not group then |
set theGroup to create location "#{@options[:group]}" in #{database} |
end if) |
end |
cmd = %(tell application id "DNtp" |
#{group} |
#{cmd} |
set AppleScript's text item delimiters to "," |
set theList to every text item of "#{tags}" |
set tags of theRecord to theList |
set theAnnotation to annotation of theRecord |
if theAnnotation is not missing value then |
update record theAnnotation with text "#{annotation}" mode appending |
else |
set annotation of theRecord to create record with {name:((name of theRecord) as string) & " (Annotation)", type:markdown, content:"#{annotation}"} in (annotations group of database of theRecord) |
end if |
end tell) |
`osascript <<'APPLESCRIPT' |
#{cmd} |
$CHILD_STATUS.success? |
end |
# Save the last bookmark ID to a file |
def save_last_id(bookmark) |
last_linkding_id = bookmark.id |
File.open(LAST_ID_FILE, 'w') { |f| f.print last_linkding_id } |
end |
end |
# Reset last bookmark ID |
if ARGV[0] =~ /^r/ |
# if the next argument is a number, set the last ID to that number |
if ARGV[1] && ARGV[1] =~ /^\d+$/ |
File.open(LAST_ID_FILE, 'w') { |f| f.print ARGV[1] } |
puts "Reset last bookmark ID to #{ARGV[1]}" |
else |
FileUtils.rm(LAST_ID_FILE) if File.exist?(LAST_ID_FILE) |
puts 'Reset last bookmark ID' |
end |
Process.exit 0 |
end |
ld = Linkding.new(options) |
puts "#{Time.now.strftime('%Y-%m-%d %H:%M')}: Starting" |
new_bookmarks = ld.new_bookmarks(options[:primary_tag]) |
puts "#{Time.now.strftime('%Y-%m-%d %H:%M')}: Fetched archive bookmarks" |
archived = 0 |
# archive content and merge new bookmarks into main database |
new_bookmarks.each do |mark| |
bookmark = Bookmark.new(mark, options) |
ARCHIVE_TYPES.each do |type| |
next unless bookmark.should_save?(type) |
begin |
res = ld.save_to_dt(type, bookmark) |
puts "✅ Saved ##{bookmark.id} #{bookmark.url} as #{type} to DEVONthink" if res |
ld.save_last_id(bookmark) |
archived += 1 |
rescue StandardError => e |
puts "😥 Failed to save #{bookmark.url} as #{type}" |
warn e |
warn e.backtrace |
end |
end |
rescue StandardError => e |
puts "😥 Failed to save #{bookmark.url}" |
warn e |
warn e.backtrace |
end |
puts "🗃️ Archived #{archived} new bookmarks" |