|
#!/usr/bin/env ruby |
|
# frozen_string_literal: true |
|
|
|
# |
|
# # Archive linkding bookmarks to DEVONthink |
|
# |
|
# This script will archive bookmarks from a linkding server |
|
# to DEVONthink. |
|
# |
|
# This script is designed to run once initially, and then be |
|
# set to run in the background at intervals using cron or |
|
# launchd. |
|
# |
|
# You can specify a certain tag to only archive bookmarks |
|
# containing that tag. This is useful because if you're just |
|
# linking a tool or interesting app, you probably don't need |
|
# an archive of it. Tagging allows selective and intentional |
|
# archiving. But if you leave it blank, the script will just |
|
# archive all bookmarks. |
|
# |
|
# You can also specify tags for different types of archives, |
|
# including webloc, webarchive, and pdf. |
|
# |
|
# ### REQUIREMENTS |
|
# |
|
# To use the script, you'll need Ruby available. Install |
|
# with a package manager or version manager if you don't |
|
# have it. |
|
# |
|
# You'll also need to install the HTTParty gem. Run `gem |
|
# install httparty` to add it. |
|
# |
|
# If you run multiple versions, be sure to install the gem |
|
# to the version of Ruby that the script will run under. You |
|
# may want to use a wrapper or execute command from your |
|
# version manager when setting up an automated job so that |
|
# the correct gems are available. |
|
# |
|
# ## USAGE |
|
# |
|
# Save the script in your PATH and make it executable with |
|
# `chmod a+x linkding.rb`. Modify the options section at the |
|
# top with your server name, API key |
|
# (https://example.com/settings/integrations), and set the |
|
# various options. Run it once from the command line to test |
|
# and to archive initial existing bookmarks with the |
|
# specified tags. Once run successfully, you can then set up |
|
# a cron job or launchd agent to run in the background at |
|
# intervals (I recommend at least a 5 minute interval). I |
|
# recommend the app |
|
# [LaunchControl](https://www.soma-zone.com/LaunchControl/) |
|
# for editing launchd jobs. |
|
# |
|
# The script will create a file at `~/.last_linkding_id.txt` |
|
# to keep track of the last bookmark ID it archived. If you |
|
# want to re-archive all bookmarks, you can delete this file |
|
# or run the script with the argument `reset` to clear it. |
|
# If a numeric argument is provided to the reset, it will |
|
# set the last ID to that number. |
|
# |
|
# ## CONFIGURATION |
|
# |
|
# See the commented options at top for configuration |
|
# |
|
# ## LICENSE |
|
# |
|
# MIT license, copyright Brett Terpstra 2025 |
|
|
|
%w[time cgi fileutils json erb shellwords English httparty].each do |filename| |
|
require filename |
|
end |
|
|
|
# Configuration |
|
options = { |
|
# your linkding install URL |
|
server: 'https://bookmarks.example.com', |
|
|
|
# API key https://your_install/settings/integrations |
|
api_key: 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX', |
|
|
|
# DEVONthink group to which to save bookmarks |
|
# Use 'inbox' to save to the inbox |
|
# Or specify a group with POSIX path, e.g. '/Bookmarks' |
|
group: '/LinkDing', |
|
# Use 'global' to save to the global inbox |
|
# Or specify a database name, e.g. 'Primary' |
|
database: 'global', |
|
|
|
# Use Readability (no clutter) when creating markdown, |
|
# web archive, or PDF |
|
readability: { |
|
archive: true, |
|
markdown: true, |
|
pdf: true |
|
}, |
|
|
|
# Use Marky the Markdownifier v2 for markdownifying |
|
# (otherwise use DEVONthink's built-in markdownifier) |
|
# |
|
# FIXME: Using Marky for this is currently unpredictable, |
|
# I'll be digging in to find out why soon. |
|
marky: false, |
|
|
|
# This tag must be present in addition to the type tags below |
|
# Leave blank to act on all bookmarks (very slow) |
|
primary_tag: '.archive', |
|
|
|
# If any tag below is empty, primary_tag will save |
|
# that type if no other type tag is present. This allows |
|
# you to set a default type(s) for bookmarks without |
|
# a specific type tag. |
|
# |
|
# For example, if you set primary_tag to '.archive' and |
|
# leave markdown_tag empty, any bookmark tagged with |
|
# '.archive' will be saved as markdown if it doesn't have |
|
# an archive, bookmark, or PDF tag. |
|
# |
|
# Tag to save Markdown version |
|
markdown_tag: '', |
|
# Tag to save web archive |
|
archive_tag: '.webarchive', |
|
# Tag to save webloc bookmark |
|
bookmark_tag: '.bookmark', |
|
# Tag to save PDF |
|
pdf_tag: '.pdf' |
|
} |
|
|
|
ARCHIVE_TYPES = %i[markdown archive bookmark pdf].freeze |
|
LAST_ID_FILE = File.expand_path('~/.last_linkding_id.txt') |
|
|
|
# Utility functions |
|
module Util |
|
class << self |
|
# Use HTTParty to perform a HEAD request to follow redirects |
|
def follow_redirects(original_url) |
|
response = HTTParty.get(original_url, follow_redirects: false) |
|
while response.headers['location'] |
|
final_url = response.headers['location'] |
|
response = HTTParty.get(final_url, follow_redirects: false) |
|
end |
|
|
|
final_url || original_url |
|
rescue StandardError => e |
|
warn e |
|
nil |
|
end |
|
|
|
# Test if URL result is meta redirect |
|
# |
|
# @return [String] final url after following redirects |
|
# |
|
def redirect?(url) |
|
content = `curl -SsL "#{url}"`.scrub |
|
url = redirect?(Regexp.last_match(1)) if content =~ /meta http-equiv=["']refresh["'].*?url=(.*?)["']/ |
|
url |
|
end |
|
end |
|
end |
|
|
|
# String extensions |
|
class ::String |
|
# Escape special characters for AppleScript |
|
def e_as |
|
to_s.gsub(/(?=["\\])/, '\\') |
|
end |
|
|
|
# Convert to Markdown blockquote |
|
# |
|
# @return [String] Markdown blockquote |
|
def block_quote |
|
split(/\n/).map { |s| "> #{s}" }.join("\n") |
|
end |
|
|
|
# Convert to Markdown italicized text, line by line |
|
# |
|
# @return [String] Markdown italicized text |
|
def italicize |
|
split(/\n/).map { |s| s.strip.empty? ? '' : "_#{s}_" }.join("\n") |
|
end |
|
|
|
# |
|
# Some fixes for Marky output |
|
# |
|
# - Removes metadata to be replaced by this script's metadata |
|
def marky_fix |
|
content = strip.scrub |
|
content.gsub!(/^(date|title|tags|source|description):.*?\n/, '') |
|
content.strip |
|
end |
|
|
|
# |
|
# Discard invalid characters and output a UTF-8 String |
|
# |
|
# @return [String] UTF-8 encoded string |
|
# |
|
def scrub |
|
encode('utf-16', invalid: :replace).encode('utf-8') |
|
end |
|
|
|
# |
|
# Destructive version of #utf8 |
|
# |
|
# @return [String] UTF-8 encoded string, in place |
|
# |
|
def scrub! |
|
replace scrub |
|
end |
|
end |
|
|
|
class Bookmark |
|
attr_reader :url, :title, :description, :notes, :tags, :date, :id |
|
attr_writer :options |
|
|
|
# Initialize a new Bookmark instance |
|
def initialize(bookmark, options = {}) |
|
url = bookmark['url'] |
|
url = Util.redirect?(url) |
|
url = Util.follow_redirects(url) |
|
|
|
@url = url.nil? ? bookmark['url'] : url |
|
|
|
@title = bookmark['title'].strip |
|
@description = bookmark['description'].strip |
|
@notes = bookmark['notes'].strip.block_quote |
|
@tags = bookmark['tag_names'] |
|
@date = bookmark['date_added'] |
|
@id = bookmark['id'] |
|
@options = options |
|
end |
|
|
|
# Remove special tags from tag array |
|
def clean_tags |
|
tags = @tags.dup |
|
tags.delete(@options[:primary_tag]) |
|
ARCHIVE_TYPES.each do |t| |
|
tags.delete(@options["#{t}_tag".to_sym]) if @options["#{t}_tag".to_sym] && !@options["#{t}_tag".to_sym].empty? |
|
end |
|
tags |
|
end |
|
|
|
# Determine if bookmark should be saved as a specific type |
|
def should_save?(type) |
|
tag = "#{type}_tag".to_sym |
|
|
|
if !@options[tag] || @options[tag].empty? |
|
ARCHIVE_TYPES.each do |t| |
|
next if t == type |
|
|
|
ctag = "#{t}_tag".to_sym |
|
return false if @options[ctag] && !@options[ctag].empty? && @tags.include?(@options[ctag]) |
|
end |
|
|
|
return @tags.include?(@options[:primary_tag]) ? true : false |
|
end |
|
|
|
@tags.include?(@options[tag]) |
|
end |
|
end |
|
|
|
class Linkding |
|
# Initialize a new instance |
|
# |
|
# @param options [Hash] hash of options |
|
# |
|
# @option options [String] :server Linkding server |
|
# @option options [String] :api_key API key |
|
# @option options [String] :markdown_tag Tag for markdown |
|
# @option options [String] :bookmark_tag Tag for webloc |
|
# @option options [String] :archive_tag Tag for webarchive |
|
# @option options [String] :pdf_tag Tag for PDF |
|
def initialize(options = {}) |
|
return self if options.empty? |
|
|
|
@options = options |
|
end |
|
|
|
# retrieves the JSON output from the Linkding API |
|
# |
|
# @param api_call [String] the API path to call |
|
# |
|
# @return [Hash] Converted hash of output |
|
def get_json(api_call) |
|
JSON.parse(`curl -SsL -H 'Authorization: Token #{@options[:api_key]}' '#{@options[:server]}#{api_call}'`) |
|
end |
|
|
|
# compares bookmark array to existing bookmarks to find new urls |
|
# |
|
# @return [Array] array of unsaved bookmarks |
|
# |
|
def new_bookmarks(tag = nil) |
|
folder ||= @options[:notes_folder] |
|
search = "&q=%23#{tag}" if tag && !tag.empty? |
|
call = "/api/bookmarks/?limit=1000&format=json#{search}" |
|
json = get_json(call) |
|
bookmarks = json['results'] |
|
offset = 0 |
|
while json['next'] |
|
offset += 1 |
|
json = get_json(call + "&offset=#{offset}") |
|
bookmarks.concat(json['results']) |
|
end |
|
|
|
last_linkding_id = if File.exist?(LAST_ID_FILE) |
|
IO.read(LAST_ID_FILE).strip.to_i |
|
else |
|
0 |
|
end |
|
puts "Starting at bookmark ##{last_linkding_id}" |
|
|
|
bookmarks.sort_by! { |b| b['id'].to_i } |
|
|
|
bookmarks.reject! { |s| s['id'].to_i <= last_linkding_id } |
|
|
|
bookmarks |
|
end |
|
|
|
# Filter bookmarks based on tag array |
|
# |
|
# @param bookmarks [Array] array of bookmarks |
|
# @param tags [Array] array of tags to filter on. |
|
# Bookmarks must contain all tags |
|
# in array to pass. |
|
# |
|
# @return [Array] filtered array of bookmarks |
|
def filter_bookmarks(bookmarks, tags) |
|
tags = [tags] unless tags.is_a?(Array) |
|
tags.each do |tag| |
|
bookmarks.delete_if { |bm| !bm['tag_names'].include?(tag) } |
|
end |
|
|
|
bookmarks |
|
end |
|
|
|
# markdownify url with Marky the Markdownifier |
|
# |
|
# @param url [String] URL to markdownify |
|
# |
|
# @return [String] markdown content |
|
# |
|
def marky(url, readability = true) |
|
r = readability ? '1' : '0' |
|
call = %(https://heckyesmarkdown.com/api/2/?url=#{CGI.escape(url)}&format=mmd&readability=#{r}) |
|
`curl -SsL '#{call}'` |
|
end |
|
|
|
# Save a bookmark to DEVONthink |
|
# @param type [Symbol] type of archive to save |
|
# @param bookmark [Bookmark] bookmark to save |
|
# |
|
# @return [Boolean] success status |
|
def save_to_dt(type = :markdown, bookmark) |
|
name = bookmark.title.e_as |
|
ctags = bookmark.clean_tags |
|
tags = ctags.empty? ? '' : ctags.join(',') |
|
annotation = "#{bookmark.description.e_as}\n\n#{bookmark.notes.e_as}" |
|
|
|
case type |
|
when :markdown |
|
if @options[:marky] |
|
content = marky(bookmark.url, @options[:readability][:markdown]).e_as |
|
cmd = %(set theRecord to create record with {name:"#{name}", type:markdown, content:"#{content}", URL:"#{bookmark.url}"} in theGroup) |
|
else |
|
cmd = %(set theRecord to create Markdown from "#{bookmark.url}" readability #{@options[:readability][:markdown]} name "#{name}" in theGroup) |
|
end |
|
when :bookmark |
|
cmd = %(set theRecord to create record with {name:"#{name}", type:bookmark, URL:"#{bookmark.url}"} in theGroup) |
|
when :archive |
|
cmd = %(set theRecord to create web document from "#{bookmark.url}" readability #{@options[:readability][:archive]} name "#{name}" in theGroup ) |
|
when :pdf |
|
cmd = %(set theRecord to create PDF document from "#{bookmark.url}" pagination true readability #{@options[:readability][:pdf]} name "#{name}" in theGroup) |
|
end |
|
|
|
database = if @options[:database] =~ /^global$/i |
|
'inbox' |
|
else |
|
%(database "#{@options[:database]}") |
|
end |
|
|
|
group = if @options[:group] =~ /^inbox$/i |
|
%(set theGroup to inbox) |
|
else |
|
%(set theGroup to get record at "#{@options[:group]}" in #{database} |
|
if theGroup is missing value or type of theGroup is not group then |
|
set theGroup to create location "#{@options[:group]}" in #{database} |
|
end if) |
|
end |
|
|
|
cmd = %(tell application id "DNtp" |
|
#{group} |
|
|
|
#{cmd} |
|
|
|
set AppleScript's text item delimiters to "," |
|
set theList to every text item of "#{tags}" |
|
set tags of theRecord to theList |
|
|
|
set theAnnotation to annotation of theRecord |
|
if theAnnotation is not missing value then |
|
update record theAnnotation with text "#{annotation}" mode appending |
|
else |
|
set annotation of theRecord to create record with {name:((name of theRecord) as string) & " (Annotation)", type:markdown, content:"#{annotation}"} in (annotations group of database of theRecord) |
|
end if |
|
end tell) |
|
|
|
`osascript <<'APPLESCRIPT' |
|
#{cmd} |
|
APPLESCRIPT` |
|
|
|
$CHILD_STATUS.success? |
|
end |
|
|
|
# Save the last bookmark ID to a file |
|
def save_last_id(bookmark) |
|
last_linkding_id = bookmark.id |
|
|
|
File.open(LAST_ID_FILE, 'w') { |f| f.print last_linkding_id } |
|
end |
|
end |
|
|
|
# Reset last bookmark ID |
|
if ARGV[0] =~ /^r/ |
|
# if the next argument is a number, set the last ID to that number |
|
if ARGV[1] && ARGV[1] =~ /^\d+$/ |
|
File.open(LAST_ID_FILE, 'w') { |f| f.print ARGV[1] } |
|
puts "Reset last bookmark ID to #{ARGV[1]}" |
|
else |
|
FileUtils.rm(LAST_ID_FILE) if File.exist?(LAST_ID_FILE) |
|
puts 'Reset last bookmark ID' |
|
end |
|
Process.exit 0 |
|
end |
|
|
|
ld = Linkding.new(options) |
|
|
|
puts "#{Time.now.strftime('%Y-%m-%d %H:%M')}: Starting" |
|
|
|
new_bookmarks = ld.new_bookmarks(options[:primary_tag]) |
|
|
|
puts "#{Time.now.strftime('%Y-%m-%d %H:%M')}: Fetched archive bookmarks" |
|
|
|
archived = 0 |
|
|
|
# archive content and merge new bookmarks into main database |
|
new_bookmarks.each do |mark| |
|
bookmark = Bookmark.new(mark, options) |
|
|
|
ARCHIVE_TYPES.each do |type| |
|
next unless bookmark.should_save?(type) |
|
|
|
begin |
|
res = ld.save_to_dt(type, bookmark) |
|
puts "✅ Saved ##{bookmark.id} #{bookmark.url} as #{type} to DEVONthink" if res |
|
ld.save_last_id(bookmark) |
|
archived += 1 |
|
rescue StandardError => e |
|
puts "😥 Failed to save #{bookmark.url} as #{type}" |
|
warn e |
|
warn e.backtrace |
|
end |
|
end |
|
rescue StandardError => e |
|
puts "😥 Failed to save #{bookmark.url}" |
|
warn e |
|
warn e.backtrace |
|
end |
|
|
|
puts "🗃️ Archived #{archived} new bookmarks" |