-
-
Save jeffvautin/b7ec779c85a6a608976ccd91da6f89bc to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(* Import and update your Pinboard bookmarks to DEVONthink | |
Based on work done by Christian Grunenberg on Mon Jan 23 2006, | |
Rafael Bugajewski to support Pinboard instead of Delicious on Sun Dec 19 2010 and | |
Andreas Zeitler on Sun Mar 03 2011 to display user feedback when finished. | |
By Sascha A. Carlin <https://about.me/sascha.a.carlin> on 2018-03-07 to set the creation date of new record, show progress bar, use Pinboard Auth Token, use modification date of folder to fetch only recent items | |
Copyright (c) 2018. All rights reserved. *) | |
use framework "Foundation" | |
use scripting additions | |
-- We use your Pinboard Auth Token. See https://pinboard.in/settings/password | |
property pAuthToken : "" | |
-- We support different import strategies. See below for details. | |
property importStrategy : "decluttered" | |
-- 1…3. The higher, the more we send to DTs log | |
property logVerbosity : 1 | |
-- If true, sends all log info to AppleScript's logging system | |
property scriptDebugging : false | |
-- Extract username from Auth token, we'll display it in the progress bar | |
set _words to words of pAuthToken | |
set pUser to item 1 of _words | |
-- We create the group in case it does not exist in the database | |
-- We check whether to import all bookmarks from Pinboard or just the ones younger than a certain date | |
set theUTCDateTime to "" | |
set logUpdateFrom to "" | |
set theGroupComment to "" | |
tell application id "DNtp" | |
set theGroup to get record at "/Pinboard" | |
if (theGroup is missing value or type of theGroup is not group or theGroupComment is "") then | |
-- If the group does not exist, we'l pull all bookmarks from Pinboard | |
set theGroup to create location "/Pinboard" | |
set thumbnail of theGroup to "https://imgur.com/download/UfP65so" -- PNG of a macOS folder with embosses Pinboard pin. Created using Lucas Garron's Folderify: https://github.com/lgarron/folderify | |
set logUpdateFrom to "Group not found or has no comment, assuming we need to start from scratch!" | |
else | |
set theGroupComment to theGroup's comment as text | |
if theGroupComment is not "" then | |
-- We use the group's comment to store the datetime of the last Pinboard post to be imported. | |
-- Create date string as per RFC 3339 for Pinboards API | |
try | |
tell application "System Events" to set theDate to (my dateFromRfc3339String:theGroupComment) as date | |
set theUTCDateTime to (my rfc3339FromDate:theDate) | |
set logUpdateFrom to "Group has comment, we start from " & theUTCDateTime | |
on error error_message number error_number | |
my logMessage(error_number, error_number, 4) | |
set theUTCDateTime to "" | |
end try | |
else | |
set logUpdateFrom to "Group has no comment, assuming we need to start from scratch!" | |
end if | |
end if | |
end tell | |
logMessage("Import strategy:", importStrategy, 1) | |
logMessage("Update strategy:", logUpdateFrom, 1) | |
logMessage("Import posts created on or after:", theUTCDateTime, 1) | |
-- Set up the request parameters for Pinboard, telling it to deliver all or only entries younger than theUTCDateTime | |
if (theUTCDateTime is not "") then | |
set thePinboardURL to "https://api.pinboard.in/v1/posts/all?auth_token=" & pAuthToken & "&fromdt=" & theUTCDateTime | |
else | |
-- Debug Switch: Be nice to Pinboard and use recent while debugging. | |
-- set thePinboardURL to "https://api.pinboard.in/v1/posts/recent?auth_token=" & pAuthToken | |
set thePinboardURL to "https://api.pinboard.in/v1/posts/all?auth_token=" & pAuthToken | |
end if | |
logMessage("thePinboardURL:", thePinboardURL, 2) | |
-- Get the data and process the entries | |
tell application id "DNtp" | |
try | |
set theXML to download markup from thePinboardURL encoding "UTF-8" | |
if (theXML is missing value or theXML is "") then | |
error "Download failed." | |
else if theXML contains "503 Service Temporarily Unavailable" then | |
error "503 Service Temporarily Unavailable" | |
logMessage("theXML:", theXML, 3) | |
else | |
tell application "System Events" | |
set x to make new XML data with data theXML | |
set theElements to XML elements of (XML element 1 of x) -- <posts> | |
end tell | |
set theElementCount to length of theElements | |
my logMessage("Elements to process from XML:", theElementCount as string, 1) | |
if (theElementCount < 1) then | |
error "Pinboard says it found no new bookmarks." | |
end if | |
try | |
tell application id "DNtp" | |
show progress indicator "Importing " & theElementCount & " bookmarks from Pinboard (" & pUser & ") …" steps (theElementCount) with cancel button | |
end tell | |
set importedPosts to 0 | |
-- Import loop | |
repeat with theSteps from theElementCount to 1 by -1 | |
-- Get URL first. We check whether it's unique. | |
tell application "System Events" | |
set theElement to item theSteps of theElements | |
set theUrl to (value of XML attribute named "href" of theElement) as string | |
end tell | |
tell application id "DNtp" | |
if not (exists record with URL theUrl) then | |
-- URL is unique, let's go! | |
-- Show what we're working on | |
step progress indicator theUrl | |
my logMessage(theUrl, "…", 3) | |
-- Extract data from XML | |
tell application "System Events" | |
set theName to (value of XML attribute named "description" of theElement) as string | |
set theTag to (value of XML attribute named "tag" of theElement) as string | |
set theComment to (value of XML attribute named "extended" of theElement) as string | |
set theDate to (value of XML attribute named "time" of theElement) as string | |
end tell | |
tell application "System Events" | |
-- Get pathExtension and convert to lowercase. We'll use it to handle binary files. | |
-- URLs containing UTF characters seem to cause NSURL to barf :-( | |
set theExtension to "" | |
try | |
set _url to (current application's |NSURL|'s URLWithString:theUrl) | |
set theExtension to _url's pathExtension as string | |
tell me to set the theExtension to do shell script "echo " & quoted form of (theExtension) & " | tr A-Z a-z" | |
end try | |
set {od, AppleScript's text item delimiters} to {AppleScript's text item delimiters, " "} | |
set theTags to {} | |
try | |
set theTags to (text items of theTag) | |
end try | |
set AppleScript's text item delimiters to od | |
end tell | |
try | |
-- Judging by theExtension, some files should be processed as they come. | |
-- We differentiate between "doucment" and binary formats. | |
if (theExtension is in {"pdf", "jpg", "jpeg", "png", "zip", "rtf", "csv", "dmg", "doc", "docx", "ppt", "pptx", "xls", "xlsx", "key", "pages", "key", "numbers"}) then | |
set theRecord to create web document from theUrl in theGroup | |
else if (theExtension is in {"zip", "dmg", "exe", "tar", "gz"}) then | |
set theRecord to create record with {type:bookmark, rich text:theComment} in theGroup | |
else | |
(* Choose an IMPORT STRATEGY. | |
See also DEVONthink's blog: http://blog.devontechnologies.com/2010/05/tuesday-tip-capturing-web-content/ *) | |
if (importStrategy is "create web document") then | |
(* Slow. As good as opening the URL in your favorite browser, includes all media & scripts. | |
Great web-like format, until Apple breaks backwards compatability. *) | |
-- set theRecord to create web document from "http://instapaper.com/text?u=" & (my encodeText:theUrl) in theGroup | |
set theRecord to create web document from theUrl in theGroup | |
else if (importStrategy is "create PDF document") then | |
(* Slow. PDF version of the site's print layout. | |
PDFs preserve most of the original style, most links can still be used. | |
Probably the best for serious archiving. | |
Todo: How does DEVONthink determine page width? *) | |
set theRecord to create PDF document from theUrl in theGroup with pagination | |
else if (importStrategy is "create formatted note") then | |
(* Slow. If your really can do without the original formatting and remote images. | |
Really, I don't see the point. Can as well use Markdown and save some execution time. *) | |
set theRecord to create formatted note from theUrl in theGroup | |
else if (importStrategy is "decluttered") then | |
(* Great for properly marked up HTML. Pretty bad for everthing else. | |
Want to use the same declutter functionality as in the DEVONthink Web Clipper? | |
Uses an external service! *) | |
-- Alternativly you can use Instapaper. | |
-- See the comment above at "create web document from" | |
set theMarkDownUrl to "http://heckyesmarkdown.com/go/?read=1&md=1&u=" & (my encodeText:theUrl) | |
set theContent to download markup from theMarkDownUrl | |
-- If the Markdown content seems to be too short, grab a PDF instead. | |
if (length of theContent < 400) then | |
set theRecord to create PDF document from theUrl in theGroup with pagination | |
else | |
set theRecord to create record with {type:markdown, content:theContent} in theGroup | |
end if | |
else if (importStrategy is "create Markdown") then | |
(* Rather not use an external service? *) | |
set theRecord to create Markdown from theUrl in theGroup | |
else if (importStrategy is "create record") then | |
(* Fastest method. Doesn't store anything local. | |
If you do not need any content but just the URLs, then choose this. *) | |
set theRecord to create record with {type:bookmark, rich text:theComment} in theGroup | |
end if | |
end if | |
-- Populate the record's fields with your Pinboard data | |
set the name of theRecord to theName | |
set the tags of theRecord to theTags | |
set the URL of theRecord to theUrl | |
set the comment of theRecord to theComment | |
-- Properly format theDate! For some functions, DEVONthink need an AppleScript date type as theDate | |
set the date of theRecord to (my dateFromRfc3339String:theDate) | |
-- Store the datetime of the last successfully imported post as a comment of the group. We'll use it to determine from which date onwards to ask for new posts the next time the script runs. | |
-- Doing so on every post ensures that we'll never miss a post in case the import process breaks. | |
try | |
set theGroup's comment to theDate | |
set importedPosts to importedPosts + 1 | |
end try | |
on error error_message number error_number | |
-- Something went wrong. | |
my logMessage(theUrl, error_message, 0) | |
end try | |
else | |
my logMessage(theUrl, "Skipped as duplicate", 3) | |
end if | |
-- I like percentages in my logs. | |
set thePercentage to theSteps / theElementCount | |
set thePercentage to round thePercentage * 100 rounding down | |
my logMessage(theUrl, (thePercentage as string) & "% of " & theElementCount, 4) -- I am lazy. Log level 4 is a magic number. When you set scriptDebugging to true, this info will be logged to AppleScript's log but will never show up in DT. | |
if cancelled progress then exit repeat | |
end tell | |
end repeat | |
on error error_message number error_number | |
hide progress indicator | |
display alert "Pinboard importer" message error_message as warning | |
end try | |
end if | |
-- When done, play a sound | |
hide progress indicator | |
tell application "Finder" | |
if exists POSIX file "/System/Library/Sounds/Glass.aiff" then | |
do shell script "afplay /System/Library/Sounds/Glass.aiff" | |
end if | |
end tell | |
display alert "Done importing." & return & return & "Processed bookmarks: " & theElementCount & return & "Skipped:" & (theElementCount - importedPosts) & return & "Imported: " & importedPosts | |
on error error_message number error_number | |
hide progress indicator | |
if the error_number is not -128 then display alert "Pinboard importer" message error_message as warning | |
my logMessage(error_number, error_message, 4) | |
end try | |
end tell | |
-- Stolen from Mark Aldritt: http://forum.latenightsw.com/t/formatting-dates/841 | |
on rfc3339FromDate:aDate | |
set theFormatter to current application's NSDateFormatter's new() | |
theFormatter's setLocale:(current application's NSLocale's localeWithLocaleIdentifier:"en_US_POSIX") | |
theFormatter's setTimeZone:(current application's NSTimeZone's timeZoneWithAbbreviation:"GMT") -- skip for local time | |
theFormatter's setDateFormat:"yyyy'-'MM'-'dd'T'HH':'mm':'ssXXX" | |
return (theFormatter's stringFromDate:aDate) as text | |
end rfc3339FromDate: | |
-- Stolen from Mark Aldritt: http://forum.latenightsw.com/t/formatting-dates/841 | |
on dateFromRfc3339String:theString | |
set theFormatter to current application's NSDateFormatter's new() | |
theFormatter's setLocale:(current application's NSLocale's localeWithLocaleIdentifier:"en_US_POSIX") | |
theFormatter's setTimeZone:(current application's NSTimeZone's timeZoneWithAbbreviation:"GMT") -- skip for local time | |
theFormatter's setDateFormat:"yyyy'-'MM'-'dd'T'HH':'mm':'ssXXX" | |
return (theFormatter's dateFromString:theString) as date | |
end dateFromRfc3339String: | |
-- Stolen from Apple: https://developer.apple.com/library/content/documentation/LanguagesUtilities/Conceptual/MacAutomationScriptingGuide/EncodeandDecodeText.html | |
on encodeText:theText | |
set theString to stringWithString_(theText) of NSString of current application | |
set theEncoding to NSUTF8StringEncoding of current application | |
set theAdjustedString to stringByAddingPercentEscapesUsingEncoding_(theEncoding) of theString | |
return (theAdjustedString as string) | |
end encodeText: | |
on logMessage(logResource, logInfo, logLevel) | |
if ((logLevel ≥ logVerbosity) and (logLevel < 4)) then | |
tell application id "DNtp" | |
log message logResource info logInfo | |
end tell | |
end if | |
if (scriptDebugging) then | |
log logInfo & ": " & logResource | |
end if | |
end logMessage |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment