Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Obsidian Web Clipper Bookmarklet to save articles and pages from the web (for Safari, Chrome, Firefox, and mobile browsers)

By @kepano

🎉 Support my work at buymeacoffee.com/kepano

Demo

You can find a demo of this bookmarklet on YouTube

Installation

Create a new bookmark in your browser, then copy/paste the minified code below into the URL field.

You can customize the output using the optional variables at the top, and the template at the bottom. The default template is designed for use with the Dataview plugin. If you make changes I recommend using Bookmarklet Maker to minify and URI encode the bookmarklet.

Usage

By default, clicking the bookmarklet creates a new Obsidian file from the main body of the article (similar to Readability view). Alternatively you can choose to create a file from a selection, by either selecting all (CMD+A), or just a portion of the page.

Any images in the content will be embedded as external references. If you want to download images locally you can use the Local Images plugin which allows you to download images for a note.

Troubleshooting

This bookmarklet may not work on all websites. If you run into issues, you can also try the MarkDownload browser extension which provides similar functionality. You can troubleshoot issues by opening the Developer Console in your browser and checking if any errors appear when you click the bookmarklet. The most common error is that a website or the browser itself is blocking third party code execution. Unfortunately there is no good solve for that yet.

javascript: Promise.all([import('https://unpkg.com/turndown@6.0.0?module'), import('https://unpkg.com/@tehshrike/readability@0.2.0'), ]).then(async ([{
default: Turndown
}, {
default: Readability
}]) => {
/* Optional vault name */
const vault = "";
/* Optional folder name such as "Clippings/" */
const folder = "";
/* Optional tags */
const tags = "#clippings";
function getSelectionHtml() {
var html = "";
if (typeof window.getSelection != "undefined") {
var sel = window.getSelection();
if (sel.rangeCount) {
var container = document.createElement("div");
for (var i = 0, len = sel.rangeCount; i < len; ++i) {
container.appendChild(sel.getRangeAt(i).cloneContents());
}
html = container.innerHTML;
}
} else if (typeof document.selection != "undefined") {
if (document.selection.type == "Text") {
html = document.selection.createRange().htmlText;
}
}
return html;
}
const selection = getSelectionHtml();
const {
title,
byline,
content
} = new Readability(document.cloneNode(true)).parse();
function getFileName(fileName) {
var userAgent = window.navigator.userAgent,
platform = window.navigator.platform,
windowsPlatforms = ['Win32', 'Win64', 'Windows', 'WinCE'];
if (windowsPlatforms.indexOf(platform) !== -1) {
fileName = fileName.replace(':', '').replace(/[/\\?%*|"<>]/g, '-');
} else {
fileName = fileName.replace(':', '').replace(/\//g, '-').replace(/\\/g, '-');
}
return fileName;
}
const fileName = getFileName(title);
if (selection) {
var markdownify = selection;
} else {
var markdownify = content;
}
if (vault) {
var vaultName = '&vault=' + encodeURIComponent(`${vault}`);
} else {
var vaultName = '';
}
const markdownBody = new Turndown({
headingStyle: 'atx',
hr: '---',
bulletListMarker: '-',
codeBlockStyle: 'fenced',
emDelimiter: '*',
}).turndown(markdownify);
var date = new Date();
function convertDate(date) {
var yyyy = date.getFullYear().toString();
var mm = (date.getMonth()+1).toString();
var dd = date.getDate().toString();
var mmChars = mm.split('');
var ddChars = dd.split('');
return yyyy + '-' + (mmChars[1]?mm:"0"+mmChars[0]) + '-' + (ddChars[1]?dd:"0"+ddChars[0]);
}
const today = convertDate(date);
const fileContent =
"author:: " + byline + "\n"
+ "source:: [" + title + "](" + document.URL + ")\n"
+ "clipped:: [[" + today + "]]\n"
+ "published:: \n\n"
+ tags + "\n\n"
+ markdownBody ;
document.location.href = "obsidian://new?"
+ "file=" + encodeURIComponent(folder + fileName)
+ "&content=" + encodeURIComponent(fileContent)
+ vaultName ;
})
javascript:(function()%7Bjavascript%3A%20Promise.all(%5Bimport('https%3A%2F%2Funpkg.com%2Fturndown%406.0.0%3Fmodule')%2C%20import('https%3A%2F%2Funpkg.com%2F%40tehshrike%2Freadability%400.2.0')%2C%20%5D).then(async%20(%5B%7B%0A%20%20%20%20default%3A%20Turndown%0A%7D%2C%20%7B%0A%20%20%20%20default%3A%20Readability%0A%7D%5D)%20%3D%3E%20%7B%0A%0A%20%20%2F*%20Optional%20vault%20name%20*%2F%0A%20%20const%20vault%20%3D%20%22%22%3B%0A%0A%20%20%2F*%20Optional%20folder%20name%20such%20as%20%22Clippings%2F%22%20*%2F%0A%20%20const%20folder%20%3D%20%22%22%3B%0A%0A%20%20%2F*%20Optional%20tags%20%20*%2F%0A%20%20const%20tags%20%3D%20%22%23clippings%22%3B%0A%0A%20%20function%20getSelectionHtml()%20%7B%0A%20%20%20%20var%20html%20%3D%20%22%22%3B%0A%20%20%20%20if%20(typeof%20window.getSelection%20!%3D%20%22undefined%22)%20%7B%0A%20%20%20%20%20%20%20%20var%20sel%20%3D%20window.getSelection()%3B%0A%20%20%20%20%20%20%20%20if%20(sel.rangeCount)%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20var%20container%20%3D%20document.createElement(%22div%22)%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20for%20(var%20i%20%3D%200%2C%20len%20%3D%20sel.rangeCount%3B%20i%20%3C%20len%3B%20%2B%2Bi)%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20%20container.appendChild(sel.getRangeAt(i).cloneContents())%3B%0A%20%20%20%20%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%20%20%20%20%20%20%20%20html%20%3D%20container.innerHTML%3B%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%20else%20if%20(typeof%20document.selection%20!%3D%20%22undefined%22)%20%7B%0A%20%20%20%20%20%20%20%20if%20(document.selection.type%20%3D%3D%20%22Text%22)%20%7B%0A%20%20%20%20%20%20%20%20%20%20%20%20html%20%3D%20document.selection.createRange().htmlText%3B%0A%20%20%20%20%20%20%20%20%7D%0A%20%20%20%20%7D%0A%20%20%20%20return%20html%3B%0A%20%20%7D%0A%0A%20%20const%20selection%20%3D%20getSelectionHtml()%3B%0A%0A%20%20const%20%7B%0A%20%20%20%20%20%20title%2C%0A%20%20%20%20%20%20byline%2C%0A%20%20%20%20%20%20content%0A%20%20%7D%20%3D%20new%20Readability(document.cloneNode(true)).parse()%3B%0A%0A%20%20function%20getFileName(fileName)%20%7B%0A%20%20%20%20var%20userAgent%20%3D%20window.navigator.userAgent%2C%0A%20%20%20%20%20%20%20%20platform%20%3D%20window.navigator.platform%2C%0A%20%20%20%20%20%20%20%20windowsPlatforms%20%3D%20%5B'Win32'%2C%20'Win64'%2C%20'Windows'%2C%20'WinCE'%5D%3B%0A%0A%20%20%20%20if%20(windowsPlatforms.indexOf(platform)%20!%3D%3D%20-1)%20%7B%0A%20%20%20%20%20%20fileName%20%3D%20fileName.replace('%3A'%2C%20'').replace(%2F%5B%2F%5C%5C%3F%25*%7C%22%3C%3E%5D%2Fg%2C%20'-')%3B%0A%20%20%20%20%7D%20else%20%7B%0A%20%20%20%20%20%20fileName%20%3D%20fileName.replace('%3A'%2C%20'').replace(%2F%5C%2F%2Fg%2C%20'-').replace(%2F%5C%5C%2Fg%2C%20'-')%3B%0A%20%20%20%20%7D%0A%20%20%20%20return%20fileName%3B%0A%20%20%7D%0A%20%20const%20fileName%20%3D%20getFileName(title)%3B%0A%0A%20%20if%20(selection)%20%7B%0A%20%20%20%20%20%20var%20markdownify%20%3D%20selection%3B%0A%20%20%7D%20else%20%7B%0A%20%20%20%20%20%20var%20markdownify%20%3D%20content%3B%0A%20%20%7D%0A%0A%20%20if%20(vault)%20%7B%0A%20%20%20%20%20%20var%20vaultName%20%3D%20'%26vault%3D'%20%2B%20encodeURIComponent(%60%24%7Bvault%7D%60)%3B%0A%20%20%7D%20else%20%7B%0A%20%20%20%20%20%20var%20vaultName%20%3D%20''%3B%0A%20%20%7D%0A%0A%20%20const%20markdownBody%20%3D%20new%20Turndown(%7B%0A%20%20%20%20%20%20headingStyle%3A%20'atx'%2C%0A%20%20%20%20%20%20hr%3A%20'---'%2C%0A%20%20%20%20%20%20bulletListMarker%3A%20'-'%2C%0A%20%20%20%20%20%20codeBlockStyle%3A%20'fenced'%2C%0A%20%20%20%20%20%20emDelimiter%3A%20'*'%2C%0A%20%20%7D).turndown(markdownify)%3B%0A%0A%20%20var%20date%20%3D%20new%20Date()%3B%0A%0A%20%20function%20convertDate(date)%20%7B%0A%20%20%20%20var%20yyyy%20%3D%20date.getFullYear().toString()%3B%0A%20%20%20%20var%20mm%20%3D%20(date.getMonth()%2B1).toString()%3B%0A%20%20%20%20var%20dd%20%20%3D%20date.getDate().toString()%3B%0A%20%20%20%20var%20mmChars%20%3D%20mm.split('')%3B%0A%20%20%20%20var%20ddChars%20%3D%20dd.split('')%3B%0A%20%20%20%20return%20yyyy%20%2B%20'-'%20%2B%20(mmChars%5B1%5D%3Fmm%3A%220%22%2BmmChars%5B0%5D)%20%2B%20'-'%20%2B%20(ddChars%5B1%5D%3Fdd%3A%220%22%2BddChars%5B0%5D)%3B%0A%20%20%7D%0A%0A%20%20const%20today%20%3D%20convertDate(date)%3B%0A%0A%20%20const%20fileContent%20%3D%20%0A%20%20%20%20%20%20%22author%3A%3A%20%22%20%2B%20byline%20%2B%20%22%5Cn%22%0A%20%20%20%20%20%20%2B%20%22source%3A%3A%20%5B%22%20%2B%20title%20%2B%20%22%5D(%22%20%2B%20document.URL%20%2B%20%22)%5Cn%22%0A%20%20%20%20%20%20%2B%20%22clipped%3A%3A%20%5B%5B%22%20%2B%20today%20%2B%20%22%5D%5D%5Cn%22%0A%20%20%20%20%20%20%2B%20%22published%3A%3A%20%5Cn%5Cn%22%20%0A%20%20%20%20%20%20%2B%20tags%20%2B%20%22%5Cn%5Cn%22%0A%20%20%20%20%20%20%2B%20markdownBody%20%3B%0A%20%20%0A%20%20document.location.href%20%3D%20%22obsidian%3A%2F%2Fnew%3F%22%0A%20%20%20%20%2B%20%22file%3D%22%20%2B%20encodeURIComponent(folder%20%2B%20fileName)%0A%20%20%20%20%2B%20%22%26content%3D%22%20%2B%20encodeURIComponent(fileContent)%0A%20%20%20%20%2B%20vaultName%20%3B%0A%7D)%7D)()%3B
@kepano
Copy link
Author

kepano commented Aug 9, 2021

Use file instead of name in the url scheme and this is fixed.

Thanks, this change is made

Is it maybe possible to get the date of publication as well? That would be super useful when you want to clip newspaper articles.

I'm using Mozilla's readability library which unfortunately doesn't provide published date as an output. Would love to see this too, but don't have the time to explore adding this feature right now. Might be a good suggestion for the readability library overall as I could imagine other use cases for it.

@nimerino
Copy link

nimerino commented Aug 15, 2021

@kepano: Here’s my slightly modified version for us YAML aficionados, would be glad if you’d include that in your collection:

Thank you so much @Moonbase59 for the YAML extension.

And @kepano, I take my hat off to you as always.

@bblais
Copy link

bblais commented Aug 18, 2021

How hard would it be to export/save all images as well, kind of like the "Save Entire Website" option in Chrome? I say this because, when I want to save a website in Obsidian I don't want to count on the website keeping all the images, and I want to be able to look at it offline. It's a pretty cool tool!

@kepano
Copy link
Author

kepano commented Aug 19, 2021

How hard would it be to export/save all images as well, kind of like the "Save Entire Website" option in Chrome?

You can do this using the MarkDownload extension

@chrisgrieser
Copy link

chrisgrieser commented Aug 22, 2021

I'm using Mozilla's readability library which unfortunately doesn't provide published date as an output. Would love to see this too, but don't have the time to explore adding this feature right now.

Unfortunately, my knowledge of javascript is very basic, but I found a way via shell script to get the publication date, as it usually simply follows the datePublished in the source code of a webpage.

curl -s "https://www.wired.com/story/unbearable-cuteness-instagram-hedgehog-influencers/" | grep -Eo "datePublished[^,]*" | grep -Eo "[[:digit:]]{4}" | head -n 1

Does this maybe help you to quickly implement it via javascript?

@projectfaktory
Copy link

projectfaktory commented Sep 10, 2021

Thank you for this!

@Keitaro-AH
Copy link

Keitaro-AH commented Sep 15, 2021

Hi, to preface, i am pretty new to Obsidian and these types of codings.

I got the following problem. If i mark a small part of a website, the snippet works. If i use Ctrl+a or send the complete Site, it doesnt. It shows a obsidian:\new?.... link in the dev console, however nothing happens in Obsidian. (happens with your code and my changed code)

Any tips what could be happening here?

@Asseel-Naji
Copy link

Asseel-Naji commented Sep 26, 2021

A small fork that prompts the user for folder and extra tags : https://gist.github.com/Asseel-Naji/604de43495538e04f0448da394bc456a

javascript: Promise.all([import('https://unpkg.com/turndown@6.0.0?module'), import('https://unpkg.com/@tehshrike/readability@0.2.0'), ]).then(async ([{
    default: Turndown
}, {
    default: Readability
}]) => {

  /* Optional vault name */
  const vault = "Brain";

  /* Optional folder name such as "Clippings/" */
  /* const folder = "Resources/0_INBOX/"; */
  const folder = prompt("Folder:", "Resources/0_INBOX/");

  /* Optional tags  */
  let baseTag = "#resources ";
  let extraTags = prompt("additional tags:", "#to_read");
  const tags = baseTag + extraTags;

  


  function getSelectionHtml() {
    var html = "";
    if (typeof window.getSelection != "undefined") {
        var sel = window.getSelection();
        if (sel.rangeCount) {
            var container = document.createElement("div");
            for (var i = 0, len = sel.rangeCount; i < len; ++i) {
                container.appendChild(sel.getRangeAt(i).cloneContents());
            }
            html = container.innerHTML;
        }
    } else if (typeof document.selection != "undefined") {
        if (document.selection.type == "Text") {
            html = document.selection.createRange().htmlText;
        }
    }
    return html;
  }

  const selection = getSelectionHtml();

  let {
      title,
      byline,
      content
  } = new Readability(document.cloneNode(true)).parse();

  function getFileName(fileName) {
    var userAgent = window.navigator.userAgent,
        platform = window.navigator.platform,
        windowsPlatforms = ['Win32', 'Win64', 'Windows', 'WinCE'];

    if (windowsPlatforms.indexOf(platform) !== -1) {
      fileName = fileName.replace(':', '').replace(/[/\\?%*|"<>]/g, '-');
    } else {
      fileName = fileName.replace(':', '').replace(/\//g, '-').replace(/\\/g, '-');
    }
    return fileName;
  }
  let fileName = prompt("File Name", getFileName(title));

  if (selection) {
      var markdownify = selection;
  } else {
      var markdownify = content;
  }

  if (vault) {
      var vaultName = '&vault=' + encodeURIComponent(`${vault}`);
  } else {
      var vaultName = '';
  }

  const markdownBody = new Turndown({
      headingStyle: 'atx',
      hr: '---',
      bulletListMarker: '-',
      codeBlockStyle: 'fenced',
      emDelimiter: '*',
  }).turndown(markdownify);

  var date = new Date();

  function convertDate(date) {
    var yyyy = date.getFullYear().toString();
    var mm = (date.getMonth()+1).toString();
    var dd  = date.getDate().toString();
    var mmChars = mm.split('');
    var ddChars = dd.split('');
    return yyyy + '-' + (mmChars[1]?mm:"0"+mmChars[0]) + '-' + (ddChars[1]?dd:"0"+ddChars[0]);
  }

  const today = convertDate(date);

  const fileContent = 
      "author:: " + byline + "\n"
      + "source:: [" + title + "](" + document.URL + ")\n"
      + "clipped:: [[" + today + "]]\n"
      + "published:: \n\n" 
      + tags + "\n\n"
      + markdownBody ;
  
  document.location.href = "obsidian://new?"
    + "file=" + encodeURIComponent(folder + fileName)
    + "&content=" + encodeURIComponent(fileContent)
    + vaultName ;
})

@yfr
Copy link

yfr commented Jan 3, 2022

Is there a way to automatically download the images of a clipped page as attachments?

@kepano
Copy link
Author

kepano commented Jan 3, 2022

@yfr you can use the Local Images plugin

@norton-11
Copy link

norton-11 commented Jan 5, 2022

I am a user who is using the Obsidian Web Clipper well. However, from one day on, the clipper stopped working in Windows 10. It works well when running the same page on mobile or safari. May I know what the problem is?

@a-toms
Copy link

a-toms commented Jan 6, 2022

Works excellently for me. Thanks kepano!

@norton-11
Copy link

norton-11 commented Jan 6, 2022

Hello. I am a clipper user. When using Clipper in Windows 10 Chrome, it is impossible to work on sites that worked well on mobile and Safari. I have attached the log of the problem and would appreciate it if you check it out.
} = new Readability(document.cloneNode(true)).parse();
TypeError: (new Readability(...)).parse() is null
Uncaught (in promise) TypeError: (new Readability(...)).parse() is null

@ysakols
Copy link

ysakols commented Feb 1, 2022

+1 to Norton-11. Same issue on Windows

@imloualvaro
Copy link

imloualvaro commented Feb 8, 2022

Would be good to have iOS safari extension of this. I usually do researchs on iphone too.

BTW thanks. It's working properly on chrome build 98

@pioneerskies
Copy link

pioneerskies commented Feb 12, 2022

@kepano @Moonbase59 I'm experimenting a personal solution, starting from your code, at https://github.com/pioneerskies/downmark.

Maybe you and the community could find something interesting there.

Thank you very much for your contributions.

@ganesshkumar
Copy link

ganesshkumar commented Feb 13, 2022

@kepano, I have been using this and I have couple of bookmarklets to clip the contents into different folders in my vault. Since I created bookmarklets for various folders, I created a simple UI to fill in the vault name, folder name, tags and get the bookmarklet.

http://obsidian-clipper-maker.ganesshkumar.com

Let me know if this is useful for you guys!

@jordanwiseman
Copy link

jordanwiseman commented Mar 5, 2022

Hey, @kepano, fantastic work! FYI, I forked a version and added auto-tagging based on a site's <meta name="keywords" .... tag, and using YAML front matter (a la @Moonbase59) as it seems to handle less predictable alphanumeric-only tags better.

Code:

javascript: Promise.all([import('https://unpkg.com/turndown@6.0.0?module'), import('https://unpkg.com/@tehshrike/readability@0.2.0'), ]).then(async ([{
    default: Turndown
}, {
    default: Readability
}]) => {

  /* Optional vault name */
  const vault = "";

  /* Optional folder name such as "Clippings/" */
  const folder = "";

  /* Optional tags  */
  let tags = "clippings";

  /* parse and lightly clean the site's meta keywords content into tags, if present */
  if (document.querySelector('meta[name="keywords" i]')) {
      var keywords = document.querySelector('meta[name="keywords" i]').getAttribute('content').split(',');

      keywords.forEach(function(keyword) {
          let tag = ' ' + keyword.split(' ').join('');
          tags += tag;
      });
  }

  function getSelectionHtml() {
    var html = "";
    if (typeof window.getSelection != "undefined") {
        var sel = window.getSelection();
        if (sel.rangeCount) {
            var container = document.createElement("div");
            for (var i = 0, len = sel.rangeCount; i < len; ++i) {
                container.appendChild(sel.getRangeAt(i).cloneContents());
            }
            html = container.innerHTML;
        }
    } else if (typeof document.selection != "undefined") {
        if (document.selection.type == "Text") {
            html = document.selection.createRange().htmlText;
        }
    }
    return html;
  }

  const selection = getSelectionHtml();

  const {
      title,
      byline,
      content
  } = new Readability(document.cloneNode(true)).parse();

  function getFileName(fileName) {
    var userAgent = window.navigator.userAgent,
        platform = window.navigator.platform,
        windowsPlatforms = ['Win32', 'Win64', 'Windows', 'WinCE'];

    if (windowsPlatforms.indexOf(platform) !== -1) {
      fileName = fileName.replace(':', '').replace(/[/\\?%*|"<>]/g, '-');
    } else {
      fileName = fileName.replace(':', '').replace(/\//g, '-').replace(/\\/g, '-');
    }
    return fileName;
  }
  const fileName = getFileName(title);

  if (selection) {
      var markdownify = selection;
  } else {
      var markdownify = content;
  }

  if (vault) {
      var vaultName = '&vault=' + encodeURIComponent(`${vault}`);
  } else {
      var vaultName = '';
  }

  const markdownBody = new Turndown({
      headingStyle: 'atx',
      hr: '~~~',
      bulletListMarker: '-',
      codeBlockStyle: 'fenced',
      emDelimiter: '*',
  }).turndown(markdownify);

  var date = new Date();

  function convertDate(date) {
    var yyyy = date.getFullYear().toString();
    var mm = (date.getMonth()+1).toString();
    var dd  = date.getDate().toString();
    var mmChars = mm.split('');
    var ddChars = dd.split('');
    return yyyy + '-' + (mmChars[1]?mm:"0"+mmChars[0]) + '-' + (ddChars[1]?dd:"0"+ddChars[0]);
  }

  const today = convertDate(date);

  /* YAML front matter as tags render cleaner with special chars  */
  const fileContent = 
      "---\n"
      + "author:    " + byline + "\n"
      + "title:     [" + title + "]\n"
      + "source:    " + document.URL + "\n"
      + "clipped:   " + today + "\n"
      + "published: \n\n" 
      + "tags:      [" + tags + "]\n"
      + "---\n\n"
      + markdownBody ;
  
  document.location.href = "obsidian://new?"
    + "file=" + encodeURIComponent(folder + fileName)
    + "&content=" + encodeURIComponent(fileContent)
    + vaultName ;
})

@skelly-larry
Copy link

skelly-larry commented Mar 5, 2022

Hey @jordanwiseman , unfortunately none of the variations work very well for me. In Chrome, I've regressed to all extensions disabled, security set to 'no protection', AV disabled, local Defender firewall disabled... still, a particular page will work once for me, then fail for awhile, then work again. For example:

https://xkcd.com/327/

I see the obsidian launch in the console, Sometimes with an error, but often with no error and still does not clip.

Sometimes see: see unknown origin/cancelled. However when I retry I do not see this, but see: Not allowed to launch 'obsidian://new?file=Exploits%20of%20a%20Mom&content=---...' because a user gesture is required.

There is no CORS error in the console. Of course not, the page origin is not making the request.

Some pages never work, always get the '... user gesture required'.
Like this page: https://sportsmedicine-open.springeropen.com/articles/10.1186/s40798-021-00390-y#author-information

Some pages work again, randomly. I could not find a pattern for awhile. Opening another page in the browser makes no difference. However, if I type into another page, like this post, or gmail... or even notepad, it almost always makes it work again. And it fails until I use the keyboard on a page again.

No joy in Edge either. Have only tried a personal machine so far.

I've wasted most of my morning on this, need to get some things done today!

@pioneerskies
Copy link

pioneerskies commented Mar 5, 2022

@skelly-larry just FYI, with my variation the XKCD URL techincally works, but wrong portion of the page is extracted by Readability, thus resulting content is not meaningful (you can try it by https://downmark.herokuapp.com/obsidian?u=https://xkcd.com/327/). The second URL AFAIK doesn't work because the generated obsidian://... is too long to be handled by the browser; this could happen when you're clipping a URL with very long content. One example symptom I collected:

 curl -L "https://downmark.herokuapp.com/obsidian?u=https://sportsmedicine-open.springeropen.com/articles/10.1186/s40798-021-00390-y"
curl: (27) Out of memory

Since my variation has a server-side component, I can report - for curiosity sake - that the server is correctly producing the URL, but the client (curl or the browser) is not able to handle it.

Cheers

Disclaimer: I'm really not interested into bringing someone to use my personal experiment variation, just bringing some more data to the table to eventually contribute to the discussion. ;)

@jordanwiseman
Copy link

jordanwiseman commented Mar 5, 2022

Yeah, for the gesture error, this is a security feature, inconsistently applied, across browsers. Since the bookmarklet calls an external protocol handler ("obsidian://") , the usual policy is not to let this happen automatically, i.e., without the user's knowledge. If you interact with the page you want to clip after it's loaded, but before you click on the bookmarklet, it should work. Most tries, anyway. It worked for XKCD (although the actual comic image isn't grabbed, but that's an issue with Readability not understanding the regions on the xkcd pages).

Interaction can be as simple as scrolling the page, btw.

For the SpringerOpen link, it's as @pioneerskies said: the generated obsidian link is too large to be handled in this way (see https://stackoverflow.com/questions/417142/what-is-the-maximum-length-of-a-url-in-different-browsers), although the entire constructed value does get logged to the console without error, ironically. There's no real way around this when using the obsidian protocol handler since you can't POST to a protocol handler (which is basically just a command line).

death_au's MarkDownload addon doesn't have these issues because addons don't have the same limitations as a bookmarklet running javascript "in" the page.

@skelly-larry
Copy link

skelly-larry commented Mar 5, 2022

Thanks @jordanwiseman . Yes the fact it logged the entire command properly threw me... but I was already late taking one of my kids to the airport so I had to abandon it. Never wrote javascript, might need to get into it, and look around for useful extensions. I and my kids have hundreds of recipes marooned in Evernote so I appreciate the local, replicable markdown. I'll mainly use this for annotated links, and I'm curious why the source tag is not stored as a navigable URL, but easy to enhance; maybe I'm misunderstanding. In a perfect world it would popup a dialog with meta tags I could prune, and the standard metadata, editable, before it commits. If the text selection is null, then grab the metadata and a URL link. Honestly, Obsidian is so flexible, I have to figure out how I plan to use it. The graph is cute, looks like a project I know Vasturiano's force graph that we have used before in the lab. Will have to play with it more to see if it is really useful.

@Write
Copy link

Write commented Mar 6, 2022

This script fail on almost every website for me due to CORS.
Only solution for me is to disable cors entierly in firefox
content.cors.disable to true

@skelly-larry
Copy link

skelly-larry commented Mar 6, 2022

After scanning some documentation, I think Obsidian has great potential but has ignored their most common use case. Maybe I am missing something? I hope I am.

The documentation describes a note-first approach, and a link-first approach. If you are taking notes independent of the rest of the world this is ok. But I read web pages that Google and new services push to me constantly, or search, and almost all of my notes are web page-first. I used to share these to Pocket (terrible organization of pages) or other things. I resort to sharing them to an email message to myself to read the next day. What I want to do is push them to Obsidian, refine the metadata, and use existing or new tags to fit them into the tagging scheme.

I rarely start with notes. I am absorbing info that I want to tag, develop insight from, and link into more complex structures later.

A lot of potential because it is so flexible, but it is so horribly awkward to use. I ctan be morphed into something highly usable, but I have a day job.

@skelly-larry
Copy link

skelly-larry commented Mar 7, 2022

@pioneerskies @jordanwiseman

Many things I want to clip are too long and it fails. I want to grab the leading part of a page and edit from there; or a selection.
I also want a navigable link to the page at the bottom of what I clip.

This refinement of Jordan's fits as much of the page as possible, assuming a safe URL length for most browsers is 2048; i trimmed it back to 2030 due to errors, which can URI encode 1200-1300 characters of source content. It also adds the link to source and a diagnostic of the number of characters of page content clipped.

I acknowledge that tags may be fractured and the last character of the page may be broken, if URI encoding it escapes it and I truncate in the middle. But it captures pages pretty well so far and there will always be some massaging on the Obsidian page.

Decrementing by 1 char each time seems a little wasteful, I used to use 100, but its just browser cycles and is instantaneous.

Still no CSP or CORS solution.

Code

javascript: Promise.all([import('https://unpkg.com/turndown@6.0.0?module'), import('https://unpkg.com/@tehshrike/readability@0.2.0'), ]).then(async ([{
    default: Turndown
}, {
    default: Readability
}]) => {

  /* Optional vault name */
  const vault = "";

  /* Optional folder name such as "Clippings/" */
  const folder = "";

  /* Optional tags  */
  let tags = "clippings";

  /* parse and lightly clean the site's meta keywords content into tags, if present */
  if (document.querySelector('meta[name="keywords" i]')) {
      var keywords = document.querySelector('meta[name="keywords" i]').getAttribute('content').split(',');

      keywords.forEach(function(keyword) {
          let tag = ' ' + keyword.split(' ').join('');
          tags += tag;
      });
  }

  function getSelectionHtml() {
    var html = "";
    if (typeof window.getSelection != "undefined") {
        var sel = window.getSelection();
        if (sel.rangeCount) {
            var container = document.createElement("div");
            for (var i = 0, len = sel.rangeCount; i < len; ++i) {
                container.appendChild(sel.getRangeAt(i).cloneContents());
            }
            html = container.innerHTML;
        }
    } else if (typeof document.selection != "undefined") {
        if (document.selection.type == "Text") {
            html = document.selection.createRange().htmlText;
        }
    }
    return html;
  }

  const selection = getSelectionHtml();

  const {
      title,
      byline,
      content
  } = new Readability(document.cloneNode(true)).parse();

  function getFileName(fileName) {
    var userAgent = window.navigator.userAgent,
        platform = window.navigator.platform,
        windowsPlatforms = ['Win32', 'Win64', 'Windows', 'WinCE'];

    if (windowsPlatforms.indexOf(platform) !== -1) {
      fileName = fileName.replace(':', '').replace(/[/\\?%*|"<>]/g, '-');
    } else {
      fileName = fileName.replace(':', '').replace(/\//g, '-').replace(/\\/g, '-');
    }
    return fileName;
  }
  const fileName = getFileName(title);

  if (selection) {
      var markdownify = selection;
  } else {
      var markdownify = content;
  }

  if (vault) {
      var vaultName = '&vault=' + encodeURIComponent(`${vault}`);
  } else {
      var vaultName = '';
  }

  const markdownBody = new Turndown({
      headingStyle: 'atx',
      hr: '~~~',
      bulletListMarker: '-',
      codeBlockStyle: 'fenced',
      emDelimiter: '*',
  }).turndown(markdownify);

  var date = new Date();

  function convertDate(date) {
    var yyyy = date.getFullYear().toString();
    var mm = (date.getMonth()+1).toString();
    var dd  = date.getDate().toString();
    var mmChars = mm.split('');
    var ddChars = dd.split('');
    return yyyy + '-' + (mmChars[1]?mm:"0"+mmChars[0]) + '-' + (ddChars[1]?dd:"0"+ddChars[0]);
  }

  const today = convertDate(date);

  /* YAML front matter as tags render cleaner with special chars  */
  const fileContent = 
      "---\n"
      + "author:    " + byline + "\n"
      + "title:     [" + title + "]\n"
      + "source:    " + document.URL + "\n"
      + "clipped:   " + today + "\n"
      + "published: \n\n" 
      + "tags:      [" + tags + "]\n"
      + "---\n\n"
      + markdownBody ;
  
  
  /* add a link to the source at the end */
  const linkref = 
      "\n\n---\n\source: [" + title + "](" + document.URL + ")\n";

  /* assemble URL, decrementing each time, until it does not exceed the max URL length for this browser, supposedly 2048 but allow for a little error with 2030 */
  contentLength = 2048;
  maxURLLength = 2030;
  decrement = 1;

  do {
	hrefString = "obsidian://new?"
      + "file=" + encodeURIComponent(folder + fileName)
      + "&content=" + encodeURIComponent(fileContent.substr(0,contentLength-1)) + encodeURIComponent(linkref) + "\n\nclipped " + String(contentLength) + " chars " 
      + vaultName ;
	contentLength = contentLength - decrement;
  } while (hrefString.length > maxURLLength);

  document.location.href = hrefString;

})

@pioneerskies
Copy link

pioneerskies commented Mar 8, 2022

@skelly-larry nice one. I'll think about implementing a simple "selection" function on my script too. With something like https://jsfiddle.net/Y4BBq/

Still no CSP or CORS solution.

That's not a script's problem, but a security enforcement on some websites. I wrote a webservice for me only to be able to overcome that thing (I often do webclips from GitHub for example).

@wealthychef1
Copy link

wealthychef1 commented Mar 12, 2022

Another workaround I've found for certain sites that block web clipping: use "Reader View" on Safari and copy and paste from there... defeats a lot of things

@vii33
Copy link

vii33 commented Mar 12, 2022

@jordanwiseman: Works like a charm, thank you. 👍

Small improvement idea: Add a level one headline (# The Headline) at the beginning of the document.
E.g.:

var headline = title;
const fileContent = ....... +
'# ' + headline + 
markdownBody;

@Write
Copy link

Write commented Mar 12, 2022

If this is turned into an extension, couldn't it bypass CORS restriction ?

@ScootRay
Copy link

ScootRay commented Apr 18, 2022

@kepano, I have been using this and I have couple of bookmarklets to clip the contents into different folders in my vault. Since I created bookmarklets for various folders, I created a simple UI to fill in the vault name, folder name, tags and get the bookmarklet.

http://obsidian-clipper-maker.ganesshkumar.com

Let me know if this is useful for you guys!

Thanks for setting that up, makes it so much easier : )

Ray

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment