Created
July 12, 2017 20:28
-
-
Save ayumi/21688471b4cf1c3bc0f77ef72a6e6b3a to your computer and use it in GitHub Desktop.
Compare/Diff bookmark export HTML files in netscape formats
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Take 2 bookmark HTML files in netscape format, and look for differences. | |
# Doesn't compare hierarchy. | |
# Requirements: Ruby 2.x; oga ruby gem (gem install oga) | |
# License: MPL 2.0 | |
HELP_MSG = "Usage: compare-bookmarks.rb [Bookmarks HTML 1] [Bookmarks HTML 2]".freeze | |
# HTML parser | |
require "oga" | |
file_1 = ARGV[0] | |
file_2 = ARGV[1] | |
if file_1.nil? || file_2.nil? | |
puts HELP_MSG | |
exit(1) | |
end | |
file_1_data = File.read(file_1) | |
file_2_data = File.read(file_2) | |
html_1 = Oga.parse_html(file_1_data) | |
html_2 = Oga.parse_html(file_2_data) | |
# @param nodes array of Oga nodes | |
# @returns array of strings | |
def nodes_to_strings(nodes) | |
strings = [] | |
nodes.each do |node| | |
href = node.attributes.find {|n| n.name == "HREF"}&.value&.strip | |
string = href || node.text.strip | |
strings.push(string) | |
end | |
strings | |
end | |
folders_1 = nodes_to_strings(html_1.css("h3")) | |
folders_2 = nodes_to_strings(html_2.css("h3")) | |
bookmarks_1 = nodes_to_strings(html_1.css("a")) | |
bookmarks_2 = nodes_to_strings(html_2.css("a")) | |
folders_in_1_not_in_2 = folders_1 - folders_2 | |
folders_in_2_not_in_1 = folders_2 - folders_1 | |
bookmarks_in_1_not_in_2 = bookmarks_1 - bookmarks_2 | |
bookmarks_in_2_not_in_1 = bookmarks_2 - bookmarks_1 | |
def log_strings(strings, prefix = " ") | |
strings.each do |s| | |
puts "#{prefix}#{s}" | |
end | |
end | |
puts "Folders in #{file_1} not in #{file_2} (#{folders_in_1_not_in_2.size} of #{folders_2.size}):" | |
log_strings(folders_in_1_not_in_2) | |
puts "Bookmarks in #{file_1} not in #{file_2} (#{bookmarks_in_1_not_in_2.size} of #{bookmarks_2.size}):" | |
log_strings(bookmarks_in_1_not_in_2) | |
puts "Folders in #{file_2} not in #{file_1} (#{folders_in_2_not_in_1.size} of #{folders_1.size}):" | |
log_strings(folders_in_2_not_in_1) | |
puts "Bookmarks in #{file_1} not in #{file_2} (#{bookmarks_in_2_not_in_1.size} of #{bookmarks_1.size}):" | |
log_strings(bookmarks_in_2_not_in_1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment