Skip to content

Instantly share code, notes, and snippets.

@naupaka
Forked from bcdavasconcelos/Tinderbox-Ruby.rb
Created April 4, 2022 04:35
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save naupaka/b9622a4527ea3f3508c4bae277e330ab to your computer and use it in GitHub Desktop.
Save naupaka/b9622a4527ea3f3508c4bae277e330ab to your computer and use it in GitHub Desktop.
This ruby class is aimed at the XML files of the macOS app Tinderbox with methods to parse attributes, links and notes.
#!/Users/bcdav/.rbenv/shims/ruby
# frozen_string_literal: false
Encoding.default_external = Encoding::UTF_8
# Bernardo C. D. A. Vasconcelos #
# 2022-01-06-10-02 #
# A Ruby class for Tinderbox Documents with methods to parse attributes, links and notes.
# It deals directly with the XML file and does not rely on the application's OAS interface.
# At the moment one can only retrive information, but not alter the document in any way.
# Use example can be found at the bottom.
# To do: add methods for colors, menu, link types, macros, preferences, windows, searches, filters, gallery, badges
# A complete description of Tinderbox's XML is available at [aTBref 9.0](https://www.acrobatfaq.com/atbref9/index/SyntaxLibrary/TheXMLTBXformat.html)
# Begin Class
class TinderboxDocument
require 'nokogiri'
attr_accessor :content
def initialize(tbxfile)
if tbxfile.include?('.tbx')
@content = Nokogiri::XML(File.open(tbxfile))
else
puts 'ERROR: Please select a Tinderbox file'
end
end
attr_accessor :tbx_name
def tbx_name
@tbx_name = content.xpath('//tinderbox//item')[0].at('.//attribute[@name="Name"]').text
end
attr_accessor :tbx_links
def tbx_links
return unless content.instance_of?(Nokogiri::XML::Document)
@tbx_links = []
links = content.xpath('//link')
links.each do |node|
@tbx_links << {
name: node.attr('name'),
sourceid: node.attr('sourceid'),
destid: node.attr('destid'),
sstart: node.attr('sstart'),
slen: node.attr('slen')
}
end
@tbx_links
end
# I am not sure this is the best way to do it.
# We are getting all attributes first and offering it up via `doc.tbx_all_attributes`
# Then removing attributes that are already deprecated and offering it up via `doc.tbx_attributes`
# It seems slightly innefective to get all of the values and later removing some of them
# See https://acrobatfaq.com/atbref8/index/Attributes/Attributesgroupedbypurpo/Deprecatedattributes.html
attr_accessor :tbx_attributes
def tbx_attributes
return unless content.instance_of?(Nokogiri::XML::Document)
@tbx_attributes = tbx_all_attributes
@tbx_attributes.except(
:anything, :System, :User, :AutomaticIndent, :Color2, :HideKeyAttributes, :HTMLMarkDown,
:HTMLOverwriteImages, :KeyAttributeDateFormat, :KeyAttributeFont, :KeyAttributeFontSize,
:KeyAttributes, :LeafBase, :LeafBend, :LeafDirection, :LeafTip, :MapBackgroundColor2,
:MapPrototypeColor, :MapTextSize, :mt_allow_comments, :mt_allow_pings, :mt_convert_breaks,
:mt_keywords, :OutlineTextSize, :RSSChannelTemplate, :RSSItemLimit, :RSSItemTemplate, :ShowTitle,
:TextAlign, :TextExportTemplate, :TextPaneRatio, :TextPaneWidth, :TextSidebar,
:TitleBackgroundColor, :TitleFont, :TitleForegroundColor, :WeblogPostID
)
end
attr_accessor :tbx_all_attributes
def tbx_all_attributes
return unless content.instance_of?(Nokogiri::XML::Document)
@tbx_all_attributes = {}
attrs = content.xpath('//attrib')
attrs.each do |node|
@tbx_all_attributes[:"#{node.attr('Name')}"] = {
parent: node.attr('parent').to_s,
editable: node.attr('editable').to_s,
visible: node.attr('visibleInEditor').to_s,
type: node.attr('type').to_s,
default: node.attr('default').to_s
}
end
@tbx_all_attributes
end
# This is the main method
# It retrieves all of the notes with all of its attributes in one of three formats.
# Hash (default), Array or CSV/TSV (useful for opening the data in a spreadsheet and for pasting it back to Tinderbox)
attr_accessor :tbx_notes
def tbx_notes(*options)
the_notes = content.xpath('//tinderbox//item')
the_attributes = tbx_attributes.keys
case options[0]
when :Array
result = []
the_notes.each do |the_note|
result << get_note_attributes(the_attributes, the_note, :Array, options[1])
end
when :TSV
result = the_attributes.to_s.gsub!(/(:|\[|\])/, '')
result = result.gsub!(/, /, "\"\t\"")
result = result.gsub!(/(^|$)/, '"')
the_notes.each do |the_note|
result += "\n#{get_note_attributes(the_attributes, the_note, :TSV, options[1])}"
end
when :CSV
result = the_attributes.to_s.gsub!(/(:|\[|\])/, '').gsub!(/, /, '\", \"').gsub!(/(^|$)/, '"')
the_notes.each do |the_note|
note_attributes = get_note_attributes(the_attributes, the_note, :CSV, options[1])
result += "\n#{note_attributes}"
end
else
result = {}
the_notes.each do |the_note|
note_attributes = get_note_attributes(the_attributes, the_note, options[1])
result[note_attributes[:ID]] = note_attributes
end
end
@tbx_notes = result
end
# The method for retrieving all of the attributes belonging to a specific note
# ARGS: attributes to be collected, note and options (Hash, Array or TSV/CSV)
def get_note_attributes(attributes, note, *options)
return unless note.instance_of?(Nokogiri::XML::Element)
note_attributes = case options[0]
when :Array then []
when :TSV then ''
when :CSV then ''
else {}
end
attributes.each do |attribute|
attribute_value = case attribute
when :ID then note.attr('ID') || ''
when :Prototype then note.attr('proto') || ''
when :Creator then note.attr('Creator') || ''
when :Container then get_note_attribute_container(note)
when :Path then "#{get_note_attribute_container(note)}/#{note.at('./attribute[@name="Name"]').text}"
when :Name then note.at('./attribute[@name="Name"]').text
when :Text
the_text = note.at('./text ') ? (note.at('./text ').text).to_s : ''
if options[1] == :Links && the_text.to_s.length > 5
found_links = tbx_links.select { |link_hash| link_hash[:sourceid] == note.attr('ID') }
if found_links.to_s.length > 5
found_links.each do |found_link|
unless found_link[:slen].to_i < 4
link_start = found_link[:sstart].to_i
link_slen = found_link[:slen].to_i
link_text = the_text.gsub(the_text[0...link_start], '')
link_text = link_text[0...link_slen]
the_text.gsub!(link_text, "[[#{link_text}]]")
end
end
end
end
attribute_value = the_text
else note.at("./attribute[@name=\"#{attribute.to_s.gsub(/:/, '')}\"]") ? note.at("./attribute[@name=\"#{attribute.to_s.gsub(/:/, '')}\"]").text : ''
end
case options[0]
when :Array
note_attributes << attribute_value
when :TSV
if attribute_value.to_s.length > 5
attribute_value = attribute_value.gsub(/\n/, '\n')
attribute_value = attribute_value.gsub(/\t/, '\t')
attribute_value = attribute_value.gsub(/"/, '\"')
end
note_attributes = "#{note_attributes}#{attribute_value}\t"
when :CSV
attribute_value.gsub!(/\n/, '\n') unless attribute_value == ''
attribute_value.gsub!(/\t/, '\t') unless attribute_value == ''
attribute_value.gsub!(/"/, '\"') unless attribute_value == ''
note_attributes = "#{note_attributes}\"#{attribute_value}\", "
else
note_attributes[attribute] = attribute_value unless attribute_value == ''
end
end
note_attributes
end
# A method I came up with to build the value of the $Container and $Path attributes, which are not readily available
# Not sure once again if this is the best way to do it, but it gets done
def get_note_attribute_container(note)
return unless note.instance_of?(Nokogiri::XML::Element)
container = name = parent = ''
until name == tbx_name
the_command = "note.parent#{parent}.at('.//attribute[@name=\"Name\"]').text"
parent += '.parent'
name = eval(the_command)
container = "/#{name}#{container}" unless name == tbx_name
end
container
end
end
# End Class
#### Use case examples ####
tbx_file = '/Users/bcdav/Dropbox/Github/Tindergit/Docu.tbx'
### First we need to create an instance of TinderboxDocument class
doc = TinderboxDocument.new(tbx_file)
## Parse all of the notes with all their attributes
notes = doc.tbx_notes(:TSV) # To Tab-separated value
# notes = doc.tbx_notes(:TSV, :Links) # With wiki links added
# notes = doc.tbx_notes # Hash
# Create spreadsheet file
spreadsheet_file = tbx_file.gsub(".tbx", ".tsv")
%x(`touch "#{spreadsheet_file}"`)
# Write to file
File.write(spreadsheet_file, notes) && %x(`open "#{spreadsheet_file}"`)
# Other methods
# p doc.tbx_name
# p doc.tbx_links
# p doc.tbx_attributes
# p doc.tbx_all_attributes
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment