lpar/reader2evernote.rb

## reader2evernote.rb
#!/usr/bin/env ruby
# encoding: UTF-8

# Google Reader to Evernote

# Quick hack together of a Ruby script which will pull all your Google Reader
# starred items into an Evernote notebook in ENML (Evernote export format).
# Requires Ruby 2.0, no other special dependencies. Should work on 1.9 but I
# haven't tested it.
#
# Get your Google Reader data from
#   https://www.google.com/takeout/#custom:reader
# Unpack the zip file, find starred.json, feed it to this supplying an output
# filename.
#
# Should preserve item title, the canonical URL, the date it was starred,
# and the clipped content in HTML format. Worked for my 986 starred items,
# hope it works for yours.
#
# I didn't use tags or categories with starred items, so I haven't
# attempted to handle those. Improvements and bug fixes welcomed.
#
# — mathew <meta@pobox.com> 2013-03-14
#   No copyright. No warranty.
#
# Links:
#  * ENML: http://dev.evernote.com/documentation/cloud/chapters/note_export.php
#  * Ruby: http://www.ruby-lang.org/
#  * My article about Google Reader alternatives:
#      http://meta.ath0.com/2013/03/14/google-reader-alternatives/

require 'json'
require 'builder'

# Evernote timestamp format
ETSF='%Y%m%dT%H%M%SZ'

if ARGV.length < 2
  puts "Usage: reader2evernote starred.json outfile.enex"
  puts " starred.json is the file of the same name from your Google Reader data"
  puts " outfile.exex is wherever you want to put the exported data"
  puts " After running, import outfile.enex into Evernote."
  exit 1
end

def xmlize(item)
  content = item['content']
  if !content
    content = item['summary']
  end
  if !content
    return "<p>No content was clipped for this URL.</p>"
  end
  html = content['content']
  xml = Builder::XmlMarkup.new
  xml.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
  xml.declare! :DOCTYPE, 'en-note', :SYSTEM, "http://xml.evernote.com/pub/enml2.dtd"
  xml.tag!('en-note') { |x|
    x << html
  }
  return xml.target!
end

def geturl(href)
  url = nil
  if href
    arr = href.first
    if arr
      url = arr['href']
    end
  end
  return url
end

def gettitle(title)
  if !title
    return "[Missing title]"
  end
  return title.strip.gsub(/[<>\n\r]/, '')[0,200]
end

jsontxt = File.open(ARGV[0], "r:UTF-8").read
xmlout = File.open(ARGV[1], "w:UTF-8")
obj = JSON.parse(jsontxt)

title = obj["title"]
items = obj["items"]

xml = Builder::XmlMarkup.new({:indent => 2, :target => xmlout})
xml.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
xml.declare! :DOCTYPE, 'en-note', :SYSTEM, "http://xml.evernote.com/pub/evernote-export.dtd"

now = Time.new.getgm.strftime(ETSF)

xml.tag!("en-export", {"export-date" => now, "application" => "Google Reader", "version" => "2013"}) {
  items.each do |item|
  url = geturl(item['canonical'])
  if !url
    url = geturl(item['alternate'])
  end
  content = xmlize(item)
  title = gettitle(item["title"])
  puts title
  xml.note {
    xml.title {|x| x << title}
    xml.tag!("note-attributes") {
      xml.source 'web.clip'
      xml.tag!("source-url", url)
    }
    xml.created(Time.at(item['published']).strftime(ETSF))
    if content
      xml.content {
        xml.cdata!(content)
      }
    end
  }
  end
}

xmlout.close
	#!/usr/bin/env ruby
	# encoding: UTF-8

	# Google Reader to Evernote

	# Quick hack together of a Ruby script which will pull all your Google Reader
	# starred items into an Evernote notebook in ENML (Evernote export format).
	# Requires Ruby 2.0, no other special dependencies. Should work on 1.9 but I
	# haven't tested it.
	#
	# Get your Google Reader data from
	# https://www.google.com/takeout/#custom:reader
	# Unpack the zip file, find starred.json, feed it to this supplying an output
	# filename.
	#
	# Should preserve item title, the canonical URL, the date it was starred,
	# and the clipped content in HTML format. Worked for my 986 starred items,
	# hope it works for yours.
	#
	# I didn't use tags or categories with starred items, so I haven't
	# attempted to handle those. Improvements and bug fixes welcomed.
	#
	# — mathew <meta@pobox.com> 2013-03-14
	# No copyright. No warranty.
	#
	# Links:
	# * ENML: http://dev.evernote.com/documentation/cloud/chapters/note_export.php
	# * Ruby: http://www.ruby-lang.org/
	# * My article about Google Reader alternatives:
	# http://meta.ath0.com/2013/03/14/google-reader-alternatives/

	require 'json'
	require 'builder'

	# Evernote timestamp format
	ETSF='%Y%m%dT%H%M%SZ'

	if ARGV.length < 2
	puts "Usage: reader2evernote starred.json outfile.enex"
	puts " starred.json is the file of the same name from your Google Reader data"
	puts " outfile.exex is wherever you want to put the exported data"
	puts " After running, import outfile.enex into Evernote."
	exit 1
	end

	def xmlize(item)
	content = item['content']
	if !content
	content = item['summary']
	end
	if !content
	return "<p>No content was clipped for this URL.</p>"
	end
	html = content['content']
	xml = Builder::XmlMarkup.new
	xml.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
	xml.declare! :DOCTYPE, 'en-note', :SYSTEM, "http://xml.evernote.com/pub/enml2.dtd"
	xml.tag!('en-note') { \|x\|
	x << html
	}
	return xml.target!
	end

	def geturl(href)
	url = nil
	if href
	arr = href.first
	if arr
	url = arr['href']
	end
	end
	return url
	end

	def gettitle(title)
	if !title
	return "[Missing title]"
	end
	return title.strip.gsub(/[<>\n\r]/, '')[0,200]
	end

	jsontxt = File.open(ARGV[0], "r:UTF-8").read
	xmlout = File.open(ARGV[1], "w:UTF-8")
	obj = JSON.parse(jsontxt)

	title = obj["title"]
	items = obj["items"]

	xml = Builder::XmlMarkup.new({:indent => 2, :target => xmlout})
	xml.instruct! :xml, :version=>"1.0", :encoding=>"UTF-8"
	xml.declare! :DOCTYPE, 'en-note', :SYSTEM, "http://xml.evernote.com/pub/evernote-export.dtd"

	now = Time.new.getgm.strftime(ETSF)

	xml.tag!("en-export", {"export-date" => now, "application" => "Google Reader", "version" => "2013"}) {
	items.each do \|item\|
	url = geturl(item['canonical'])
	if !url
	url = geturl(item['alternate'])
	end
	content = xmlize(item)
	title = gettitle(item["title"])
	puts title
	xml.note {
	xml.title {\|x\| x << title}
	xml.tag!("note-attributes") {
	xml.source 'web.clip'
	xml.tag!("source-url", url)
	}
	xml.created(Time.at(item['published']).strftime(ETSF))
	if content
	xml.content {
	xml.cdata!(content)
	}
	end
	}
	end
	}

	xmlout.close