Created
December 22, 2016 21:27
-
-
Save hassaku/8a2445a5257d8b8c1448e6241e227a47 to your computer and use it in GitHub Desktop.
enex形式でエクスポートされたEvernoteの名刺データをパースしてCSV化
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# coding: utf-8 | |
# USAGE: ruby parse_enex.rb XX.enex > XX.csv | |
require 'nokogiri' | |
require 'date' | |
require 'ostruct' | |
class Note < OpenStruct; end | |
class Notes < Array; end | |
notes = Notes.new | |
xml = Nokogiri::XML(File.open(ARGV[0])) | |
xml.xpath("//note").each do |n| | |
note = Note.new | |
note.title = n.xpath('title').first.content | |
content_xml = n.xpath('content').first.content | |
note.content = Nokogiri::XML(content_xml).content | |
notes << note | |
end | |
puts "名前, メール, 住所" | |
notes.each do |note| | |
name = note.title.split(" - ")[0].gsub(/,/, " ") | |
is_next_email = false | |
is_next_address = false | |
email = "" | |
address = "" | |
contents = note.content.gsub(/\n(\s| )*\n/, "\n").gsub(/^\s+/, '') | |
contents.split("\n").each do |c| | |
if ["email", "メール", "勤務先"].include?(c) | |
is_next_email = true | |
next | |
end | |
if ["email", "住所"].include?(c) | |
is_next_address = true | |
next | |
end | |
if is_next_email | |
email += "#{c.gsub(/,/i, " ")} " | |
is_next_email = false | |
end | |
if is_next_address | |
address += "#{c.gsub(/,/i, " ")} " unless c.gsub(/\s+/, "").chomp.empty? | |
end | |
if c =~ /^メール/ | |
email += c.split("メール").last | |
end | |
if c =~ /^住所/ | |
address += c.split("住所").last | |
end | |
end | |
puts "#{name}, #{email}, #{address}" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment