Created
November 27, 2014 16:53
-
-
Save rafapolo/8123944da8101ea36c80 to your computer and use it in GitHub Desktop.
Convert Jeff's .XLS into a .DOT Graph file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
#encoding: utf-8 | |
#comment: Convert Jeff's .XLS into a .DOT Graph file | |
# configurations | |
file = "kenya.xls" | |
# columns to export as graph nodes | |
columns = ["Project", "Platform", "Developer", "Organization", "Funder", "Category", "Type"] | |
# how columns connect to each other | |
relations = { | |
["Organization", "Funder", "Platform", "Category"] => "Project", | |
["Developer", "Type"] => "Platform" | |
} | |
# generate graph node ID from string | |
require 'digest/md5' | |
def to_ID(type, data) | |
data = data.first if data.class == Array | |
md5 = Digest::MD5.hexdigest(data.to_s) | |
"#{type.downcase}_#{md5}" | |
end | |
# remove exceeding spaces and marks | |
def clean(data) | |
data.to_s.strip.gsub('"', '\'') | |
end | |
# print fancy info bar | |
start = Time.now | |
require 'colored' | |
bar = ("="*55).yellow | |
puts bar | |
puts "\t Parsing #{file} spreadsheet...".yellow | |
puts bar | |
# read spreadsheet file | |
require 'pry' | |
require 'simple-spreadsheet' | |
xls = SimpleSpreadsheet::Workbook.read(file) | |
xls.selected_sheet = xls.sheets.first | |
# map all header's columns positions | |
header = {} | |
0.upto xls.last_column do |column| | |
type = xls.cell(1, column) | |
if type | |
data = "#{column} => #{clean(type)}" | |
puts columns.index(type) ? data.green : data.red | |
header[column] = type | |
end | |
end | |
puts bar | |
# iterate over all data lines and rows | |
entities = [] | |
2.upto(xls.last_row) do |line| | |
relation = {} | |
1.upto xls.last_column do |column| | |
data = xls.cell(line, column) | |
if data | |
type = nil | |
unless type = header[column] | |
# if this column doesn't have an associated header | |
# than it's header type is the previous existing one | |
current = column | |
while !type | |
type = header[current-=1] | |
end | |
end | |
if columns.index(type) | |
# instanciate a hash of array types as a relation structure, if empty | |
relation[type] = [] unless relation[type] | |
# and associate it | |
relation[type] << clean(data) | |
# ex: "Developer"=>["Social Impact Lab", "CartONG (France)", "International Media Support (IMS)"] | |
end | |
end | |
printf("\r\tInterpreting messy structure: %d%", 100*(line)/(xls.last_row-1)) | |
end | |
entities << relation | |
end | |
puts | |
# now we have structured data to play with! let's generate the .DOT file | |
puts bar | |
output = file.split('.')[0] + Time.now.strftime("_%Y%m%d") + ".dot" | |
puts "\t\tGenerating graph file...".yellow | |
puts bar | |
ids = [] | |
File.open(output, 'w') do |dot_file| | |
dot_file.write("digraph G {\n") | |
entities.each do |rows| | |
rows.keys.each do |type| | |
rows[type].each do |data| | |
unless data.class == Array | |
data = [data] | |
end | |
data.each do |value| | |
id = to_ID(type, value) # generate ID from string | |
# only create node entity if not previously done, avoiding repetition. | |
unless ids.index(id) | |
ids << id | |
append = "\"#{id}\" [label=\"#{value}\" type=\"#{type}\"];\n" | |
dot_file.write(append) | |
end | |
# connect nodes! | |
to_node = nil | |
relations.keys.each{|key| to_node = relations[key] if key.index(type)} | |
if to_node && rows[to_node] # how about nodes with no related nodes? | |
to_id = to_ID(to_node, rows[to_node]) | |
connection = "\"#{id}\" -> \"#{to_id}\";\n" | |
dot_file.write(connection) | |
end | |
end | |
end | |
end | |
end | |
dot_file.write("}") | |
puts ("\t-> #{output} done in %3.2f secs" % [Time.now - start]).green | |
end | |
puts bar |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment