Last active
October 27, 2020 16:53
-
-
Save sgardn/20ea6b79363ffe9e6bad5801eab38c2a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'csv' | |
require 'prawn' | |
# require 'pry' | |
# require 'prawn/table' | |
Prawn::Font::AFM.hide_m17n_warning = true | |
NUMERIC_FORMAT = /^(\d*). / | |
SIMULATION_FORMAT = /(\d+\. .*)/ | |
ParseSimulation = Struct.new(:title, :start, :end) | |
def putsd(string) | |
if $DEBUG | |
puts string | |
end | |
end | |
class QuestionColumn < Struct.new(:title, :responses, :type) | |
end | |
class Simulation < Struct.new(:title, :results) | |
def upsert_response(question_title, value) | |
if res = self.results.select { |r| r.title == question_title }.first | |
res.responses.push value | |
else | |
self.results = self.results.push(QuestionColumn.new(question_title, [value], parse_answer_type(value))) | |
end | |
end | |
def print(pdf) | |
free_text = [] | |
table_responses = [] | |
time_responses = [] | |
self.results.each do |r| | |
if r[:type] == :numeric || r[:type] == :yesno | |
table_responses.push(r) | |
elsif r[:type] == :text | |
free_text.push(r) | |
elsif r[:type] == :minutes | |
time_responses.push(r) | |
else | |
puts r[:responses] | |
raise "Unknown column response type for cell in column with title: #{r[:title]}!" | |
end | |
end | |
pdf.text("Results for #{self.title}", size: 24) | |
pdf.move_down 20 | |
table_responses.each do |r| | |
pdf.text(r[:title], size: 18) | |
pdf.move_down 5 | |
pdf.text(generate_summary(r), size: 16) | |
pdf.move_down 15 | |
end | |
time_responses.each do |tr| | |
pdf.text(tr[:title], size: 18) | |
pdf.move_down 5 | |
pdf.text(generate_summary(tr), size: 12) | |
pdf.move_down 15 | |
end | |
free_text.each do |ft| | |
pdf.text(ft[:title], size: 18) | |
pdf.move_down 20 | |
ft[:responses].each do |resp| | |
pdf.text resp | |
pdf.move_down 10 | |
end | |
end | |
end | |
end | |
def generate_summary(response) | |
if response[:type] == :numeric | |
return decimal_to_percent(summarize_numeric(response[:responses])).to_s | |
elsif response[:type] == :yesno | |
return decimal_to_percent(summarize_yesno(response[:responses])).to_s | |
elsif response[:type] == :minutes | |
freq = response[:responses].inject(Hash.new(0)) { |h,v| h[v] += 1; h } | |
peak = response[:responses].max_by { |v| freq[v] } | |
return "Most common response: " + peak + " (#{freq[peak]} / #{response[:responses].length})" | |
else | |
raise "Should not be coercing text / time into table format" | |
end | |
end | |
def decimal_to_percent(d) | |
# if this value is more than 1, we're assuming a 1 - 4 scale | |
# d * 25.0 is really (d * 100) / 4 | |
if d > 1.0 | |
return (d * 25.0).round(0).to_s + "%" | |
else | |
return (d * 100).round(0).to_s + "%" | |
end | |
end | |
def summarize_yesno(vals) | |
counts = Hash.new(0) | |
vals.each { |name| counts[name] += 1 } | |
return counts["Yes"] / vals.length.to_f | |
end | |
# puts summarize_yesno(["Yes", "Yes", "No"]) | |
def summarize_numeric(vals) | |
total = 0 | |
vals.each do |v| | |
c = v.match(NUMERIC_FORMAT)&.captures | |
raise "Failed to parse one numeric value from number response" unless c&.length == 1 | |
total += c[0].to_i | |
end | |
return total / vals.length.to_f | |
end | |
# puts summarize_numeric(["1. Hi", "5. There", "10. Buddy boy 1."]) # puts summarize_numeric(["LOL 1. Hi", "5. There", "10. Buddy boy 1."]) # puts summarize_numeric(["1. Hi"]) | |
def role_type(value) | |
if value == "1. Trainee" | |
return :trainee | |
elsif value == "2. Instructor" | |
return :instructor | |
elsif value == "3. Volunteer" | |
return :volunteer | |
else | |
puts "Saw '#{value}', expected something like '1. Trainee'" | |
raise "Expected to see a role of Trainee, Instructor, or Volunteer. Something is probably badly formatted about the source CSV." | |
end | |
end | |
def parse_answer_type(value) | |
# digits have to be first in the cell to be numeric | |
# ugh, this stupid response in the first instance broke our guessing of the column type | |
# 1. The creating was good but could be a little more louder and energetic. | |
# 2. Asked questions to understand my needs, but wasn't aware about the company's products so could not suggest properly. | |
if value =~ /minutes/ && value.length < 23 # "3-5 minutes", "longer than 10 minutes" | |
# someone could write minutes in free text oof | |
# putsd "MINUTES" | |
return :minutes | |
elsif value =~ /^\d+\. [\w+ ]{0,}$/ | |
# digit(s) followed by a period, space, and 0 - 2 'words' | |
# putsd "NUMERIC" | |
return :numeric | |
elsif (value == "Yes") || (value == "No") | |
# putsd "BOOLEAN" | |
return :yesno | |
else | |
# putsd "TEXT" | |
return :text | |
end | |
end | |
if ARGV.length != 1 | |
puts 'need an argument to run!' | |
puts 'EXAMPLE: ./parser.rb {target.csv}' | |
puts 'Will create a folder "target" and put PDF results in that folder' | |
return | |
end | |
dest = ARGV[0] | |
sanitzied = dest.delete_suffix('.csv') | |
if !Dir.exist?(sanitzied) | |
puts "Creating '#{sanitzied}' destination directory" | |
Dir.mkdir(sanitzied) | |
else | |
puts "Directory '#{sanitzied}' detected" | |
end | |
iter = 0 | |
sims = [] | |
$DEBUG = false | |
generic_headers = [] | |
CSV.foreach(dest) do |row| | |
if iter == 0 | |
# do nothing | |
elsif iter == 1 | |
row.each_with_index do |cell, idx| | |
if cell && cell.strip != "" | |
generic_headers.push({ key: idx, value: cell.strip }) | |
end | |
end | |
else | |
if role_type(row[1]) == :volunteer | |
putsd "processing row #{iter}" | |
row_sim_title = nil | |
row.each_with_index do |cell, i| | |
if cell && !cell.empty? | |
putsd "#{i} -> #{cell}" | |
if i == 12 | |
# this is the simulation column | |
# find the sim for this row... | |
cell.match(SIMULATION_FORMAT) do |m| | |
sim = sims.select { |s| s.title == m[1] } | |
if sim.empty? | |
s = Simulation.new(m[1], []) | |
row_sim_title = m[1] | |
sims = sims.push(s) | |
else | |
row_sim_title = sim.first.title | |
end | |
end | |
elsif i == 13 | |
# ignore, this is the volunteer's name | |
elsif i > 13 | |
if cell && !cell.empty? | |
# upsert header (because we have values in those columns) | |
sim = sims.select { |s| s.title == row_sim_title }.first | |
column_header = generic_headers.select { |gh| gh[:key] == i }.first | |
# these are responses we care about | |
# insert result for headers | |
sim.upsert_response(column_header[:value], cell) | |
end | |
end | |
end | |
end | |
end | |
end | |
iter = iter + 1 | |
end | |
simnumber = 1 | |
sims.each do |sim| | |
if sim.results.length != 0 | |
safe_title = (sim.title.delete_suffix('.') + ".pdf").gsub(/\s+/, "").gsub(/:/, "") | |
puts "generating #{safe_title} in #{sanitzied}/" | |
Prawn::Document.generate(sanitzied + "/" + safe_title) do |pdf| | |
pdf.font("./Roboto-Regular.ttf") | |
sim.print(pdf) | |
end | |
simnumber += 1 | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment