Navigation Menu

Skip to content

Instantly share code, notes, and snippets.

@slhck
Created November 25, 2016 10:13
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save slhck/a5f6c412051b628493ef5bbabf5a8ae5 to your computer and use it in GitHub Desktop.
Save slhck/a5f6c412051b628493ef5bbabf5a8ae5 to your computer and use it in GitHub Desktop.
Converts Logs from AVRate into parsable, tidy format
#!/usr/bin/env ruby
#
# Author: Werner Robitza
# Synopsis: Converts AVRate logs into a parsable, tidy format with one row per rating
# Requirements: gem install trollop awesome_print
require 'trollop'
require 'ap'
require 'csv'
$opts = Trollop::options do
opt :input, "Input files", type: :strings, required: true
opt :output, "Output CSV file", type: :string, default: "output.csv"
opt :append, "Append to existing output file", type: :boolean
opt :verbose, "Verbose mode", type: :boolean
end
# rejected subject IDs
$rejected = []
# ================================================================
class SubjectData
attr_accessor :pvs_data, :id, :playlist
def initialize
@pvs_data = []
end
end
class PvsData
attr_accessor :pvs_id, :hrc_id, :src_id, :rating_timestamp, :rating_duration, :pvs_name, :rating, :type
end
#================================================================
def parsePvsLine(line, type)
return nil if line.empty? or line == ":PP"
date, time, ratingtime, pvsname, rating = line.split(" ")
pvs_data = PvsData.new
pvs_data.pvs_id = File.basename(pvsname.split("\\").last, '.*')
pvs_data.pvs_name = pvsname.split("\\").last
pvs_data.src_id = pvsname.scan(/SRC(\d+)/).last.first rescue "N/A"
pvs_data.hrc_id = pvsname.scan(/HRC(\d+)/).last.first rescue "N/A"
pvs_data.rating_timestamp = DateTime.parse(date + " " + time)
pvs_data.rating_duration = ratingtime.gsub("sec", "").gsub(",", ".").to_f
pvs_data.rating = rating.gsub(",", ".").to_f + 1 # offset for 0-based values
pvs_data.type = type
pvs_data
end
def parseFile(file)
puts "INFO: parsing file #{file}"
open(file, "r") do |f|
lines = f.read
lines = lines.gsub(/\r\n?/, "\n")
general_info, raw_results_data = lines.split("--- Results ---------------------------")
# get overall info
subj_data = SubjectData.new
subj_data.playlist = general_info.scan(/Playlist File: (.*)\n/).last.first.split("\\").last
# this isn't used
# subj_data.id = general_info.scan(/ID: (.*)\n/).last.first
# we instead get the ID from the playlist
subj_data.id = subj_data.playlist.scan(/_(\d+)\.lst/).last.first
# remove histogram
raw_results_data = raw_results_data.split("--- Rating Histogram ------------------").first
raw_results_data = raw_results_data.gsub(/\n+/, "\n")
training_phase_data, test_phase_data = raw_results_data.split(":TE")
training_phase_data.split("\n").each do |line|
pvs_data = parsePvsLine(line, "training")
subj_data.pvs_data << pvs_data unless pvs_data.nil?
end
test_phase_data.split("\n").each do |line|
pvs_data = parsePvsLine(line, "test")
subj_data.pvs_data << pvs_data unless pvs_data.nil?
end
subj_data
end
end
def writeCsvFile(data)
mode = $opts[:append] ? "ab" : "wb"
CSV.open($opts[:output], mode) do |csv|
csv << %w(pvs hrc src subject playlist rating type rating_timestamp rating_time pvs_name) unless $opts[:append]
data.each do |subj_data|
next if $rejected.include? subj_data.id
#:pvs_id, :hrc_id, :src_id, :rating_timestamp, :rating_duration, :pvs_name, :rating, :type
subj_data.pvs_data.each do |pvs_data|
csv << [
pvs_data.pvs_id, pvs_data.hrc_id, pvs_data.src_id,
subj_data.id, subj_data.playlist,
pvs_data.rating, pvs_data.type,
pvs_data.rating_timestamp, pvs_data.rating_duration,
pvs_data.pvs_name
]
end
end
end
end
# ================================================================
allData = []
$opts[:input].each do |file|
begin
allData << parseFile(file)
rescue Exception => e
puts "Error in parsing file #{file}: #{e.message}"
end
end
puts "Writing output file to #{$opts[:output]}"
writeCsvFile(allData)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment