Created
November 25, 2016 10:13
-
-
Save slhck/a5f6c412051b628493ef5bbabf5a8ae5 to your computer and use it in GitHub Desktop.
Converts Logs from AVRate into parsable, tidy format
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# | |
# Author: Werner Robitza | |
# Synopsis: Converts AVRate logs into a parsable, tidy format with one row per rating | |
# Requirements: gem install trollop awesome_print | |
require 'trollop' | |
require 'ap' | |
require 'csv' | |
$opts = Trollop::options do | |
opt :input, "Input files", type: :strings, required: true | |
opt :output, "Output CSV file", type: :string, default: "output.csv" | |
opt :append, "Append to existing output file", type: :boolean | |
opt :verbose, "Verbose mode", type: :boolean | |
end | |
# rejected subject IDs | |
$rejected = [] | |
# ================================================================ | |
class SubjectData | |
attr_accessor :pvs_data, :id, :playlist | |
def initialize | |
@pvs_data = [] | |
end | |
end | |
class PvsData | |
attr_accessor :pvs_id, :hrc_id, :src_id, :rating_timestamp, :rating_duration, :pvs_name, :rating, :type | |
end | |
#================================================================ | |
def parsePvsLine(line, type) | |
return nil if line.empty? or line == ":PP" | |
date, time, ratingtime, pvsname, rating = line.split(" ") | |
pvs_data = PvsData.new | |
pvs_data.pvs_id = File.basename(pvsname.split("\\").last, '.*') | |
pvs_data.pvs_name = pvsname.split("\\").last | |
pvs_data.src_id = pvsname.scan(/SRC(\d+)/).last.first rescue "N/A" | |
pvs_data.hrc_id = pvsname.scan(/HRC(\d+)/).last.first rescue "N/A" | |
pvs_data.rating_timestamp = DateTime.parse(date + " " + time) | |
pvs_data.rating_duration = ratingtime.gsub("sec", "").gsub(",", ".").to_f | |
pvs_data.rating = rating.gsub(",", ".").to_f + 1 # offset for 0-based values | |
pvs_data.type = type | |
pvs_data | |
end | |
def parseFile(file) | |
puts "INFO: parsing file #{file}" | |
open(file, "r") do |f| | |
lines = f.read | |
lines = lines.gsub(/\r\n?/, "\n") | |
general_info, raw_results_data = lines.split("--- Results ---------------------------") | |
# get overall info | |
subj_data = SubjectData.new | |
subj_data.playlist = general_info.scan(/Playlist File: (.*)\n/).last.first.split("\\").last | |
# this isn't used | |
# subj_data.id = general_info.scan(/ID: (.*)\n/).last.first | |
# we instead get the ID from the playlist | |
subj_data.id = subj_data.playlist.scan(/_(\d+)\.lst/).last.first | |
# remove histogram | |
raw_results_data = raw_results_data.split("--- Rating Histogram ------------------").first | |
raw_results_data = raw_results_data.gsub(/\n+/, "\n") | |
training_phase_data, test_phase_data = raw_results_data.split(":TE") | |
training_phase_data.split("\n").each do |line| | |
pvs_data = parsePvsLine(line, "training") | |
subj_data.pvs_data << pvs_data unless pvs_data.nil? | |
end | |
test_phase_data.split("\n").each do |line| | |
pvs_data = parsePvsLine(line, "test") | |
subj_data.pvs_data << pvs_data unless pvs_data.nil? | |
end | |
subj_data | |
end | |
end | |
def writeCsvFile(data) | |
mode = $opts[:append] ? "ab" : "wb" | |
CSV.open($opts[:output], mode) do |csv| | |
csv << %w(pvs hrc src subject playlist rating type rating_timestamp rating_time pvs_name) unless $opts[:append] | |
data.each do |subj_data| | |
next if $rejected.include? subj_data.id | |
#:pvs_id, :hrc_id, :src_id, :rating_timestamp, :rating_duration, :pvs_name, :rating, :type | |
subj_data.pvs_data.each do |pvs_data| | |
csv << [ | |
pvs_data.pvs_id, pvs_data.hrc_id, pvs_data.src_id, | |
subj_data.id, subj_data.playlist, | |
pvs_data.rating, pvs_data.type, | |
pvs_data.rating_timestamp, pvs_data.rating_duration, | |
pvs_data.pvs_name | |
] | |
end | |
end | |
end | |
end | |
# ================================================================ | |
allData = [] | |
$opts[:input].each do |file| | |
begin | |
allData << parseFile(file) | |
rescue Exception => e | |
puts "Error in parsing file #{file}: #{e.message}" | |
end | |
end | |
puts "Writing output file to #{$opts[:output]}" | |
writeCsvFile(allData) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment