Skip to content

Instantly share code, notes, and snippets.

@chadkouse
Created February 20, 2013 04:15
Show Gist options
  • Save chadkouse/4992831 to your computer and use it in GitHub Desktop.
Save chadkouse/4992831 to your computer and use it in GitHub Desktop.
File.open('foo_sorted_reduced.tab', 'w:UTF-8') { |f|
File.open('foo_sorted.tab', 'r:UTF-8').each_line do |line|
parts = line.strip.split("\t")
song_id = parts[0]
artist = parts[1]
title = parts[2]
date = parts[3]
country = parts[4]
if last_song_id == -1
last_song_id = song_id
last_artist = artist
last_title = title
last_date = date
last_country = country
end
if last_song_id == song_id &&
last_artist.casecmp(artist) == 0 &&
last_title.casecmp(title) == 0 &&
last_date.casecmp(date) == 0 &&
last_country.casecmp(country) == 0
song_id_country_count += 1
next
end
#output the counts for the previous group
day = last_date[-2, 2]
f.printf("%s\t%s\t%s\t%s\t%d\t%s\n", last_song_id,
last_artist,
last_title,
day,
song_id_country_count,
last_country)
last_song_id = song_id
last_artist = artist
last_title = title
last_date = date
last_country = country
song_id_country_count = 1
end
#output the counts for the last group
day = last_date[-2, 2]
f.printf("%s\t%s\t%s\t%s\t%d\t%s\n", last_song_id,
last_artist,
last_title,
day,
song_id_country_count,
last_country)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment