Convert SSHRC Awards results (in tab separated values) to annual files of titles
* A very simple script to parse SSHRC awards titles from Excel results into annual files.
* It assumes you've downloaded results into Excel from here:
* and then saved the worksheet as tab separated values in a file called sshrc.txt
* Usage (from the command line): php sshrcTsvToAnnualTitles.php
* Versions:
* 1.0 (April 8, 2014): initial release
* License: CC-BY (don't blame me if this reformats your drive)
* Author: Stéfan Sinclair
// tab separated values to read in – input character encoding will be same as output
$contents = file_get_contents(dirname(__FILE__) . "/sshrc.txt");
// strip top section and headers
$lastHeader = "Program Code";
$afterLastHeaderPosition = strpos($contents, "Program Code")+strlen($lastHeader)+1;
$contents = substr($contents, $afterLastHeaderPosition);
// strip bottom section
$bottomSection = "Source: SSHRC";
$contents = substr($contents, 0, strpos($contents, $bottomSection)-1);
// lines that don't end with a number (program code) are probably wrong (misplaced newline)
$contents = preg_replace("/(?<!\d)(\r\n|\r|\n)/", " ", $contents);
$years = array();
foreach (preg_split("/(\r\n|\r|\n)/", $contents) as $line) {
$cells = explode("\t", $line);
list($applicant, $org, $title, $amout, $year, $code) = explode("\t", $line);
$years[$year][] = $title;
/* OUTPUT */
foreach ($years as $year => $titles) {
$filename = dirname(__FILE__) . "/$year.sshrc.titles.txt";
file_put_contents($filename, implode("\n", $titles));
