cincodenada/slice_ngrams.sh

## slice_ngrams.sh
gunzip -c googlebooks-eng-all-1gram-*-[a-z].gz | pv | grep -P `cat reddit_words.csv | perl -e "while(<>) { s/\'s//g; chomp; push @words, split ','; } printf '^(%s)\\t', join '|', @words;"` > collision_stats.tsv

## total.pl
my %table;
open(CS,'<collision_stats.tsv');
while(<CS>) {
    ($word, $year, $total, $books) = split;
    $word =~ s/\'s$//;
    $table{$word}{$year} = [$total, $books];
    $table{$word}{'total'}[0] += $total;
    $table{$word}{'total'}[1] += $books;
}

open(RW,'<word_pairs.csv');
while(<RW>) {
    chomp;
    @words = split;
    @totaltotals = (0,0);
    @texttots = ();
    foreach $word (split ',') {
        $word =~ s/\'s$//;
        @totals = @{$table{$word}{'total'}};
        $totaltotals[0] += $totals[0];
        $totaltotals[1] += $totals[1];
        push(@texttots, sprintf('%s (%d)',$word,$totals[0]));
    }
    printf "%d %s\n", $totaltotals[0], join(',',@texttots);
}
	my %table;
	open(CS,'<collision_stats.tsv');
	while(<CS>) {
	($word, $year, $total, $books) = split;
	$word =~ s/\'s$//;
	$table{$word}{$year} = [$total, $books];
	$table{$word}{'total'}[0] += $total;
	$table{$word}{'total'}[1] += $books;
	}

	open(RW,'<word_pairs.csv');
	while(<RW>) {
	chomp;
	@words = split;
	@totaltotals = (0,0);
	@texttots = ();
	foreach $word (split ',') {
	$word =~ s/\'s$//;
	@totals = @{$table{$word}{'total'}};
	$totaltotals[0] += $totals[0];
	$totaltotals[1] += $totals[1];
	push(@texttots, sprintf('%s (%d)',$word,$totals[0]));
	}
	printf "%d %s\n", $totaltotals[0], join(',',@texttots);
	}