Skip to content

Instantly share code, notes, and snippets.

@peterkeen
Created June 3, 2010 02:20
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peterkeen/423346 to your computer and use it in GitHub Desktop.
Save peterkeen/423346 to your computer and use it in GitHub Desktop.
#!/usr/bin/env perl
use strict;
use warnings;
use Try::Tiny;
use IO::File;
use JSON::XS qw/ decode_json /;
use File::Slurp qw/ read_file /;
use constant READ_TIMEOUT_SECONDS => 5;
# see http://apiwiki.twitter.com/Streaming-API-Documentation
my $capture_hours = shift;
my $max_mb = shift;
my $output_file = shift;
my $keywords = shift;
my $capture_seconds = $capture_hours * 3600;
my $end_time = time() + $capture_seconds;
my $max_num_rounds = int(($max_mb * 1024 * 1024) / 4096);
my $current_round = 0;
my $output_fh = IO::File->new($output_file, 'a+') or die "Cannot open $output_file for append: $!";
my $config_contents = read_file("$ENV{HOME}/.twpass");
my $config = decode_json($config_contents);
my $username = $config->{default_username};
my $password = $config->{passwords}->{$username};
print STDERR "\nstarted at " . time() . " ending at $end_time, max rounds $max_num_rounds, tracking keywords $keywords\n";
my $command = qq{curl -q http://stream.twitter.com/1/statuses/filter.json\?track=$keywords -u$username:$password 2>/dev/null};
sub capture_sample
{
my $fh = shift;
my $buf;
eval {
local $SIG{ALRM} = sub { die "read timeout at " . time() . "\n" };
alarm(READ_TIMEOUT_SECONDS);
sysread($fh, $buf, 4096);
alarm 0;
};
if ($@) {
die $@;
}
return $buf;
}
sub capture_twitter_feed
{
open (my $fh, "$command|");
while(my $buf = capture_sample($fh))
{
$output_fh->print($buf);
last if ($end_time < time() || $current_round++ > $max_num_rounds)
}
close $fh;
}
while (1) {
try {
capture_twitter_feed();
print STDERR "finished\n";
exit 0;
} catch {
if ($_ =~ "read timeout") {
print STDERR "read timeout. Retrying...\n";
} else {
die $_;
}
}
}
#!/usr/bin/env perl
use strict;
use warnings;
use JSON::XS qw/ decode_json /;
use Try::Tiny;
$/ = "\r";
binmode(STDIN, ':utf8');
binmode(STDOUT, ':utf8');
while(<>) {
my $obj;
try {
$obj = decode_json($_);
} catch { };
next unless $obj;
my $text = $obj->{text};
next unless $text;
$text =~ s/[^\w\d#\s]//g;
my @w = split(/\s+/, lc $text);
for my $i ( 0 .. $#w ) {
print_if_all(1, $w[$i]);
print_if_all(2, @w[$i..$i+1]);
print_if_all(3, @w[$i..$i+2]);
}
}
sub print_if_all
{
my $n = shift;
@_ = grep { $_ } @_;
print join(' ', @_) . "\t1\n" if @_ == $n;
}
#!/usr/bin/env perl
use strict;
use warnings;
my %sum;
binmode(STDIN, ':utf8');
binmode(STDOUT, ':utf8');
while(<>) {
chomp;
my ($key, $num) = split(/\t/, $_);
$sum{$key} += $num;
}
print join("\t", $_, $sum{$_}) . "\n" for keys %sum;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment