public
Created — forked from attractivechaos/get_votes.pl

Plot HackerNews polls on favorite and disliked programming languages

  • Download Gist
get_votes.pl
Perl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
#!/usr/bin/env perl
 
# This script collects voting from HackerNews and outputs a plot votes.eps
# You need to have gnuplot installed for plotting.
 
use strict;
use warnings;
use IO::Socket::INET;
 
sub http_get {
my ($host, $path, $proxy) = @_;
 
my $server = ($proxy)? $proxy : $host;
$server .= ":80" unless ($server =~ /:\d+$/); # port 80 by default
my $url = ($proxy)? "http://$host$path" : $path;
 
my $fh = IO::Socket::INET->new($server) || die("Fail to connect server: $server.\n");
print $fh join("\015\012", "GET $url HTTP/1.0", "Host: $host", "", "");
return $fh;
}
 
sub get_poll {
my ($host, $path, $hash) = @_;
my $fh = &http_get($host, $path);
while (<$fh>) {
while (/<font color=#000000>([^<]+)<\/font><\/div><\/td><\/tr><tr><td><\/td><td class="default"><span class="comhead"><span id=score_\d+>(\d+) points</g) {
next if ($1 eq 'Other');
my ($lang, $cnt) = ($1, $2);
$lang =~ s/\s/-/;
push(@{$hash->{$lang}}, $cnt);
}
}
close($fh);
}
 
sub main {
my (%hash, @a, $fh);
&get_poll('news.ycombinator.com', '/item?id=3746692', \%hash);
&get_poll('news.ycombinator.com', '/item?id=3748961', \%hash);
for (keys %hash) {
if (@{$hash{$_}} == 2) {
push(@a, [$_, @{$hash{$_}}]);
}
}
 
open($fh, ">votes.txt") || die;
@a = sort{($b->[1]+$b->[2])<=>($a->[1]+$a->[2])} @a;
for (@a) {
print $fh join("\t", @{$_}), "\n";
}
close($fh);
 
open($fh, ">votes-alt.txt") || die;
@a = sort{$b->[1]/($b->[1]+$b->[2])<=>$a->[1]/($a->[1]+$a->[2])} @a;
for (@a) {
print $fh join("\t", @{$_}), "\n";
}
close($fh);
 
}
 
&main();
plot.R
R
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
setwd("/Users/robingower/cshop/2206278")
library(ggplot2)
 
d <- read.csv("votes.txt", sep="\t", header=F)
colnames(d) <- c("language","liked","disliked")
d$pliked <- d$liked/max(d$liked)
d$pdisliked <- d$disliked/max(d$disliked)
 
g <- ggplot(d, aes(pdisliked, pliked)) +
scale_x_log10(name="Count of Dislikes as proportion of max (log base 10)") +
scale_y_log10(name="Count of Favourite as proportion of max (log base 10)") +
geom_abline(intercept=0,slope=1,alpha=0.25) +
geom_point(colour="blue", size=1) +
geom_text(aes(label=language), size=2, vjust=-1) +
geom_text(aes(label=paste(liked,":",disliked,sep=" ")), size=1.75, vjust=2, alpha=0.5) +
opts(title="HackerNews polls on favourite/ disliked programming languages")
 
ggsave("plot.png",g)
 
 
# summary(lm(pliked ~ pdisliked, data=d))
# no correlation!

I updated it a little to show the response size:

library(ggplot2)
d <- read.csv("votes.txt", sep="\t", header=F)
colnames(d) <- c("language","liked","disliked")

d$pliked <- d$liked/max(d$liked)
d$pdisliked <- d$disliked/max(d$disliked)
d$respons <- d$liked + d$disliked

g <- ggplot(d, aes(pdisliked, pliked)) +
  scale_x_log10(name="Count of Dislikes as proportion of max (log base 10)") +
  scale_y_log10(name="Count of Favourite as proportion of max (log base 10)") +
  geom_abline(intercept=0,slope=1,alpha=0.25) +
  geom_point(colour="#43A2CA", aes(size=d$response)) +
  geom_text(aes(label=language), size=2, vjust=-1) +
  geom_text(aes(label=paste(liked,":",disliked,sep=" ")), size=1.75, vjust=2, alpha=0.5) +
  opts(title="HackerNews polls on favourite/ disliked programming languages")

ggsave("plot.png",g)

Please sign in to comment on this gist.

Something went wrong with that request. Please try again.