Created — forked from attractivechaos/get_votes.pl

Embed URL

HTTPS clone URL

SSH clone URL

You can clone with HTTPS or SSH.

Download Gist

Plot HackerNews polls on favorite and disliked programming languages

View get_votes.pl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
#!/usr/bin/env perl
 
# This script collects voting from HackerNews and outputs a plot votes.eps
# You need to have gnuplot installed for plotting.
 
use strict;
use warnings;
use IO::Socket::INET;
 
sub http_get {
my ($host, $path, $proxy) = @_;
 
my $server = ($proxy)? $proxy : $host;
$server .= ":80" unless ($server =~ /:\d+$/); # port 80 by default
my $url = ($proxy)? "http://$host$path" : $path;
 
my $fh = IO::Socket::INET->new($server) || die("Fail to connect server: $server.\n");
print $fh join("\015\012", "GET $url HTTP/1.0", "Host: $host", "", "");
return $fh;
}
 
sub get_poll {
my ($host, $path, $hash) = @_;
my $fh = &http_get($host, $path);
while (<$fh>) {
while (/<font color=#000000>([^<]+)<\/font><\/div><\/td><\/tr><tr><td><\/td><td class="default"><span class="comhead"><span id=score_\d+>(\d+) points</g) {
next if ($1 eq 'Other');
my ($lang, $cnt) = ($1, $2);
$lang =~ s/\s/-/;
push(@{$hash->{$lang}}, $cnt);
}
}
close($fh);
}
 
sub main {
my (%hash, @a, $fh);
&get_poll('news.ycombinator.com', '/item?id=3746692', \%hash);
&get_poll('news.ycombinator.com', '/item?id=3748961', \%hash);
for (keys %hash) {
if (@{$hash{$_}} == 2) {
push(@a, [$_, @{$hash{$_}}]);
}
}
 
open($fh, ">votes.txt") || die;
@a = sort{($b->[1]+$b->[2])<=>($a->[1]+$a->[2])} @a;
for (@a) {
print $fh join("\t", @{$_}), "\n";
}
close($fh);
 
open($fh, ">votes-alt.txt") || die;
@a = sort{$b->[1]/($b->[1]+$b->[2])<=>$a->[1]/($a->[1]+$a->[2])} @a;
for (@a) {
print $fh join("\t", @{$_}), "\n";
}
close($fh);
 
}
 
&main();
View get_votes.pl
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
setwd("/Users/robingower/cshop/2206278")
library(ggplot2)
 
d <- read.csv("votes.txt", sep="\t", header=F)
colnames(d) <- c("language","liked","disliked")
d$pliked <- d$liked/max(d$liked)
d$pdisliked <- d$disliked/max(d$disliked)
 
g <- ggplot(d, aes(pdisliked, pliked)) +
scale_x_log10(name="Count of Dislikes as proportion of max (log base 10)") +
scale_y_log10(name="Count of Favourite as proportion of max (log base 10)") +
geom_abline(intercept=0,slope=1,alpha=0.25) +
geom_point(colour="blue", size=1) +
geom_text(aes(label=language), size=2, vjust=-1) +
geom_text(aes(label=paste(liked,":",disliked,sep=" ")), size=1.75, vjust=2, alpha=0.5) +
opts(title="HackerNews polls on favourite/ disliked programming languages")
 
ggsave("plot.png",g)
 
 
# summary(lm(pliked ~ pdisliked, data=d))
# no correlation!

I updated it a little to show the response size:

library(ggplot2)
d <- read.csv("votes.txt", sep="\t", header=F)
colnames(d) <- c("language","liked","disliked")

d$pliked <- d$liked/max(d$liked)
d$pdisliked <- d$disliked/max(d$disliked)
d$respons <- d$liked + d$disliked

g <- ggplot(d, aes(pdisliked, pliked)) +
  scale_x_log10(name="Count of Dislikes as proportion of max (log base 10)") +
  scale_y_log10(name="Count of Favourite as proportion of max (log base 10)") +
  geom_abline(intercept=0,slope=1,alpha=0.25) +
  geom_point(colour="#43A2CA", aes(size=d$response)) +
  geom_text(aes(label=language), size=2, vjust=-1) +
  geom_text(aes(label=paste(liked,":",disliked,sep=" ")), size=1.75, vjust=2, alpha=0.5) +
  opts(title="HackerNews polls on favourite/ disliked programming languages")

ggsave("plot.png",g)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Something went wrong with that request. Please try again.