Skip to content

Instantly share code, notes, and snippets.

hoehrmann /
Created Jan 13, 2012
Searches YouTube for artist + title of some musical composition and prints out details about videos related to the first result (in other words, it finds related music and other content)
View gist:2286936
#!perl -w
use strict;
use warnings;
use IO::Unread 'unread';
use Data::Dumper;
sub read_line_group {
my ($handle, $key_regex) = @_;
my $previous_key;
my @lines;
hoehrmann / gist:2313504
Created Apr 5, 2012
Extract tabular data from PDFs (after using pdftohtml -c -xml)
View gist:2313504
#!perl -w
use strict;
use warnings;
use XML::LibXML;
use List::Util qw/max/;
use Math::Trig qw/:pi deg2rad rad2deg/;
my $d = XML::LibXML->load_xml(location => 'wurdlist.xml');
my $m = deg2rad( 3 );
hoehrmann / gist:2340570
Created Apr 9, 2012
Download plain text versions of public domain books from EXAMPLE Books.
View gist:2340570
#!perl -w
use strict;
use warnings;
use LWP::UserAgent;
use HTML::FormatText;
die "Usage: $0 bookid > example.txt\n" unless @ARGV == 1;
my $book = shift @ARGV;
my %seen;
hoehrmann / gist:2393552
Created Apr 15, 2012
Quick and dirty run lengths
View gist:2393552
sub run_lengths {
return unless @_ > 0;
my $prev = shift @_;
my @runs = [$prev, 1];
for (@_) {
if ($_ eq $prev) {
push @runs, [$_, 1];
hoehrmann / gist:2395307
Created Apr 15, 2012
Merge Internet Archive OCR data with Google Books plain text into JSON
View gist:2395307
#!perl -w
use strict;
use warnings;
use XML::Parser;
use PerlIO::via::gzip;
use feature 'say';
use Archive::Zip;
use Image::Magick;
use Algorithm::Diff 'sdiff';
use List::Util qw/min max first/;
hoehrmann / gist:2439564
Created Apr 21, 2012
Extract german noun inflections from Wiktionary (quick and dirty)
View gist:2439564
#!perl -w
use strict;
use warnings;
use encoding 'utf-8';
use MediaWiki::DumpFile::Pages;
use YAML::XS;
my $pages = MediaWiki::DumpFile::Pages
#!perl -w
use strict;
use warnings;
use XML::LibXML;
use XML::LibXSLT;
use autodie;
my ($html_path, $xslt_path) = @ARGV;
my $html_doc = XML::LibXML->load_html( location => $html_path, recover => 2, load_ext_dtd => 0 );
my $xslt_doc = XML::LibXML->load_xml( location => $xslt_path );
View ngrams-plain.php
# Ad-hoc oblique service for ngram databases with compatible interface
header('Content-Type: text/plain;charset=UtF-8');
if (@!isset($_REQUEST['q']) || @!isset($_REQUEST['nick'])) {
printf("I need a q! I need a nick! Sonst functioneren ik niet.");
View gist:4893226
var ordered = [][], 0), function(a, b) {
return (a.p1 - b.p1) || (a.p2 - b.p2);
You can’t perform that action at this time.