Skip to content

Instantly share code, notes, and snippets.

Björn Höhrmann hoehrmann

Block or report user

Report or block hoehrmann

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
hoehrmann /
Created Jan 13, 2012
Searches YouTube for artist + title of some musical composition and prints out details about videos related to the first result (in other words, it finds related music and other content)
View gist:2286936
#!perl -w
use strict;
use warnings;
use IO::Unread 'unread';
use Data::Dumper;
sub read_line_group {
my ($handle, $key_regex) = @_;
my $previous_key;
my @lines;
hoehrmann / gist:2313504
Created Apr 5, 2012
Extract tabular data from PDFs (after using pdftohtml -c -xml)
View gist:2313504
#!perl -w
use strict;
use warnings;
use XML::LibXML;
use List::Util qw/max/;
use Math::Trig qw/:pi deg2rad rad2deg/;
my $d = XML::LibXML->load_xml(location => 'wurdlist.xml');
my $m = deg2rad( 3 );
hoehrmann / gist:2340570
Created Apr 9, 2012
Download plain text versions of public domain books from EXAMPLE Books.
View gist:2340570
#!perl -w
use strict;
use warnings;
use LWP::UserAgent;
use HTML::FormatText;
die "Usage: $0 bookid > example.txt\n" unless @ARGV == 1;
my $book = shift @ARGV;
my %seen;
hoehrmann / gist:2393552
Created Apr 15, 2012
Quick and dirty run lengths
View gist:2393552
sub run_lengths {
return unless @_ > 0;
my $prev = shift @_;
my @runs = [$prev, 1];
for (@_) {
if ($_ eq $prev) {
push @runs, [$_, 1];
hoehrmann / gist:2395307
Created Apr 15, 2012
Merge Internet Archive OCR data with Google Books plain text into JSON
View gist:2395307
#!perl -w
use strict;
use warnings;
use XML::Parser;
use PerlIO::via::gzip;
use feature 'say';
use Archive::Zip;
use Image::Magick;
use Algorithm::Diff 'sdiff';
use List::Util qw/min max first/;
hoehrmann / gist:2439564
Created Apr 21, 2012
Extract german noun inflections from Wiktionary (quick and dirty)
View gist:2439564
#!perl -w
use strict;
use warnings;
use encoding 'utf-8';
use MediaWiki::DumpFile::Pages;
use YAML::XS;
my $pages = MediaWiki::DumpFile::Pages
#!perl -w
use strict;
use warnings;
use XML::LibXML;
use XML::LibXSLT;
use autodie;
my ($html_path, $xslt_path) = @ARGV;
my $html_doc = XML::LibXML->load_html( location => $html_path, recover => 2, load_ext_dtd => 0 );
my $xslt_doc = XML::LibXML->load_xml( location => $xslt_path );
View ngrams-plain.php
# Ad-hoc oblique service for ngram databases with compatible interface
header('Content-Type: text/plain;charset=UtF-8');
if (@!isset($_REQUEST['q']) || @!isset($_REQUEST['nick'])) {
printf("I need a q! I need a nick! Sonst functioneren ik niet.");
View gist:4893226
var ordered = [][], 0), function(a, b) {
return (a.p1 - b.p1) || (a.p2 - b.p2);
You can’t perform that action at this time.