Skip to content

Instantly share code, notes, and snippets.

@hoehrmann
hoehrmann / music_related_youtube_videos.pl
Created January 13, 2012 18:30
Searches YouTube for artist + title of some musical composition and prints out details about videos related to the first result (in other words, it finds related music and other content)
@hoehrmann
hoehrmann / gist:2286936
Created April 2, 2012 20:18
read_line_group
#!perl -w
use strict;
use warnings;
use IO::Unread 'unread';
use Data::Dumper;
sub read_line_group {
my ($handle, $key_regex) = @_;
my $previous_key;
my @lines;
@hoehrmann
hoehrmann / gist:2313504
Created April 5, 2012 19:40
Extract tabular data from PDFs (after using pdftohtml -c -xml)
#!perl -w
use strict;
use warnings;
use XML::LibXML;
use List::Util qw/max/;
use Math::Trig qw/:pi deg2rad rad2deg/;
my $d = XML::LibXML->load_xml(location => 'wurdlist.xml');
my $m = deg2rad( 3 );
@hoehrmann
hoehrmann / gist:2340570
Created April 9, 2012 00:44
Download plain text versions of public domain books from EXAMPLE Books.
#!perl -w
use strict;
use warnings;
use LWP::UserAgent;
use HTML::FormatText;
die "Usage: $0 bookid > example.txt\n" unless @ARGV == 1;
my $book = shift @ARGV;
my %seen;
@hoehrmann
hoehrmann / gist:2393552
Created April 15, 2012 15:56
Quick and dirty run lengths
sub run_lengths {
return unless @_ > 0;
my $prev = shift @_;
my @runs = [$prev, 1];
for (@_) {
if ($_ eq $prev) {
$runs[-1]->[1]++;
next;
}
push @runs, [$_, 1];
@hoehrmann
hoehrmann / gist:2395307
Created April 15, 2012 23:27
Merge Internet Archive OCR data with Google Books plain text into JSON
#!perl -w
use strict;
use warnings;
use XML::Parser;
use PerlIO::via::gzip;
use feature 'say';
use Archive::Zip;
use Image::Magick;
use Algorithm::Diff 'sdiff';
use List::Util qw/min max first/;
@hoehrmann
hoehrmann / gist:2439564
Created April 21, 2012 20:59
Extract german noun inflections from Wiktionary (quick and dirty)
#!perl -w
use strict;
use warnings;
use encoding 'utf-8';
use MediaWiki::DumpFile::Pages;
use YAML::XS;
my $pages = MediaWiki::DumpFile::Pages
->new('dewiktionary-20120416-pages-meta-current.xml');
#!perl -w
use strict;
use warnings;
use XML::LibXML;
use XML::LibXSLT;
use autodie;
my ($html_path, $xslt_path) = @ARGV;
my $html_doc = XML::LibXML->load_html( location => $html_path, recover => 2, load_ext_dtd => 0 );
my $xslt_doc = XML::LibXML->load_xml( location => $xslt_path );
<?php
#
# Ad-hoc oblique service for ngram databases with compatible interface
#
header('Content-Type: text/plain;charset=UtF-8');
if (@!isset($_REQUEST['q']) || @!isset($_REQUEST['nick'])) {
printf("I need a q! I need a nick! Sonst functioneren ik niet.");
exit();
var ordered = [].sort.call([].slice.call(list, 0), function(a, b) {
return (a.p1 - b.p1) || (a.p2 - b.p2);
});