Skip to content

Instantly share code, notes, and snippets.

@kimmel
kimmel / accept_http_compression.pl
Last active October 11, 2015 21:38
Show decodable HTTP compression formats
#!/usr/bin/perl
use v5.16;
use warnings;
use autodie qw( :all );
use utf8::all;
use LWP::UserAgent;
my $ua = LWP::UserAgent->new;
my $can_accept = HTTP::Message::decodable;
@kimmel
kimmel / method_bench.pl
Created September 19, 2012 12:22
A Perl benchmark of foreach loops
#!/usr/bin/perl
use v5.16;
use warnings;
use autodie qw( :all );
use utf8::all;
use List::MoreUtils qw( uniq any );
use Benchmark qw( cmpthese :hireswallclock );
my %file_names = ();
@kimmel
kimmel / gist:3689276
Created September 10, 2012 06:42
simple text matching with index
#!/usr/bin/perl
use v5.14;
use warnings;
use utf8::all;
use File::Slurp qw( read_file );
my $pattern_list = do 'fw.pl';
my @patterns = keys $pattern_list;
my $content = read_file( 'dracula.txt' );
@kimmel
kimmel / gist:3689246
Created September 10, 2012 06:32
scan dracula for 4k patterns
#!/usr/bin/perl
use v5.14;
use warnings;
use utf8::all;
use File::Slurp qw( read_file );
my $pattern_list = do 'fw.pl';
my @patterns = keys $pattern_list;
my $content = read_file( 'dracula.txt' );
@kimmel
kimmel / gist:3688579
Created September 10, 2012 02:57
brute force all match patterns
#!/usr/bin/perl
use v5.14;
use warnings;
use utf8::all;
use File::Slurp qw( read_file );
...
my @patterns = map {qr/\b$_\b/ixms} keys $pattern_list;
@kimmel
kimmel / gist:3688004
Created September 9, 2012 23:55
text normalization and token splitting
#!/usr/bin/perl
use v5.14;
use warnings;
use utf8::all;
use List::Util qw( reduce );
use List::MoreUtils qw( uniq any );
use Path::Class::Rule;
use File::Slurp qw( read_file );
@kimmel
kimmel / gist:3681026
Created September 8, 2012 23:28
decode_json() takes a binary encoded string
#!/usr/bin/perl
use v5.14;
use warnings;
use utf8::all;
use Encode;
use Data::Dumper;
use JSON::XS qw( decode_json );
my $wl = '{"creche":"crèche", "¥":"£", "₡":"волн"}';
@kimmel
kimmel / gist:3482211
Created August 26, 2012 18:08
python Beautiful Soup regexp 1
from datetime import datetime
import BeautifulSoup as soup
import requests
...
r = requests.get(host + page)
doc = soup.BeautifulSoup(r.content)
titles = doc.table.findAll(True, {'class': 'title'})
@kimmel
kimmel / gist:3482190
Created August 26, 2012 18:06
ruby regexp html parsing 2
# The following regexp will break
indentation = '<img src="http:\/\/ycombinator.com\/images\/s.gif" height=1 width=(\d+)><\/td>'
score = '<span id=score_([0-9]+)>([0-9]+) point'
user_id = '<a href="user\\?id=([^"]+)">'
time_ago = '<\/a>([^\|]+)\|'
comment_body = '<span class=\\"comment\\"><font color=#000000>(.*?)<\\/font>'
regexp_str = "#{indentation}.*?#{score}.*?#{user_id}.*?#{time_ago}.*?#{comment_body}"
@kimmel
kimmel / gist:3482179
Created August 26, 2012 18:05
ruby regexp html parsing 1
next if content =~ />#{username}</ # skip if we posted it
next unless post.inner_html =~ /vote/ # skip if it's not a post
id = content[/\_(\d+)/,1]
comment_text = (post/'.comment').first.inner_text
commenter = content[/user\?id=(\w+)/,1]