Skip to content

Instantly share code, notes, and snippets.

View takdavid's full-sized avatar

Dávid Takács takdavid

  • Scania
  • Stockholm, Sweden
View GitHub Profile
@takdavid
takdavid / wordcount.pl
Created May 10, 2018 19:52
Word count, keeping all-caps word forms, with better ordering
#!/usr/bin/perl
binmode(STDIN, 'utf8');
binmode(STDOUT, 'utf8');
sub smart_lowercase($)
{
my $word = shift;
return $word if (length($word) > 1 and uc($word) eq $word);
return lc $word;
#!/usr/bin/perl
use Unicode::UCD 'charinfo';
use Unicode::Normalize 'isNonStDecomp', 'NFD', 'NFC', 'isComp2nd', 'isNonStDecomp';
use charnames ':full';
use Encode;
use HTML::Entities;
#use Getopt::Std;
#our ($opt_f, $opt_r);
@takdavid
takdavid / wordcount.pl
Created December 6, 2016 16:09
Word count
#!/usr/bin/perl
my @w;
while (<STDIN>) { chomp; while (split /[^-\w]+/) { push @w, lc $_; } }
my $c = {};
while (@w) { $c->{$_} += 1; }
my @k = reverse sort { $c->{$a} <=> $c{$b} } keys %{$c};
while (@k) { print $c->{$k}, $k, "\n"; }
@takdavid
takdavid / wordcount.pl
Last active December 6, 2016 16:09
Word count, excluding english stop words.
#!/usr/bin/perl
my @w;
my $sw = {};
while (<DATA>) { print $_; chomp; $sw->{$_} = 1; last; }
close DATA;
while (<STDIN>) { chomp; while (split /[^-\w]+/) { $_ = lc $_; if (!$sw->{$_}) { push @w, $_; } } }
my $c = {};
@takdavid
takdavid / setup-django-dynamic-scraper.sh
Last active May 24, 2016 16:20
How to f* install django-dynamic-scraper
#!/bin/bash
set -e
set -x
git clone git@github.com:holgerd77/django-dynamic-scraper.git
cd django-dynamic-scraper
virtualenv -p python2.7 env
source env/bin/activate
pip install celery==2.4.6
@takdavid
takdavid / MemcacheHelper.py
Created September 14, 2014 16:14
memoizer decorator with memcache
import memcache
import hashlib
client = None
def getClient() :
global client
if not client :
client = memcache.Client(['127.0.0.1:11211'], debug=0)
return client
@takdavid
takdavid / HUNMARC.pm
Created December 17, 2013 13:52
MARC::File::HUNMARC, HUNMARC-specific file handling for the marcpm project
package MARC::File::HUNMARC;
=head1 NAME
MARC::File::HUNMARC - HUNMARC-specific file handling
=cut
use strict;
use integer;