Skip to content

Instantly share code, notes, and snippets.

@ribomation
Created January 23, 2024 12:38
Show Gist options
  • Save ribomation/7863536c12aacdf09f95b4d944a14820 to your computer and use it in GitHub Desktop.
Save ribomation/7863536c12aacdf09f95b4d944a14820 to your computer and use it in GitHub Desktop.
1BRC implementations in Python and Perl
use strict;
use warnings;
use Time::HiRes qw(gettimeofday tv_interval);
my $start_time = [ gettimeofday ];
my $filename = '../../data/weather-data-100K.csv';
$filename = $ARGV[0] if (@ARGV);
print "filename: $filename\n----\n";
my %data;
open my $file, '<', $filename or die "cannot open $filename: $!";
while (my $line = <$file>) {
chomp $line;
my ($station, $temperature) = split ';', $line;
$data{$station}{count}++;
$data{$station}{sum} += $temperature;
if (!defined $data{$station}{min} || $temperature < $data{$station}{min}) {
$data{$station}{min} = $temperature;
}
if (!defined $data{$station}{max} || $temperature > $data{$station}{max}) {
$data{$station}{max} = $temperature;
}
}
close $file;
foreach my $station (sort keys %data) {
my $cnt = $data{$station}{count};
my $avg = $data{$station}{sum} / $cnt;
my $min = $data{$station}{min};
my $max = $data{$station}{max};
printf "%s: %.1f, %.1f/%.1f (%d)\n", $station, $avg, $min, $max, $cnt;
}
print '----\n';
my $end_time = [ gettimeofday ];
my $elapsed_time = tv_interval($start_time, $end_time);
print STDERR "[perl] elapsed $elapsed_time seconds, $filename\n";
import time, sys
start_time = time.time()
filename = '../../data/weather-data-100K.csv'
if len(sys.argv) > 1:
filename = sys.argv[1]
print('filename:', filename, '\n----')
data = {}
with open(filename, encoding='utf-8') as file:
for line in file:
station, temperature = line.strip().split(';')
temperature = float(temperature)
next = {'count': 1, 'sum': temperature, 'min': temperature, 'max': temperature}
entry = data.get(station)
if entry:
next['count'] = 1 + entry['count']
next['sum'] = temperature + entry['sum']
next['min'] = min(temperature, entry['min'])
next['max'] = max(temperature, entry['max'])
data[station] = next
for entry in dict(sorted(data.items())).items():
station, a = entry
print('{}: {:+.1f} C, {:+.1f}/{:+.1f} ({})'.format(
station, a['sum'] / a['count'], a['min'], a['max'], a['count']))
print('------')
end_time = time.time()
elapsed_time = end_time - start_time
print('[python] elapsed {:.2f} seconds, {}'.format(elapsed_time, filename), file=sys.stderr)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment