Skip to content

Instantly share code, notes, and snippets.

@iso2022jp
Created November 12, 2012 06:52
Show Gist options
  • Save iso2022jp/4057868 to your computer and use it in GitHub Desktop.
Save iso2022jp/4057868 to your computer and use it in GitHub Desktop.
Perl: Pure BMP to JPEG converter
#!/usr/bin/perl
use strict;
use warnings;
use integer;
use bmp_io;
sub main ();
sub build_huffman_table ($$);
sub get_code_length ($);
my $ENABLE_RESTART = 1;
#-----------------------------------------------------------------------------
# 既定のハフマンテーブル定義子
#-----------------------------------------------------------------------------
my @HT_DC_L_COUNTS = (0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0);
my @HT_DC_L_VALUES = (0 .. 11);
my @HT_DC_C_COUNTS = (0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0);
my @HT_DC_C_VALUES = (0 .. 11);
my @HT_AC_L_COUNTS = (0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125);
my @HT_AC_L_VALUES = (0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31,
0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32,
0x81, 0x91, 0xa1, 0x08, 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52,
0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a,
0x16..0x1a, 0x25..0x2a, 0x34..0x3a, 0x43..0x4a, 0x53..0x5a,
0x63..0x6a, 0x73..0x7a, 0x83..0x8a, 0x92..0x9a, 0xa2..0xaa,
0xb2..0xba, 0xc2..0xca, 0xd2..0xda, 0xe1..0xea, 0xf1..0xfa);
my @HT_AC_C_COUNTS = (0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119);
my @HT_AC_C_VALUES = (0x00..0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41,
0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14,
0x42, 0x91, 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25,
0xf1, 0x17..0x1a, 0x26..0x2a, 0x35..0x3a, 0x43..0x4a, 0x53..0x5a,
0x63..0x6a, 0x73..0x7a, 0x82..0x8a, 0x92..0x9a, 0xa2..0xaa,
0xb2..0xba, 0xc2..0xca, 0xd2..0xda, 0xe2..0xea, 0xf2..0xfa);
# 定義子より、符号化用テーブルを作成。
my %HT_DC_MAPPING = (
'y' => build_huffman_table(\@HT_DC_L_COUNTS, \@HT_DC_L_VALUES),
'cb' => build_huffman_table(\@HT_DC_C_COUNTS, \@HT_DC_C_VALUES),
);
$HT_DC_MAPPING{'cr'} = $HT_DC_MAPPING{'cb'};
my %HT_AC_MAPPING = (
'y' => build_huffman_table(\@HT_AC_L_COUNTS, \@HT_AC_L_VALUES),
'cb' => build_huffman_table(\@HT_AC_C_COUNTS, \@HT_AC_C_VALUES),
);
$HT_AC_MAPPING{'cr'} = $HT_AC_MAPPING{'cb'};
#-----------------------------------------------------------------------------
# 高速演算用のテーブル
#-----------------------------------------------------------------------------
use integer;
# 整数演算用の精度
my $INTDCT_PRECISION = 0x7fffffff / (2048 * 6);
no integer;
# DCT 用コサインテーブル: $DCTCOS[v][u] = cos( (2 * u + 1) * v * PI / 16)
my @DCT_COS = map
{
my $vv = $_;
[ map {
my $uu = $_;
cos((2 * $uu + 1) * $vv * 3.14159265258979 / 16);
} (0 .. 7) ]; # u
} (0 .. 7); # v
# DCT 用計算済み整数テーブル: $INTDCT_COS2[q][p][v][u] = $DCTCOS[q][p] * $DCTCOS[v][u];
my @INTDCT_COS2 = map {
my $vv = $_;
[ map {
my $uu = $_;
[ map {
my $qq = $_;
[ map {
my $pp = $_;
int($DCT_COS[$qq][$pp] * $DCT_COS[$vv][$uu] * $INTDCT_PRECISION);
} (0 .. 7) ]; # p
} (0 .. 7) ]; # q
} (0 .. 7) ]; # u
} (0 .. 7); # v
undef @DCT_COS; # 不要
# DCT 乗数テーブル
my @DCT_CUCV = map
{
my $vv = $_;
[ map {
my $uu = $_;
($uu == 0 ? sqrt(0.5) : 1) * ($vv == 0 ? sqrt(0.5) : 1);
} (0 .. 7) ]; # u
} (0 .. 7); # v
use integer;
#-----------------------------------------------------------------------------
# ジグザグテーブル
#-----------------------------------------------------------------------------
my @MCU_ZIGZAG = (
0, 1, 5, 6, 14, 15, 27, 28,
2, 4, 7, 13, 16, 26, 29, 42,
3, 8, 12, 17, 25, 30, 41, 43,
9, 11, 18, 24, 31, 40, 44, 53,
10, 19, 23, 32, 39, 45, 52, 54,
20, 22, 33, 38, 46, 51, 55, 60,
21, 34, 37, 47, 50, 56, 59, 61,
35, 36, 48, 49, 57, 58, 62, 63,
);
main;
exit;
sub main () {
my $data = read_bmp;
my ($width, $height) = (scalar(@{$data->[0]}), scalar(@$data));
binmode(select);
#print "Content-Type: image/jpeg\x0d\x0a\x0d\x0a";
# イメージ開始 (SOI)
print pack("n", 0xffd8);
# JFIF 1.2 情報 (APP0)
print pack("nna5CCx7", 0xffe0, 16, "JFIF\0", 1, 2);
# リスタートインターバル定義 (DRI)
if ($ENABLE_RESTART) {
print pack("nnn", 0xffdd, 4, ($width + 7) >> 3);
}
# 最高品質の 8 ビット量子化テーブル #0 定義 (DQT)
print pack("nnCC64", 0xffdb, 67, 0, (1) x 64);
# 輝度 DC 成分用、既定のハフマンテーブル定義 (DHT)
print pack("nnCC*", 0xffc4, 3 + scalar(@HT_DC_L_COUNTS) + scalar(@HT_DC_L_VALUES),
0b_0000_0000, @HT_DC_L_COUNTS, @HT_DC_L_VALUES);
# 色差 DC 成分用、既定のハフマンテーブル定義 (DHT)
print pack("nnCC*", 0xffc4, 3 + scalar(@HT_DC_C_COUNTS) + scalar(@HT_DC_C_VALUES),
0b_0000_0001, @HT_DC_C_COUNTS, @HT_DC_C_VALUES);
# 輝度 AC 成分用、既定のハフマンテーブル定義 (DHT)
print pack("nnCC*", 0xffc4, 3 + scalar(@HT_AC_L_COUNTS) + scalar(@HT_AC_L_VALUES),
0b_0001_0000, @HT_AC_L_COUNTS, @HT_AC_L_VALUES);
# 色差 AC 成分用、既定のハフマンテーブル定義 (DHT)
print pack("nnCC*", 0xffc4, 3 + scalar(@HT_AC_C_COUNTS) + scalar(@HT_AC_C_VALUES),
0b_0001_0001, @HT_AC_C_COUNTS, @HT_AC_C_VALUES);
# Baseline DCT フレーム開始 (SOF0)
print pack("nnCnnCC9", 0xffc0, 17, 8, $height, $width, 3,
(0, 0b_0001_0001, 0), (1, 0b_0001_0001, 0), (2, 0b_0001_0001, 0));
# スキャン開始 (SOS)
print pack("nnCC6CCC", 0xffda, 12, 3,
(0, 0b_0000_0000), (1, 0b_0001_0001), (2, 0b_0001_0001),
0, 63, 0b_0000_0000);
# データ
# 幅と高さを 8 の倍数長に拡張。
push @{$_}, (($_->[-1]) x ((-$width) & 0x7)) foreach @$data;
push @$data, (($data->[-1]) x ((-$height) & 0x7));
# DC の差分記憶用領域とリスタート採番
my %dc_cache = ('y' => 0, 'cb' => 0, 'cr' => 0);
my $interval = 0;
# クロージャを使った、簡易ビット出力クラス。
sub create_writer () {
my $bit_buffer = '';
my $write = sub ($$) {
my ($value, $length) = @_;
$bit_buffer .= substr(unpack('B*', pack('N', $value)), -$length);
while (length $bit_buffer >= 8) {
my $ch = pack("B8", substr($bit_buffer, 0, 8));
print $ch;
if ($ch eq "\xff") { print "\0" };
$bit_buffer = substr($bit_buffer, 8);
}
# print STDERR sprintf("value: %7d, length: %3d, bit: %s\n", $value, $length, substr(unpack('B*', pack('N', $value)), -$length));
};
my $flush = sub () {
if (length $bit_buffer) {
my $ch = pack('B*', $bit_buffer);
print $ch;
if ($ch eq "\xff") { print "\0" };
$bit_buffer = '';
}
};
($write, $flush);
};
my ($write, $flush) = create_writer;
# 画像の処理。
for (my $yy = 0; $yy < $height; $yy += 8) {
# RSTx [Restart Interval]
if ($ENABLE_RESTART) {
if ($yy) { # 2 行目以降
$flush->();
print pack("n", 0xffd0 + ($interval & 0x7));
++$interval;
# Restart
%dc_cache = ('y' => 0, 'cb' => 0, 'cr' => 0);
}
}
for (my $xx = 0; $xx < $width; $xx += 8) {
# 今回はサンプリングファクタ 1 固定なので、MCU = 8x8
use integer;
# YCbCr として、8x8 ブロックのデータを取り出す
my %ycbcr = ('y' => [], 'cb' => [], 'cr' => []);
foreach (0 .. 7) {
my $rgb_line = $data->[$yy + $_];
my $y_line = $ycbcr{'y'}[$_] = [];
my $cb_line = $ycbcr{'cb'}[$_] = [];
my $cr_line = $ycbcr{'cr'}[$_] = [];
foreach my $rgb (@$rgb_line[$xx .. $xx + 7]) {
my ($r1, $g1, $b1) = (($rgb >> 16) & 0xff, ($rgb >> 8) & 0xff, $rgb & 0xff);
push @$y_line, ($r1 * 299 + $g1 * 587 + $b1 * 114 + 500) / 1000 - 128; # -128-127
push @$cb_line, ($r1 * -16874 + $g1 * -33126 + $b1 * 50000 + 12850000) / 100000 - 128; # -128-128
push @$cr_line, ($r1 * 50000 + $g1 * -41869 + $b1 * -8131 + 12850000) / 100000 - 128; # -128-128
=begin
CCIR 601-1
Y = 0.2990*R + 0.5870*G + 0.1140*B
Cb = -0.1687*R - 0.3313*G + 0.5000*B + CENTER
Cr = 0.5000*R - 0.4187*G - 0.0813*B + CENTER
R = Y + 0.0000*(Cb-CENTER) + 1.4020*(Cr-CENTER)
G = Y - 0.3441*(Cb-CENTER) - 0.7141*(Cr-CENTER)
B = Y + 1.7720*(Cb-CENTER) + 0.0000*(Cr-CENTER)
JPEGLIB
* Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
* Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE
* Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE
* R = Y + 1.40200 * Cr
* G = Y - 0.34414 * Cb - 0.71414 * Cr
* B = Y + 1.77200 * Cb
=cut
}
}
foreach my $component ('y', 'cb', 'cr') {
# 各種参照を記憶しておく
my $block = $ycbcr{$component};
my $dc_table = $HT_DC_MAPPING{$component};
my $ac_table = $HT_AC_MAPPING{$component};
my $prev_dc = \$dc_cache{$component};
# DCT をかます
my @coefficients = ();
foreach my $vv (0 .. 7) {
foreach my $uu (0 .. 7) {
my $total = 0;
foreach my $qq (0 .. 7) {
foreach my $pp (0 .. 7) {
$total += $block->[$qq][$pp] * $INTDCT_COS2[$uu][$pp][$vv][$qq];
}
}
no integer;
push @coefficients, int($total * $DCT_CUCV[$vv][$uu] / ($INTDCT_PRECISION * 4) + 0.5);
}
}
# 最高品質で書き出すため量子化なし
# ジグザグに並び替える
@coefficients[@MCU_ZIGZAG] = @coefficients;
# DC 成分の差分符号化
my $value = $coefficients[0] - $$prev_dc;
$$prev_dc = $coefficients[0];
my $length = get_code_length($value);
$write->(@{$dc_table->[$length]});
if ($value) {
--$value if $value < 0;
$write->($value, $length);
}
# AC 成分の差分符号化
shift @coefficients;
my $EOB = 0;
my $ZRL = 240;
my $zero_runs = 0; # start of 0
foreach $value (@coefficients) {
if ($value == 0) { # zero
++$zero_runs;
} else {
while ($zero_runs >= 16) { # ZRL
$write->(@{$ac_table->[$ZRL]});
$zero_runs -= 16;
}
$length = get_code_length($value);
$write->(@{$ac_table->[$zero_runs << 4 | $length]});
--$value if $value < 0;
$write->($value, $length);
$zero_runs = 0; # clear runs
}
}
if ($zero_runs) {
$write->(@{$ac_table->[$EOB]});
}
}
}
}
$flush->();
# イメージ終了 (SOI)
print pack("n", 0xffd9);
}
# [ [ $code, $length ] x $count ]
sub build_huffman_table ($$) {
my ($counts, $values) = @_;
my @table = ();
my $code = 0;
my $index = 0; # index of values
foreach my $length (1 .. 16) {
$table[$values->[$index++]] = [ $code++, $length ] foreach 1 .. $counts->[$length - 1];
$code <<= 1;
}
\@table;
}
sub get_code_length ($) {
my $value = ($_[0] >= 0 ? $_[0] : -$_[0]);
if ($value == 0) { return 0 };
if ($value < 2) { return 1 };
if ($value < 4) { return 2 };
if ($value < 8) { return 3 };
if ($value < 16) { return 4 };
if ($value < 32) { return 5 };
if ($value < 64) { return 6 };
if ($value < 128) { return 7 };
if ($value < 256) { return 8 };
if ($value < 512) { return 9 };
if ($value < 1024) { return 10 };
if ($value < 2048) { return 11 };
if ($value < 4096) { return 12 };
die;
}
package bmp_io;
require 5.6.0;
use strict;
use warnings;
use integer;
BEGIN {
use Exporter;
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS);
$VERSION = '1.00';
@ISA = qw/ Exporter /;
@EXPORT = qw/ read_bmp write_bmp write_bmp_data /;
@EXPORT_OK = ();
%EXPORT_TAGS = ();
}
# 標準入力から BMP を読み込む
sub read_bmp () {
use integer;
# 必ずバイナリで。
binmode(STDIN);
# 読み取りバッファ。
my $chunk;
# 読み取り位置。
my $pos = 0;
# $chunk に指定バイト標準入力から読み込むクロージャ。
# 読んだ分だけ $pos を加算。指定長読めないと die する。
# $chunk, $pos を束縛。
my $read_exact = sub ($) {
my $size = shift;
my $read = read(STDIN, $chunk, $size);
defined $read or die $!;
$read == $size or die 'Data truncated.';
$pos += $read;
};
# BITMAPFILEHEADER 読み込み。
$read_exact->(14);
my ($signature, $data_offset) = unpack('a2x8V', $chunk);
# 'BM' チェック。
$signature eq 'BM' or die 'Invalid signature.';
# BITMAPINFOHEADER 読み込み。
$read_exact->(40);
my ($bih_size, $width, $height, $planes,
$bit_count, $compression, $color_count)
= unpack('VVVvvVx12Vx4', $chunk);
# 40 未満のサイズは古い形式なので対応しない。
$bih_size >= 40 or die 'OS/2 bitmaps not supported.';
# BMP は必ず 1 プレーン(面)。
$planes == 1 or die 'Invalid bitmap.';
# ビット数は 1, 4, 8, 24, 32 のどれか。
grep { $bit_count == $_ } (1, 4, 8, 24, 32)
or die 'Not supported bit count.';
# 圧縮は今回扱わない。
$compression == 0 or die 'Not supported compression.';
# 8 ビット以下の BMP はカラーテーブルを持つ。
if ($color_count == 0 and $bit_count <= 8) {
$color_count = 2 ** $bit_count;
}
# ヘッダ全体を読み飛ばす。
if ($bih_size > 40) {
$read_exact->(40);
}
# カラーテーブル。
my $colors;
# カラーテーブルがあれば読み込む。
if ($color_count) {
$read_exact->($color_count * 4);
# "V256" などのテンプレートで配列化。
$colors = [ unpack("V$color_count", $chunk) ];
}
# 標準入力は seek できないのでデータ部まで読み捨て。
if ($data_offset - $pos) {
$read_exact->($data_offset - $pos);
}
# 行のデータ部まで読み飛ばす。
my $line_size = int(($width * $bit_count + 31) / 32) * 4;
# 行リファレンスの配列。ここに結果が入る。
my @data = ();
# トップダウンの BMP の場合高さが負の値。
# 順番に処理した場合、配列に追加する場所が異なる。
# そこで @data を束縛する行追加用のクロージャを作る。
my $insert = $height < 0
? sub { push(@data, $_[0]) }
: sub { unshift(@data, $_[0]) }
;
# 行データ処理用のクロージャを作る。
# $chunk, $width, $insert を束縛。
my $process_line =
$bit_count == 32 ?
sub {
$insert->([ unpack('V*', $chunk) ]);
}
: $bit_count == 24 ?
sub {
my $line = [
map { unpack('V', $_ . "\0") }
$chunk =~ /.{3}/sg
];
$#$line = $width - 1;
$insert->($line);
}
: $bit_count == 16 ?
sub {
my $line = [ unpack('v*', $chunk) ];
$#$line = $width - 1;
$insert->($line);
}
: # $bit_count <= 8
sub {
my $bits = unpack('B*', $chunk);
my $line = [
map { ord pack('b8', scalar reverse $_) }
$bits =~ /.{$bit_count}/sg
];
$#$line = $width - 1;
$insert->($line);
}
;
# データを読み込む。
$height = -$height if $height < 0;
foreach (1 .. $height) {
$read_exact->($line_size);
$process_line->();
}
wantarray ? (\@data, $bit_count, $colors) : \@data;
}
sub write_bmp_data ($$;$) {
my ($data, $bit_count) = @_;
use integer;
my ($width, $height) = (scalar(@{$data->[0]}), scalar(@$data));
my $line_size = int(($width * $bit_count + 31) / 32) * 4;
my $ctable_size = $bit_count > 8 ? 0 : 2 ** $bit_count * 4;
foreach my $line (reverse @$data) {
if ($bit_count == 32) {
print pack("V*", @$line);
} elsif ($bit_count == 24) {
my @bytes = map { substr(pack("V", $_), 0, int($bit_count / 8)) } @$line;
print pack("a*\@$line_size", join('', @bytes));
} elsif ($bit_count == 16) {
my @bytes = ();
foreach (@$line) {
my ($red, $green, $blue) = unpack('xCCC', pack('N', $_));
push @bytes, pack("v", $red >> 3 << 10 | $green >> 3 << 5 | $blue >> 3);
}
print pack("a*\@$line_size", join('', @bytes));
} else {
my @bits = map { substr(unpack("B8", pack("C", $_)), -$bit_count) } @$line;
print pack("B*\@$line_size", join('', @bits));
}
}
}
sub write_bmp ($$;$) {
my ($data, $bit_count, $colors) = @_;
use integer;
my ($width, $height) = (scalar(@{$data->[0]}), scalar(@$data));
my $line_size = int(($width * $bit_count + 31) / 32) * 4;
my $ctable_size = $bit_count > 8 ? 0 : 2 ** $bit_count * 4;
binmode(select);
# BITMAPFILEHEADER
print pack("a2Vx4V", "BM", 54 + $ctable_size + $line_size * $height, 54 + $ctable_size);
# BITMAPINFOHEADER
print pack("VVVvvx24", 40, $width, $height, 1, $bit_count);
# RGBQUAD (カラーテーブル)
if ($ctable_size > 0) {
print pack("V", $_) foreach @$colors;
}
write_bmp_data($data, $bit_count);
}
1;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment