Created
November 12, 2012 06:52
-
-
Save iso2022jp/4057868 to your computer and use it in GitHub Desktop.
Perl: Pure BMP to JPEG converter
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl | |
use strict; | |
use warnings; | |
use integer; | |
use bmp_io; | |
sub main (); | |
sub build_huffman_table ($$); | |
sub get_code_length ($); | |
my $ENABLE_RESTART = 1; | |
#----------------------------------------------------------------------------- | |
# 既定のハフマンテーブル定義子 | |
#----------------------------------------------------------------------------- | |
my @HT_DC_L_COUNTS = (0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0); | |
my @HT_DC_L_VALUES = (0 .. 11); | |
my @HT_DC_C_COUNTS = (0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0); | |
my @HT_DC_C_VALUES = (0 .. 11); | |
my @HT_AC_L_COUNTS = (0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 125); | |
my @HT_AC_L_VALUES = (0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12, 0x21, 0x31, | |
0x41, 0x06, 0x13, 0x51, 0x61, 0x07, 0x22, 0x71, 0x14, 0x32, | |
0x81, 0x91, 0xa1, 0x08, 0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, | |
0xd1, 0xf0, 0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, | |
0x16..0x1a, 0x25..0x2a, 0x34..0x3a, 0x43..0x4a, 0x53..0x5a, | |
0x63..0x6a, 0x73..0x7a, 0x83..0x8a, 0x92..0x9a, 0xa2..0xaa, | |
0xb2..0xba, 0xc2..0xca, 0xd2..0xda, 0xe1..0xea, 0xf1..0xfa); | |
my @HT_AC_C_COUNTS = (0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 119); | |
my @HT_AC_C_VALUES = (0x00..0x03, 0x11, 0x04, 0x05, 0x21, 0x31, 0x06, 0x12, 0x41, | |
0x51, 0x07, 0x61, 0x71, 0x13, 0x22, 0x32, 0x81, 0x08, 0x14, | |
0x42, 0x91, 0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0, | |
0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34, 0xe1, 0x25, | |
0xf1, 0x17..0x1a, 0x26..0x2a, 0x35..0x3a, 0x43..0x4a, 0x53..0x5a, | |
0x63..0x6a, 0x73..0x7a, 0x82..0x8a, 0x92..0x9a, 0xa2..0xaa, | |
0xb2..0xba, 0xc2..0xca, 0xd2..0xda, 0xe2..0xea, 0xf2..0xfa); | |
# 定義子より、符号化用テーブルを作成。 | |
my %HT_DC_MAPPING = ( | |
'y' => build_huffman_table(\@HT_DC_L_COUNTS, \@HT_DC_L_VALUES), | |
'cb' => build_huffman_table(\@HT_DC_C_COUNTS, \@HT_DC_C_VALUES), | |
); | |
$HT_DC_MAPPING{'cr'} = $HT_DC_MAPPING{'cb'}; | |
my %HT_AC_MAPPING = ( | |
'y' => build_huffman_table(\@HT_AC_L_COUNTS, \@HT_AC_L_VALUES), | |
'cb' => build_huffman_table(\@HT_AC_C_COUNTS, \@HT_AC_C_VALUES), | |
); | |
$HT_AC_MAPPING{'cr'} = $HT_AC_MAPPING{'cb'}; | |
#----------------------------------------------------------------------------- | |
# 高速演算用のテーブル | |
#----------------------------------------------------------------------------- | |
use integer; | |
# 整数演算用の精度 | |
my $INTDCT_PRECISION = 0x7fffffff / (2048 * 6); | |
no integer; | |
# DCT 用コサインテーブル: $DCTCOS[v][u] = cos( (2 * u + 1) * v * PI / 16) | |
my @DCT_COS = map | |
{ | |
my $vv = $_; | |
[ map { | |
my $uu = $_; | |
cos((2 * $uu + 1) * $vv * 3.14159265258979 / 16); | |
} (0 .. 7) ]; # u | |
} (0 .. 7); # v | |
# DCT 用計算済み整数テーブル: $INTDCT_COS2[q][p][v][u] = $DCTCOS[q][p] * $DCTCOS[v][u]; | |
my @INTDCT_COS2 = map { | |
my $vv = $_; | |
[ map { | |
my $uu = $_; | |
[ map { | |
my $qq = $_; | |
[ map { | |
my $pp = $_; | |
int($DCT_COS[$qq][$pp] * $DCT_COS[$vv][$uu] * $INTDCT_PRECISION); | |
} (0 .. 7) ]; # p | |
} (0 .. 7) ]; # q | |
} (0 .. 7) ]; # u | |
} (0 .. 7); # v | |
undef @DCT_COS; # 不要 | |
# DCT 乗数テーブル | |
my @DCT_CUCV = map | |
{ | |
my $vv = $_; | |
[ map { | |
my $uu = $_; | |
($uu == 0 ? sqrt(0.5) : 1) * ($vv == 0 ? sqrt(0.5) : 1); | |
} (0 .. 7) ]; # u | |
} (0 .. 7); # v | |
use integer; | |
#----------------------------------------------------------------------------- | |
# ジグザグテーブル | |
#----------------------------------------------------------------------------- | |
my @MCU_ZIGZAG = ( | |
0, 1, 5, 6, 14, 15, 27, 28, | |
2, 4, 7, 13, 16, 26, 29, 42, | |
3, 8, 12, 17, 25, 30, 41, 43, | |
9, 11, 18, 24, 31, 40, 44, 53, | |
10, 19, 23, 32, 39, 45, 52, 54, | |
20, 22, 33, 38, 46, 51, 55, 60, | |
21, 34, 37, 47, 50, 56, 59, 61, | |
35, 36, 48, 49, 57, 58, 62, 63, | |
); | |
main; | |
exit; | |
sub main () { | |
my $data = read_bmp; | |
my ($width, $height) = (scalar(@{$data->[0]}), scalar(@$data)); | |
binmode(select); | |
#print "Content-Type: image/jpeg\x0d\x0a\x0d\x0a"; | |
# イメージ開始 (SOI) | |
print pack("n", 0xffd8); | |
# JFIF 1.2 情報 (APP0) | |
print pack("nna5CCx7", 0xffe0, 16, "JFIF\0", 1, 2); | |
# リスタートインターバル定義 (DRI) | |
if ($ENABLE_RESTART) { | |
print pack("nnn", 0xffdd, 4, ($width + 7) >> 3); | |
} | |
# 最高品質の 8 ビット量子化テーブル #0 定義 (DQT) | |
print pack("nnCC64", 0xffdb, 67, 0, (1) x 64); | |
# 輝度 DC 成分用、既定のハフマンテーブル定義 (DHT) | |
print pack("nnCC*", 0xffc4, 3 + scalar(@HT_DC_L_COUNTS) + scalar(@HT_DC_L_VALUES), | |
0b_0000_0000, @HT_DC_L_COUNTS, @HT_DC_L_VALUES); | |
# 色差 DC 成分用、既定のハフマンテーブル定義 (DHT) | |
print pack("nnCC*", 0xffc4, 3 + scalar(@HT_DC_C_COUNTS) + scalar(@HT_DC_C_VALUES), | |
0b_0000_0001, @HT_DC_C_COUNTS, @HT_DC_C_VALUES); | |
# 輝度 AC 成分用、既定のハフマンテーブル定義 (DHT) | |
print pack("nnCC*", 0xffc4, 3 + scalar(@HT_AC_L_COUNTS) + scalar(@HT_AC_L_VALUES), | |
0b_0001_0000, @HT_AC_L_COUNTS, @HT_AC_L_VALUES); | |
# 色差 AC 成分用、既定のハフマンテーブル定義 (DHT) | |
print pack("nnCC*", 0xffc4, 3 + scalar(@HT_AC_C_COUNTS) + scalar(@HT_AC_C_VALUES), | |
0b_0001_0001, @HT_AC_C_COUNTS, @HT_AC_C_VALUES); | |
# Baseline DCT フレーム開始 (SOF0) | |
print pack("nnCnnCC9", 0xffc0, 17, 8, $height, $width, 3, | |
(0, 0b_0001_0001, 0), (1, 0b_0001_0001, 0), (2, 0b_0001_0001, 0)); | |
# スキャン開始 (SOS) | |
print pack("nnCC6CCC", 0xffda, 12, 3, | |
(0, 0b_0000_0000), (1, 0b_0001_0001), (2, 0b_0001_0001), | |
0, 63, 0b_0000_0000); | |
# データ | |
# 幅と高さを 8 の倍数長に拡張。 | |
push @{$_}, (($_->[-1]) x ((-$width) & 0x7)) foreach @$data; | |
push @$data, (($data->[-1]) x ((-$height) & 0x7)); | |
# DC の差分記憶用領域とリスタート採番 | |
my %dc_cache = ('y' => 0, 'cb' => 0, 'cr' => 0); | |
my $interval = 0; | |
# クロージャを使った、簡易ビット出力クラス。 | |
sub create_writer () { | |
my $bit_buffer = ''; | |
my $write = sub ($$) { | |
my ($value, $length) = @_; | |
$bit_buffer .= substr(unpack('B*', pack('N', $value)), -$length); | |
while (length $bit_buffer >= 8) { | |
my $ch = pack("B8", substr($bit_buffer, 0, 8)); | |
print $ch; | |
if ($ch eq "\xff") { print "\0" }; | |
$bit_buffer = substr($bit_buffer, 8); | |
} | |
# print STDERR sprintf("value: %7d, length: %3d, bit: %s\n", $value, $length, substr(unpack('B*', pack('N', $value)), -$length)); | |
}; | |
my $flush = sub () { | |
if (length $bit_buffer) { | |
my $ch = pack('B*', $bit_buffer); | |
print $ch; | |
if ($ch eq "\xff") { print "\0" }; | |
$bit_buffer = ''; | |
} | |
}; | |
($write, $flush); | |
}; | |
my ($write, $flush) = create_writer; | |
# 画像の処理。 | |
for (my $yy = 0; $yy < $height; $yy += 8) { | |
# RSTx [Restart Interval] | |
if ($ENABLE_RESTART) { | |
if ($yy) { # 2 行目以降 | |
$flush->(); | |
print pack("n", 0xffd0 + ($interval & 0x7)); | |
++$interval; | |
# Restart | |
%dc_cache = ('y' => 0, 'cb' => 0, 'cr' => 0); | |
} | |
} | |
for (my $xx = 0; $xx < $width; $xx += 8) { | |
# 今回はサンプリングファクタ 1 固定なので、MCU = 8x8 | |
use integer; | |
# YCbCr として、8x8 ブロックのデータを取り出す | |
my %ycbcr = ('y' => [], 'cb' => [], 'cr' => []); | |
foreach (0 .. 7) { | |
my $rgb_line = $data->[$yy + $_]; | |
my $y_line = $ycbcr{'y'}[$_] = []; | |
my $cb_line = $ycbcr{'cb'}[$_] = []; | |
my $cr_line = $ycbcr{'cr'}[$_] = []; | |
foreach my $rgb (@$rgb_line[$xx .. $xx + 7]) { | |
my ($r1, $g1, $b1) = (($rgb >> 16) & 0xff, ($rgb >> 8) & 0xff, $rgb & 0xff); | |
push @$y_line, ($r1 * 299 + $g1 * 587 + $b1 * 114 + 500) / 1000 - 128; # -128-127 | |
push @$cb_line, ($r1 * -16874 + $g1 * -33126 + $b1 * 50000 + 12850000) / 100000 - 128; # -128-128 | |
push @$cr_line, ($r1 * 50000 + $g1 * -41869 + $b1 * -8131 + 12850000) / 100000 - 128; # -128-128 | |
=begin | |
CCIR 601-1 | |
Y = 0.2990*R + 0.5870*G + 0.1140*B | |
Cb = -0.1687*R - 0.3313*G + 0.5000*B + CENTER | |
Cr = 0.5000*R - 0.4187*G - 0.0813*B + CENTER | |
R = Y + 0.0000*(Cb-CENTER) + 1.4020*(Cr-CENTER) | |
G = Y - 0.3441*(Cb-CENTER) - 0.7141*(Cr-CENTER) | |
B = Y + 1.7720*(Cb-CENTER) + 0.0000*(Cr-CENTER) | |
JPEGLIB | |
* Y = 0.29900 * R + 0.58700 * G + 0.11400 * B | |
* Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B + CENTERJSAMPLE | |
* Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B + CENTERJSAMPLE | |
* R = Y + 1.40200 * Cr | |
* G = Y - 0.34414 * Cb - 0.71414 * Cr | |
* B = Y + 1.77200 * Cb | |
=cut | |
} | |
} | |
foreach my $component ('y', 'cb', 'cr') { | |
# 各種参照を記憶しておく | |
my $block = $ycbcr{$component}; | |
my $dc_table = $HT_DC_MAPPING{$component}; | |
my $ac_table = $HT_AC_MAPPING{$component}; | |
my $prev_dc = \$dc_cache{$component}; | |
# DCT をかます | |
my @coefficients = (); | |
foreach my $vv (0 .. 7) { | |
foreach my $uu (0 .. 7) { | |
my $total = 0; | |
foreach my $qq (0 .. 7) { | |
foreach my $pp (0 .. 7) { | |
$total += $block->[$qq][$pp] * $INTDCT_COS2[$uu][$pp][$vv][$qq]; | |
} | |
} | |
no integer; | |
push @coefficients, int($total * $DCT_CUCV[$vv][$uu] / ($INTDCT_PRECISION * 4) + 0.5); | |
} | |
} | |
# 最高品質で書き出すため量子化なし | |
# ジグザグに並び替える | |
@coefficients[@MCU_ZIGZAG] = @coefficients; | |
# DC 成分の差分符号化 | |
my $value = $coefficients[0] - $$prev_dc; | |
$$prev_dc = $coefficients[0]; | |
my $length = get_code_length($value); | |
$write->(@{$dc_table->[$length]}); | |
if ($value) { | |
--$value if $value < 0; | |
$write->($value, $length); | |
} | |
# AC 成分の差分符号化 | |
shift @coefficients; | |
my $EOB = 0; | |
my $ZRL = 240; | |
my $zero_runs = 0; # start of 0 | |
foreach $value (@coefficients) { | |
if ($value == 0) { # zero | |
++$zero_runs; | |
} else { | |
while ($zero_runs >= 16) { # ZRL | |
$write->(@{$ac_table->[$ZRL]}); | |
$zero_runs -= 16; | |
} | |
$length = get_code_length($value); | |
$write->(@{$ac_table->[$zero_runs << 4 | $length]}); | |
--$value if $value < 0; | |
$write->($value, $length); | |
$zero_runs = 0; # clear runs | |
} | |
} | |
if ($zero_runs) { | |
$write->(@{$ac_table->[$EOB]}); | |
} | |
} | |
} | |
} | |
$flush->(); | |
# イメージ終了 (SOI) | |
print pack("n", 0xffd9); | |
} | |
# [ [ $code, $length ] x $count ] | |
sub build_huffman_table ($$) { | |
my ($counts, $values) = @_; | |
my @table = (); | |
my $code = 0; | |
my $index = 0; # index of values | |
foreach my $length (1 .. 16) { | |
$table[$values->[$index++]] = [ $code++, $length ] foreach 1 .. $counts->[$length - 1]; | |
$code <<= 1; | |
} | |
\@table; | |
} | |
sub get_code_length ($) { | |
my $value = ($_[0] >= 0 ? $_[0] : -$_[0]); | |
if ($value == 0) { return 0 }; | |
if ($value < 2) { return 1 }; | |
if ($value < 4) { return 2 }; | |
if ($value < 8) { return 3 }; | |
if ($value < 16) { return 4 }; | |
if ($value < 32) { return 5 }; | |
if ($value < 64) { return 6 }; | |
if ($value < 128) { return 7 }; | |
if ($value < 256) { return 8 }; | |
if ($value < 512) { return 9 }; | |
if ($value < 1024) { return 10 }; | |
if ($value < 2048) { return 11 }; | |
if ($value < 4096) { return 12 }; | |
die; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package bmp_io; | |
require 5.6.0; | |
use strict; | |
use warnings; | |
use integer; | |
BEGIN { | |
use Exporter; | |
our ($VERSION, @ISA, @EXPORT, @EXPORT_OK, %EXPORT_TAGS); | |
$VERSION = '1.00'; | |
@ISA = qw/ Exporter /; | |
@EXPORT = qw/ read_bmp write_bmp write_bmp_data /; | |
@EXPORT_OK = (); | |
%EXPORT_TAGS = (); | |
} | |
# 標準入力から BMP を読み込む | |
sub read_bmp () { | |
use integer; | |
# 必ずバイナリで。 | |
binmode(STDIN); | |
# 読み取りバッファ。 | |
my $chunk; | |
# 読み取り位置。 | |
my $pos = 0; | |
# $chunk に指定バイト標準入力から読み込むクロージャ。 | |
# 読んだ分だけ $pos を加算。指定長読めないと die する。 | |
# $chunk, $pos を束縛。 | |
my $read_exact = sub ($) { | |
my $size = shift; | |
my $read = read(STDIN, $chunk, $size); | |
defined $read or die $!; | |
$read == $size or die 'Data truncated.'; | |
$pos += $read; | |
}; | |
# BITMAPFILEHEADER 読み込み。 | |
$read_exact->(14); | |
my ($signature, $data_offset) = unpack('a2x8V', $chunk); | |
# 'BM' チェック。 | |
$signature eq 'BM' or die 'Invalid signature.'; | |
# BITMAPINFOHEADER 読み込み。 | |
$read_exact->(40); | |
my ($bih_size, $width, $height, $planes, | |
$bit_count, $compression, $color_count) | |
= unpack('VVVvvVx12Vx4', $chunk); | |
# 40 未満のサイズは古い形式なので対応しない。 | |
$bih_size >= 40 or die 'OS/2 bitmaps not supported.'; | |
# BMP は必ず 1 プレーン(面)。 | |
$planes == 1 or die 'Invalid bitmap.'; | |
# ビット数は 1, 4, 8, 24, 32 のどれか。 | |
grep { $bit_count == $_ } (1, 4, 8, 24, 32) | |
or die 'Not supported bit count.'; | |
# 圧縮は今回扱わない。 | |
$compression == 0 or die 'Not supported compression.'; | |
# 8 ビット以下の BMP はカラーテーブルを持つ。 | |
if ($color_count == 0 and $bit_count <= 8) { | |
$color_count = 2 ** $bit_count; | |
} | |
# ヘッダ全体を読み飛ばす。 | |
if ($bih_size > 40) { | |
$read_exact->(40); | |
} | |
# カラーテーブル。 | |
my $colors; | |
# カラーテーブルがあれば読み込む。 | |
if ($color_count) { | |
$read_exact->($color_count * 4); | |
# "V256" などのテンプレートで配列化。 | |
$colors = [ unpack("V$color_count", $chunk) ]; | |
} | |
# 標準入力は seek できないのでデータ部まで読み捨て。 | |
if ($data_offset - $pos) { | |
$read_exact->($data_offset - $pos); | |
} | |
# 行のデータ部まで読み飛ばす。 | |
my $line_size = int(($width * $bit_count + 31) / 32) * 4; | |
# 行リファレンスの配列。ここに結果が入る。 | |
my @data = (); | |
# トップダウンの BMP の場合高さが負の値。 | |
# 順番に処理した場合、配列に追加する場所が異なる。 | |
# そこで @data を束縛する行追加用のクロージャを作る。 | |
my $insert = $height < 0 | |
? sub { push(@data, $_[0]) } | |
: sub { unshift(@data, $_[0]) } | |
; | |
# 行データ処理用のクロージャを作る。 | |
# $chunk, $width, $insert を束縛。 | |
my $process_line = | |
$bit_count == 32 ? | |
sub { | |
$insert->([ unpack('V*', $chunk) ]); | |
} | |
: $bit_count == 24 ? | |
sub { | |
my $line = [ | |
map { unpack('V', $_ . "\0") } | |
$chunk =~ /.{3}/sg | |
]; | |
$#$line = $width - 1; | |
$insert->($line); | |
} | |
: $bit_count == 16 ? | |
sub { | |
my $line = [ unpack('v*', $chunk) ]; | |
$#$line = $width - 1; | |
$insert->($line); | |
} | |
: # $bit_count <= 8 | |
sub { | |
my $bits = unpack('B*', $chunk); | |
my $line = [ | |
map { ord pack('b8', scalar reverse $_) } | |
$bits =~ /.{$bit_count}/sg | |
]; | |
$#$line = $width - 1; | |
$insert->($line); | |
} | |
; | |
# データを読み込む。 | |
$height = -$height if $height < 0; | |
foreach (1 .. $height) { | |
$read_exact->($line_size); | |
$process_line->(); | |
} | |
wantarray ? (\@data, $bit_count, $colors) : \@data; | |
} | |
sub write_bmp_data ($$;$) { | |
my ($data, $bit_count) = @_; | |
use integer; | |
my ($width, $height) = (scalar(@{$data->[0]}), scalar(@$data)); | |
my $line_size = int(($width * $bit_count + 31) / 32) * 4; | |
my $ctable_size = $bit_count > 8 ? 0 : 2 ** $bit_count * 4; | |
foreach my $line (reverse @$data) { | |
if ($bit_count == 32) { | |
print pack("V*", @$line); | |
} elsif ($bit_count == 24) { | |
my @bytes = map { substr(pack("V", $_), 0, int($bit_count / 8)) } @$line; | |
print pack("a*\@$line_size", join('', @bytes)); | |
} elsif ($bit_count == 16) { | |
my @bytes = (); | |
foreach (@$line) { | |
my ($red, $green, $blue) = unpack('xCCC', pack('N', $_)); | |
push @bytes, pack("v", $red >> 3 << 10 | $green >> 3 << 5 | $blue >> 3); | |
} | |
print pack("a*\@$line_size", join('', @bytes)); | |
} else { | |
my @bits = map { substr(unpack("B8", pack("C", $_)), -$bit_count) } @$line; | |
print pack("B*\@$line_size", join('', @bits)); | |
} | |
} | |
} | |
sub write_bmp ($$;$) { | |
my ($data, $bit_count, $colors) = @_; | |
use integer; | |
my ($width, $height) = (scalar(@{$data->[0]}), scalar(@$data)); | |
my $line_size = int(($width * $bit_count + 31) / 32) * 4; | |
my $ctable_size = $bit_count > 8 ? 0 : 2 ** $bit_count * 4; | |
binmode(select); | |
# BITMAPFILEHEADER | |
print pack("a2Vx4V", "BM", 54 + $ctable_size + $line_size * $height, 54 + $ctable_size); | |
# BITMAPINFOHEADER | |
print pack("VVVvvx24", 40, $width, $height, 1, $bit_count); | |
# RGBQUAD (カラーテーブル) | |
if ($ctable_size > 0) { | |
print pack("V", $_) foreach @$colors; | |
} | |
write_bmp_data($data, $bit_count); | |
} | |
1; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment