Last active
August 29, 2015 13:57
-
-
Save nebuta/9754976 to your computer and use it in GitHub Desktop.
Parsing TIFF files in Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// tiff.rs | |
// Compiles on rustc 0.10-pre (6eae7df 2014-03-20 15:01:47 -0700) | |
#[crate_id = "tiff"]; | |
#[crate_type="bin"]; | |
#[license = "MIT"]; | |
#[desc = "TIFF reading example"]; | |
//! TIFF reading library. | |
//! Supports multipage tiff files. | |
//! Limitations: only supports 16bit grayscale images. | |
extern crate std; | |
extern crate num; | |
extern crate time; | |
use std::io::File; | |
// | |
// Struct definitions from top to down. | |
// | |
#[deriving(Eq,Show,Clone)] | |
struct TiffFile { | |
bytes: ~[u8], | |
endian: Endian, | |
ifds: ~[IFD], | |
} | |
#[deriving(Eq,Show,Clone)] | |
enum Endian { | |
BigEndian, | |
LittleEndian | |
} | |
#[deriving(Eq,Show,Clone)] | |
struct IFD { | |
tags: ~[IFDEntry] | |
} | |
#[deriving(Eq,Show,Clone)] | |
struct IFDEntry { | |
tag: TiffTag, | |
ftype: FieldType, | |
count: u32, | |
value: ValueOrOffset | |
} | |
#[deriving(Eq,Show,Clone)] | |
pub enum TiffTag { | |
TagNumber(u16), | |
NewSubfileType, | |
SubfileType, | |
ImageWidth, | |
ImageLength, | |
BitsPerSample, | |
Compression, | |
PhotometricInterpretation, | |
Threshholding, | |
CellWidth, | |
CellLength, | |
FillOrder, | |
ImageDescription, | |
Make, | |
Model, | |
StripOffsets, | |
Orientation, | |
SamplesPerPixel, | |
RowsPerStrip, | |
StripByteCounts, | |
MinSampleValue, | |
MaxSampleValue, | |
XResolution, | |
YResolution, | |
PlanarConfiguration, | |
FreeOffsets, | |
FreeByteCounts, | |
GrayResponseUnit, | |
GrayResponseCurve, | |
ResolutionUnit, | |
Software, | |
DateTime, | |
Artist, | |
HostComputer, | |
ColorMap, | |
ExtraSamples, | |
Copyright, | |
} | |
#[deriving(Eq,Show,Clone)] | |
enum ValueOrOffset { | |
IfdByte(~[u8]), | |
IfdAscii(~[u8]), | |
IfdShort(~[u16]), | |
IfdLong(u32), | |
IfdOffset(u32) | |
} | |
#[deriving(Eq,Show,Clone)] | |
enum FieldType { | |
Byte, | |
Ascii, | |
Short, | |
Long, | |
Rational, | |
Invalid | |
} | |
// | |
// Some functions for those structs above. | |
// | |
impl TiffFile { | |
pub fn num_pages(&self) -> uint { | |
self.ifds.len() | |
} | |
pub fn read_file(name: &str) -> Option<TiffFile> { | |
let bytes: ~[u8] = File::open(&Path::new(name)).read_to_end().unwrap(); | |
let ret = TiffFile::read_bytes(bytes); | |
match ret { | |
Some(r) => Some(TiffFile{bytes: bytes, endian: r.endian, ifds: r.ifds}), | |
_ => None | |
} | |
} | |
// Read a TIFF file from the byte content of the file. | |
pub fn read_bytes(bytes: &[u8]) -> Option<TiffFile> { | |
let h = bytes.slice(0,2); | |
let cs: ~[char] = h.iter().map(|&a| a as char).collect(); | |
let end = if cs[0] == 'M' {BigEndian} else {LittleEndian}; | |
assert!(p16(bytes.slice(2,4),end) == Some(42)); | |
match p32(bytes.slice(4,8), end) { | |
Some(offset) => { | |
let mut mnos = Some(offset); | |
let mut count = 0; | |
let mut ifds: ~[IFD] = ~[]; | |
while mnos.is_some() { | |
let t = readIFD(bytes,mnos.unwrap(),end); | |
ifds.push(*t.clone().val0()); | |
mnos = t.val1(); | |
count += 1 | |
} | |
match count { | |
1 => println!("One IFD was found."), | |
n => println!("{} IFDs were found.", n), | |
} | |
//stub | |
Some(TiffFile{bytes: ~[], endian: end, ifds: ifds}) | |
}, | |
_ => { | |
println!("Parse failed."); | |
None | |
} | |
} | |
} | |
pub fn get_pixels(&self, page: u32) -> ~[u16] { | |
let tags: &[IFDEntry] = self.ifds[page].tags; | |
let ot = tags.iter().find(|t| t.tag == StripOffsets); | |
let obc = tags.iter().find(|t| t.tag == StripByteCounts); | |
match (ot,obc) { | |
(Some(ref t),Some(ref bc)) => match (&t.value,&bc.value) { | |
(&IfdLong(v),&IfdLong(bc2)) => { | |
let len: uint = bc2 as uint/2; | |
range(0,len).map(|i| { | |
let from = v as uint+i*2; | |
let bs = self.bytes.slice(from,from+2); | |
p16(bs,BigEndian).unwrap() | |
}).collect() | |
} | |
_ => ~[], | |
}, | |
_ => ~[] | |
} | |
} | |
pub fn get_pixels_from_bytes(&self, bytes: &[u8], page: u32) -> ~[u16] { | |
let tags: &[IFDEntry] = self.ifds[page].tags; | |
let ot = tags.iter().find(|t| t.tag == StripOffsets); | |
let obc = tags.iter().find(|t| t.tag == StripByteCounts); | |
match (ot,obc) { | |
(Some(ref t),Some(ref bc)) => match (&t.value,&bc.value) { | |
(&IfdLong(v),&IfdLong(bc2)) => { | |
let len: uint = bc2 as uint/2; | |
range(0,len).map(|i| { | |
let from = v as uint+i*2; | |
let bs = bytes.slice(from,from+2); | |
p16(bs,BigEndian).unwrap() | |
}).collect() | |
} | |
_ => ~[], | |
}, | |
_ => ~[] | |
} | |
} | |
} | |
// Read one IFD with multiple TIFF tags. | |
fn readIFD(bytes: &[u8], offset: u32, end: Endian) -> (~IFD, Option<u32>) { | |
println!("Offset of the IFD: {}.",offset); | |
let num_entries: Option<u16> = p16(bytes.slice(offset as uint,(offset+2) as uint),end); | |
match num_entries { | |
Some(num_entries) => { | |
println!("{} IFD entries.",num_entries); | |
let ifds2: ~[Option<IFDEntry>] = range(0, num_entries).map(|i| { | |
let os: u32 = offset + i as u32 * 12 + 2; | |
readIFDEntry(bytes, os, end) | |
}).collect(); | |
let ifds = my_collect(ifds2); | |
let next = p32(bytes.slice((offset + num_entries as u32 * 12 + 2) as uint, | |
(offset + num_entries as u32 * 12 + 6) as uint), end); | |
match next { | |
Some(0) => (~IFD{tags: ifds},None), | |
Some(nos) => (~IFD{tags: ifds},Some(nos)), | |
_ => { | |
println!("Parse error"); | |
(~IFD{tags: ~[]},None) | |
} | |
} | |
} | |
None => { | |
println!("Parse error"); | |
(~IFD{tags: ~[]},None) | |
} | |
} | |
} | |
// Read each entry in an IFD. | |
fn readIFDEntry(bytes: &[u8], offs: u32, end: Endian) -> Option<IFDEntry> { | |
let bs = bytes.slice(offs as uint, offs as uint + 12); | |
if bs.len() == 12 { | |
let t = p16(bs.slice(0, 2), end).map(|a| parse_tag(a)); | |
let ft = p16(bs.slice(2,4), end).map(|a| read_ft(a)); | |
let c = p32(bs.slice(4,8), end); | |
let os = p32(bs.slice(8,12), end); | |
match (t, ft,c,os) { | |
(Some(t), Some(f),Some(c),Some(v)) => { | |
if is_offset(f,c) { | |
Some(IFDEntry{tag: t, ftype: f, count: c, value: IfdOffset(v)}) | |
}else{ | |
match get_val(f, c, bs.slice(8,12), end) { | |
Some(v) => Some(IFDEntry{tag: t, ftype: f, count: c, value: v}), | |
_ => None | |
} | |
} | |
}, | |
_ => None | |
} | |
} else { | |
None | |
} | |
} | |
fn parse_tag(n: u16) -> TiffTag { | |
match n { | |
254 => NewSubfileType, | |
255 => SubfileType, | |
256 => ImageWidth, | |
257 => ImageLength, | |
258 => BitsPerSample, | |
259 => Compression, | |
262 => PhotometricInterpretation, | |
263 => Threshholding, | |
264 => CellWidth, | |
265 => CellLength, | |
266 => FillOrder, | |
270 => ImageDescription, | |
271 => Make, | |
272 => Model, | |
273 => StripOffsets, | |
274 => Orientation, | |
277 => SamplesPerPixel, | |
278 => RowsPerStrip, | |
279 => StripByteCounts, | |
280 => MinSampleValue, | |
281 => MaxSampleValue, | |
282 => XResolution, | |
283 => YResolution, | |
284 => PlanarConfiguration, | |
288 => FreeOffsets, | |
289 => FreeByteCounts, | |
290 => GrayResponseUnit, | |
291 => GrayResponseCurve, | |
296 => ResolutionUnit, | |
305 => Software, | |
306 => DateTime, | |
315 => Artist, | |
316 => HostComputer, | |
320 => ColorMap, | |
338 => ExtraSamples, | |
33432 => Copyright, | |
n => TagNumber(n) | |
} | |
} | |
fn read_ft(n: u16) -> FieldType { | |
match n { | |
1 => Byte, | |
2 => Ascii, | |
3 => Short, | |
4 => Long, | |
5 => Rational, | |
_ => Invalid | |
} | |
} | |
// See if the value is stored outside (with the offset) the IFD or in place. | |
fn is_offset(f: FieldType, c: u32) -> bool { | |
let len = match f { | |
Byte => 1, | |
Ascii => 1, | |
Short => 2, | |
Long => 4, | |
Rational => 8, | |
Invalid => 0 | |
}; | |
len * c > 4 | |
} | |
//Obtain the value(s) that are stored in place. | |
fn get_val(f: FieldType, c: u32, bs: &[u8], end: Endian) -> Option<ValueOrOffset> { | |
match f { | |
Byte => Some(IfdByte(bs.slice(0,c as uint).to_owned())), | |
Ascii => Some(IfdAscii(bs.slice(0,c as uint).to_owned())), | |
Short => read_shorts(bs.slice(0,2*c as uint),c,end).map(|v| IfdShort(v)), | |
Long => p32(bs,end).map(|v| IfdLong(v)), | |
Rational => None, // Rational is 8 bytes, so that the value is not directly stored. | |
Invalid => None | |
} | |
} | |
// Read the short value(s) stored in place. | |
fn read_shorts(bs: &[u8],count: u32, end: Endian) -> Option<~[u16]>{ | |
match count { | |
1 => p16(bs.slice(0,2),end).map(|v| ~[v]), | |
2 => match (p16(bs.slice(0,2),end),p16(bs.slice(3,5),end)) { | |
(Some(a),Some(b)) => Some(~[a,b]), | |
_ => None | |
}, | |
_ => None | |
} | |
} | |
// | |
// Some utility functions that may be available somewhere in the std library. | |
// | |
fn my_collect<T: Clone>(vs: &[Option<T>]) -> ~[T] { | |
let mut ret: ~[T] = ~[]; | |
for v in vs.iter() { | |
match *v { | |
Some(ref vv) => ret.push(vv.clone()), | |
None => () | |
} | |
} | |
ret | |
} | |
fn mean(vs: &[u16]) -> f32 { | |
let mut m = 0.0; | |
let mut count = 0; | |
if vs.len() > 0 { | |
for &v in vs.iter() { | |
count += 1; | |
m += (v as f32 - m) / count as f32; | |
} | |
m | |
} else { | |
std::f32::NAN | |
} | |
} | |
fn p32(bs: &[u8], end: Endian) -> Option<u32> { | |
if bs.len() == 4 { | |
let b: ~[u32] = bs.iter().map(|&n| n as u32).collect(); | |
match end { | |
BigEndian => Some(b[0] * 16777216 + b[1] * 65536 + b[2] * 256 + b[3]), | |
LittleEndian => Some(b[3] * 16777216 + b[2] * 65536 + b[1] * 256 + b[0]) | |
} | |
} else { | |
None | |
} | |
} | |
fn p16(bs: &[u8], end: Endian) -> Option<u16> { | |
if bs.len() == 2 { | |
let b: ~[u16] = bs.iter().map(|&n| n as u16).collect(); | |
match end { | |
BigEndian => Some(b[0] * 256 + b[1]), | |
LittleEndian => Some(b[1] * 256 + b[0]) | |
} | |
} else { | |
None | |
} | |
} | |
fn timed<T>(func: || -> T) -> (T,i64) { | |
let b = time::get_time(); | |
let r = func(); | |
let e = time::get_time(); | |
let ee = (e.sec * 1000000000 + e.nsec as i64) as i64; | |
let bb = (b.sec * 1000000000 + b.nsec as i64) as i64; | |
let t = (ee - bb) / 1000000; | |
println!("Timed: {}",t); | |
(r,t) | |
} | |
// | |
// Testing | |
// | |
fn opt1() { | |
let name = "test.tiff"; | |
let bytes: &[u8] = File::open(&Path::new(name)).read_to_end().unwrap(); | |
let of = TiffFile::read_bytes(bytes); | |
match of { | |
Some(f) => { | |
let ps: ~[u16] = timed(|| f.get_pixels_from_bytes(bytes,0)).val0(); | |
println!("{}", timed(|| mean(ps))); | |
}, | |
_ => () | |
} | |
} | |
fn opt2() { | |
let name = "test.tiff"; | |
let of = TiffFile::read_file(name); | |
match of { | |
Some(f) => { | |
let ps: ~[u16] = f.get_pixels(0); | |
println!("{}", mean(ps)); | |
}, | |
_ => () | |
} | |
} | |
fn main(){ | |
println!("{} msec for opt1().", timed(|| opt1())); | |
println!("{} msec for opt2().", timed(|| opt2())); | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment