Skip to content

Instantly share code, notes, and snippets.

@nviennot
Created January 22, 2020 02:20
Show Gist options
  • Save nviennot/396fce1ceaa4b00fe838cab7527072a7 to your computer and use it in GitHub Desktop.
Save nviennot/396fce1ceaa4b00fe838cab7527072a7 to your computer and use it in GitHub Desktop.
#![cfg_attr(debug_assertions, allow(dead_code, unused_imports, unused_variables))]
use packed_simd;
use std::env;
use std::error;
use std::fs::File;
use std::io::Seek;
use std::io::SeekFrom;
use std::io;
use std::io::Write;
use std::cmp::min;
use memmap::MmapOptions;
use pbr;
use std::time;
const ONE_KB: usize = 1024;
const ONE_MB: usize = 1024*1024;
type ScanT = packed_simd::u8x32;
fn scan_jpg_header(data: &[u8]) -> Option<usize> {
const VLEN: usize = ScanT::lanes();
const PATTERN_LEN: usize = 3;
let p1 = ScanT::splat(0xff);
let p2 = ScanT::splat(0xd8);
let p3 = ScanT::splat(0xff);
// TODO: Finish the search on the part of the array that doesn't
// fit in a vector.
let num_chunks = (data.len()-PATTERN_LEN+1)/VLEN;
for chunk_index in 0..num_chunks {
let c1 = ScanT::from_slice_unaligned(&data[chunk_index*VLEN..]);
let c2 = ScanT::from_slice_unaligned(&data[chunk_index*VLEN+1..]);
let c3 = ScanT::from_slice_unaligned(&data[chunk_index*VLEN+2..]);
let matches = c1.eq(p1) & c2.eq(p2) & c3.eq(p3);
if matches.any() {
match (0..VLEN).filter(|i| matches.extract(*i)).next() {
Some(i) => return Some(chunk_index*VLEN + i),
_ => ()
}
}
}
return None;
}
#[derive(PartialEq)]
#[derive(Debug)]
enum MarkerKind {
SOI, APPn, DQT, SOF0, DHT, SOF2, DRI, RSTn, COM, SOS, EOI,
}
#[derive(Debug)]
struct Marker {
kind: MarkerKind,
len: usize,
}
impl Marker {
fn build(kind: MarkerKind, payload_len: usize,
buffer_len: usize) -> Option<Self> {
let marker_len = payload_len + 2;
if marker_len > buffer_len {
return None;
} else {
return Some(Self{kind: kind, len: marker_len});
}
}
fn scan(buf: &[u8]) -> Option<Self> {
let size = buf.len();
if size < 2 {
return None;
}
let header = ((buf[0] as u16) << 8) | buf[1] as u16;
match header {
0xffd8 => return Self::build(MarkerKind::SOI, 0, size),
0xffda => return Self::build(MarkerKind::SOS, 0, size),
0xffd9 => return Self::build(MarkerKind::EOI, 0, size),
_ => (),
};
if size < 4 {
return None;
}
let var_len = ((buf[2] as usize) << 8) | buf[3] as usize;
match header {
0xfffe => return Self::build(MarkerKind::COM, var_len, size),
0xffc0 => return Self::build(MarkerKind::SOF0, var_len, size),
0xffc4 => return Self::build(MarkerKind::DHT, var_len, size),
0xffc2 => return Self::build(MarkerKind::SOF2, var_len, size),
0xffdb => return Self::build(MarkerKind::DQT, var_len, size),
0xffdd => return Self::build(MarkerKind::DRI, 4, size),
0xffd0..=0xffd7 => return Self::build(MarkerKind::RSTn, var_len, size),
0xffe0..=0xffef => return Self::build(MarkerKind::APPn, var_len, size),
_ => return None,
}
}
}
fn get_end_of_jpg(buf: &[u8]) -> Option<usize> {
let size = buf.len();
let mut offset = 0;
// Step 1: Make sure the SOI marker is present
let marker: Marker = Marker::scan(&buf[offset..])?;
if marker.kind != MarkerKind::SOI {
return None;
}
offset += marker.len;
// Step 2: Go through all markers until MARKER_SOS
while offset < size {
let marker = Marker::scan(&buf[offset..])?;
offset += marker.len;
if marker.kind == MarkerKind::SOS {
break;
}
}
// After start of scan, the raw data of the image
// is present. We don't know where it ends. There's
// no length indicator.
// Step 3: Search for MARKER_EOI
// if we looked through 50MB of data, give up
while offset < size && offset < 50*ONE_MB {
match Marker::scan(&buf[offset..]) {
Some(Marker{kind: MarkerKind::EOI, len}) =>
return Some(offset + len),
_ => ()
}
// Note: We do byte by byte because the EOI marker
// can be anywhere
offset += 1;
}
return None;
}
fn create_empty_jpg() -> io::Result<File> {
static mut IMG_NUM: u32 = 0;
unsafe {
IMG_NUM += 1;
return File::create(format!("{:03}.jpg", IMG_NUM));
}
}
fn recover_jpg(buf: &[u8]) -> io::Result<()> {
let mut file = create_empty_jpg()?;
file.write_all(&buf)?;
Ok(())
}
fn maybe_recover_jpg(buf: &[u8]) -> io::Result<()> {
match get_end_of_jpg(buf) {
Some(end) => recover_jpg(&buf[..end]),
None => Ok(())
}
}
fn undelete_jpg(buf: &[u8]) -> io::Result<usize> {
let mut img_count = 0;
let size = buf.len();
let mut offset = 0;
let mut pbar = pbr::ProgressBar::new(size as u64);
pbar.set_units(pbr::Units::Bytes);
pbar.set_max_refresh_rate(Some(time::Duration::from_millis(500)));
while offset < size {
let scan_for = min(10*ONE_KB, size-offset);
let inc_by = match scan_jpg_header(&buf[offset..offset+scan_for]) {
Some(offset_header) => {
maybe_recover_jpg(&buf[offset+offset_header..])?;
img_count += 1;
offset_header+1
},
None => scan_for,
};
pbar.add(inc_by as u64);
offset += inc_by;
}
pbar.finish();
return Ok(img_count);
}
fn main() -> Result<(), Box<dyn error::Error>> {
let argv: Vec<String> = env::args().collect();
if argv.len() != 2 {
panic!("usage: undelete input_file");
}
let path = &argv[1];
let mut file = File::open(path)
.expect(&format!("Failed to open {}", path));
// block devices need special care for getting their sizes
let fsize = file.seek(SeekFrom::End(0))?;
let mfile = unsafe { MmapOptions::new().len(fsize as usize).map(&file)? };
let img_count = undelete_jpg(&mfile)?;
println!("Recovered {} images", img_count);
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment