Skip to content

Instantly share code, notes, and snippets.

@shikanime
Last active February 15, 2022 10:52
Show Gist options
  • Save shikanime/32c824ab2a024b104532d65b1057b64a to your computer and use it in GitHub Desktop.
Save shikanime/32c824ab2a024b104532d65b1057b64a to your computer and use it in GitHub Desktop.
List broken truncated images
import glob
from struct import unpack
marker_mapping = {
0xffd8: "Start of Image",
0xffe0: "Application Default Header",
0xffdb: "Quantization Table",
0xffc0: "Start of Frame",
0xffc4: "Define Huffman Table",
0xffda: "Start of Scan",
0xffd9: "End of Image"
}
class JPEG:
def __init__(self, image_file):
with open(image_file, 'rb') as f:
self.img_data = f.read()
def decode(self):
data = self.img_data
while(True):
marker, = unpack(">H", data[0:2])
# print(marker_mapping.get(marker))
if marker == 0xffd8:
data = data[2:]
elif marker == 0xffd9:
return
elif marker == 0xffda:
data = data[-2:]
else:
lenchunk, = unpack(">H", data[2:4])
data = data[2+lenchunk:]
if len(data) == 0:
break
images = glob.glob("data/**/*.jpg")
for path in images:
image = JPEG(path)
try:
image.decode()
except:
print(path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment