Skip to content

Instantly share code, notes, and snippets.

@KokoseiJ
Last active July 8, 2022 02:08
Show Gist options
  • Save KokoseiJ/04f575758eb6dec569018f6080ebea35 to your computer and use it in GitHub Desktop.
Save KokoseiJ/04f575758eb6dec569018f6080ebea35 to your computer and use it in GitHub Desktop.
Check md5 hash of batch downloaded archive.org files
import os
import re
import sys
import requests
from hashlib import md5
from xml.etree.ElementTree import fromstring
if len(sys.argv) < 2:
print(f"Usage: {sys.executable} {sys.argv[0]} xml_url")
url = sys.argv[1]
dllist_match = re.fullmatch(
r"http(?:s)?://(?:www\.)?archive\.org/download/([a-zA-Z-]+?)", url
)
if dllist_match:
name = dllist_match.group(1)
url = f"{url}/{name}_files.xml"
r = requests.get(url)
xml = fromstring(r.text)
ls = os.listdir()
hashmap = {
file.attrib['name']: file.find("md5").text for file in xml.findall("file")
if file.attrib['name'] in ls
}
failed = []
for name, hashval in hashmap.items():
print(name, end="... ")
if hashval == md5(open(name, "rb").read()).hexdigest():
print("OK!")
else:
print("FAILED")
failed.append(name)
print("\n -================================- \n")
if failed:
print("Files with mismatching hashes:", *failed, sep="\n")
else:
print("No hash mismatches!")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment