Skip to content

Instantly share code, notes, and snippets.

@evjeny
Created July 18, 2022 19:46
Show Gist options
  • Save evjeny/925ae4f2369c8bf7771755ddda33b4aa to your computer and use it in GitHub Desktop.
Save evjeny/925ae4f2369c8bf7771755ddda33b4aa to your computer and use it in GitHub Desktop.
Скрипт для спасения задачек Родиона
import os
import shutil
from charset_normalizer import from_bytes
import xmltodict
def get_xml_dict(path: str):
with open(path, "rb+") as f:
content = f.read()
results = from_bytes(
content,
steps=10, # Number of steps/block to extract from my_byte_str
chunk_size=512, # Set block size of each extraction
threshold=0.2, # Maximum amount of chaos allowed on first pass
cp_isolation=None, # Finite list of encoding to use when searching for a match
cp_exclusion=None, # Finite list of encoding to avoid when searching for a match
preemptive_behaviour=True, # Determine if we should look into my_byte_str (ASCII-Mode) for pre-defined encoding
explain=False # Print on screen what is happening when searching for a match
)
for match in results:
try:
return xmltodict.parse(match.output().decode("utf-8"))
except:
continue
raise Exception("unknown encoding")
def main(folder: str):
xml_files = filter(
lambda name: name.endswith(".xml"),
os.listdir(folder)
)
for name in xml_files:
xml_path = os.path.join(folder, name)
try:
xml = get_xml_dict(xml_path)
author = xml["Task"]["RegistrationInfo"]["Author"]
if "r.shchebentovsk" in author:
shutil.copyfile(xml_path, os.path.join("rod_tasks", name))
print(f"{name}, author: {author}")
except:
print("can't read", name)
if __name__ == "__main__":
main("D:\\Work\\_taskinfo")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment