Created
July 18, 2022 19:46
-
-
Save evjeny/925ae4f2369c8bf7771755ddda33b4aa to your computer and use it in GitHub Desktop.
Скрипт для спасения задачек Родиона
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import shutil | |
from charset_normalizer import from_bytes | |
import xmltodict | |
def get_xml_dict(path: str): | |
with open(path, "rb+") as f: | |
content = f.read() | |
results = from_bytes( | |
content, | |
steps=10, # Number of steps/block to extract from my_byte_str | |
chunk_size=512, # Set block size of each extraction | |
threshold=0.2, # Maximum amount of chaos allowed on first pass | |
cp_isolation=None, # Finite list of encoding to use when searching for a match | |
cp_exclusion=None, # Finite list of encoding to avoid when searching for a match | |
preemptive_behaviour=True, # Determine if we should look into my_byte_str (ASCII-Mode) for pre-defined encoding | |
explain=False # Print on screen what is happening when searching for a match | |
) | |
for match in results: | |
try: | |
return xmltodict.parse(match.output().decode("utf-8")) | |
except: | |
continue | |
raise Exception("unknown encoding") | |
def main(folder: str): | |
xml_files = filter( | |
lambda name: name.endswith(".xml"), | |
os.listdir(folder) | |
) | |
for name in xml_files: | |
xml_path = os.path.join(folder, name) | |
try: | |
xml = get_xml_dict(xml_path) | |
author = xml["Task"]["RegistrationInfo"]["Author"] | |
if "r.shchebentovsk" in author: | |
shutil.copyfile(xml_path, os.path.join("rod_tasks", name)) | |
print(f"{name}, author: {author}") | |
except: | |
print("can't read", name) | |
if __name__ == "__main__": | |
main("D:\\Work\\_taskinfo") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment