evjeny/check_tasks.py

## check_tasks.py
import os
import shutil

from charset_normalizer import from_bytes
import xmltodict


def get_xml_dict(path: str):
    with open(path, "rb+") as f:
        content = f.read()

    results = from_bytes(
        content,
        steps=10,  # Number of steps/block to extract from my_byte_str
        chunk_size=512,  # Set block size of each extraction
        threshold=0.2,  # Maximum amount of chaos allowed on first pass
        cp_isolation=None,  # Finite list of encoding to use when searching for a match
        cp_exclusion=None,  # Finite list of encoding to avoid when searching for a match
        preemptive_behaviour=True,  # Determine if we should look into my_byte_str (ASCII-Mode) for pre-defined encoding
        explain=False  # Print on screen what is happening when searching for a match
    )

    for match in results:
        try:
            return xmltodict.parse(match.output().decode("utf-8"))
        except:
            continue
    raise Exception("unknown encoding")


def main(folder: str):
    xml_files = filter(
        lambda name: name.endswith(".xml"),
        os.listdir(folder)
    )
    for name in xml_files:
        xml_path = os.path.join(folder, name)
        try:
            xml = get_xml_dict(xml_path)
            author = xml["Task"]["RegistrationInfo"]["Author"]
            if "r.shchebentovsk" in author:
                shutil.copyfile(xml_path, os.path.join("rod_tasks", name))
                print(f"{name}, author: {author}")
        except:
            print("can't read", name)


if __name__ == "__main__":
    main("D:\\Work\\_taskinfo")
	import os
	import shutil

	from charset_normalizer import from_bytes
	import xmltodict


	def get_xml_dict(path: str):
	with open(path, "rb+") as f:
	content = f.read()

	results = from_bytes(
	content,
	steps=10, # Number of steps/block to extract from my_byte_str
	chunk_size=512, # Set block size of each extraction
	threshold=0.2, # Maximum amount of chaos allowed on first pass
	cp_isolation=None, # Finite list of encoding to use when searching for a match
	cp_exclusion=None, # Finite list of encoding to avoid when searching for a match
	preemptive_behaviour=True, # Determine if we should look into my_byte_str (ASCII-Mode) for pre-defined encoding
	explain=False # Print on screen what is happening when searching for a match
	)

	for match in results:
	try:
	return xmltodict.parse(match.output().decode("utf-8"))
	except:
	continue
	raise Exception("unknown encoding")


	def main(folder: str):
	xml_files = filter(
	lambda name: name.endswith(".xml"),
	os.listdir(folder)
	)
	for name in xml_files:
	xml_path = os.path.join(folder, name)
	try:
	xml = get_xml_dict(xml_path)
	author = xml["Task"]["RegistrationInfo"]["Author"]
	if "r.shchebentovsk" in author:
	shutil.copyfile(xml_path, os.path.join("rod_tasks", name))
	print(f"{name}, author: {author}")
	except:
	print("can't read", name)


	if __name__ == "__main__":
	main("D:\\Work\\_taskinfo")