b4tman/office_unlocker.pyw

## office_unlocker.pyw
import os
import shutil
import sys
import tempfile
import tkinter as tk
from tkinter import filedialog
from tkinter import messagebox
from xml.sax import make_parser
from xml.sax.saxutils import XMLFilterBase, XMLGenerator
from zipfile import ZipFile, is_zipfile


class XMLTagsFilter(XMLFilterBase):
    """This decides which SAX events to forward to the ContentHandler

    We will not forward events when we are inside any elements with a
    name specified in the 'tags_names_to_exclude' parameter

    from: https://stackoverflow.com/a/42411493
    """

    def __init__(self, tag_names_to_exclude, parent=None):
        super().__init__(parent)

        # set of tag names to exclude
        self._tag_names_to_exclude = [tag.lower() for tag in tag_names_to_exclude]

        # _excluded_tags_count keeps track of opened elements to exclude
        self._excluded_tags_count = 0

    def _forward_events(self):
        # will return True when we are not inside excluded element
        return self._excluded_tags_count == 0

    def startElement(self, name, attrs):
        if name.lower() in self._tag_names_to_exclude:
            self._excluded_tags_count += 1

        if self._forward_events():
            super().startElement(name, attrs)

    def endElement(self, name):
        if self._forward_events():
            super().endElement(name)

        if name.lower() in self._tag_names_to_exclude:
            self._excluded_tags_count -= 1

    def characters(self, content):
        if self._forward_events():
            super().characters(content)


def xml_remove_protection(input_filename, output_filename):
    tag_names_to_exclude = ['sheetProtection', 'workbookProtection', 'documentProtection']
    reader = XMLTagsFilter(tag_names_to_exclude, make_parser())

    with open(output_filename, 'wb') as f:
        handler = XMLGenerator(f, encoding='utf-8', short_empty_elements=True)
        reader.setContentHandler(handler)
        reader.parse(input_filename)


def unlock_office_file(input_filename, output_filename):
    files_to_process = ['xl/workbook.xml', 'word/settings.xml']

    if not is_zipfile(input_filename):
        raise NotImplementedError

    with ZipFile(input_filename, 'r') as zin:
        content = zin.namelist()

        # exclude files not in archive
        files_to_process = list(filter(lambda x: x in content, files_to_process))

        # add sheets
        worksheets = filter(lambda x: x.startswith('xl/worksheets/') and x.endswith('.xml'), content)
        files_to_process.extend(worksheets)

        # check files to process
        if len(files_to_process) == 0:
            raise NotImplementedError

        # create empty temp_dir
        temp_dir = tempfile.mkdtemp('-unlock')

        # create new archive
        with ZipFile(output_filename, 'w') as zout:
            for z_filename in content:
                info = zin.getinfo(z_filename)
                if info.is_dir():
                    continue  # skip directories
                temp_filename = ''
                if z_filename in files_to_process:
                    # extract and get output path
                    tempfile_orig = zin.extract(z_filename, temp_dir)

                    # build path for unlocked xml file
                    out_dir = os.path.dirname(tempfile_orig)
                    out_base = os.path.basename(tempfile_orig)
                    temp_filename = os.path.join(out_dir, f'unlock-{out_base}')

                    # remove protection tags from xml file
                    xml_remove_protection(tempfile_orig, temp_filename)
                    os.remove(tempfile_orig)
                else:
                    # just extract file
                    temp_filename = zin.extract(z_filename, temp_dir)

                # add file to output archive
                zout.write(temp_filename, z_filename, compress_type=info.compress_type)
                os.remove(temp_filename)
        # remove temp dir
        shutil.rmtree(temp_dir)


def unlock(source_filename):
    file_path = os.path.dirname(source_filename)
    file_base = os.path.basename(source_filename)
    file_name = os.path.splitext(file_base)[0]
    file_extension = os.path.splitext(file_base)[1]
    target_name = f'{file_name}_unlocked{file_extension}'
    target_file = os.path.join(file_path, target_name)

    try:
        unlock_office_file(source_filename, target_file)
    except NotImplementedError:
        messagebox.showerror('Неподдерживаемый тип', f'Формат файла "{file_base}" не поддерживается\nДанный скрипт '
                                                     'поддерживает снятие блокировки только с файлов MS Word (.docx)'
                                                     ' и MS Excel (.xlsx)')
        exit(1)

    messagebox.showinfo('Операция завершена', f'Файл "{target_name}" в "{file_path}" готов к использованию')


def sanitize_filename(filename):
    replaces = '",\''.split(',')
    for sub in replaces:
        filename = filename.replace(sub, '')
    return filename


def main():
    root = tk.Tk()
    root.withdraw()

    source_file = ''

    if len(sys.argv) <= 1:
        source_file = filedialog.askopenfilename()

        # file dialog closed
        if source_file == "":
            exit(0)
    else:
        source_file = sys.argv[1]

    source_file = sanitize_filename(source_file)
    source_file = os.path.abspath(source_file)

    try:
        unlock(source_file)
    except SystemExit:
        pass
    except:
        messagebox.showerror('Непредвиденная ошибка', sys.exc_info()[0])


if __name__ == "__main__":
    main()
	import os
	import shutil
	import sys
	import tempfile
	import tkinter as tk
	from tkinter import filedialog
	from tkinter import messagebox
	from xml.sax import make_parser
	from xml.sax.saxutils import XMLFilterBase, XMLGenerator
	from zipfile import ZipFile, is_zipfile


	class XMLTagsFilter(XMLFilterBase):
	"""This decides which SAX events to forward to the ContentHandler

	We will not forward events when we are inside any elements with a
	name specified in the 'tags_names_to_exclude' parameter

	from: https://stackoverflow.com/a/42411493
	"""

	def __init__(self, tag_names_to_exclude, parent=None):
	super().__init__(parent)

	# set of tag names to exclude
	self._tag_names_to_exclude = [tag.lower() for tag in tag_names_to_exclude]

	# _excluded_tags_count keeps track of opened elements to exclude
	self._excluded_tags_count = 0

	def _forward_events(self):
	# will return True when we are not inside excluded element
	return self._excluded_tags_count == 0

	def startElement(self, name, attrs):
	if name.lower() in self._tag_names_to_exclude:
	self._excluded_tags_count += 1

	if self._forward_events():
	super().startElement(name, attrs)

	def endElement(self, name):
	if self._forward_events():
	super().endElement(name)

	if name.lower() in self._tag_names_to_exclude:
	self._excluded_tags_count -= 1

	def characters(self, content):
	if self._forward_events():
	super().characters(content)


	def xml_remove_protection(input_filename, output_filename):
	tag_names_to_exclude = ['sheetProtection', 'workbookProtection', 'documentProtection']
	reader = XMLTagsFilter(tag_names_to_exclude, make_parser())

	with open(output_filename, 'wb') as f:
	handler = XMLGenerator(f, encoding='utf-8', short_empty_elements=True)
	reader.setContentHandler(handler)
	reader.parse(input_filename)


	def unlock_office_file(input_filename, output_filename):
	files_to_process = ['xl/workbook.xml', 'word/settings.xml']

	if not is_zipfile(input_filename):
	raise NotImplementedError

	with ZipFile(input_filename, 'r') as zin:
	content = zin.namelist()

	# exclude files not in archive
	files_to_process = list(filter(lambda x: x in content, files_to_process))

	# add sheets
	worksheets = filter(lambda x: x.startswith('xl/worksheets/') and x.endswith('.xml'), content)
	files_to_process.extend(worksheets)

	# check files to process
	if len(files_to_process) == 0:
	raise NotImplementedError

	# create empty temp_dir
	temp_dir = tempfile.mkdtemp('-unlock')

	# create new archive
	with ZipFile(output_filename, 'w') as zout:
	for z_filename in content:
	info = zin.getinfo(z_filename)
	if info.is_dir():
	continue # skip directories
	temp_filename = ''
	if z_filename in files_to_process:
	# extract and get output path
	tempfile_orig = zin.extract(z_filename, temp_dir)

	# build path for unlocked xml file
	out_dir = os.path.dirname(tempfile_orig)
	out_base = os.path.basename(tempfile_orig)
	temp_filename = os.path.join(out_dir, f'unlock-{out_base}')

	# remove protection tags from xml file
	xml_remove_protection(tempfile_orig, temp_filename)
	os.remove(tempfile_orig)
	else:
	# just extract file
	temp_filename = zin.extract(z_filename, temp_dir)

	# add file to output archive
	zout.write(temp_filename, z_filename, compress_type=info.compress_type)
	os.remove(temp_filename)
	# remove temp dir
	shutil.rmtree(temp_dir)


	def unlock(source_filename):
	file_path = os.path.dirname(source_filename)
	file_base = os.path.basename(source_filename)
	file_name = os.path.splitext(file_base)[0]
	file_extension = os.path.splitext(file_base)[1]
	target_name = f'{file_name}_unlocked{file_extension}'
	target_file = os.path.join(file_path, target_name)

	try:
	unlock_office_file(source_filename, target_file)
	except NotImplementedError:
	messagebox.showerror('Неподдерживаемый тип', f'Формат файла "{file_base}" не поддерживается\nДанный скрипт '
	'поддерживает снятие блокировки только с файлов MS Word (.docx)'
	' и MS Excel (.xlsx)')
	exit(1)

	messagebox.showinfo('Операция завершена', f'Файл "{target_name}" в "{file_path}" готов к использованию')


	def sanitize_filename(filename):
	replaces = '",\''.split(',')
	for sub in replaces:
	filename = filename.replace(sub, '')
	return filename


	def main():
	root = tk.Tk()
	root.withdraw()

	source_file = ''

	if len(sys.argv) <= 1:
	source_file = filedialog.askopenfilename()

	# file dialog closed
	if source_file == "":
	exit(0)
	else:
	source_file = sys.argv[1]

	source_file = sanitize_filename(source_file)
	source_file = os.path.abspath(source_file)

	try:
	unlock(source_file)
	except SystemExit:
	pass
	except:
	messagebox.showerror('Непредвиденная ошибка', sys.exc_info()[0])


	if __name__ == "__main__":
	main()