Skip to content

Instantly share code, notes, and snippets.

@b4tman
Forked from arrowinaknee/office_unlocker.pyw
Last active August 29, 2020 20:06
Show Gist options
  • Save b4tman/9cabde75cb2f4c9afb550575ebc5b75e to your computer and use it in GitHub Desktop.
Save b4tman/9cabde75cb2f4c9afb550575ebc5b75e to your computer and use it in GitHub Desktop.
Снимает защиту от редактирования с файлов MS Word (.docx) и MS Excel (.xlsx)
import os
import shutil
import sys
import tempfile
import tkinter as tk
from tkinter import filedialog
from tkinter import messagebox
from xml.sax import make_parser
from xml.sax.saxutils import XMLFilterBase, XMLGenerator
from zipfile import ZipFile, is_zipfile
class XMLTagsFilter(XMLFilterBase):
"""This decides which SAX events to forward to the ContentHandler
We will not forward events when we are inside any elements with a
name specified in the 'tags_names_to_exclude' parameter
from: https://stackoverflow.com/a/42411493
"""
def __init__(self, tag_names_to_exclude, parent=None):
super().__init__(parent)
# set of tag names to exclude
self._tag_names_to_exclude = [tag.lower() for tag in tag_names_to_exclude]
# _excluded_tags_count keeps track of opened elements to exclude
self._excluded_tags_count = 0
def _forward_events(self):
# will return True when we are not inside excluded element
return self._excluded_tags_count == 0
def startElement(self, name, attrs):
if name.lower() in self._tag_names_to_exclude:
self._excluded_tags_count += 1
if self._forward_events():
super().startElement(name, attrs)
def endElement(self, name):
if self._forward_events():
super().endElement(name)
if name.lower() in self._tag_names_to_exclude:
self._excluded_tags_count -= 1
def characters(self, content):
if self._forward_events():
super().characters(content)
def xml_remove_protection(input_filename, output_filename):
tag_names_to_exclude = ['sheetProtection', 'workbookProtection', 'documentProtection']
reader = XMLTagsFilter(tag_names_to_exclude, make_parser())
with open(output_filename, 'wb') as f:
handler = XMLGenerator(f, encoding='utf-8', short_empty_elements=True)
reader.setContentHandler(handler)
reader.parse(input_filename)
def unlock_office_file(input_filename, output_filename):
files_to_process = ['xl/workbook.xml', 'word/settings.xml']
if not is_zipfile(input_filename):
raise NotImplementedError
with ZipFile(input_filename, 'r') as zin:
content = zin.namelist()
# exclude files not in archive
files_to_process = list(filter(lambda x: x in content, files_to_process))
# add sheets
worksheets = filter(lambda x: x.startswith('xl/worksheets/') and x.endswith('.xml'), content)
files_to_process.extend(worksheets)
# check files to process
if len(files_to_process) == 0:
raise NotImplementedError
# create empty temp_dir
temp_dir = tempfile.mkdtemp('-unlock')
# create new archive
with ZipFile(output_filename, 'w') as zout:
for z_filename in content:
info = zin.getinfo(z_filename)
if info.is_dir():
continue # skip directories
temp_filename = ''
if z_filename in files_to_process:
# extract and get output path
tempfile_orig = zin.extract(z_filename, temp_dir)
# build path for unlocked xml file
out_dir = os.path.dirname(tempfile_orig)
out_base = os.path.basename(tempfile_orig)
temp_filename = os.path.join(out_dir, f'unlock-{out_base}')
# remove protection tags from xml file
xml_remove_protection(tempfile_orig, temp_filename)
os.remove(tempfile_orig)
else:
# just extract file
temp_filename = zin.extract(z_filename, temp_dir)
# add file to output archive
zout.write(temp_filename, z_filename, compress_type=info.compress_type)
os.remove(temp_filename)
# remove temp dir
shutil.rmtree(temp_dir)
def unlock(source_filename):
file_path = os.path.dirname(source_filename)
file_base = os.path.basename(source_filename)
file_name = os.path.splitext(file_base)[0]
file_extension = os.path.splitext(file_base)[1]
target_name = f'{file_name}_unlocked{file_extension}'
target_file = os.path.join(file_path, target_name)
try:
unlock_office_file(source_filename, target_file)
except NotImplementedError:
messagebox.showerror('Неподдерживаемый тип', f'Формат файла "{file_base}" не поддерживается\nДанный скрипт '
'поддерживает снятие блокировки только с файлов MS Word (.docx)'
' и MS Excel (.xlsx)')
exit(1)
messagebox.showinfo('Операция завершена', f'Файл "{target_name}" в "{file_path}" готов к использованию')
def sanitize_filename(filename):
replaces = '",\''.split(',')
for sub in replaces:
filename = filename.replace(sub, '')
return filename
def main():
root = tk.Tk()
root.withdraw()
source_file = ''
if len(sys.argv) <= 1:
source_file = filedialog.askopenfilename()
# file dialog closed
if source_file == "":
exit(0)
else:
source_file = sys.argv[1]
source_file = sanitize_filename(source_file)
source_file = os.path.abspath(source_file)
try:
unlock(source_file)
except SystemExit:
pass
except:
messagebox.showerror('Непредвиденная ошибка', sys.exc_info()[0])
if __name__ == "__main__":
main()
@b4tman
Copy link
Author

b4tman commented Aug 27, 2020

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment