Last active
November 7, 2022 18:32
-
-
Save mxwell/e253548692820cdce778631165090080 to your computer and use it in GitHub Desktop.
AutoCAD SheetSet *.dst files have some simple coding. This helps to manipulate them.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python3 | |
""" | |
AutoCAD SheetSet file with *.dst extension is just an XML stream encoded with substitution cipher. | |
XML data is firstly encoded with UTF-8. This means characters might have different length (from one to four bytes). | |
It looks like substitution needs to be done on a per-character level rather than per-byte level. | |
== Restore conversion dictionary == | |
Given two files "Crack.xml" and "Crack.dst", we can restore conversion dictionary (limited to charset used in these files) as follows: | |
python dst_format_ctl.py restore_dictionary --xml Crack.xml --dst Crack.dst --dictionary dictionary.json | |
The dictionary is stored into "dictionary.json". It includes mappings for both directions - XML to DST and vice versa. | |
== Convert *.dst to *.xml == | |
Given "sheet.dst" and "dictionary.json", we can produce "sheet.xml": | |
python dst_format_ctl.py convert --direction dst_to_xml --dst sheet.dst --xml sheet.xml --dictionary dictionary.json | |
== Convert *.xml to *.dst == | |
Given "sheet.xml" and "dictionary.json", we can produce "sheet.dst": | |
python dst_format_ctl.py convert --direction xml_to_dst --dst sheet.dst --xml sheet.xml --dictionary dictionary.json | |
""" | |
import argparse | |
import json | |
import logging | |
import sys | |
def load_sequence(filename): | |
result = [] | |
with open(filename, "rb") as f: | |
byte = f.read(1) | |
while byte: | |
result.append(ord(byte)) | |
byte = f.read(1) | |
logging.info("Loaded sequence of %d byte(s) from %s", len(result), filename) | |
return result | |
def save_sequence(sequence, filename): | |
with open(filename, "wb") as output: | |
for x in sequence: | |
output.write(bytes([x])) | |
logging.info("Sequence of %d byte(s) is saved to %s", len(sequence), filename) | |
def join_bytes(sequence, start, end): | |
if end - 1 >= len(sequence): | |
raise Exception("sequence is too short to extract {} bytes of unicode character at {}".format(end - start, start)) | |
result = 0 | |
for i in range(start, end): | |
result = (result << 8) | sequence[i] | |
return result | |
def split_bytes(encoding, span): | |
result = [] | |
for i in range(span): | |
result.append(encoding & 0xFF) | |
encoding = encoding >> 8 | |
return reversed(result) | |
def restore_dictionary_command(args): | |
xml = load_sequence(args.xml) | |
dst = load_sequence(args.dst) | |
cipher = dict() | |
decipher = dict() | |
assert len(xml) == len(dst), "both *.xml and *.dst files must be of the same length" | |
n = len(xml) | |
i = 0 | |
while i < n: | |
b = xml[i] | |
if (b >> 4) == 0xF: | |
span = 4 | |
elif (b >> 5) == 0x7: | |
span = 3 | |
elif (b >> 6) == 0x3: | |
span = 2 | |
else: | |
span = 1 | |
logging.debug("i %d, n %d, b %d, span %d", i, n, b, span) | |
cur = join_bytes(xml, i, i + span) | |
encoding = join_bytes(dst, i, i + span) | |
if cur in cipher: | |
if cipher[cur] != encoding: | |
raise Exception("inconsistent encoding: {} was encoded as {} firstly and as {} afterwards".format(cur, cipher[cur], encoding)) | |
else: | |
cipher[cur] = encoding | |
if encoding in decipher: | |
if decipher[encoding] != cur: | |
raise Exception("inconsistent decoding: {} was decoded as {} firstly and as {} afterwards".format(encoding, decipher[encoding], cur)) | |
else: | |
decipher[encoding] = cur | |
i += span | |
logging.info("Detected encoding of %d characters", len(cipher)) | |
logging.info("Detected decoding of %d characters", len(decipher)) | |
logging.info("Saving dictionary data to %s", args.dictionary) | |
with open(args.dictionary, "w") as a_file: | |
a_file.write(json.dumps({ | |
"dst_to_xml": decipher, | |
"xml_to_dst": cipher, | |
})) | |
def load_dictionary(path, direction): | |
with open(path) as a_file: | |
data = json.loads(a_file.read()) | |
result = data[direction] | |
logging.info("Loaded dictionary of %d character(s) from %s for direction %s", len(result), path, direction) | |
return result | |
def convert_command(args): | |
if args.direction == "dst_to_xml": | |
input_path = args.dst | |
output_path = args.xml | |
else: | |
input_path = args.xml | |
output_path = args.dst | |
sequence = load_sequence(input_path) | |
dictionary = load_dictionary(args.dictionary, args.direction) | |
n = len(sequence) | |
i = 0 | |
result = [] | |
while i < n: | |
found = False | |
for j in range(1, 5): | |
span = j | |
cur = str(join_bytes(sequence, i, i + span)) | |
logging.debug("trying %s with span %d", cur, span) | |
if cur not in dictionary: | |
continue | |
bytes = split_bytes(dictionary[cur], span) | |
result.extend(bytes) | |
found = True | |
break | |
if not found: | |
logging.error("failed to decode character sequence at %d, stopping here", i) | |
break | |
i += span | |
save_sequence(result, output_path) | |
def main(): | |
logging.basicConfig(level=logging.INFO) | |
parser = argparse.ArgumentParser() | |
subparsers = parser.add_subparsers(help="Modes of operation") | |
restore_dictionary = subparsers.add_parser("restore_dictionary") | |
restore_dictionary.set_defaults(func=restore_dictionary_command) | |
restore_dictionary.add_argument("--xml", help="Path to original *.xml file", required=True) | |
restore_dictionary.add_argument("--dst", help="Path to encoded *.dst file", required=True) | |
restore_dictionary.add_argument("--dictionary", help="Path to save dictionary data", default="dictionary.json") | |
convert = subparsers.add_parser("convert") | |
convert.set_defaults(func=convert_command) | |
convert.add_argument("--direction", help="Direction of conversion", choices=["xml_to_dst", "dst_to_xml"]) | |
convert.add_argument("--xml", help="Path to original *.xml file", required=True) | |
convert.add_argument("--dst", help="Path to encoded *.dst file", required=True) | |
convert.add_argument("--dictionary", help="Path to load dictionary data from", default="dictionary.json") | |
args = parser.parse_args() | |
args.func(args) | |
return 0 | |
if __name__ == "__main__": | |
sys.exit(main()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Here is an example of "dictionary.json" that includes all Russian letters.