Skip to content

Instantly share code, notes, and snippets.

@mxwell
Last active November 7, 2022 18:32
Show Gist options
  • Save mxwell/e253548692820cdce778631165090080 to your computer and use it in GitHub Desktop.
Save mxwell/e253548692820cdce778631165090080 to your computer and use it in GitHub Desktop.
AutoCAD SheetSet *.dst files have some simple coding. This helps to manipulate them.
#! /usr/bin/env python3
"""
AutoCAD SheetSet file with *.dst extension is just an XML stream encoded with substitution cipher.
XML data is firstly encoded with UTF-8. This means characters might have different length (from one to four bytes).
It looks like substitution needs to be done on a per-character level rather than per-byte level.
== Restore conversion dictionary ==
Given two files "Crack.xml" and "Crack.dst", we can restore conversion dictionary (limited to charset used in these files) as follows:
python dst_format_ctl.py restore_dictionary --xml Crack.xml --dst Crack.dst --dictionary dictionary.json
The dictionary is stored into "dictionary.json". It includes mappings for both directions - XML to DST and vice versa.
== Convert *.dst to *.xml ==
Given "sheet.dst" and "dictionary.json", we can produce "sheet.xml":
python dst_format_ctl.py convert --direction dst_to_xml --dst sheet.dst --xml sheet.xml --dictionary dictionary.json
== Convert *.xml to *.dst ==
Given "sheet.xml" and "dictionary.json", we can produce "sheet.dst":
python dst_format_ctl.py convert --direction xml_to_dst --dst sheet.dst --xml sheet.xml --dictionary dictionary.json
"""
import argparse
import json
import logging
import sys
def load_sequence(filename):
result = []
with open(filename, "rb") as f:
byte = f.read(1)
while byte:
result.append(ord(byte))
byte = f.read(1)
logging.info("Loaded sequence of %d byte(s) from %s", len(result), filename)
return result
def save_sequence(sequence, filename):
with open(filename, "wb") as output:
for x in sequence:
output.write(bytes([x]))
logging.info("Sequence of %d byte(s) is saved to %s", len(sequence), filename)
def join_bytes(sequence, start, end):
if end - 1 >= len(sequence):
raise Exception("sequence is too short to extract {} bytes of unicode character at {}".format(end - start, start))
result = 0
for i in range(start, end):
result = (result << 8) | sequence[i]
return result
def split_bytes(encoding, span):
result = []
for i in range(span):
result.append(encoding & 0xFF)
encoding = encoding >> 8
return reversed(result)
def restore_dictionary_command(args):
xml = load_sequence(args.xml)
dst = load_sequence(args.dst)
cipher = dict()
decipher = dict()
assert len(xml) == len(dst), "both *.xml and *.dst files must be of the same length"
n = len(xml)
i = 0
while i < n:
b = xml[i]
if (b >> 4) == 0xF:
span = 4
elif (b >> 5) == 0x7:
span = 3
elif (b >> 6) == 0x3:
span = 2
else:
span = 1
logging.debug("i %d, n %d, b %d, span %d", i, n, b, span)
cur = join_bytes(xml, i, i + span)
encoding = join_bytes(dst, i, i + span)
if cur in cipher:
if cipher[cur] != encoding:
raise Exception("inconsistent encoding: {} was encoded as {} firstly and as {} afterwards".format(cur, cipher[cur], encoding))
else:
cipher[cur] = encoding
if encoding in decipher:
if decipher[encoding] != cur:
raise Exception("inconsistent decoding: {} was decoded as {} firstly and as {} afterwards".format(encoding, decipher[encoding], cur))
else:
decipher[encoding] = cur
i += span
logging.info("Detected encoding of %d characters", len(cipher))
logging.info("Detected decoding of %d characters", len(decipher))
logging.info("Saving dictionary data to %s", args.dictionary)
with open(args.dictionary, "w") as a_file:
a_file.write(json.dumps({
"dst_to_xml": decipher,
"xml_to_dst": cipher,
}))
def load_dictionary(path, direction):
with open(path) as a_file:
data = json.loads(a_file.read())
result = data[direction]
logging.info("Loaded dictionary of %d character(s) from %s for direction %s", len(result), path, direction)
return result
def convert_command(args):
if args.direction == "dst_to_xml":
input_path = args.dst
output_path = args.xml
else:
input_path = args.xml
output_path = args.dst
sequence = load_sequence(input_path)
dictionary = load_dictionary(args.dictionary, args.direction)
n = len(sequence)
i = 0
result = []
while i < n:
found = False
for j in range(1, 5):
span = j
cur = str(join_bytes(sequence, i, i + span))
logging.debug("trying %s with span %d", cur, span)
if cur not in dictionary:
continue
bytes = split_bytes(dictionary[cur], span)
result.extend(bytes)
found = True
break
if not found:
logging.error("failed to decode character sequence at %d, stopping here", i)
break
i += span
save_sequence(result, output_path)
def main():
logging.basicConfig(level=logging.INFO)
parser = argparse.ArgumentParser()
subparsers = parser.add_subparsers(help="Modes of operation")
restore_dictionary = subparsers.add_parser("restore_dictionary")
restore_dictionary.set_defaults(func=restore_dictionary_command)
restore_dictionary.add_argument("--xml", help="Path to original *.xml file", required=True)
restore_dictionary.add_argument("--dst", help="Path to encoded *.dst file", required=True)
restore_dictionary.add_argument("--dictionary", help="Path to save dictionary data", default="dictionary.json")
convert = subparsers.add_parser("convert")
convert.set_defaults(func=convert_command)
convert.add_argument("--direction", help="Direction of conversion", choices=["xml_to_dst", "dst_to_xml"])
convert.add_argument("--xml", help="Path to original *.xml file", required=True)
convert.add_argument("--dst", help="Path to encoded *.dst file", required=True)
convert.add_argument("--dictionary", help="Path to load dictionary data from", default="dictionary.json")
args = parser.parse_args()
args.func(args)
return 0
if __name__ == "__main__":
sys.exit(main())
@mxwell
Copy link
Author

mxwell commented Apr 25, 2020

Here is an example of "dictionary.json" that includes all Russian letters.

{
  "dst_to_xml": {
    "10309965": 15711167,
    "131": 13,
    "134": 10,
    "161": 43,
    "162": 46,
    "163": 45,
    "164": 40,
    "167": 41,
    "168": 36,
    "17": 123,
    "170": 38,
    "171": 37,
    "172": 32,
    "174": 34,
    "18": 126,
    "19": 125,
    "192": 76,
    "193": 75,
    "194": 78,
    "195": 77,
    "196": 72,
    "197": 71,
    "198": 74,
    "199": 73,
    "20": 120,
    "200": 68,
    "201": 67,
    "202": 70,
    "203": 69,
    "204": 64,
    "205": 63,
    "206": 66,
    "207": 65,
    "208": 60,
    "209": 59,
    "21": 119,
    "210": 62,
    "211": 61,
    "212": 56,
    "213": 55,
    "214": 58,
    "215": 57,
    "216": 52,
    "217": 51,
    "218": 54,
    "219": 53,
    "22": 122,
    "220": 48,
    "221": 47,
    "222": 50,
    "223": 49,
    "224": 108,
    "225": 107,
    "226": 110,
    "227": 109,
    "228": 104,
    "229": 103,
    "23": 121,
    "230": 106,
    "231": 105,
    "232": 100,
    "233": 99,
    "234": 102,
    "235": 101,
    "237": 95,
    "238": 98,
    "239": 97,
    "24": 116,
    "240": 92,
    "241": 91,
    "243": 93,
    "244": 88,
    "245": 87,
    "246": 90,
    "247": 89,
    "248": 84,
    "249": 83,
    "25": 115,
    "250": 86,
    "251": 85,
    "252": 80,
    "253": 79,
    "254": 82,
    "255": 81,
    "26": 118,
    "27": 117,
    "28": 112,
    "29": 111,
    "30": 114,
    "31": 113,
    "31747": 53437,
    "31776": 53420,
    "31777": 53419,
    "31778": 53422,
    "31779": 53421,
    "31780": 53416,
    "31781": 53415,
    "31782": 53418,
    "31783": 53417,
    "31784": 53412,
    "31785": 53411,
    "31786": 53414,
    "31787": 53413,
    "31788": 53408,
    "31789": 53407,
    "31790": 53410,
    "31791": 53409,
    "31792": 53404,
    "31793": 53403,
    "31794": 53406,
    "31795": 53405,
    "31796": 53400,
    "31797": 53399,
    "31798": 53402,
    "31799": 53401,
    "31800": 53396,
    "31801": 53395,
    "31802": 53398,
    "31803": 53397,
    "31804": 53392,
    "31806": 53394,
    "31807": 53393,
    "31821": 53439,
    "31824": 53436,
    "31825": 53435,
    "31826": 53438,
    "31828": 53432,
    "31829": 53431,
    "31830": 53434,
    "31831": 53433,
    "31832": 53428,
    "31833": 53427,
    "31834": 53430,
    "31835": 53429,
    "31836": 53424,
    "31837": 53423,
    "31838": 53426,
    "31839": 53425,
    "32512": 53644,
    "32513": 53643,
    "32514": 53646,
    "32515": 53645,
    "32516": 53640,
    "32517": 53639,
    "32518": 53642,
    "32519": 53641,
    "32520": 53636,
    "32521": 53635,
    "32522": 53638,
    "32523": 53637,
    "32524": 53632,
    "32526": 53634,
    "32527": 53633,
    "32573": 53647
  },
  "xml_to_dst": {
    "10": 134,
    "100": 232,
    "101": 235,
    "102": 234,
    "103": 229,
    "104": 228,
    "105": 231,
    "106": 230,
    "107": 225,
    "108": 224,
    "109": 227,
    "110": 226,
    "111": 29,
    "112": 28,
    "113": 31,
    "114": 30,
    "115": 25,
    "116": 24,
    "117": 27,
    "118": 26,
    "119": 21,
    "120": 20,
    "121": 23,
    "122": 22,
    "123": 17,
    "125": 19,
    "126": 18,
    "13": 131,
    "15711167": 10309965,
    "32": 172,
    "34": 174,
    "36": 168,
    "37": 171,
    "38": 170,
    "40": 164,
    "41": 167,
    "43": 161,
    "45": 163,
    "46": 162,
    "47": 221,
    "48": 220,
    "49": 223,
    "50": 222,
    "51": 217,
    "52": 216,
    "53": 219,
    "53392": 31804,
    "53393": 31807,
    "53394": 31806,
    "53395": 31801,
    "53396": 31800,
    "53397": 31803,
    "53398": 31802,
    "53399": 31797,
    "53400": 31796,
    "53401": 31799,
    "53402": 31798,
    "53403": 31793,
    "53404": 31792,
    "53405": 31795,
    "53406": 31794,
    "53407": 31789,
    "53408": 31788,
    "53409": 31791,
    "53410": 31790,
    "53411": 31785,
    "53412": 31784,
    "53413": 31787,
    "53414": 31786,
    "53415": 31781,
    "53416": 31780,
    "53417": 31783,
    "53418": 31782,
    "53419": 31777,
    "53420": 31776,
    "53421": 31779,
    "53422": 31778,
    "53423": 31837,
    "53424": 31836,
    "53425": 31839,
    "53426": 31838,
    "53427": 31833,
    "53428": 31832,
    "53429": 31835,
    "53430": 31834,
    "53431": 31829,
    "53432": 31828,
    "53433": 31831,
    "53434": 31830,
    "53435": 31825,
    "53436": 31824,
    "53437": 31747,
    "53438": 31826,
    "53439": 31821,
    "53632": 32524,
    "53633": 32527,
    "53634": 32526,
    "53635": 32521,
    "53636": 32520,
    "53637": 32523,
    "53638": 32522,
    "53639": 32517,
    "53640": 32516,
    "53641": 32519,
    "53642": 32518,
    "53643": 32513,
    "53644": 32512,
    "53645": 32515,
    "53646": 32514,
    "53647": 32573,
    "54": 218,
    "55": 213,
    "56": 212,
    "57": 215,
    "58": 214,
    "59": 209,
    "60": 208,
    "61": 211,
    "62": 210,
    "63": 205,
    "64": 204,
    "65": 207,
    "66": 206,
    "67": 201,
    "68": 200,
    "69": 203,
    "70": 202,
    "71": 197,
    "72": 196,
    "73": 199,
    "74": 198,
    "75": 193,
    "76": 192,
    "77": 195,
    "78": 194,
    "79": 253,
    "80": 252,
    "81": 255,
    "82": 254,
    "83": 249,
    "84": 248,
    "85": 251,
    "86": 250,
    "87": 245,
    "88": 244,
    "89": 247,
    "90": 246,
    "91": 241,
    "92": 240,
    "93": 243,
    "95": 237,
    "97": 239,
    "98": 238,
    "99": 233
  }
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment