-
-
Save aquasync/a7a78a694933797344831be80683f0aa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import datetime | |
# type codes: | |
# these are the codes that are used to determine the rudimentary type of numbers | |
PROPERTY_CODE = 0 | |
TYPE_CODE = 3 | |
STRING_CODE = 2 | |
NUMBER_CODE = 1 | |
SEQUENCE_CODE = 7 | |
# constant codes | |
NULL = 0 # p | |
FALSE = 3 # s | |
TRUE = 4 # t | |
UNDEFINED = 5 # u | |
# these are the starting codes (structures also use this space) for complete types | |
DEFAULT_TYPE = 6 | |
ARRAY_TYPE = 7 | |
REFERENCING_TYPE = 8 | |
NUMBER_TYPE = 9 | |
EXTENSIONS = 10 | |
METADATA_TYPE = 11 | |
COPY_PROPERTY = 12 # for defining a typed object without returning the value | |
REFERENCING_POSITION = 13 | |
TYPE_DEFINITION = 14 # for defining a typed object without returning the value | |
ERROR_METADATA = 500 | |
# sequence codes | |
OPEN_SEQUENCE = 12 # < | |
PARTIAL_DEFERRED_REFERENCE = 12 # < | |
END_SEQUENCE = 14 # > | |
DEFERRED_REFERENCE = 15 # ? | |
MAX_LENGTH = 1024*1024*16 | |
# think this is just a sentinel object | |
PAUSED = {} | |
UNDEFINED = {} | |
# using these in various places where the original code was using arrays/dicts and then assigning properties to them | |
class proplist(list): | |
pass | |
class propdict(dict): | |
pass | |
class Parser: | |
def __init__(self, options={}): | |
self.options = options | |
self.offset = None | |
self.source = None | |
self.isPartial = None | |
self.disposedChars = 0 | |
self.classByName = options.get('classByName', {}) | |
self.classByName.update({'Map': readMap, 'Set': readSet, 'Date': readDate}) | |
self.pausedState = None | |
self.deferredReads = None | |
self.nonParsingError = None | |
@classmethod | |
def createParser(cls, *args, **kwargs): | |
return cls(*args, **kwargs) | |
def setSource(self, string, startOffset=0, isPartialString=False): | |
self.source = string | |
self.offset = startOffset | |
self.disposedChars = 0 | |
self.isPartial = isPartialString | |
return self | |
def hasMoreData(self): | |
return self.source.length > self.offset | |
def isPaused(self): | |
return self.pausedState | |
def hasUnfulfilledReferences(self): | |
return self.deferredReads and len(self.deferredReads) > self.deferredReads.index_ | |
def getOffset(self): | |
return self.offset + self.disposedChars | |
# everything below here (except the `read` function) is not returned in the createParser object, | |
# so is internal api I guess. could perhaps prefix the names with _... | |
def pause(self, state, lastRead): | |
raise NotImplementedError | |
state['previous'] = self.pausedState | |
state['resume'] = True | |
self.pausedState = state | |
if not self.isPartial: | |
raise Exception('Unexpected end of dpack stream') | |
if not hasattr(self, 'onResume'): # only if not already defined, otherwise we just want to use the one from the top of the call stack | |
def onResume(nextString, isPartialString, rebuildString): | |
# top of the resume stack | |
resumeState = self.pausedState | |
self.pausedState = None | |
self.onResume = None | |
# we slice and add these together to generate an entirely new string | |
# so we don't cause a memory leak with redundant strings that a retained a slice parents | |
if lastRead < self.source.length: | |
self.source = self.source.slice(lastRead) + nextString | |
else: | |
if rebuildString: # if we have read all of source, we want to slice and re-concatenate to eliminate the slice reference to the parent, we really don't want that retained in memory | |
self.source = nextString.slice(0, 1) + nextString.slice(1) | |
else: | |
self.source = nextString | |
self.isPartial = isPartialString | |
self.disposedChars += lastRead | |
self.offset = 0 | |
return resumeState['reader'](resumeState) if 'reader' in resumeState else self.readSequence(resumeState['length'], resumeState) | |
self.onResume = onResume | |
return state['object'] | |
def readSequence(self, length, thisProperty=None): | |
""" propertyStates: | |
0 - starting next property slot | |
1 - property created, succeeding value should be value of property | |
2 - property creation in progress, next value should define key | |
11+ - modifying property, next value modifies property (adds metadata, position, type, etc.) | |
""" | |
propertyState = 0 | |
if thisProperty is None: | |
thisProperty = proplist() | |
property = None | |
isArray = None | |
object = None | |
value = UNDEFINED | |
i = 0 | |
propertyIndex = 0 | |
if hasattr(thisProperty, 'resume'): # resuming, thisProperty is the resume state. | |
raise NotImplementedError | |
""" | |
property = thisProperty.previous | |
if (property) { | |
// do the previous/upper stack frame first | |
var value = property.reader ? property.reader(property) : readSequence(property.length, property) | |
var values = property.values | |
if (values) { | |
// we have an assignment to referencing values that is waiting for pausing to complete | |
if (pausedState) { | |
// if still paused, pass on to next pausedState | |
pausedState.values = values | |
} else { | |
if (value.nextPosition > -1) { | |
// we use this path for fulfilling forward references, and when the position is reset | |
values[values.nextPosition++] = value | |
} else { | |
values.push(value) | |
} | |
} | |
} | |
} | |
if (thisProperty.code && thisProperty.code !== thisProperty.thisProperty.code) { | |
thisProperty.resume = false // if the resume property has been reassigned to an incompatible type, can't switch | |
} | |
else { | |
i = thisProperty.i || 0 | |
object = thisProperty.object | |
propertyState = thisProperty.propertyState || 0 | |
propertyIndex = thisProperty.propertyIndex || 0 | |
thisProperty = thisProperty.thisProperty | |
} | |
""" | |
isArray = hasattr(thisProperty, 'code') and thisProperty.code == ARRAY_TYPE # ARRAY_TYPE | |
if object is None: | |
object = thisProperty.constructs() if hasattr(thisProperty, 'constructs') else (proplist() if isArray else propdict()) # TODO: we could probably construct a new reader that does this a little faster | |
while i < length: | |
type = None | |
number = None | |
lastRead = self.offset | |
token = ord(self.source[self.offset]); self.offset += 1 | |
if token >= 0x30: # fast path for one byte with stop bit | |
if token > 0x3000: # long-token handling | |
type = (token >> 12) ^ 4 | |
number = token & 0xfff | |
else: | |
type = (token >> 4) ^ 4 | |
number = token & 0xf | |
else: | |
type = (token >> 4) & 11 # shift and omit the stop bit (bit 3) | |
number = token & 0xf | |
token = ord(self.source[self.offset]); self.offset += 1 | |
number = (number << 6) + (token & 0x3f) # 10 bit number | |
if not (token >= 0x40): | |
token = ord(self.source[self.offset]); self.offset += 1 | |
number = (number << 6) + (token & 0x3f) # 16 bit number | |
if not (token >= 0x40): | |
token = ord(self.source[self.offset]); self.offset += 1 | |
number = (number << 6) + (token & 0x3f) # 22 bit number | |
if not (token >= 0x40): | |
token = ord(self.source[self.offset]); self.offset += 1 | |
number = (number << 6) + (token & 0x3f) # 28 bit number | |
if not (token >= 0x40): | |
token = ord(self.source[self.offset]); self.offset += 1 | |
number = (number * 0x40) + (token & 0x3f) # 34 bit number (we can't use 32-bit shifting operators anymore) | |
if not (token >= 0x40): | |
token = ord(self.source[self.offset]); self.offset += 1 | |
number = (number * 0x40) + (token & 0x3f) # 40 bit number | |
if not (token >= 0x40): | |
token = ord(self.source[self.offset]); self.offset += 1 | |
number = (number * 0x40) + (token & 0x3f) # 46 bit number, we don't go beyond this | |
if not (token >= 0): | |
if self.offset > self.source.length: | |
return self.pause({ | |
'length': length, | |
'thisProperty': thisProperty, | |
'i': i, | |
'object': object, | |
'propertyIndex': propertyIndex, | |
'propertyState': propertyState, | |
}, lastRead) | |
if type == 0: # /*PROPERTY_CODE*/ | |
propertyIndex = number | |
propertyState = 0 | |
continue | |
if propertyIndex >= len(thisProperty): | |
if isinstance(thisProperty, proplist): | |
thisProperty.append(None) | |
else: | |
thisProperty[propertyIndex] = None | |
property = thisProperty[propertyIndex] | |
if type == 3: # /*TYPE_CODE*/ | |
# we store the previous property state in token, so we can assign the next one | |
if number < 6: | |
# special values (constants, deferreds) | |
if number < 3: | |
if number == 0: | |
value = None | |
else: | |
value = f'Unknown token, type: {type} number: {number}' | |
else: | |
if number == TRUE: | |
value = True | |
elif number == FALSE: | |
value = False | |
else: | |
# is this reachable? maybe the reserved ones? | |
value = UNDEFINED | |
else: | |
if number <= NUMBER_TYPE: | |
# property definition | |
if propertyState == 1: | |
# we were in property value position, but no value, so we increment position here | |
propertyIndex += 1 | |
i += 1 | |
property = thisProperty[propertyIndex] | |
# what is this for? no mention of resetTo in this file - maybe comes from the block stuff... | |
if hasattr(thisProperty, 'resetTo') and propertyIndex < thisProperty.resetTo: | |
raise Exception('Overwriting frozen property') | |
if property is not None: | |
if not hasattr(property, 'resume'): | |
# should we fallback to undefined or none? | |
value = getattr(property, 'key', UNDEFINED) # inherit the existing key | |
if propertyIndex >= len(thisProperty): | |
thisProperty.append(None) | |
property = thisProperty[propertyIndex] = proplist() | |
property.key = value | |
else: # no property existed in slot, start with default key of null | |
if propertyIndex >= len(thisProperty): | |
thisProperty.append(None) | |
property = thisProperty[propertyIndex] = proplist() | |
property.key = None | |
property.code = number | |
property.parent = thisProperty | |
#property.key = None | |
propertyState = 2 # read next value as the key | |
if number == REFERENCING_TYPE: # REFERENCING_TYPE | |
property.values = proplist() | |
elif number == ARRAY_TYPE: | |
if len(property) == 0: | |
property.append(None) | |
property[0] = proplist() | |
property[0].key = None | |
property[0].code = DEFAULT_TYPE | |
property[0].parent = property | |
else: | |
# property modification | |
propertyState = number | |
continue | |
else: | |
if type == 2: # STRING_CODE | |
value = self.source[self.offset:self.offset + number] | |
self.offset += number | |
if self.offset > len(self.source): | |
return self.pause({ | |
'length': length, | |
'thisProperty': thisProperty, | |
'i': i, | |
'object': object, | |
'propertyIndex': propertyIndex, | |
'propertyState': propertyState | |
}, lastRead) | |
if propertyState < 2: | |
if property.code == NUMBER_TYPE: # NUMBER_TYPE | |
#value = +value | |
value = float(value) | |
elif type == 1: # NUMBER_CODE | |
value = number | |
else: # if type == 7 SEQUENCE_CODE | |
if number > 13: | |
if number == END_SEQUENCE: | |
return object | |
elif number == DEFERRED_REFERENCE: | |
value = self.readSequence(0, property) | |
propertyState = 0 | |
if 'forDeferred' in self.options: | |
value = self.options['forDeferred'](value, property) | |
else: | |
if self.deferredReads is None: | |
self.deferredReads = proplist() | |
self.deferredReads.append({ | |
'property': property, | |
'value': value | |
}) | |
else: | |
if number >= OPEN_SEQUENCE: | |
number = 2000000000 | |
if propertyState > 1: | |
if propertyState == 2: | |
propertyState = 0 # if the property key was skipped, go directly into value mode | |
value = self.readSequence(number, property) | |
elif propertyState == METADATA_TYPE: | |
temp = propdict() | |
temp.key = None | |
temp.code = 6 | |
value = self.readSequence(number, proplist([temp])) | |
elif property.resume and getattr(property.code, DEFAULT_TYPE) == property.thisProperty.code: | |
value = self.readSequence(number, property.thisProperty) | |
else: | |
value = self.readSequence(number, property) | |
else: | |
value = self.readSequence(number, property) | |
if self.pausedState: | |
# the check in the js is actually for undefined... not sure if that matters? | |
if value is None: # or a PAUSED object | |
self.pausedState = None # erase anything further up in the stack, as this means the sequence couldn't start, so we need to re-parse from here | |
self.onResume = None | |
return self.pause({ | |
'length': length, | |
'thisProperty': thisProperty, | |
'i': i, | |
'object': object, | |
'property': property, | |
'propertyIndex': propertyIndex, | |
# this seems to be the only reference to previousProperty, so how does this work? | |
'previousProperty': previousProperty, | |
'propertyState': propertyState, | |
}, lastRead) | |
else: | |
# need to assign the values *after* the completion of the sequence if it is a forward reference | |
self.pausedState.values = property.values if isinstance(property.values, list) else None | |
#elif values: | |
# pass | |
#if not property: | |
# raise Exception('No property defined for slot' + (' in ' + thisProperty.key if thisProperty.key else '')) | |
if propertyState < 2 and property is not None and property.code == REFERENCING_TYPE: # REFERENCING_TYPE | |
values = property.values | |
if isinstance(value, (int, float)): | |
value = values[number] | |
# this was undefined | |
if value is None and number not in values: | |
raise Exception('Referencing value that has not been read yet') | |
elif (type == 2 or type == 7) and values is not None: # only strings and sequences | |
if not hasattr(values, 'nextPosition'): | |
values.nextPosition = 0 | |
if values.nextPosition > -1: | |
# we use this path for fulfilling forward references | |
if hasattr(property, 'recordValueReference'): | |
property.recordValueReference(values) | |
if values.nextPosition >= len(values): | |
values.append(None) | |
values[values.nextPosition] = value | |
values.nextPosition += 1 | |
else: | |
values.append(value) | |
if propertyState > 1: | |
# 0, 1: values are the value of the property | |
if propertyState == 2: # // property key | |
property.key = value | |
elif propertyState == METADATA_TYPE: # 3: metadata paramater | |
if isinstance(value, str): | |
if extendedType := self.classByName.get(value): | |
if hasattr(extendedType, 'fromValue'): | |
property.fromValue = extendedType.fromValue | |
else: | |
property.constructs = extendedType | |
elif self.options.get('errorOnUnknownClass'): | |
# is parameter supposed to be value here? | |
raise Exception(f'Attempt to deserialize to unknown class {parameter}') | |
else: | |
# if no warning, just parse as a plain value/object | |
pass | |
property.extendedType = extendedType # if this gets used for serialization, this is needed | |
else: | |
# extended metadata assignment (used by shared structures to mark objects as unstructured) | |
property.metadata = value | |
if value == ERROR_METADATA: | |
property.fromValue = self.onError | |
elif propertyState == REFERENCING_POSITION: | |
if not hasattr(property, 'values'): | |
property.values = proplist() | |
values = property.values | |
#values.splice(value, values.length) | |
values.nextPosition = value | |
elif propertyState == TYPE_DEFINITION: # for type-only state | |
# throw away this value and read next value as the real value | |
pass | |
else: | |
raise Exception(f'Unknown property type {propertyState}') | |
propertyState = 1 # property value after definition position | |
continue # read next value as the property value | |
else: | |
propertyState = 0 | |
if hasattr(property, 'fromValue'): | |
value = property.fromValue(value) | |
if isArray and property.key is None: | |
object.append(value) | |
# FIXME: the js code used value is not undefined, but we're using is not None. | |
# i guess that means we can't write out nulls, so need a different sentinel value | |
elif value is not UNDEFINED: | |
#print(f'Setting {property.key} to {value}') | |
object[property.key] = value | |
i += 1 | |
if not isArray: | |
propertyIndex += 1 | |
return object | |
def unknownType(self, number): | |
raise Exception('Unknown type ' + number) | |
def onError(self, error): | |
raise NotImplementedError | |
""" | |
var g = typeof global != 'undefined' ? global : window | |
if (error && error.name && g[error.name]) | |
error = new g[error.name](error.message) | |
else if (typeof error == 'string') | |
error = new Error(error) | |
if (options.onError) | |
options.onError(error) | |
else { | |
nonParsingError = true | |
throw error | |
} | |
""" | |
def read(self, property=None): | |
try: | |
if property and 'resume' in property: | |
previous = property['previous'] | |
value = self.readSequence(previous['length'], previous) | |
# once we get the value, we don't know which point in the stack could still be resolving | |
value = property['object'] or value | |
property = property['property'] | |
else: | |
temp = propdict() | |
temp.key = None | |
temp.code = 6 | |
property = property or proplist([self.options.get('shared', temp)]) | |
value = self.readSequence(1, property)[property[0].key] | |
while True: | |
if self.pausedState: | |
return self.pause({ | |
'reader': self.read, | |
'object': value, | |
'property': property | |
}) | |
if not self.deferredReads: | |
return value | |
index = getattr(self.deferredReads, 'index_', 0) | |
# FIXME: do we need to handle out-of-bounds here? | |
deferredRead = self.deferredReads[index] | |
self.deferredReads.index = index + 1 | |
if not deferredRead: | |
self.deferredReads = self.deferredReads.parent | |
continue | |
target = deferredRead['value'] | |
parentDeferredReads = self.deferredReads | |
self.deferredReads = proplist() | |
self.deferredReads.parent = parentDeferredReads | |
targetProperty = deferredRead['property'] | |
result = self.readSequence(1, property := proplist([{ | |
'resume': True, | |
'key': None, | |
'thisProperty': targetProperty, | |
'object': target | |
}])) | |
result = result.get(None, targetProperty['key']) # Blocks should use null as the root key, but could have the target property key as well | |
if result is not target: # This should only occur with a promise that resolves to a different type, an array | |
# object was replaced with something else (an array, presumably) | |
target.update(result) | |
if self.pausedState and self.pausedState['object'] is result: | |
self.pausedState['object'] = target | |
if result and type(result) != list: | |
# no idea what this was for... | |
raise NotImplementedError # do our best to make it array like | |
except Exception as error: | |
# FIXME: | |
#if (!nonParsingError) | |
# error.message = 'DPack parsing error: ' + error.message + ' at position: ' + (offset + disposedChars) + ' near: ' + source.slice(offset - 10, offset + 10) | |
#throw error | |
raise | |
class readMap: | |
@staticmethod | |
def fromValue(entries): | |
return {entry.key: entry.value for entry in entries} | |
class readSet: | |
@staticmethod | |
def fromValue(values): | |
return set(values) | |
class readDate: | |
@staticmethod | |
def fromValue(time): | |
return datetime.datetime.fromtimestamp(time) | |
def parse(stringOrBuffer, options={}): | |
# we process the input as a string... | |
if type(stringOrBuffer) == str: | |
source = stringOrBuffer | |
elif type(stringOrBuffer) == bytes: | |
source = stringOrBuffer.decode(options.get('encoding', 'utf-8')) | |
else: # whatever (undefined or null or whatever), just return it | |
return stringOrBuffer | |
parser = Parser.createParser(options).setSource(source) | |
if 'shared' in options: | |
return parser.read([options['shared']]) | |
return parser.read() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment