Skip to content

Instantly share code, notes, and snippets.

@aquasync
Created September 12, 2021 07:18
Show Gist options
  • Save aquasync/a7a78a694933797344831be80683f0aa to your computer and use it in GitHub Desktop.
Save aquasync/a7a78a694933797344831be80683f0aa to your computer and use it in GitHub Desktop.
import datetime
# type codes:
# these are the codes that are used to determine the rudimentary type of numbers
PROPERTY_CODE = 0
TYPE_CODE = 3
STRING_CODE = 2
NUMBER_CODE = 1
SEQUENCE_CODE = 7
# constant codes
NULL = 0 # p
FALSE = 3 # s
TRUE = 4 # t
UNDEFINED = 5 # u
# these are the starting codes (structures also use this space) for complete types
DEFAULT_TYPE = 6
ARRAY_TYPE = 7
REFERENCING_TYPE = 8
NUMBER_TYPE = 9
EXTENSIONS = 10
METADATA_TYPE = 11
COPY_PROPERTY = 12 # for defining a typed object without returning the value
REFERENCING_POSITION = 13
TYPE_DEFINITION = 14 # for defining a typed object without returning the value
ERROR_METADATA = 500
# sequence codes
OPEN_SEQUENCE = 12 # <
PARTIAL_DEFERRED_REFERENCE = 12 # <
END_SEQUENCE = 14 # >
DEFERRED_REFERENCE = 15 # ?
MAX_LENGTH = 1024*1024*16
# think this is just a sentinel object
PAUSED = {}
UNDEFINED = {}
# using these in various places where the original code was using arrays/dicts and then assigning properties to them
class proplist(list):
pass
class propdict(dict):
pass
class Parser:
def __init__(self, options={}):
self.options = options
self.offset = None
self.source = None
self.isPartial = None
self.disposedChars = 0
self.classByName = options.get('classByName', {})
self.classByName.update({'Map': readMap, 'Set': readSet, 'Date': readDate})
self.pausedState = None
self.deferredReads = None
self.nonParsingError = None
@classmethod
def createParser(cls, *args, **kwargs):
return cls(*args, **kwargs)
def setSource(self, string, startOffset=0, isPartialString=False):
self.source = string
self.offset = startOffset
self.disposedChars = 0
self.isPartial = isPartialString
return self
def hasMoreData(self):
return self.source.length > self.offset
def isPaused(self):
return self.pausedState
def hasUnfulfilledReferences(self):
return self.deferredReads and len(self.deferredReads) > self.deferredReads.index_
def getOffset(self):
return self.offset + self.disposedChars
# everything below here (except the `read` function) is not returned in the createParser object,
# so is internal api I guess. could perhaps prefix the names with _...
def pause(self, state, lastRead):
raise NotImplementedError
state['previous'] = self.pausedState
state['resume'] = True
self.pausedState = state
if not self.isPartial:
raise Exception('Unexpected end of dpack stream')
if not hasattr(self, 'onResume'): # only if not already defined, otherwise we just want to use the one from the top of the call stack
def onResume(nextString, isPartialString, rebuildString):
# top of the resume stack
resumeState = self.pausedState
self.pausedState = None
self.onResume = None
# we slice and add these together to generate an entirely new string
# so we don't cause a memory leak with redundant strings that a retained a slice parents
if lastRead < self.source.length:
self.source = self.source.slice(lastRead) + nextString
else:
if rebuildString: # if we have read all of source, we want to slice and re-concatenate to eliminate the slice reference to the parent, we really don't want that retained in memory
self.source = nextString.slice(0, 1) + nextString.slice(1)
else:
self.source = nextString
self.isPartial = isPartialString
self.disposedChars += lastRead
self.offset = 0
return resumeState['reader'](resumeState) if 'reader' in resumeState else self.readSequence(resumeState['length'], resumeState)
self.onResume = onResume
return state['object']
def readSequence(self, length, thisProperty=None):
""" propertyStates:
0 - starting next property slot
1 - property created, succeeding value should be value of property
2 - property creation in progress, next value should define key
11+ - modifying property, next value modifies property (adds metadata, position, type, etc.)
"""
propertyState = 0
if thisProperty is None:
thisProperty = proplist()
property = None
isArray = None
object = None
value = UNDEFINED
i = 0
propertyIndex = 0
if hasattr(thisProperty, 'resume'): # resuming, thisProperty is the resume state.
raise NotImplementedError
"""
property = thisProperty.previous
if (property) {
// do the previous/upper stack frame first
var value = property.reader ? property.reader(property) : readSequence(property.length, property)
var values = property.values
if (values) {
// we have an assignment to referencing values that is waiting for pausing to complete
if (pausedState) {
// if still paused, pass on to next pausedState
pausedState.values = values
} else {
if (value.nextPosition > -1) {
// we use this path for fulfilling forward references, and when the position is reset
values[values.nextPosition++] = value
} else {
values.push(value)
}
}
}
}
if (thisProperty.code && thisProperty.code !== thisProperty.thisProperty.code) {
thisProperty.resume = false // if the resume property has been reassigned to an incompatible type, can't switch
}
else {
i = thisProperty.i || 0
object = thisProperty.object
propertyState = thisProperty.propertyState || 0
propertyIndex = thisProperty.propertyIndex || 0
thisProperty = thisProperty.thisProperty
}
"""
isArray = hasattr(thisProperty, 'code') and thisProperty.code == ARRAY_TYPE # ARRAY_TYPE
if object is None:
object = thisProperty.constructs() if hasattr(thisProperty, 'constructs') else (proplist() if isArray else propdict()) # TODO: we could probably construct a new reader that does this a little faster
while i < length:
type = None
number = None
lastRead = self.offset
token = ord(self.source[self.offset]); self.offset += 1
if token >= 0x30: # fast path for one byte with stop bit
if token > 0x3000: # long-token handling
type = (token >> 12) ^ 4
number = token & 0xfff
else:
type = (token >> 4) ^ 4
number = token & 0xf
else:
type = (token >> 4) & 11 # shift and omit the stop bit (bit 3)
number = token & 0xf
token = ord(self.source[self.offset]); self.offset += 1
number = (number << 6) + (token & 0x3f) # 10 bit number
if not (token >= 0x40):
token = ord(self.source[self.offset]); self.offset += 1
number = (number << 6) + (token & 0x3f) # 16 bit number
if not (token >= 0x40):
token = ord(self.source[self.offset]); self.offset += 1
number = (number << 6) + (token & 0x3f) # 22 bit number
if not (token >= 0x40):
token = ord(self.source[self.offset]); self.offset += 1
number = (number << 6) + (token & 0x3f) # 28 bit number
if not (token >= 0x40):
token = ord(self.source[self.offset]); self.offset += 1
number = (number * 0x40) + (token & 0x3f) # 34 bit number (we can't use 32-bit shifting operators anymore)
if not (token >= 0x40):
token = ord(self.source[self.offset]); self.offset += 1
number = (number * 0x40) + (token & 0x3f) # 40 bit number
if not (token >= 0x40):
token = ord(self.source[self.offset]); self.offset += 1
number = (number * 0x40) + (token & 0x3f) # 46 bit number, we don't go beyond this
if not (token >= 0):
if self.offset > self.source.length:
return self.pause({
'length': length,
'thisProperty': thisProperty,
'i': i,
'object': object,
'propertyIndex': propertyIndex,
'propertyState': propertyState,
}, lastRead)
if type == 0: # /*PROPERTY_CODE*/
propertyIndex = number
propertyState = 0
continue
if propertyIndex >= len(thisProperty):
if isinstance(thisProperty, proplist):
thisProperty.append(None)
else:
thisProperty[propertyIndex] = None
property = thisProperty[propertyIndex]
if type == 3: # /*TYPE_CODE*/
# we store the previous property state in token, so we can assign the next one
if number < 6:
# special values (constants, deferreds)
if number < 3:
if number == 0:
value = None
else:
value = f'Unknown token, type: {type} number: {number}'
else:
if number == TRUE:
value = True
elif number == FALSE:
value = False
else:
# is this reachable? maybe the reserved ones?
value = UNDEFINED
else:
if number <= NUMBER_TYPE:
# property definition
if propertyState == 1:
# we were in property value position, but no value, so we increment position here
propertyIndex += 1
i += 1
property = thisProperty[propertyIndex]
# what is this for? no mention of resetTo in this file - maybe comes from the block stuff...
if hasattr(thisProperty, 'resetTo') and propertyIndex < thisProperty.resetTo:
raise Exception('Overwriting frozen property')
if property is not None:
if not hasattr(property, 'resume'):
# should we fallback to undefined or none?
value = getattr(property, 'key', UNDEFINED) # inherit the existing key
if propertyIndex >= len(thisProperty):
thisProperty.append(None)
property = thisProperty[propertyIndex] = proplist()
property.key = value
else: # no property existed in slot, start with default key of null
if propertyIndex >= len(thisProperty):
thisProperty.append(None)
property = thisProperty[propertyIndex] = proplist()
property.key = None
property.code = number
property.parent = thisProperty
#property.key = None
propertyState = 2 # read next value as the key
if number == REFERENCING_TYPE: # REFERENCING_TYPE
property.values = proplist()
elif number == ARRAY_TYPE:
if len(property) == 0:
property.append(None)
property[0] = proplist()
property[0].key = None
property[0].code = DEFAULT_TYPE
property[0].parent = property
else:
# property modification
propertyState = number
continue
else:
if type == 2: # STRING_CODE
value = self.source[self.offset:self.offset + number]
self.offset += number
if self.offset > len(self.source):
return self.pause({
'length': length,
'thisProperty': thisProperty,
'i': i,
'object': object,
'propertyIndex': propertyIndex,
'propertyState': propertyState
}, lastRead)
if propertyState < 2:
if property.code == NUMBER_TYPE: # NUMBER_TYPE
#value = +value
value = float(value)
elif type == 1: # NUMBER_CODE
value = number
else: # if type == 7 SEQUENCE_CODE
if number > 13:
if number == END_SEQUENCE:
return object
elif number == DEFERRED_REFERENCE:
value = self.readSequence(0, property)
propertyState = 0
if 'forDeferred' in self.options:
value = self.options['forDeferred'](value, property)
else:
if self.deferredReads is None:
self.deferredReads = proplist()
self.deferredReads.append({
'property': property,
'value': value
})
else:
if number >= OPEN_SEQUENCE:
number = 2000000000
if propertyState > 1:
if propertyState == 2:
propertyState = 0 # if the property key was skipped, go directly into value mode
value = self.readSequence(number, property)
elif propertyState == METADATA_TYPE:
temp = propdict()
temp.key = None
temp.code = 6
value = self.readSequence(number, proplist([temp]))
elif property.resume and getattr(property.code, DEFAULT_TYPE) == property.thisProperty.code:
value = self.readSequence(number, property.thisProperty)
else:
value = self.readSequence(number, property)
else:
value = self.readSequence(number, property)
if self.pausedState:
# the check in the js is actually for undefined... not sure if that matters?
if value is None: # or a PAUSED object
self.pausedState = None # erase anything further up in the stack, as this means the sequence couldn't start, so we need to re-parse from here
self.onResume = None
return self.pause({
'length': length,
'thisProperty': thisProperty,
'i': i,
'object': object,
'property': property,
'propertyIndex': propertyIndex,
# this seems to be the only reference to previousProperty, so how does this work?
'previousProperty': previousProperty,
'propertyState': propertyState,
}, lastRead)
else:
# need to assign the values *after* the completion of the sequence if it is a forward reference
self.pausedState.values = property.values if isinstance(property.values, list) else None
#elif values:
# pass
#if not property:
# raise Exception('No property defined for slot' + (' in ' + thisProperty.key if thisProperty.key else ''))
if propertyState < 2 and property is not None and property.code == REFERENCING_TYPE: # REFERENCING_TYPE
values = property.values
if isinstance(value, (int, float)):
value = values[number]
# this was undefined
if value is None and number not in values:
raise Exception('Referencing value that has not been read yet')
elif (type == 2 or type == 7) and values is not None: # only strings and sequences
if not hasattr(values, 'nextPosition'):
values.nextPosition = 0
if values.nextPosition > -1:
# we use this path for fulfilling forward references
if hasattr(property, 'recordValueReference'):
property.recordValueReference(values)
if values.nextPosition >= len(values):
values.append(None)
values[values.nextPosition] = value
values.nextPosition += 1
else:
values.append(value)
if propertyState > 1:
# 0, 1: values are the value of the property
if propertyState == 2: # // property key
property.key = value
elif propertyState == METADATA_TYPE: # 3: metadata paramater
if isinstance(value, str):
if extendedType := self.classByName.get(value):
if hasattr(extendedType, 'fromValue'):
property.fromValue = extendedType.fromValue
else:
property.constructs = extendedType
elif self.options.get('errorOnUnknownClass'):
# is parameter supposed to be value here?
raise Exception(f'Attempt to deserialize to unknown class {parameter}')
else:
# if no warning, just parse as a plain value/object
pass
property.extendedType = extendedType # if this gets used for serialization, this is needed
else:
# extended metadata assignment (used by shared structures to mark objects as unstructured)
property.metadata = value
if value == ERROR_METADATA:
property.fromValue = self.onError
elif propertyState == REFERENCING_POSITION:
if not hasattr(property, 'values'):
property.values = proplist()
values = property.values
#values.splice(value, values.length)
values.nextPosition = value
elif propertyState == TYPE_DEFINITION: # for type-only state
# throw away this value and read next value as the real value
pass
else:
raise Exception(f'Unknown property type {propertyState}')
propertyState = 1 # property value after definition position
continue # read next value as the property value
else:
propertyState = 0
if hasattr(property, 'fromValue'):
value = property.fromValue(value)
if isArray and property.key is None:
object.append(value)
# FIXME: the js code used value is not undefined, but we're using is not None.
# i guess that means we can't write out nulls, so need a different sentinel value
elif value is not UNDEFINED:
#print(f'Setting {property.key} to {value}')
object[property.key] = value
i += 1
if not isArray:
propertyIndex += 1
return object
def unknownType(self, number):
raise Exception('Unknown type ' + number)
def onError(self, error):
raise NotImplementedError
"""
var g = typeof global != 'undefined' ? global : window
if (error && error.name && g[error.name])
error = new g[error.name](error.message)
else if (typeof error == 'string')
error = new Error(error)
if (options.onError)
options.onError(error)
else {
nonParsingError = true
throw error
}
"""
def read(self, property=None):
try:
if property and 'resume' in property:
previous = property['previous']
value = self.readSequence(previous['length'], previous)
# once we get the value, we don't know which point in the stack could still be resolving
value = property['object'] or value
property = property['property']
else:
temp = propdict()
temp.key = None
temp.code = 6
property = property or proplist([self.options.get('shared', temp)])
value = self.readSequence(1, property)[property[0].key]
while True:
if self.pausedState:
return self.pause({
'reader': self.read,
'object': value,
'property': property
})
if not self.deferredReads:
return value
index = getattr(self.deferredReads, 'index_', 0)
# FIXME: do we need to handle out-of-bounds here?
deferredRead = self.deferredReads[index]
self.deferredReads.index = index + 1
if not deferredRead:
self.deferredReads = self.deferredReads.parent
continue
target = deferredRead['value']
parentDeferredReads = self.deferredReads
self.deferredReads = proplist()
self.deferredReads.parent = parentDeferredReads
targetProperty = deferredRead['property']
result = self.readSequence(1, property := proplist([{
'resume': True,
'key': None,
'thisProperty': targetProperty,
'object': target
}]))
result = result.get(None, targetProperty['key']) # Blocks should use null as the root key, but could have the target property key as well
if result is not target: # This should only occur with a promise that resolves to a different type, an array
# object was replaced with something else (an array, presumably)
target.update(result)
if self.pausedState and self.pausedState['object'] is result:
self.pausedState['object'] = target
if result and type(result) != list:
# no idea what this was for...
raise NotImplementedError # do our best to make it array like
except Exception as error:
# FIXME:
#if (!nonParsingError)
# error.message = 'DPack parsing error: ' + error.message + ' at position: ' + (offset + disposedChars) + ' near: ' + source.slice(offset - 10, offset + 10)
#throw error
raise
class readMap:
@staticmethod
def fromValue(entries):
return {entry.key: entry.value for entry in entries}
class readSet:
@staticmethod
def fromValue(values):
return set(values)
class readDate:
@staticmethod
def fromValue(time):
return datetime.datetime.fromtimestamp(time)
def parse(stringOrBuffer, options={}):
# we process the input as a string...
if type(stringOrBuffer) == str:
source = stringOrBuffer
elif type(stringOrBuffer) == bytes:
source = stringOrBuffer.decode(options.get('encoding', 'utf-8'))
else: # whatever (undefined or null or whatever), just return it
return stringOrBuffer
parser = Parser.createParser(options).setSource(source)
if 'shared' in options:
return parser.read([options['shared']])
return parser.read()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment