alexcrichton/a.rs

## a.rs
pub struct Parser {
    // internally has a state machine which looks like:
    //
    // * parsing a section - in this state we're looking for the section header
    //   which indicates the section code and how big the section is. Depending
    //   on the section code this will indicate what form of chunk is returned.
    //   This also waits for some sections to be entirely resident before
    //   proceeding.
    //
    // * parsing functions - this has a count of how many functions are
    //   remaining and expects the next item to be a function chunk.
    //
    // * parsing modules - similar for functions, indicates how many nested
    //   modules remain.
    //
    // additionally there's an field which configures the maximum amount of data
    // which can be parsed. This is used to limit the module/code sections, for
    // example. Additionally it's used when nested `Parser` structures are
    // returned for nested modules to ensure they consume a precise amount of
    // data.
    //
    // While this is a state machine which is an avenue for complication, it's
    // hoped that this is a relatively simple state machine since the main
    // complexity is dealing with the streamed sections, but even then that's
    // relatively simple.
    _x: (),
}

impl Send for Parser {}
impl Sync for Parser {}

impl Parser {
    // Creates a new module parser.
    //
    // Reports errors relative to `offset` provided, where `offset` is some
    // logical offset within the input stream that we're parsing..
    pub fn new(offset: usize) -> Parser {
        // ...
    }

    // Attempts to parse a chunk of data.
    //
    // If a parse error happens, then `Err` is returned. Otherwise the number of
    // bytes consumed and the parsed chunk is returned. See more docs on `Chunk`
    // for what can be successfully parsed.
    //
    // If `eof` is `true` then it indicates that `data` is all the remaining
    // data and no more data will be available for parsing. If `eof` is false
    // then it means that more data may be coming in the future.
    //
    // It's expected that you parse until `Payload::End` is reached. For the
    // top-level module that won't get returned until `data` is empty and `eof`
    // is `true`. For sub-modules, however, `data` may have bytes in it or `eof`
    // may be false when `End` is returned.
    //
    // TODO: maybe `&[IoVec]` as input? probably too fancy
    pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
        // ...
    }
}

enum Chunk<'a> {
    // This can be returned at any time and indicates that more data is needed
    // to proceed with parsing. Zero bytes were consumed from the input to
    // `parse` and `usize` more bytes are needed before making progress.
    NeedMoreData(usize),

    // A chunk was successfully parsed.
    Parsed {
        // This many bytes of the `data` input to `parse` were consumed to
        // produce `payload`.
        consumed: usize,
        payload: Payload<'a>,
    },
}

enum Payload<'a> {
    // Sections which are received in their entirety and then available for
    // parsing.
    //
    // These sections are required to be entirely resident in memory
    // before they're parsed. The payload of each of these variants is a slice
    // into the original `data` passed to `parse` which outlines the entire
    // section.
    //
    // Note that the presence of this chunk does not imply that this chunk is
    // valid or will parse correctly. You'll need to at least iterate over this
    // chunk or validate it to figure that out.
    TypeSection(TypeSectionReader<'a>),
    ImportSection(ImportSectionReader<'a>),
    AliasSection(AliasSectionReader<'a>),
    InstanceSection(InstanceSectionReader<'a>),
    ModuleSection(ModuleSectionReader<'a>),
    FunctionSection(FunctionSectionReader<'a>),
    TableSection(TableSectionReader<'a>),
    MemorySection(MemorySectionReader<'a>),
    GlobalSection(GlobalSectionReader<'a>),
    ExportSection(ExportSectionReader<'a>),
    StartSection(u32),
    ElementSection(ElementSectionReader<'a>),
    DataCount(u32),

    // The code section is a little more interesting since it's intended to be
    // streamed.
    //
    // The purpose of this is so that `parse` doesn't require the entirety of
    // all functions to be resident in memory before we return a chunk. This
    // way we can parse functions as they come off the wire or modules as
    // they're downloaded. Note, however, that functions are not internally
    // incrementally parsed, they're required to be fully resident in memory
    // before a chunk is returned.
    //
    // Note that `CodeStart(u32)` means that the section has started and `u32`
    // `CodeSectionEntry` will be returned. You do not need to validate that
    // there are `u32` items present, that will be validated internally. You're
    // guaranteed the next `u32` successful chunks will be returned as
    // `CodeSectionEntry`. If that doesn't happen then an error is otherwise
    // returned.
    CodeSectionStart(u32),
    CodeSectionEntry(FunctionBody<'a>),

    // This is similar to the code section where we want to allow streaming
    // processing and don't want to require modules are fully resident in
    // memory.
    //
    // Similar to the code section the first entry means that `u32` more
    // `ModuleCodeEntry` entries will be returned. Unlike the code section this
    // is a bit different. What happens here is that once you receive a
    // `Parser`, then future data should *not* be fed into this parser. Instead
    // data should be fed into the `Parser` given until it reports `End`. Once
    // `End` is successfully seen then you should switch back to the original
    // `Parser`.
    ModuleCodeSectionStart(u32),
    ModuleCodeEntry(Parser),

    // Like the code section, but allows streaming each individual data section
    // entry, in case they're large. Again you're guaranteed that after seeing
    // `DataSectionStart` you'll see that many `DataSectionEntry` payloads next.
    // Or an error happens.
    DataSectionStart(u32),
    DataSectionEntry(&'a [u8]),

    // The end was successfully reached, and the module is entirely parsed.
    End,
}

pub struct Validator {
    // ...
}

impl Send for Validator() {}
impl Sync for Validator() {}

impl Validator {
    pub fn new() -> Validator {
        // ...
    }

    // Validates the "header" of a wasm module.
    //
    // This is intended to be used where you feed in parsed payloads up to the
    // point where one of the streaming sections above start. Once those are
    // reached you switch to the `validate_body` entry below.
    //
    // The purpose of this function is to ingest all of the "wait for it to be
    // resident in memory" sections. This includes sections like the function
    // section, table section, memory section, globals, etc. This incrementally
    // builds the state of the module so we know what its internals will be.
    //
    // Note that mutable access is required here which means that this can't
    // happen in parallel. Later, however, you'll call `validate_body` which
    // only requires `&self` which means it can be called in parallel for each
    // module/function.
    pub fn validate_header(&mut self, payload: Payload<'_>) -> Result<()> {
        // ...
    }

    pub fn validate_body(&self, payload: Payload<'_>) -> Result<()> {
        // ...
    }

    // This would also include various accessors to figure out the type of each
    // function, element, global, etc. Internally everything about aliasing with
    // modules would be handled and you'd only have to deal with concrete types
    // and such.
}
	pub struct Parser {
	// internally has a state machine which looks like:
	//
	// * parsing a section - in this state we're looking for the section header
	// which indicates the section code and how big the section is. Depending
	// on the section code this will indicate what form of chunk is returned.
	// This also waits for some sections to be entirely resident before
	// proceeding.
	//
	// * parsing functions - this has a count of how many functions are
	// remaining and expects the next item to be a function chunk.
	//
	// * parsing modules - similar for functions, indicates how many nested
	// modules remain.
	//
	// additionally there's an field which configures the maximum amount of data
	// which can be parsed. This is used to limit the module/code sections, for
	// example. Additionally it's used when nested `Parser` structures are
	// returned for nested modules to ensure they consume a precise amount of
	// data.
	//
	// While this is a state machine which is an avenue for complication, it's
	// hoped that this is a relatively simple state machine since the main
	// complexity is dealing with the streamed sections, but even then that's
	// relatively simple.
	_x: (),
	}

	impl Send for Parser {}
	impl Sync for Parser {}

	impl Parser {
	// Creates a new module parser.
	//
	// Reports errors relative to `offset` provided, where `offset` is some
	// logical offset within the input stream that we're parsing..
	pub fn new(offset: usize) -> Parser {
	// ...
	}

	// Attempts to parse a chunk of data.
	//
	// If a parse error happens, then `Err` is returned. Otherwise the number of
	// bytes consumed and the parsed chunk is returned. See more docs on `Chunk`
	// for what can be successfully parsed.
	//
	// If `eof` is `true` then it indicates that `data` is all the remaining
	// data and no more data will be available for parsing. If `eof` is false
	// then it means that more data may be coming in the future.
	//
	// It's expected that you parse until `Payload::End` is reached. For the
	// top-level module that won't get returned until `data` is empty and `eof`
	// is `true`. For sub-modules, however, `data` may have bytes in it or `eof`
	// may be false when `End` is returned.
	//
	// TODO: maybe `&[IoVec]` as input? probably too fancy
	pub fn parse<'a>(&mut self, data: &'a [u8], eof: bool) -> Result<Chunk<'a>> {
	// ...
	}
	}

	enum Chunk<'a> {
	// This can be returned at any time and indicates that more data is needed
	// to proceed with parsing. Zero bytes were consumed from the input to
	// `parse` and `usize` more bytes are needed before making progress.
	NeedMoreData(usize),

	// A chunk was successfully parsed.
	Parsed {
	// This many bytes of the `data` input to `parse` were consumed to
	// produce `payload`.
	consumed: usize,
	payload: Payload<'a>,
	},
	}

	enum Payload<'a> {
	// Sections which are received in their entirety and then available for
	// parsing.
	//
	// These sections are required to be entirely resident in memory
	// before they're parsed. The payload of each of these variants is a slice
	// into the original `data` passed to `parse` which outlines the entire
	// section.
	//
	// Note that the presence of this chunk does not imply that this chunk is
	// valid or will parse correctly. You'll need to at least iterate over this
	// chunk or validate it to figure that out.
	TypeSection(TypeSectionReader<'a>),
	ImportSection(ImportSectionReader<'a>),
	AliasSection(AliasSectionReader<'a>),
	InstanceSection(InstanceSectionReader<'a>),
	ModuleSection(ModuleSectionReader<'a>),
	FunctionSection(FunctionSectionReader<'a>),
	TableSection(TableSectionReader<'a>),
	MemorySection(MemorySectionReader<'a>),
	GlobalSection(GlobalSectionReader<'a>),
	ExportSection(ExportSectionReader<'a>),
	StartSection(u32),
	ElementSection(ElementSectionReader<'a>),
	DataCount(u32),

	// The code section is a little more interesting since it's intended to be
	// streamed.
	//
	// The purpose of this is so that `parse` doesn't require the entirety of
	// all functions to be resident in memory before we return a chunk. This
	// way we can parse functions as they come off the wire or modules as
	// they're downloaded. Note, however, that functions are not internally
	// incrementally parsed, they're required to be fully resident in memory
	// before a chunk is returned.
	//
	// Note that `CodeStart(u32)` means that the section has started and `u32`
	// `CodeSectionEntry` will be returned. You do not need to validate that
	// there are `u32` items present, that will be validated internally. You're
	// guaranteed the next `u32` successful chunks will be returned as
	// `CodeSectionEntry`. If that doesn't happen then an error is otherwise
	// returned.
	CodeSectionStart(u32),
	CodeSectionEntry(FunctionBody<'a>),

	// This is similar to the code section where we want to allow streaming
	// processing and don't want to require modules are fully resident in
	// memory.
	//
	// Similar to the code section the first entry means that `u32` more
	// `ModuleCodeEntry` entries will be returned. Unlike the code section this
	// is a bit different. What happens here is that once you receive a
	// `Parser`, then future data should not be fed into this parser. Instead
	// data should be fed into the `Parser` given until it reports `End`. Once
	// `End` is successfully seen then you should switch back to the original
	// `Parser`.
	ModuleCodeSectionStart(u32),
	ModuleCodeEntry(Parser),

	// Like the code section, but allows streaming each individual data section
	// entry, in case they're large. Again you're guaranteed that after seeing
	// `DataSectionStart` you'll see that many `DataSectionEntry` payloads next.
	// Or an error happens.
	DataSectionStart(u32),
	DataSectionEntry(&'a [u8]),

	// The end was successfully reached, and the module is entirely parsed.
	End,
	}

	pub struct Validator {
	// ...
	}

	impl Send for Validator() {}
	impl Sync for Validator() {}

	impl Validator {
	pub fn new() -> Validator {
	// ...
	}

	// Validates the "header" of a wasm module.
	//
	// This is intended to be used where you feed in parsed payloads up to the
	// point where one of the streaming sections above start. Once those are
	// reached you switch to the `validate_body` entry below.
	//
	// The purpose of this function is to ingest all of the "wait for it to be
	// resident in memory" sections. This includes sections like the function
	// section, table section, memory section, globals, etc. This incrementally
	// builds the state of the module so we know what its internals will be.
	//
	// Note that mutable access is required here which means that this can't
	// happen in parallel. Later, however, you'll call `validate_body` which
	// only requires `&self` which means it can be called in parallel for each
	// module/function.
	pub fn validate_header(&mut self, payload: Payload<'_>) -> Result<()> {
	// ...
	}

	pub fn validate_body(&self, payload: Payload<'_>) -> Result<()> {
	// ...
	}

	// This would also include various accessors to figure out the type of each
	// function, element, global, etc. Internally everything about aliasing with
	// modules would be handled and you'd only have to deal with concrete types
	// and such.
	}