Skip to content

Instantly share code, notes, and snippets.

@Gozala
Created January 31, 2022 19:59
Show Gist options
  • Save Gozala/2176b953d04c0f58a460b7c0178e3f0f to your computer and use it in GitHub Desktop.
Save Gozala/2176b953d04c0f58a460b7c0178e3f0f to your computer and use it in GitHub Desktop.
Attempt to define more clearly unixfs encoding
import type { CID } from 'multiformats'
/**
* Logical representation of a raw chunk of a file.
*
* TODO: Clarify when represenation is used instead of `FileChunk`
* representation.
*/
export interface Raw extends PBNode {
Data: ByteView<{
Type: DataType.Raw,
// Raw bytes
Data: Bytes
}>
// Does not have any links, but empty list is still there
Links: never[]
}
/**
* Logical representation of a file chunk. When large file is added to IPFS
* it gets chunked into smaller pieces and each chunk is encoded in this
* representation and linked from the file DAG.
*/
export interface FileChunk extends PBNode {
Data: ByteView<{
Type: DataType.File,
// Raw bytes
Data: Bytes
// Number of bytes in Data field
filesize: uint64
}>
// Does not have any links, but empty list is still there
Links: never[]
}
/**
* Logical representation file a file shard. When large files are chunked
* larger file slices (spanning over several chunks) may be represented (
* depends on chosen DAG layout) via shard like this one.
*/
export interface FileShard extends PBNode {
Data: ByteView<{
Type: DataType.File,
/**
* The total number of bytes of a file slize represented by this shard.
*/
filesize: uint64
/**
* List of `filesize`s for each linked node (in exact same order).
*/
blocksizes: uint64[]
}>
/**
* Links to the file slices this shard is comprised of.
*/
Links: FileLink<
| Bytes // Can be raw block if --raw-leaves options was used
| Raw // I don't actually know when this happens
| FileChunk // File chunk
| FileShard // In balanced & trickle DAGs you may get multiple shard layers
>[]
}
/**
* Logical representation of a file that fits a single chunk. It is semantically
* different from `FileChunk` even though representation is the same, with a
* difference that it may have optional `mode`, `mtime` metadata.
*/
export interface SimpleFileLayout extends PBNode {
Data: ByteView<{
Type: DataType.File,
// Raw bytes
Data: Bytes
// Number of bytes in Data field
filesize: uint64,
mode?: Mode
mtime?: UnixTime
}>
Links: never[]
}
export interface AdvancedFileLayout extends PBNode {
Data: ByteView<{
Type: DataType.File,
// Total number of bytes in the file (not the graph structure).
filesize: uint64,
/**
* List of `filesize`s for each linked node (in exact same order).
*/
blocksizes: uint64[]
mode?: Mode
mtime?: UnixTime
}>
/**
* Links to the file slices (in same order as in `blocksizes`).
*/
Links: FileLink<
| Bytes // Can be raw block if --raw-leaves options was used
| Raw // I don't actually know when this happens
| FileChunk // File chunk
| FileShard // In balanced & trickle DAGs you may get multiple shard layers
>[]
}
/**
* In IPFS large files are chucked into several blocks for a more effective
* replication. Such files in UnixFS are represented via `AdvancedFileLayout`.
* And files that fit into a single block are represented via `SimpleFileLayout`.
*
* Please note: In some configurations files that fit a single block are not
* even encoded as UnixFS but rather as a raw blocks. However this type
* describes UinxFS endoing which is why that variant is not present.
*/
export type FileLayout =
| SimpleFileLayout
| AdvancedFileLayout
export interface FlatDirectoryLayout extends PBNode {
Data: ByteView<{
Type: DataType.Directory
filesize: 0
mode?: Mode
mtime?: UnixTime
}>
Links: DirectoryLink<FileLayout|DirectoryLayout>[]
}
export interface DirectoryShard extends PBNode {
Data: ByteView<{
Type: DataType.HAMTShard,
Data: ByteView<Bitfield>
fanout: uint64,
hashType: uint64,
}>
// Either links to other shards or actual directory entries
Links: ShardLink[]|DirectoryLink<FileLayout|DirectoryLayout>[]
}
export interface ShardLink extends PBLink<FileLayout|DirectoryLayout|DirectoryShard> {
Name: string
Tsize: number
}
export interface AdvancedDirectoryLayout {
Data: ByteView<{
Type: DataType.Directory,
Data: ByteView<Bitfield>
fanout: uint64,
hashType: uint64,
mode?: Mode
mtime?: UnixTime
}>
Links: ShardLink[]
}
export type DirectoryLayout =
| FlatDirectoryLayout
| AdvancedDirectoryLayout
export type FileNode =
| FileChunk
| FileLayout
export type DirectoryNode =
| FlatDirectoryLayout
| AdvancedDirectoryLayout
/**
* @TODO
*/
export type MetadataNode = never
/**
* @TODO
*/
export type SymlinkNode = never
export type UnixFS =
| Raw
| DirectoryNode
| FileNode
| MetadataNode
| SymlinkNode
| DirectoryShard
export enum DataType {
Raw = 0,
Directory = 1,
File = 2,
/**
* TODO: Have not came across this one would be nice to either mark
* or entype it's represenation deprecated
*/
Metadata = 3,
/**
* TODO: Have not came across this one either, I'm not sure how it supposed
* to be represented. If not used in practice maybe it sholud be marked
* deprecated.
*/
Symlink = 4,
HAMTShard = 5,
}
/**
* representing the modification time in seconds relative to the unix epoch
* 1970-01-01T00:00:00Z.
*/
export interface UnixTime {
/**
* (signed 64bit integer): represents the amount of seconds after or before
* the epoch.
*/
readonly Seconds: int64;
/**
* (optional, 32bit unsigned integer ): when specified represents the
* fractional part of the mtime as the amount of nanoseconds. The valid
* range for this value are the integers [1, 999999999].
*/
readonly FractionalNanoseconds?: fixed32
}
/**
* The mode is for persisting the file permissions in [numeric notation].
* If unspecified this defaults to
* - `0755` for directories/HAMT shards
* - `0644` for all other types where applicable
*
* The nine least significant bits represent `ugo-rwx`
* The next three least significant bits represent setuid, setgid and the sticky bit.
* The remaining 20 bits are reserved for future use, and are subject to change.
* Spec implementations MUST handle bits they do not expect as follows:
* - For future-proofing the (de)serialization layer must preserve the entire
* `uint32` value during clone/copy operations, modifying only bit values that
* have a well defined meaning:
* `clonedValue = ( modifiedBits & 07777 ) | ( originalValue & 0xFFFFF000 )`
* - Implementations of this spec MUST proactively mask off bits without a
* defined meaning in the implemented version of the spec:
* `interpretedValue = originalValue & 07777`
*
* [numeric notation]:https://en.wikipedia.org/wiki/File-system_permissions#Numeric_notation
*
* @see https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/sys_stat.h.html
*/
export type Mode = uint32;
export interface FileLink<Data> extends PBLink<Data> {
Hash: CID
Tsize: number
Name?: never
}
export interface DirectoryLink<Data> extends PBLink<Data> {
Hash: CID
Tsize: 0
Name: string
}
export interface PBLink<Data> extends Phantom<Data> {
Hash: CID
Tsize?: number
Name?: string
}
export interface PBNode {
Data: ByteView<unknown>
Links: PBLink<unknown>[]
}
/**
* Represents byte encoded representation of the `Data`.
*/
export interface ByteView<Data> extends Phantom<Data> {}
/**
* Type representing raw bytes, in JS it's usually Uint8Array. Use type
* alias so it's less JS sepcific.
*/
export type Bytes = Uint8Array
/**
* @see https://github.com/ipfs/go-bitfield
*/
export type Bitfield = Uint8Array
// TS does not really have these, create aliases so it's aligned closer
// to protobuf spec
export type int64 = number
export type fixed32 = number
export type uint64 = number
export type uint32 = number
/**
* This is an utility type that can retain unuesed type parameter in
* a derived type.
*/
export interface Phantom<T> {
readonly [PhantomKey]?: T
}
declare const PhantomKey: unique symbol
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment