amallia/dense-v1.proto

## dense-v1.proto
syntax = "proto3";

// dense.proto
// This file defines the structure for a Dense Index in the Common Index File Format (CIFF).
// A Dense Index is used to store high-dimensional embeddings of documents,
// typically used in vector space models for information retrieval.

// An Embedding message represents a dense vector for a document.
// Each embedding is a high-dimensional vector, where each dimension is a float.
message Embedding {
  repeated float values = 1;  // The vector values. The length of this array should match the 'dimensions' in DenseHeader.
}

// The DenseHeader message provides metadata about the DenseIndex.
// It includes the total number of points (embeddings) and the dimensions of each embedding vector.
message DenseHeader {
  int32 num_points = 1;   // Total number of embeddings (points) in the DenseIndex.
  int32 dimensions = 2;    // Dimensions of each embedding vector. All vectors in the DenseIndex have the same dimension.
}

// The DenseIndex message represents the collection of embeddings.
// It starts with a DenseHeader followed by all the embeddings.
message DenseIndex {
  repeated Embedding embeddings = 1;  // The list of embeddings. The number of embeddings should match 'num_points' in the header.
}

## graph.proto
syntax = "proto3";

// Graph.proto
// This file defines the structure for a Graph in the context of search engines or similar applications.
// A Graph is used to represent relationships between various entities, like documents or terms,
// through nodes and neighbors.

// A Neighbor message represents a connection to another node, along with a score.
// The score quantifies the relationship or relevance between the two nodes.
message Neighbor {
  int32 node_id = 1;      // ID of the neighboring node.
  float score = 2;        // Score quantifying the relationship or relevance to the neighboring node.
}

// A Node in the graph represents an entity, such as a document, term, or concept.
// Each node has a unique identifier, an optional label, and a list of neighbors.
message Node {
  int32 id = 1;                         // Unique identifier for the node.
  repeated Neighbor neighbors = 2;      // List of neighbors for this node, each with an associated score.
}

// The Graph message represents the entire graph structure.
// It consists of a collection of nodes. Each node includes its list of neighbors and scores.
message Graph {
  repeated Node nodes = 1;  // The list of nodes in the graph, each with its own list of neighbors.
}
	syntax = "proto3";

	// dense.proto
	// This file defines the structure for a Dense Index in the Common Index File Format (CIFF).
	// A Dense Index is used to store high-dimensional embeddings of documents,
	// typically used in vector space models for information retrieval.

	// An Embedding message represents a dense vector for a document.
	// Each embedding is a high-dimensional vector, where each dimension is a float.
	message Embedding {
	repeated float values = 1; // The vector values. The length of this array should match the 'dimensions' in DenseHeader.
	}

	// The DenseHeader message provides metadata about the DenseIndex.
	// It includes the total number of points (embeddings) and the dimensions of each embedding vector.
	message DenseHeader {
	int32 num_points = 1; // Total number of embeddings (points) in the DenseIndex.
	int32 dimensions = 2; // Dimensions of each embedding vector. All vectors in the DenseIndex have the same dimension.
	}

	// The DenseIndex message represents the collection of embeddings.
	// It starts with a DenseHeader followed by all the embeddings.
	message DenseIndex {
	repeated Embedding embeddings = 1; // The list of embeddings. The number of embeddings should match 'num_points' in the header.
	}
	syntax = "proto3";

	// Graph.proto
	// This file defines the structure for a Graph in the context of search engines or similar applications.
	// A Graph is used to represent relationships between various entities, like documents or terms,
	// through nodes and neighbors.

	// A Neighbor message represents a connection to another node, along with a score.
	// The score quantifies the relationship or relevance between the two nodes.
	message Neighbor {
	int32 node_id = 1; // ID of the neighboring node.
	float score = 2; // Score quantifying the relationship or relevance to the neighboring node.
	}

	// A Node in the graph represents an entity, such as a document, term, or concept.
	// Each node has a unique identifier, an optional label, and a list of neighbors.
	message Node {
	int32 id = 1; // Unique identifier for the node.
	repeated Neighbor neighbors = 2; // List of neighbors for this node, each with an associated score.
	}

	// The Graph message represents the entire graph structure.
	// It consists of a collection of nodes. Each node includes its list of neighbors and scores.
	message Graph {
	repeated Node nodes = 1; // The list of nodes in the graph, each with its own list of neighbors.
	}