Last active
December 19, 2023 13:16
-
-
Save amallia/1363e53d923a3caa0013355db8cf0e01 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
syntax = "proto3"; | |
// dense.proto | |
// This file defines the structure for a Dense Index in the Common Index File Format (CIFF). | |
// A Dense Index is used to store high-dimensional embeddings of documents, | |
// typically used in vector space models for information retrieval. | |
// An Embedding message represents a dense vector for a document. | |
// Each embedding is a high-dimensional vector, where each dimension is a float. | |
message Embedding { | |
repeated float values = 1; // The vector values. The length of this array should match the 'dimensions' in DenseHeader. | |
} | |
// The DenseHeader message provides metadata about the DenseIndex. | |
// It includes the total number of points (embeddings) and the dimensions of each embedding vector. | |
message DenseHeader { | |
int32 num_points = 1; // Total number of embeddings (points) in the DenseIndex. | |
int32 dimensions = 2; // Dimensions of each embedding vector. All vectors in the DenseIndex have the same dimension. | |
} | |
// The DenseIndex message represents the collection of embeddings. | |
// It starts with a DenseHeader followed by all the embeddings. | |
message DenseIndex { | |
repeated Embedding embeddings = 1; // The list of embeddings. The number of embeddings should match 'num_points' in the header. | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
syntax = "proto3"; | |
// Graph.proto | |
// This file defines the structure for a Graph in the context of search engines or similar applications. | |
// A Graph is used to represent relationships between various entities, like documents or terms, | |
// through nodes and neighbors. | |
// A Neighbor message represents a connection to another node, along with a score. | |
// The score quantifies the relationship or relevance between the two nodes. | |
message Neighbor { | |
int32 node_id = 1; // ID of the neighboring node. | |
float score = 2; // Score quantifying the relationship or relevance to the neighboring node. | |
} | |
// A Node in the graph represents an entity, such as a document, term, or concept. | |
// Each node has a unique identifier, an optional label, and a list of neighbors. | |
message Node { | |
int32 id = 1; // Unique identifier for the node. | |
repeated Neighbor neighbors = 2; // List of neighbors for this node, each with an associated score. | |
} | |
// The Graph message represents the entire graph structure. | |
// It consists of a collection of nodes. Each node includes its list of neighbors and scores. | |
message Graph { | |
repeated Node nodes = 1; // The list of nodes in the graph, each with its own list of neighbors. | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment