Skip to content

Instantly share code, notes, and snippets.

@dfeldman
Created June 5, 2024 15:48
Show Gist options
  • Save dfeldman/5a5630d28b8336f403123c071cfdac9e to your computer and use it in GitHub Desktop.
Save dfeldman/5a5630d28b8336f403123c071cfdac9e to your computer and use it in GitHub Desktop.
Database Schema for Microsoft's Copilot+Recall feature
****** SemanticTextStore.db :
CREATE TABLE si_db_info (
schema_version INTEGER
);
CREATE TABLE si_items (
id BLOB(16) PRIMARY KEY NOT NULL
);
CREATE TABLE si_diskann_graph (
id INTEGER PRIMARY KEY,
embedding BLOB NULL,
outbound_ids BLOB NOT NULL
);
CREATE TABLE si_diskann_info (
graph_table_name STRING NOT NULL PRIMARY KEY,
dimension INTEGER,
vector_space_id BLOB(16),
element_type INTEGER,
content_type INTEGER,
start_node_id INTEGER NULL ,
last_id_processed_in_consolidation INTEGER NULL,
final_id_to_process_in_consolidation INTEGER NULL);
CREATE TABLE si_embedding_metadata (
embedding_id INTEGER PRIMARY KEY,
item_id BLOB(16) NOT NULL,
region_id TEXT NOT NULL,
metadata_json TEXT NULL,
FOREIGN KEY(item_id) REFERENCES si_items(id) ON DELETE CASCADE
);
CREATE INDEX si_embedding_metadata__item_id ON si_embedding_metadata (item_id, region_id);
CREATE TABLE si_diskann_purging (
node_id INTEGER PRIMARY KEY,
in_progress INTEGER NOT NULL DEFAULT(0));
CREATE TABLE si_diskann_config (
graph_table_name STRING NOT NULL PRIMARY KEY,
max_degree INTEGER,
max_degree_slack_margin INTEGER,
min_query_candidate_queue_size INTEGER,
insert_candidate_queue_size INTEGER,
alpha INTEGER,
max_pruning_candidate_size INTEGER,
saturate_graph INTEGER,
minimal_pruning_on_reverse_linking INTEGER);
****** SemanticImageStore.db :
CREATE TABLE si_db_info (
schema_version INTEGER
);
CREATE TABLE si_items (
id BLOB(16) PRIMARY KEY NOT NULL
);
CREATE TABLE si_diskann_graph (
id INTEGER PRIMARY KEY,
embedding BLOB NULL,
outbound_ids BLOB NOT NULL
);
CREATE TABLE si_diskann_info (
graph_table_name STRING NOT NULL PRIMARY KEY,
dimension INTEGER,
vector_space_id BLOB(16),
element_type INTEGER,
content_type INTEGER,
start_node_id INTEGER NULL ,
last_id_processed_in_consolidation INTEGER NULL,
final_id_to_process_in_consolidation INTEGER NULL);
CREATE TABLE si_embedding_metadata (
embedding_id INTEGER PRIMARY KEY,
item_id BLOB(16) NOT NULL,
region_id TEXT NOT NULL,
metadata_json TEXT NULL,
FOREIGN KEY(item_id) REFERENCES si_items(id) ON DELETE CASCADE
);
CREATE INDEX si_embedding_metadata__item_id ON si_embedding_metadata (item_id, region_id);
CREATE TABLE si_diskann_purging (
node_id INTEGER PRIMARY KEY,
in_progress INTEGER NOT NULL DEFAULT(0));
CREATE TABLE si_diskann_config (
graph_table_name STRING NOT NULL PRIMARY KEY,
max_degree INTEGER,
max_degree_slack_margin INTEGER,
min_query_candidate_queue_size INTEGER,
insert_candidate_queue_size INTEGER,
alpha INTEGER,
max_pruning_candidate_size INTEGER,
saturate_graph INTEGER,
minimal_pruning_on_reverse_linking INTEGER);
****** UKG.DB
CREATE TABLE IF NOT EXISTS "_MigrationMetadata" (
"Id" INTEGER NOT NULL UNIQUE,
"Version" INTEGER NOT NULL,
PRIMARY KEY("Id")
);
CREATE TABLE IF NOT EXISTS "IdTable" (
"NextId" INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS "WindowCapture" (
"Id" INTEGER PRIMARY KEY,
"Name" TEXT NOT NULL,
"ImageToken" TEXT,
"IsForeground" INTEGER,
"WindowId" INTEGER,
"WindowBounds" TEXT,
"WindowTitle" TEXT,
"Properties" BLOB,
"TimeStamp" INTEGER NOT NULL
, "IsProcessed" INTEGER NOT NULL DEFAULT 1, "ActivationUri" TEXT, "ActivityId" TEXT, "FallbackUri" TEXT);
CREATE TABLE IF NOT EXISTS "App" (
"Id" INTEGER PRIMARY KEY,
"WindowsAppId" TEXT UNIQUE NOT NULL COLLATE NOCASE,
"IconUri" TEXT,
"Name" TEXT NOT NULL COLLATE NOCASE,
"Path" TEXT NOT NULL COLLATE NOCASE,
"Properties" TEXT
);
CREATE TABLE IF NOT EXISTS "WindowCaptureAppRelation" (
"WindowCaptureId" INTEGER NOT NULL,
"AppId" INTEGER NOT NULL,
PRIMARY KEY(WindowCaptureId, AppId),
FOREIGN KEY(WindowCaptureId) REFERENCES WindowCapture(Id) ON DELETE CASCADE,
FOREIGN KEY(AppId) REFERENCES App(Id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS "File" (
"Id" INTEGER PRIMARY KEY,
"Path" TEXT UNIQUE NOT NULL COLLATE NOCASE,
"Name" TEXT NOT NULL COLLATE NOCASE,
"Extension" TEXT COLLATE NOCASE,
"Kind" TEXT COLLATE NOCASE,
"Type" TEXT COLLATE NOCASE,
"Properties" TEXT
, "ObjectId" TEXT, "VolumeId" TEXT);
CREATE TABLE IF NOT EXISTS "WindowCaptureFileRelation" (
"WindowCaptureId" INTEGER NOT NULL,
"FileId" INTEGER NOT NULL,
PRIMARY KEY(WindowCaptureId, FileId),
FOREIGN KEY(WindowCaptureId) REFERENCES WindowCapture(Id) ON DELETE CASCADE,
FOREIGN KEY(FileId) REFERENCES File(Id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS "Web" (
"Id" INTEGER PRIMARY KEY,
"Domain" TEXT NOT NULL COLLATE NOCASE,
"Uri" TEXT UNIQUE NOT NULL,
"IconUri" TEXT,
"Properties" TEXT
);
CREATE TABLE IF NOT EXISTS "WindowCaptureWebRelation" (
"WindowCaptureId" INTEGER NOT NULL,
"WebId" INTEGER NOT NULL,
PRIMARY KEY(WindowCaptureId, WebId),
FOREIGN KEY(WindowCaptureId) REFERENCES WindowCapture(Id) ON DELETE CASCADE,
FOREIGN KEY(WebId) REFERENCES Web(Id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS "ScreenRegion" (
"Id" INTEGER PRIMARY KEY,
"WindowCaptureId" INTEGER NOT NULL,
"RegionKind" INTEGER NOT NULL,
"OcrText" TEXT,
"Bounds" TEXT,
FOREIGN KEY(WindowCaptureId) REFERENCES WindowCapture(Id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS "Topic" (
"Id" INTEGER PRIMARY KEY,
"Title" TEXT UNIQUE NOT NULL COLLATE NOCASE,
"Properties" TEXT
);
CREATE TABLE IF NOT EXISTS "WindowCaptureTopicRelation" (
"WindowCaptureId" INTEGER NOT NULL,
"TopicId" INTEGER NOT NULL,
"Score" FLOAT,
PRIMARY KEY(WindowCaptureId, TopicId),
FOREIGN KEY(WindowCaptureId) REFERENCES WindowCapture(Id) ON DELETE CASCADE,
FOREIGN KEY(TopicId) REFERENCES Topic(Id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS "AppDwellTime" (
"Id" INTEGER PRIMARY KEY,
"WindowsAppId" TEXT NOT NULL COLLATE NOCASE,
"HourOfDay" INTEGER NOT NULL,
"DayOfWeek" INTEGER NOT NULL,
"HourStartTimestamp" INTEGER NOT NULL,
"DwellTime" INTEGER NOT NULL,
FOREIGN KEY(WindowsAppId) REFERENCES App(WindowsAppId) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS "WebDomainDwellTime" (
"Id" INTEGER PRIMARY KEY,
"Domain" TEXT,
"HourOfDay" INTEGER NOT NULL,
"DayOfWeek" INTEGER NOT NULL,
"HourStartTimestamp" INTEGER NOT NULL,
"DwellTime" INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS 'WindowCaptureTextIndex_data'(id INTEGER PRIMARY KEY, block BLOB);
CREATE TABLE IF NOT EXISTS 'WindowCaptureTextIndex_idx'(segid, term, pgno, PRIMARY KEY(segid, term)) WITHOUT ROWID;
CREATE TABLE IF NOT EXISTS 'WindowCaptureTextIndex_content'(id INTEGER PRIMARY KEY, c0, c1, c2);
CREATE TABLE IF NOT EXISTS 'WindowCaptureTextIndex_docsize'(id INTEGER PRIMARY KEY, sz BLOB);
CREATE TABLE IF NOT EXISTS 'WindowCaptureTextIndex_config'(k PRIMARY KEY, v) WITHOUT ROWID;
CREATE INDEX idx_windowcapture_name_timestamp ON WindowCapture(Name,TimeStamp);
CREATE INDEX idx_windowcapture_timestamp ON WindowCapture(TimeStamp);
CREATE INDEX idx_app_windowsappid ON App(WindowsAppId);
CREATE INDEX idx_app_name ON App(name);
CREATE INDEX idx_app_path ON App(Path);
CREATE INDEX idx_windowcaptureapprelation_rel ON WindowCaptureAppRelation(AppId,WindowCaptureId);
CREATE INDEX idx_file_path ON File(Path);
CREATE INDEX idx_file_name ON File(Name);
CREATE INDEX idx_file_extension ON File(Extension);
CREATE INDEX idx_file_kind ON File(Kind);
CREATE INDEX idx_file_type ON File(Type);
CREATE INDEX idx_windowcapturefilerelation_rel ON WindowCaptureFileRelation(FileId,WindowCaptureId);
CREATE INDEX idx_web_domain ON Web(Domain);
CREATE INDEX idx_web_uri ON Web(Uri);
CREATE INDEX idx_windowcapturewebrelation_rel ON WindowCaptureWebRelation(WebId,WindowCaptureId);
CREATE INDEX idx_screenregion_windowcaptureid ON ScreenRegion(WindowCaptureId);
CREATE INDEX idx_screenregion_kind ON ScreenRegion(RegionKind);
CREATE INDEX idx_topic_title ON Topic(Title);
CREATE INDEX idx_windowcapturetopicrelation_rel ON WindowCaptureTopicRelation(TopicId,WindowCaptureId);
CREATE INDEX idx_windowcapture_isprocessed ON WindowCapture(IsProcessed);
CREATE INDEX idx_appdwelltime_windowsappid_hourstarttimestamp ON AppDwellTime(WindowsAppId,HourStartTimestamp);
CREATE INDEX idx_webdomaindwelltime_domain_hourstarttimestamp ON WebDomainDwellTime(Domain,HourStartTimestamp);
CREATE TRIGGER trigger_windowcapture_before_delete BEFORE DELETE ON "WindowCapture"
BEGIN
SELECT WindowCaptureDeleteImages(OLD.ImageToken) WHERE OLD.ImageToken IS NOT NULL;
DELETE FROM WindowCaptureTextIndex WHERE WindowCaptureId=OLD.Id;
END;
CREATE VIRTUAL TABLE "WindowCaptureTextIndex" USING fts5(
"WindowCaptureId" UNINDEXED,
"WindowTitle",
"OcrText",
tokenize = 'msft_icu'
);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment