Created
June 21, 2023 14:16
-
-
Save Kerollmops/46111a12b28bded8fa022ee731fbe331 to your computer and use it in GitHub Desktop.
A small program that computes the stats of an LMDB Meilisearch index.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//! A small program that computes the stats of an LMDB Meilisearch index. | |
//! | |
//! ```cargo | |
//! [dependencies] | |
//! anyhow = "1.0.71" | |
//! clap = { version = "4.3.5", features = ["derive"] } | |
//! heed = "0.20.0-alpha.1" | |
//! ``` | |
use std::path::PathBuf; | |
use clap::Parser; | |
use heed::{types::ByteSlice, EnvOpenOptions, PolyDatabase, RoTxn}; | |
pub const MAIN: &str = "main"; | |
pub const WORD_DOCIDS: &str = "word-docids"; | |
pub const EXACT_WORD_DOCIDS: &str = "exact-word-docids"; | |
pub const WORD_PREFIX_DOCIDS: &str = "word-prefix-docids"; | |
pub const EXACT_WORD_PREFIX_DOCIDS: &str = "exact-word-prefix-docids"; | |
pub const WORD_PAIR_PROXIMITY_DOCIDS: &str = "word-pair-proximity-docids"; | |
pub const WORD_PREFIX_PAIR_PROXIMITY_DOCIDS: &str = "word-prefix-pair-proximity-docids"; | |
pub const PREFIX_WORD_PAIR_PROXIMITY_DOCIDS: &str = "prefix-word-pair-proximity-docids"; | |
pub const WORD_POSITION_DOCIDS: &str = "word-position-docids"; | |
pub const WORD_FIELD_ID_DOCIDS: &str = "word-field-id-docids"; | |
pub const WORD_PREFIX_POSITION_DOCIDS: &str = "word-prefix-position-docids"; | |
pub const WORD_PREFIX_FIELD_ID_DOCIDS: &str = "word-prefix-field-id-docids"; | |
pub const FIELD_ID_WORD_COUNT_DOCIDS: &str = "field-id-word-count-docids"; | |
pub const FACET_ID_F64_DOCIDS: &str = "facet-id-f64-docids"; | |
pub const FACET_ID_EXISTS_DOCIDS: &str = "facet-id-exists-docids"; | |
pub const FACET_ID_IS_NULL_DOCIDS: &str = "facet-id-is-null-docids"; | |
pub const FACET_ID_IS_EMPTY_DOCIDS: &str = "facet-id-is-empty-docids"; | |
pub const FACET_ID_STRING_DOCIDS: &str = "facet-id-string-docids"; | |
pub const FIELD_ID_DOCID_FACET_F64S: &str = "field-id-docid-facet-f64s"; | |
pub const FIELD_ID_DOCID_FACET_STRINGS: &str = "field-id-docid-facet-strings"; | |
pub const VECTOR_ID_DOCID: &str = "vector-id-docids"; | |
pub const DOCUMENTS: &str = "documents"; | |
pub const SCRIPT_LANGUAGE_DOCIDS: &str = "script_language_docids"; | |
/// Simple program to greet a person | |
#[derive(Parser, Debug)] | |
#[command(author, version, about, long_about = None)] | |
struct Args { | |
/// The path to the LMDB database. | |
path: PathBuf, | |
} | |
fn main() -> anyhow::Result<()> { | |
let Args { path } = Args::parse(); | |
let env = EnvOpenOptions::new().max_dbs(24).open(path)?; | |
let mut wtxn = env.write_txn()?; | |
let main = env.create_poly_database(&mut wtxn, Some(MAIN))?; | |
let word_docids = env.create_poly_database(&mut wtxn, Some(WORD_DOCIDS))?; | |
let exact_word_docids = env.create_poly_database(&mut wtxn, Some(EXACT_WORD_DOCIDS))?; | |
let word_prefix_docids = env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_DOCIDS))?; | |
let exact_word_prefix_docids = | |
env.create_poly_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?; | |
let word_pair_proximity_docids = | |
env.create_poly_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?; | |
let script_language_docids = | |
env.create_poly_database(&mut wtxn, Some(SCRIPT_LANGUAGE_DOCIDS))?; | |
let word_prefix_pair_proximity_docids = | |
env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_PAIR_PROXIMITY_DOCIDS))?; | |
let prefix_word_pair_proximity_docids = | |
env.create_poly_database(&mut wtxn, Some(PREFIX_WORD_PAIR_PROXIMITY_DOCIDS))?; | |
let word_position_docids = env.create_poly_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?; | |
let word_fid_docids = env.create_poly_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?; | |
let field_id_word_count_docids = | |
env.create_poly_database(&mut wtxn, Some(FIELD_ID_WORD_COUNT_DOCIDS))?; | |
let word_prefix_position_docids = | |
env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_POSITION_DOCIDS))?; | |
let word_prefix_fid_docids = | |
env.create_poly_database(&mut wtxn, Some(WORD_PREFIX_FIELD_ID_DOCIDS))?; | |
let facet_id_f64_docids = env.create_poly_database(&mut wtxn, Some(FACET_ID_F64_DOCIDS))?; | |
let facet_id_string_docids = | |
env.create_poly_database(&mut wtxn, Some(FACET_ID_STRING_DOCIDS))?; | |
let facet_id_exists_docids = | |
env.create_poly_database(&mut wtxn, Some(FACET_ID_EXISTS_DOCIDS))?; | |
let facet_id_is_null_docids = | |
env.create_poly_database(&mut wtxn, Some(FACET_ID_IS_NULL_DOCIDS))?; | |
let facet_id_is_empty_docids = | |
env.create_poly_database(&mut wtxn, Some(FACET_ID_IS_EMPTY_DOCIDS))?; | |
let field_id_docid_facet_f64s = | |
env.create_poly_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_F64S))?; | |
let field_id_docid_facet_strings = | |
env.create_poly_database(&mut wtxn, Some(FIELD_ID_DOCID_FACET_STRINGS))?; | |
let vector_id_docid = env.create_poly_database(&mut wtxn, Some(VECTOR_ID_DOCID))?; | |
let documents = env.create_poly_database(&mut wtxn, Some(DOCUMENTS))?; | |
wtxn.commit()?; | |
let list = [ | |
(main, MAIN), | |
(word_docids, WORD_DOCIDS), | |
(exact_word_docids, EXACT_WORD_DOCIDS), | |
(word_prefix_docids, WORD_PREFIX_DOCIDS), | |
(exact_word_prefix_docids, EXACT_WORD_PREFIX_DOCIDS), | |
(word_pair_proximity_docids, WORD_PAIR_PROXIMITY_DOCIDS), | |
(script_language_docids, SCRIPT_LANGUAGE_DOCIDS), | |
( | |
word_prefix_pair_proximity_docids, | |
WORD_PREFIX_PAIR_PROXIMITY_DOCIDS, | |
), | |
( | |
prefix_word_pair_proximity_docids, | |
PREFIX_WORD_PAIR_PROXIMITY_DOCIDS, | |
), | |
(word_position_docids, WORD_POSITION_DOCIDS), | |
(word_fid_docids, WORD_FIELD_ID_DOCIDS), | |
(field_id_word_count_docids, FIELD_ID_WORD_COUNT_DOCIDS), | |
(word_prefix_position_docids, WORD_PREFIX_POSITION_DOCIDS), | |
(word_prefix_fid_docids, WORD_PREFIX_FIELD_ID_DOCIDS), | |
(facet_id_f64_docids, FACET_ID_F64_DOCIDS), | |
(facet_id_string_docids, FACET_ID_STRING_DOCIDS), | |
(facet_id_exists_docids, FACET_ID_EXISTS_DOCIDS), | |
(facet_id_is_null_docids, FACET_ID_IS_NULL_DOCIDS), | |
(facet_id_is_empty_docids, FACET_ID_IS_EMPTY_DOCIDS), | |
(field_id_docid_facet_f64s, FIELD_ID_DOCID_FACET_F64S), | |
(field_id_docid_facet_strings, FIELD_ID_DOCID_FACET_STRINGS), | |
(vector_id_docid, VECTOR_ID_DOCID), | |
(documents, DOCUMENTS), | |
]; | |
let rtxn = env.read_txn()?; | |
for (db, name) in list { | |
let stats = compute_stats(&rtxn, db)?; | |
println!("{name}"); | |
println!("{stats:?}"); | |
println!("---") | |
} | |
Ok(()) | |
} | |
#[derive(Debug)] | |
pub struct Stats { | |
pub number_of_entries: u64, | |
pub size_of_keys: u64, | |
pub size_of_data: u64, | |
pub size_of_entries: u64, | |
} | |
fn compute_stats(rtxn: &RoTxn, db: PolyDatabase) -> anyhow::Result<Stats> { | |
let mut number_of_entries = 0; | |
let mut size_of_keys = 0; | |
let mut size_of_data = 0; | |
for result in db.iter::<ByteSlice, ByteSlice>(rtxn)? { | |
let (key, data) = result?; | |
number_of_entries += 1; | |
size_of_keys += key.len() as u64; | |
size_of_data += data.len() as u64; | |
} | |
Ok(Stats { | |
number_of_entries, | |
size_of_keys, | |
size_of_data, | |
size_of_entries: size_of_keys + size_of_data, | |
}) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment