Skip to content

Instantly share code, notes, and snippets.

View mocobeta's full-sized avatar

Tomoko Uchida mocobeta

View GitHub Profile
@mocobeta
mocobeta / checks.sh
Last active December 18, 2021 04:28
sanity checks on jms2
# check
lucene $ ./gradlew check
BUILD SUCCESSFUL in 3m 59s
# packaging
lucene $ ./gradlew clean
lucene $ ./gradlew assembleRelease
BUILD SUCCESSFUL in 49s
# luke
@State(Scope.Benchmark)
public class SearchBenchmark {
private static final String dirPath = System.getProperty("index.dir");
private static final String[] terms1 = new String[]{"電車", "列車", "鉄道"};
private Directory dir;
private IndexReader reader;
private Query query1;
@mocobeta
mocobeta / main.rs
Last active December 13, 2020 06:52
compression algorithms performance comparison
use rand::prelude::*;
fn main() {
let mut rng = thread_rng();
let p: f32 = 0.00001;
let max_doc: usize = 1_000_000;
let mut postings: Vec<usize> = vec![rng.gen_range(1, 1000) as usize];
loop {
let next = postings.last().unwrap() + geo_random(p);
if next > max_doc {
@mocobeta
mocobeta / vbyte.rs
Created December 13, 2020 04:08
vByte code in Rust
pub fn encode_vbyte(li: &[usize]) -> Vec<u8> {
fn encode(k: usize) -> Vec<u8> {
let mut vbytes = Vec::new();
let mut tmp = k;
while tmp >= 128 {
vbytes.push(128 + (tmp & 127) as u8);
tmp >>= 7;
}
vbytes.push(tmp as u8);
vbytes
@mocobeta
mocobeta / rice.rs
Created December 13, 2020 04:07
rice code in Rust
// https://crates.io/crates/bit-vec
use bit_vec::BitVec;
pub fn encode_rice(li: &[usize], m: u32) -> BitVec {
fn encode_quotient(k: usize, m: u32) -> BitVec {
let q: usize = (((k - 1) / m as usize) as f64).floor() as usize;
// encode (quotient + 1) in unary code
let mut bv = BitVec::from_elem(q + 1, false);
bv.set(q, true);
bv
@mocobeta
mocobeta / golomb.rs
Created December 13, 2020 04:06
golomb code in Rust
// https://crates.io/crates/bit-vec
use bit_vec::BitVec;
pub fn encode_golomb(li: &[usize], m: u32) -> BitVec {
fn encode_quotient(k: usize, m: u32) -> BitVec {
let q: usize = (((k - 1) / m as usize) as f64).floor() as usize;
// encode (quotient + 1) in unary code
let mut bv = BitVec::from_elem(q + 1, false);
bv.set(q, true);
bv
@mocobeta
mocobeta / gamma.rs
Created December 13, 2020 04:06
gamma code in Rust
// https://crates.io/crates/bit-vec
use bit_vec::BitVec;
pub fn encode_gamma(li: &[usize]) -> BitVec {
fn encode(k: usize) -> BitVec {
let body_len: usize = ((k as f64).log2().floor() as usize) + 1;
let body = BitVec::from_bytes(&k.to_be_bytes());
let mut bv = BitVec::from_elem(body_len * 2 - 1, false);
// set selector bit
@mocobeta
mocobeta / delta.rs
Created December 13, 2020 04:05
delta code in Rust
// https://crates.io/crates/bit-vec
use bit_vec::BitVec;
pub fn encode_delta(li: &[usize]) -> BitVec {
fn encode(k: usize) -> BitVec {
let body_len: usize = ((k as f64).log2().floor() as usize) + 1;
let body = BitVec::from_bytes(&k.to_be_bytes());
// set gamma encoded selector
let mut bv = encode_gamma(&[body_len]);
@mocobeta
mocobeta / Dockerfile-0.1.0
Last active May 25, 2022 16:39
Configuring Elasticsearch 7 cluster on GKE
FROM docker.elastic.co/elasticsearch/elasticsearch:7.7.0
ENV PATH /usr/share/elasticsearch/bin:$PATH
# switch user to elasticsearch
USER elasticsearch
# install plugins
RUN elasticsearch-plugin install analysis-kuromoji
# base image
FROM docker.elastic.co/elasticsearch/elasticsearch:7.7.0
# PATH
ENV PATH /usr/share/elasticsearch/bin:$PATH
USER elasticsearch
# copy configuration file
COPY elasticsearch.yml /usr/share/elasticsearch/config/