Skip to content

Instantly share code, notes, and snippets.

View mocobeta's full-sized avatar

Tomoko Uchida mocobeta

View GitHub Profile
@mocobeta
mocobeta / vbyte.rs
Created December 13, 2020 04:08
vByte code in Rust
pub fn encode_vbyte(li: &[usize]) -> Vec<u8> {
fn encode(k: usize) -> Vec<u8> {
let mut vbytes = Vec::new();
let mut tmp = k;
while tmp >= 128 {
vbytes.push(128 + (tmp & 127) as u8);
tmp >>= 7;
}
vbytes.push(tmp as u8);
vbytes
@mocobeta
mocobeta / rice.rs
Created December 13, 2020 04:07
rice code in Rust
// https://crates.io/crates/bit-vec
use bit_vec::BitVec;
pub fn encode_rice(li: &[usize], m: u32) -> BitVec {
fn encode_quotient(k: usize, m: u32) -> BitVec {
let q: usize = (((k - 1) / m as usize) as f64).floor() as usize;
// encode (quotient + 1) in unary code
let mut bv = BitVec::from_elem(q + 1, false);
bv.set(q, true);
bv
@mocobeta
mocobeta / golomb.rs
Created December 13, 2020 04:06
golomb code in Rust
// https://crates.io/crates/bit-vec
use bit_vec::BitVec;
pub fn encode_golomb(li: &[usize], m: u32) -> BitVec {
fn encode_quotient(k: usize, m: u32) -> BitVec {
let q: usize = (((k - 1) / m as usize) as f64).floor() as usize;
// encode (quotient + 1) in unary code
let mut bv = BitVec::from_elem(q + 1, false);
bv.set(q, true);
bv
@mocobeta
mocobeta / gamma.rs
Created December 13, 2020 04:06
gamma code in Rust
// https://crates.io/crates/bit-vec
use bit_vec::BitVec;
pub fn encode_gamma(li: &[usize]) -> BitVec {
fn encode(k: usize) -> BitVec {
let body_len: usize = ((k as f64).log2().floor() as usize) + 1;
let body = BitVec::from_bytes(&k.to_be_bytes());
let mut bv = BitVec::from_elem(body_len * 2 - 1, false);
// set selector bit
@mocobeta
mocobeta / delta.rs
Created December 13, 2020 04:05
delta code in Rust
// https://crates.io/crates/bit-vec
use bit_vec::BitVec;
pub fn encode_delta(li: &[usize]) -> BitVec {
fn encode(k: usize) -> BitVec {
let body_len: usize = ((k as f64).log2().floor() as usize) + 1;
let body = BitVec::from_bytes(&k.to_be_bytes());
// set gamma encoded selector
let mut bv = encode_gamma(&[body_len]);
# base image
FROM docker.elastic.co/elasticsearch/elasticsearch:7.7.0
# PATH
ENV PATH /usr/share/elasticsearch/bin:$PATH
USER elasticsearch
# copy configuration file
COPY elasticsearch.yml /usr/share/elasticsearch/config/
# base image
FROM docker.elastic.co/elasticsearch/elasticsearch:7.7.0
# PATH
ENV PATH /usr/share/elasticsearch/bin:$PATH
# work directory
WORKDIR /usr/share/elasticsearch
USER elasticsearch
@mocobeta
mocobeta / IndexingExample.java
Last active April 28, 2020 02:35
Hello Lucene! (6.0.0)
package example;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.*;
import org.apache.lucene.index.*;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@mocobeta
mocobeta / VectorFieldTest.java
Created December 1, 2019 12:32
POC example for approximate knn vector search example (LUCENE-9004)
import java.io.IOException;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Arrays;
import java.util.Random;
@mocobeta
mocobeta / wikidata2pgsql.py
Created December 13, 2014 15:02
Python script importing Wikipedia xml dump data to PostgreSQL
# -*- coding: utf-8 -*-
import psycopg2
import xml.sax
from xml.sax.handler import ContentHandler
from dicttoxml import dicttoxml
INSERT_STMT = "INSERT INTO pages (id, page) VALUES('%s', '%s')"
COMMIT_WINDOW = 10000