Skip to content

Instantly share code, notes, and snippets.

@mooreniemi
mooreniemi / Cargo.toml
Created May 7, 2024 14:18
This is a minimal example of reading a delta table from S3 with Rust. From [this discussion](https://delta-users.slack.com/archives/C013LCAEB98/p1714949120391979).
[package]
name = "read-table-example"
version = "0.1.0"
edition = "2021"
# note: I did not double-check this and your dependencies might slightly vary (I cut some of mine off)
[dependencies]
deltalake = { version = "*", features = ["s3"] }
tokio = { version = "1", features = ["full"] }
quick-xml = { version = "0.23.0-alpha3" }
@mooreniemi
mooreniemi / Cargo.toml
Last active May 6, 2024 04:46
streaming parquet read from s3
[package]
name = "stream-pq"
version = "0.1.0"
edition = "2021"
[dependencies]
deltalake = { version = "*", features = ["s3"] }
tokio = { version = "1", features = ["full"] }
quick-xml = { version = "0.23.0-alpha3" }
aws-config = { version = "*", features = ["rustls"] }
use arrow::array::ArrayRef;
use arrow::record_batch::RecordBatch;
use parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
use parquet::arrow::ArrowWriter;
use std::env;
use std::fs::File;
use std::path::Path;
use std::time::Instant;
fn main() -> Result<(), Box<dyn std::error::Error>> {
[package]
name = "poodah"
version = "0.1.0"
edition = "2021"
[dependencies]
arrow = "25.0.0"
parquet = "25.0.0"
rusoto_core = "0.48.0"
rusoto_mock = "0.48.0"
@mooreniemi
mooreniemi / Cargo.toml
Created April 29, 2024 03:10
local (non-prod) shard assignment via etcd
[package]
name = "cete_node"
version = "0.1.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
actix-web = "4"
aws-config = "*"
aws-sdk-codecommit = "*"
@mooreniemi
mooreniemi / increment_counter.rs
Created April 29, 2024 01:51
a basic cas op for etcd
/// Increments a counter stored in a JSON object within etcd under the specified key.
/// Assumes the JSON structure is {"count": int}. Will try 5 times before failing.
async fn increment_counter(
etcd: &AsyncMutex<Client>,
key: &str,
) -> Result<(), Box<dyn std::error::Error>> {
let key = key.to_string();
let mut client = etcd.lock().await;
// FIXME: actually we don't want to retry - we want to bail out and try to find another assignment
@mooreniemi
mooreniemi / ia_select.rs
Created January 29, 2024 03:54
example of ia-select alg
use std::collections::HashMap;
use std::time::Instant;
/**
The IA-Select algorithm translated into Rust from
[Diversifying Search Results](https://www.microsoft.com/en-us/research/wp-content/uploads/2009/02/diversifying-wsdm09.pdf)
which is a "(1 − 1/`e`)-approximation algorithm for `Diversify(k)`." `Diversify(k)` is an NP-hard function that maximizes
the probability that you find a subset of documents from multiple categories of documents that "satisfies the average user."
IA stands for "intent aware" but intents are then mapped to categories.
@mooreniemi
mooreniemi / ray_worker_progress.py
Created July 20, 2023 16:38
ray get worker progress and result
import ray
import time
import logging
from threading import Thread
log = logging.getLogger(__name__)
@ray.remote
class Worker:
extern crate blas_src;
use hnsw::{Hnsw, Params, Searcher};
use ndarray_npy::{ViewNpyExt, WriteNpyExt};
use memmap2::{Mmap, MmapMut};
use rand_pcg::Pcg64;
use space::Metric;
use std::{fs::OpenOptions, io::BufWriter, time::Instant};
extern crate blas_src;
use hnsw::{Hnsw, Params, Searcher};
use ndarray_npy::{ViewNpyExt, WriteNpyExt};
use memmap2::{Mmap, MmapMut};
use rand_pcg::Pcg64;
use space::Metric;
use std::{fs::OpenOptions, io::BufWriter, time::Instant};