Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env bash
set -euo pipefail
# Usage: ./unpack_snapshots.sh <shard_id> <output_dir> <chunk1.gz> <chunk2.gz> ...
# Example: ./unpack_snapshots.sh 1 /data/db shard-1-part1.tar.gz shard-1-part2.tar.gz
if [ "$#" -lt 3 ]; then
echo "Usage: $0 <shard_id> <output_dir> <chunk1.gz> <chunk2.gz> ..."
exit 1
fi
{"level":"info","time":1741623147008,"pid":4418,"hostname":"i-0a9ae0f900bf0651d.ec2.internal","class":"SnapchainBackfillService","fid":4,"storeType":2,"hubUsage":292,"snapchainUsage":291,"difference":1,"msg":"USAGE MISMATCH: fid: 4 store_type: LINKS, hub usage: 292, snapchain usage 291"}
{"level":"info","time":1741623147033,"pid":4418,"hostname":"i-0a9ae0f900bf0651d.ec2.internal","class":"SnapchainBackfillService","fid":5,"storeType":4,"hubUsage":5,"snapchainUsage":4,"difference":1,"msg":"USAGE MISMATCH: fid: 5 store_type: USER_DATA, hub usage: 5, snapchain usage 4"}
{"level":"info","time":1741623147034,"pid":4418,"hostname":"i-0a9ae0f900bf0651d.ec2.internal","class":"SnapchainBackfillService","fid":5,"storeType":6,"hubUsage":1,"snapchainUsage":0,"difference":1,"msg":"USAGE MISMATCH: fid: 5 store_type: USERNAME_PROOFS, hub usage: 1, snapchain usage 0"}
{"level":"info","time":1741623147034,"pid":4418,"hostname":"i-0a9ae0f900bf0651d.ec2.internal","class":"SnapchainBackfillService","fid":5,"storeType":5,"hubUsag
#![feature(iter_array_chunks)]
use std::collections::HashSet;
use std::fs::File;
use std::io::{self, BufRead};
fn score(c: char) -> u32 {
let base_for_offset = if c.is_ascii_lowercase() { 'a' } else { 'A' };
let offset = if c.is_ascii_lowercase() { 1 } else { 27 };
offset + c as u32 - base_for_offset as u32
}
public void doScan(AmazonDynamoDb client, String tableName, int numSegments) {
RateLimiter rateLimiter = RateLimiter.create(getScanRateLimit(client,
tableName, numSegments))
while (!done) {
ScanResult result = client.scan(/* feed scan request in */);
// do processing ...
rateLimiter.acquire(result.getConsumedCapacity().getCapacityUnits());
}
}
public int getScanRateLimit(AmazonDynamoDB client, String tableName,
int numSegments) {
TableDescription tableDesc = client.describeTable(tableName).getTable();
// Note: this will return 0 if the table has RCU autoscaling enabled
final long tableRcu = tableDesc.getProvisionedThroughput().getReadCapacityUnits();
final int numSegments = config.getNumSegments();
return desiredRcuUsage / numSegments;
}
public int getScanRateLimit(AmazonDynamoDB client, String tableName,
int numSegments) {
TableDescription tableDesc = client.describeTable(tableName).getTable();
// Note: this will return 0 if the table has RCU autoscaling enabled
final long tableRcu = tableDesc.getProvisionedThroughput().getReadCapacityUnits();
final int numSegments = config.getNumSegments();
return desiredRcuUsage / numSegments;
}
+---------------------+-------------+
| AcquisitionSource | TotalSale |
|---------------------+-------------|
| AffiliateReferral | 45779.0 |
| PaidSearch | 42668.0 |
| OrganicSearch | 41467.0 |
| Email | 37040.0 |
| SocialMedia | 36509.0 |
| Display | 34516.0 |
+---------------------+-------------+
SELECT C.AcquisitionSource, CEIL(SUM(O.UnitPrice)) as TotalSale
FROM customers AS C JOIN orders as O on O.CustomerID = Cast(C.CustomerID AS integer)
GROUP BY C.AcquisitionSource
ORDER BY TotalSale DESC
+------------------+-------------+
| Minute | TotalSale |
|------------------+-------------|
| 2019-01-08 11:52 | 40261.0 |
| 2019-01-08 11:53 | 66759.0 |
| 2019-01-08 11:54 | 72043.0 |
| 2019-01-08 11:55 | 56221.0 |
+------------------+-------------+
Time: 0.451s
WITH X AS (
SELECT InvoiceNo, FORMAT_TIMESTAMP('%H:%M', DATETIME(_event_time)) as Minute, SUM(UnitPrice) as OrderValue
FROM "orders"
GROUP BY InvoiceNo, _event_time
)
SELECT Minute, CEIL(SUM(OrderValue)) as TotalSale
FROM X
GROUP BY Minute
ORDER BY Minute;