Skip to content

Instantly share code, notes, and snippets.

View Ugbot's full-sized avatar
🏠
Working from home

Ben Gamble Ugbot

🏠
Working from home
View GitHub Profile
-- Step 1: Set up environment to access the Paimon catalog
SET 'execution.runtime-mode' = 'batch';
SET 'table.dynamic-table-options.enabled' = 'true';
-- Step 2: Define the source table to read LAION Parquet files
CREATE TEMPORARY TABLE source_laion (
id STRING,
url STRING,
text STRING,
height INT,
@Ugbot
Ugbot / uv_read_file.c
Created May 23, 2024 17:10 — forked from inlife/uv_read_file.c
Sample for reading a file asynchronously using libuv
// Sample for reading a file asynchronously using libuv
// taken from https://www.snip2code.com/Snippet/247423/Sample-for-reading-a-file-asynchronously
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <uv.h>
static uv_fs_t openReq;
static uv_fs_t readReq;
@Ugbot
Ugbot / loader.py
Last active May 13, 2024 10:31
uploading from S3 into clickhouse
import json
import boto3
import csv
import io
from botocore.vendored import requests
s3Client = boto3.client('s3')
demo_query = """SELECT *
FROM s3('https://datasets-documentation.s3.eu-west-3.amazonaws.com/nyc-taxi/trips_*.gz', 'TabSeparatedWithNames')
import ast
import boto3
from botocore.exceptions import ClientError
def stream_s3_file(bucket: str, key: str, file_size: int, chunk_bytes=5000) -> tuple[dict]:
"""Streams a S3 file via a generator.
Args:
bucket (str): S3 bucket
key (str): S3 object path
@Ugbot
Ugbot / basic.js
Created December 17, 2023 22:57
Transactions in Kafka JS
const { Kafka } = require('kafkajs');
const axios = require('axios');
const fs = require('fs');
const kafka = new Kafka({
clientId: 'my-app',
brokers: ['BROKERS'], // Broker list
ssl: {
rejectUnauthorized: true,
@Ugbot
Ugbot / direct_load.py
Last active November 6, 2023 21:39
s3 to clickhouse
import os
from clickhouse_driver import Client
# Initialize ClickHouse client
clickhouse_host = os.environ['CLICKHOUSE_HOST']
clickhouse_port = os.environ['CLICKHOUSE_PORT']
clickhouse_user = os.environ['CLICKHOUSE_USER']
clickhouse_password = os.environ['CLICKHOUSE_PASSWORD']
clickhouse_database = os.environ['CLICKHOUSE_DATABASE']
clickhouse_table = os.environ['CLICKHOUSE_TABLE']
@Ugbot
Ugbot / wfc.hpp
Created August 14, 2023 14:50 — forked from jdah/wfc.hpp
Wave Function Collapse
#pragma once
#include "util/types.hpp"
#include "util/std.hpp"
#include "util/ndarray.hpp"
#include "util/collections.hpp"
#include "util/rand.hpp"
#include "util/hash.hpp"
#include "util/assert.hpp"
#include "util/bitset.hpp"
@Ugbot
Ugbot / ordering.sql
Last active August 3, 2023 20:33
Flink SQL for sorting events in time windows and dead lettering the late stuff
CREATE TABLE kafka_input (
id INT,
event_timestamp STRING,
message STRING,
proctime AS PROCTIME(),
eventtime AS TO_TIMESTAMP(event_timestamp),
WATERMARK FOR eventtime AS eventtime - INTERVAL '1' SECOND
) WITH (
'connector' = 'kafka',