I hereby claim:
- I am youssef-harby on github.
- I am yharby (https://keybase.io/yharby) on keybase.
- I have a public key ASBmkyd87Ju7hwN_Uej1lBn8r2SbYZsl12bXghbTDcxPNwo
To claim this, I am signing this object:
import pyarrow as pa | |
import pyarrow.parquet as pq | |
from pathlib import Path | |
import json | |
import pandas as pd | |
def process_parquet_file(parquet_path): | |
# Read the Parquet file into a PyArrow Table | |
table = pq.read_table(parquet_path) |
import httpx | |
import datetime | |
import hashlib | |
import hmac | |
# AWS credentials | |
access_key = "secret" | |
secret_key = "secret" | |
region = "eu-central-1" # e.g. 'us-west-1' | |
bucket = "bucket-name" |
# dataset ref : https://www.kaggle.com/datasets/max-mind/world-cities-database/code | |
# pip install duckdb | |
import duckdb | |
con = duckdb.connect() | |
data = """ | |
-- Environment setup | |
SET enable_progress_bar = true; |
import duckdb | |
con = duckdb.connect('./duckdb.duckdb') | |
data = """ | |
SET memory_limit = '32GB'; | |
SET threads TO 16; | |
SET enable_progress_bar = true; | |
SET enable_progress_bar_print = true; | |
INSTALL httpfs; | |
INSTALL spatial; |
import csv | |
import re | |
def extract_lat_long(csv_file, url_column): | |
pattern = r".+!3d(-?\d+\.\d+)!4d(-?\d+\.\d+).+" | |
data = [] | |
# Read the CSV file and extract latitude and longitude values | |
with open(csv_file, 'r') as f: |
I hereby claim:
To claim this, I am signing this object: