Skip to content

Instantly share code, notes, and snippets.

View alex-kolmakov's full-sized avatar

Alexander Kolmakov alex-kolmakov

  • Dubai, UAE
View GitHub Profile
import dlt
import duckdb
from dlt.sources.helpers.rest_client import RESTClient
from dlt.sources.helpers.rest_client.paginators import PageNumberPaginator
import pandas as pd
from typing import Any, Dict, List, Optional, Tuple
@dlt.source
def padi():
@alex-kolmakov
alex-kolmakov / load_padi_divesites.py
Last active May 3, 2025 14:19
Async loading divesites from PADI
import os
import asyncio
import aiohttp
import pandas as pd
import nest_asyncio
BASE_PADI_GUIDE_URL = "url_for_loading_divesite_data"
BASE_PADI_MAP_URL = "url_for_loading_divesite_location"
nest_asyncio.apply()
@alex-kolmakov
alex-kolmakov / ducking_obis.py
Last active May 2, 2025 14:58
Using duckdb to open obis parquet safely for memory
import duckdb
import zipfile
import os
import tempfile
input_zip_filename = 'absolute_filepath_where_your_zipped_file_is'
with tempfile.TemporaryDirectory() as temp_dir:
print(f"Extracting {input_zip_filename} to {temp_dir}...")
import requests
from tqdm import tqdm
def download(
url: str,
filename: str,
auth: tuple = None,
chunk_size: int = 1024,
update_threshold = 1024*1024*128
@alex-kolmakov
alex-kolmakov / async_mage_loader.py
Last active May 4, 2024 11:03
Mage async loader
import os
import nest_asyncio
import asyncio
import aiohttp
import pandas as pd
if 'custom' not in globals():
from mage_ai.data_preparation.decorators import custom
nest_asyncio.apply()
@alex-kolmakov
alex-kolmakov / fetch_map_data.py
Created May 1, 2024 14:15
fetching map data from PADI
async def fetch_map_data(session, top_right, bottom_left):
"""Fetches map data for a specific map segment."""
url = f"{BASE_MAP_URL}?top_right={top_right}&bottom_left={bottom_left}"
return await fetch_data(session, url)
async def get_divesites():
"""Orchestrates the main data collection process."""
lat_size = 20
lon_size = 20
@alex-kolmakov
alex-kolmakov / load_data.py
Created May 1, 2024 14:12
Fetching api PADI data
async def fetch_data(session, url, datakey=None):
"""Fetches data from a URL and extracts a subkey if specified."""
async with session.get(url) as response:
data = await response.json()
return response.status, data[datakey] if datakey and datakey in data else data
async def fetch_all_guide_data(session):
"""Fetches dive guide data paginated across multiple requests."""
from dwca.read import DwCAReader
downloaded_archive = 'absolute_path_to_archive'
with DwCAReader(downloaded_archive) as dwca:
print("Core data file is: {}".format(dwca.descriptor.core.file_location))
dataframe = dwca.pd_read(dwca.descriptor.core.file_location, parse_dates=True)
import requests
def download_zip(url, save_path):
response = requests.get(url, stream=True)
with open(save_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=1024):
if chunk:
f.write(chunk)