Alexander Kolmakov alex-kolmakov

## load_padi_divesites_with_dlt.py
import dlt
import duckdb
from dlt.sources.helpers.rest_client import RESTClient
from dlt.sources.helpers.rest_client.paginators import PageNumberPaginator
import pandas as pd
from typing import Any, Dict, List, Optional, Tuple


@dlt.source
def padi():

## load_padi_divesites.py
import os
import asyncio
import aiohttp
import pandas as pd
import nest_asyncio

BASE_PADI_GUIDE_URL = "url_for_loading_divesite_data"
BASE_PADI_MAP_URL = "url_for_loading_divesite_location"

nest_asyncio.apply()

## ducking_obis.py
import duckdb
import zipfile
import os
import tempfile


input_zip_filename = 'absolute_filepath_where_your_zipped_file_is'

with tempfile.TemporaryDirectory() as temp_dir:
    print(f"Extracting {input_zip_filename} to {temp_dir}...")

## downloading_large_file.py
import requests
from tqdm import tqdm


def download(
    url: str,
    filename: str,
    auth: tuple = None,
    chunk_size: int = 1024,
    update_threshold = 1024*1024*128

## async_mage_loader.py
import os
import nest_asyncio
import asyncio
import aiohttp
import pandas as pd

if 'custom' not in globals():
    from mage_ai.data_preparation.decorators import custom

nest_asyncio.apply()

## fetch_map_data.py
async def fetch_map_data(session, top_right, bottom_left):
    """Fetches map data for a specific map segment."""
    url = f"{BASE_MAP_URL}?top_right={top_right}&bottom_left={bottom_left}"
    return await fetch_data(session, url)


async def get_divesites():
    """Orchestrates the main data collection process."""
    lat_size = 20
    lon_size = 20

## load_data.py

async def fetch_data(session, url, datakey=None):
    """Fetches data from a URL and extracts a subkey if specified."""
    async with session.get(url) as response:
        data = await response.json()
        return response.status, data[datakey] if datakey and datakey in data else data


async def fetch_all_guide_data(session):
    """Fetches dive guide data paginated across multiple requests."""

## parse_dwca.py
from dwca.read import DwCAReader

downloaded_archive = 'absolute_path_to_archive'

with DwCAReader(downloaded_archive) as dwca:
    print("Core data file is: {}".format(dwca.descriptor.core.file_location))

    dataframe = dwca.pd_read(dwca.descriptor.core.file_location, parse_dates=True)

## download_zip.py
import requests


def download_zip(url, save_path):
    response = requests.get(url, stream=True)
    with open(save_path, 'wb') as f:
        for chunk in response.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
	import dlt
	import duckdb
	from dlt.sources.helpers.rest_client import RESTClient
	from dlt.sources.helpers.rest_client.paginators import PageNumberPaginator
	import pandas as pd
	from typing import Any, Dict, List, Optional, Tuple


	@dlt.source
	def padi():
	import os
	import asyncio
	import aiohttp
	import pandas as pd
	import nest_asyncio

	BASE_PADI_GUIDE_URL = "url_for_loading_divesite_data"
	BASE_PADI_MAP_URL = "url_for_loading_divesite_location"

	nest_asyncio.apply()
	import duckdb
	import zipfile
	import os
	import tempfile


	input_zip_filename = 'absolute_filepath_where_your_zipped_file_is'

	with tempfile.TemporaryDirectory() as temp_dir:
	print(f"Extracting {input_zip_filename} to {temp_dir}...")
	import requests
	from tqdm import tqdm


	def download(
	url: str,
	filename: str,
	auth: tuple = None,
	chunk_size: int = 1024,
	update_threshold = 10241024128
	async def fetch_map_data(session, top_right, bottom_left):
	"""Fetches map data for a specific map segment."""
	url = f"{BASE_MAP_URL}?top_right={top_right}&bottom_left={bottom_left}"
	return await fetch_data(session, url)


	async def get_divesites():
	"""Orchestrates the main data collection process."""
	lat_size = 20
	lon_size = 20

	async def fetch_data(session, url, datakey=None):
	"""Fetches data from a URL and extracts a subkey if specified."""
	async with session.get(url) as response:
	data = await response.json()
	return response.status, data[datakey] if datakey and datakey in data else data


	async def fetch_all_guide_data(session):
	"""Fetches dive guide data paginated across multiple requests."""
	from dwca.read import DwCAReader

	downloaded_archive = 'absolute_path_to_archive'

	with DwCAReader(downloaded_archive) as dwca:
	print("Core data file is: {}".format(dwca.descriptor.core.file_location))

	dataframe = dwca.pd_read(dwca.descriptor.core.file_location, parse_dates=True)
	import requests


	def download_zip(url, save_path):
	response = requests.get(url, stream=True)
	with open(save_path, 'wb') as f:
	for chunk in response.iter_content(chunk_size=1024):
	if chunk:
	f.write(chunk)