This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import dlt | |
import duckdb | |
from dlt.sources.helpers.rest_client import RESTClient | |
from dlt.sources.helpers.rest_client.paginators import PageNumberPaginator | |
import pandas as pd | |
from typing import Any, Dict, List, Optional, Tuple | |
@dlt.source | |
def padi(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import asyncio | |
import aiohttp | |
import pandas as pd | |
import nest_asyncio | |
BASE_PADI_GUIDE_URL = "url_for_loading_divesite_data" | |
BASE_PADI_MAP_URL = "url_for_loading_divesite_location" | |
nest_asyncio.apply() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import duckdb | |
import zipfile | |
import os | |
import tempfile | |
input_zip_filename = 'absolute_filepath_where_your_zipped_file_is' | |
with tempfile.TemporaryDirectory() as temp_dir: | |
print(f"Extracting {input_zip_filename} to {temp_dir}...") |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from tqdm import tqdm | |
def download( | |
url: str, | |
filename: str, | |
auth: tuple = None, | |
chunk_size: int = 1024, | |
update_threshold = 1024*1024*128 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import os | |
import nest_asyncio | |
import asyncio | |
import aiohttp | |
import pandas as pd | |
if 'custom' not in globals(): | |
from mage_ai.data_preparation.decorators import custom | |
nest_asyncio.apply() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async def fetch_map_data(session, top_right, bottom_left): | |
"""Fetches map data for a specific map segment.""" | |
url = f"{BASE_MAP_URL}?top_right={top_right}&bottom_left={bottom_left}" | |
return await fetch_data(session, url) | |
async def get_divesites(): | |
"""Orchestrates the main data collection process.""" | |
lat_size = 20 | |
lon_size = 20 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async def fetch_data(session, url, datakey=None): | |
"""Fetches data from a URL and extracts a subkey if specified.""" | |
async with session.get(url) as response: | |
data = await response.json() | |
return response.status, data[datakey] if datakey and datakey in data else data | |
async def fetch_all_guide_data(session): | |
"""Fetches dive guide data paginated across multiple requests.""" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from dwca.read import DwCAReader | |
downloaded_archive = 'absolute_path_to_archive' | |
with DwCAReader(downloaded_archive) as dwca: | |
print("Core data file is: {}".format(dwca.descriptor.core.file_location)) | |
dataframe = dwca.pd_read(dwca.descriptor.core.file_location, parse_dates=True) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
def download_zip(url, save_path): | |
response = requests.get(url, stream=True) | |
with open(save_path, 'wb') as f: | |
for chunk in response.iter_content(chunk_size=1024): | |
if chunk: | |
f.write(chunk) |