Skip to content

Instantly share code, notes, and snippets.

@saumalya75
Last active April 22, 2021 11:31
Show Gist options
  • Save saumalya75/426b0e03738e7a966286c7f091dfad9d to your computer and use it in GitHub Desktop.
Save saumalya75/426b0e03738e7a966286c7f091dfad9d to your computer and use it in GitHub Desktop.
MINIO file extraction
import os
import boto3
import logging
import tempfile
import geopandas
MINIO_BUCKET = '<minio_bucket_name>'
SHAPE_FILE_NAME = '<shape_file_path_name>'
SHX_FILE_NAME = '<shx_file_path_name>'
logging.basicConfig(level=logging.INFO)
def read_geo_shape_data(local_file_pathname: str):
"""
Write GEOShape file read logic here.
I am just printing stat of the file on server,
because i am yet to figure out the mechanism to read a LOCAL shape file
"""
logging.info(f"Reading shape file - {local_file_pathname}]:")
dummy_geo_shape_df = geopandas.read_file(local_file_pathname)
return dummy_geo_shape_df
s3_resource = boto3.resource('s3',
endpoint_url='<minio_server_url>:<minio_port_usually_9000>',
aws_access_key_id='<minio_access_key>',
aws_secret_access_key='<minio_secret_key>'
)
# Create a temporary file using `tempfile.NamedTemporaryFile` contect manager,
# download the shape file into that temporary file from minio
# Use `read_geo_shape_data` method and pass the temporary file name to read the content using specific logic
with tempfile.NamedTemporaryFile('w+b', suffix='.shp') as temp_shape_file:
logging.info(f"Downloading shape file - {SHAPE_FILE_NAME} to local temporary file - {temp_shape_file.name}:")
s3_resource.Bucket(MINIO_BUCKET).download_file(SHAPE_FILE_NAME, temp_shape_file.name)
local_shx_file_name = temp_shape_file.name[:-2] + 'hx'
logging.info(f"Downloading shx file - {SHX_FILE_NAME} to local file - {local_shx_file_name}:")
with open(local_shx_file_name, 'wb') as stx_file:
s3_resource.Bucket(MINIO_BUCKET).download_file(SHX_FILE_NAME, local_shx_file_name)
geo_shape_df = read_geo_shape_data(temp_shape_file.name)
logging.info(f"Removing local shx file - {local_shx_file_name}. It will not be cleaned up automatically,"
f" because it is not a temporary file.")
os.remove(local_shx_file_name)
# The `geo_shape_df` dataframe is available outside the context manager, use it based on requirement
logging.info(geo_shape_df)
@saumalya75
Copy link
Author

If any other supporting file is available apart from .shx file, make sure to download all those files with same name and proper extension in same path, before running the read_file on .shp file.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment