Skip to content

Instantly share code, notes, and snippets.

@sixy6e
Last active January 21, 2022 04:22
Show Gist options
  • Save sixy6e/bbb2746dff43cd1d72e9d54e18a38628 to your computer and use it in GitHub Desktop.
Save sixy6e/bbb2746dff43cd1d72e9d54e18a38628 to your computer and use it in GitHub Desktop.
Toy script to scrape specfic collection of imagery
This toy script was put together to assist in trawling some imagery and building a small catalog.
"""
A basic script to trawl an imagery archie and build a custom catalog.
"""
from pathlib import Path
import click
import rasterio # type: ignore
import pandas # type: ignore
import geopandas # type: ignore
from shapely.geometry import Polygon # type: ignore
import structlog
_LOG = structlog.get_logger()
def image_info(pathname) -> geopandas.GeoDataFrame:
"""
Open an image and retrieve the shape extents.
Not getting the extents via the affine transformation as this
data is not northup (has x and y rotations).
"""
with rasterio.open(pathname) as src:
# forming a polygon using UL, UR, LR, LL coordinates
polygon = Polygon(
[
src.xy(0, 0),
src.xy(0, src.shape[1]),
src.xy(*src.shape),
src.xy(src.shape[0], 0),
]
)
data = {
"pathname": [str(pathname)],
"vessel_side": [pathname.stem.strip("_")[-1]], # ends in P or S
"geometry": [polygon],
}
gdf = geopandas.GeoDataFrame(data, crs=src.crs)
return gdf
@click.command()
@click.option(
"--rootdir",
type=click.Path(exists=True, readable=True),
help="The root input directory",
)
@click.option(
"--outdir",
type=click.Path(exists=True, writable=True),
help="The base output directory to contain the output file",
)
@click.option("--pattern", default="*.tif", help="The filename pattern to search for.")
def main(rootdir: str, outdir: str, pattern: str = "*.tif") -> None:
"""Find the imagery, and build a mini catalog as a vector file."""
catalog = []
for file in Path(rootdir).rglob(pattern):
# we'll store the full pathname, not relative, nor basename
pname = file.absolute()
_LOG.info("processing datafile", pathname=str(pname))
catalog.append(image_info(pname))
_LOG.info("concatenating individual dataframes")
gdf = pandas.concat(catalog)
out_pathname = Path(outdir).joinpath("data-catalog.gpkg")
_LOG.info("writing catalog as a GeoPackage", out_pathname=out_pathname)
gdf.to_file(out_pathname, driver="GPKG")
_LOG.info("finished")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment