Skip to content

Instantly share code, notes, and snippets.

@mayrajeo
Last active April 20, 2021 06:10
Show Gist options
  • Save mayrajeo/19bb57586e2c62053a5a762c902a3089 to your computer and use it in GitHub Desktop.
Save mayrajeo/19bb57586e2c62053a5a762c902a3089 to your computer and use it in GitHub Desktop.
Hiekkarantadata
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-08T06:32:18.690266Z",
"start_time": "2020-12-08T06:32:18.667229Z"
},
"trusted": false
},
"cell_type": "code",
"source": "import geopandas as gpd\nfrom shapely.geometry import Polygon\nfrom pathlib import Path\nimport os\nimport pandas as pd\nimport owslib.wms as wms\nimport owslib.wcs as wcs\nimport rasterio as rio\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom rasterio.warp import calculate_default_transform, reproject\nfrom rasterio.enums import Resampling\nimport rasterio.mask\nimport xarray as xr\nfrom typing import List, Tuple\nfrom pyproj.transformer import Transformer",
"execution_count": 1,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:33:47.951543Z",
"start_time": "2020-12-07T08:33:47.947957Z"
},
"trusted": false
},
"cell_type": "code",
"source": "def open_geotiff(fn, bands:int=None) -> np.ndarray:\n \"\"\"Open geotiff image from path, cast it to float and scale it to 0-1 range, optionally with only `bands` input bands.\"\n Returns numpy array of shape (C,W,H)\n \"\"\"\n with rio.open(str(fn)) as f:\n data = f.read()\n data = data.astype(np.float32)\n if bands is not None: data = data[bands]\n return data",
"execution_count": 2,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-08T08:28:08.777059Z",
"start_time": "2020-12-08T08:28:08.771948Z"
},
"trusted": false
},
"cell_type": "code",
"source": "def reproject_geotiff(infile:str, outfile:str, target_crs:str='EPSG:3857'):\n \"Reproject geotiff to some other coordinate system\"\n with rio.open(infile) as src:\n src_crs = src.crs\n tfm, w, h = calculate_default_transform(src_crs, target_crs, src.width, src.height, *src.bounds)\n kwargs = src.meta.copy()\n kwargs.update({\n 'crs':target_crs,\n 'transform': tfm,\n 'width': w,\n 'height': h\n })\n \n with rio.open(outfile, 'w', **kwargs) as dst:\n for i in range(1, src.count + 1):\n reproject(source=rio.band(src,i),\n destination=rio.band(dst,i),\n src_transform=src.transform,\n src_crs=src.crs,\n dst_transform=tfm,\n dst_crs=target_crs,\n resampling=Resampling.cubic)\n bounds = dst.bounds\n return bounds ",
"execution_count": 3,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-08T08:28:09.329442Z",
"start_time": "2020-12-08T08:28:09.322849Z"
},
"trusted": false
},
"cell_type": "code",
"source": "def download_and_preprocess(outpath, bounds:List[float], wms_url:str, layer:str):\n \"Download data from WCS and project it to EPSG:3857\"\n size = (int((bounds[2] - bounds[0])/10), int((bounds[3] - bounds[1])/10))\n wcs_url = 'https://data.nsdc.fmi.fi/geoserver/wcs'\n w = wcs.WebCoverageService(wcs_url, version='1.0.0')\n c = w[layer]\n t = c.timepositions\n for ts in t:\n if not os.path.exists(outpath/ts[:4]): os.makedirs(outpath/ts[:4])\n img = w.getCoverage(layer,\n crs='EPSG:32634',\n bbox=bounds, \n WIDTH=size[0],\n HEIGHT=size[1],\n format='GeoTIFF',\n time=[ts])\n #interpolations= 'bicubic')\n out = open(outpath/ts[:4]/f'{ts}.tif', 'wb')\n out.write(img.read())\n out.close()\n\n if not os.path.exists(outpath/f'{ts[:4]}_repro'): os.makedirs(outpath/f'{ts[:4]}_repro')\n reproject_geotiff(outpath/ts[:4]/f'{ts}.tif', outpath/f'{ts[:4]}_repro'/f'{ts}.tif', 'EPSG:3857')",
"execution_count": 26,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:33:47.975498Z",
"start_time": "2020-12-07T08:33:47.971748Z"
},
"trusted": false
},
"cell_type": "code",
"source": "def calc_yearly_median(path, year):\n images = [open_geotiff(path/str(year)/f)[0] for f in os.listdir(path/str(year))]\n masked_images = np.array([np.ma.masked_where(i == 0, i) for i in images])\n return np.ma.getdata(np.ma.median(masked_images, axis=0)).astype('uint8')",
"execution_count": 5,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:33:47.982345Z",
"start_time": "2020-12-07T08:33:47.977130Z"
},
"trusted": false
},
"cell_type": "code",
"source": "def make_yearly_medians(data_path):\n if not os.path.exists(data_path/'yearly_repro'): os.makedirs(data_path/'yearly_repro')\n for y in [2016,2017,2018,2019,2020]:\n files = os.listdir(data_path/str(y))\n with rio.open(data_path/f'{y}_repro'/files[0]) as src:\n prof = src.profile.copy()\n with rio.open(data_path/f'yearly_repro/{y}.tif', 'w', **prof) as dst:\n newvals = calc_yearly_median(data_path, y)\n dst.write(newvals, 1)",
"execution_count": 6,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:33:47.989076Z",
"start_time": "2020-12-07T08:33:47.984097Z"
},
"trusted": false
},
"cell_type": "code",
"source": "def mask_beaches(data_path:Path, beach_mask_path:str):\n \"Mask only `Hietikko` areas\"\n files = os.listdir(data_path)\n beach_mask = gpd.read_file(beach_mask_path)\n for f in files:\n with rio.open(data_path/f) as src:\n temp_beach_mask = beach_mask.to_crs(src.crs)\n temp_beach_mask = temp_beach_mask.cx[src.bounds[0]:src.bounds[2], src.bounds[1]:src.bounds[3]]\n out_im, out_tfm = rasterio.mask.mask(src, temp_beach_mask.geometry, crop=False, all_touched=True)\n out_meta = src.meta\n out_meta.update({'transform':out_tfm})\n with rio.open(data_path/f'{f[:-4]}_beach.tif', 'w', **out_meta) as dst:\n dst.write(out_im)\n",
"execution_count": 39,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:33:47.995627Z",
"start_time": "2020-12-07T08:33:47.991133Z"
},
"trusted": false
},
"cell_type": "code",
"source": "def make_graph_data(data_path:Path, outfile:str):\n \"Calculate means for each time step\"\n files = [f for f in os.listdir(data_path) if 'beach' in f]\n means = []\n for f in files:\n with rio.open(data_path/f) as src:\n vals = src.read(1)\n vals = np.ma.masked_where(vals==0, vals)\n stepmean = (np.ma.mean(vals)/100) - 1\n if np.isfinite(stepmean): \n means.append([f[:10]+'T00:00:00.000Z', stepmean])\n return means ",
"execution_count": 8,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Monitoring sandy beaches in the Baltic sea with yearly median NDVI"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Data used:\n* `Maastotietokanta` polygon data from the year 2016\n* [Sentinel 2 NDVI mosaics](https://ckan.ymparisto.fi/dataset/sentinel-2-image-index-mosaics-s2ind-sentinel-2-kuvamosaiikit-s2ind)"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Filter Baltic sea beaches from Maastotietokanta data"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Sandy beaches are defined here to be class `Hietikko`, id `34300` with maximum distance of 100 meters from `Meri`, id `36211` or `Järvi`, id `36200`"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:35:35.030411Z",
"start_time": "2020-12-07T08:33:50.007824Z"
},
"trusted": false
},
"cell_type": "code",
"source": "beaches_2016 = gpd.read_file('../sandy-beaches-data/MTKkoosteet_2016.gdb', layer='kivikalliohiekka')\nbeaches_2016 = beaches_2016[beaches_2016.LUOKKA == 34300.]\nbeaches_2016 = beaches_2016.to_crs('EPSG:32634')",
"execution_count": 9,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:35:55.882976Z",
"start_time": "2020-12-07T08:35:35.032314Z"
},
"trusted": false
},
"cell_type": "code",
"source": "waters = gpd.read_file('../sandy-beaches-data/waters.shp')\nwaters = waters[(waters.LUOKKA == 36211.) | (waters.LUOKKA == 36200.)]",
"execution_count": 10,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-04T13:00:53.059016Z",
"start_time": "2020-12-04T13:00:53.044495Z"
},
"trusted": false
},
"cell_type": "code",
"source": "#waters.LUOKKA.value_counts()",
"execution_count": 12,
"outputs": [
{
"data": {
"text/plain": "36200.0 167596\n44300.0 44985\n36313.0 18599\n36211.0 680\nName: LUOKKA, dtype: int64"
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
]
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:35:55.888964Z",
"start_time": "2020-12-07T08:35:55.885337Z"
},
"trusted": false
},
"cell_type": "code",
"source": "def dist_to_water(row, waters):\n \"Function to use with df.apply. Calculates minimum distance to water polygon\"\n nearest_idx = list(waters.sindex.nearest(row.geometry.bounds))[0]\n return waters.iloc[nearest_idx].geometry.distance(row.geometry)",
"execution_count": 11,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:37:42.966271Z",
"start_time": "2020-12-07T08:35:55.890923Z"
},
"trusted": false
},
"cell_type": "code",
"source": "beaches_2016['dist_to_water'] = beaches_2016.apply(lambda row: dist_to_water(row, waters), axis=1)\nbeaches_2016 = beaches_2016[beaches_2016.dist_to_water <= 100]",
"execution_count": 12,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Reproject shapefile to `EPSG:3857` for folium/Leaflet maps."
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:37:45.042488Z",
"start_time": "2020-12-07T08:37:42.969956Z"
},
"trusted": false
},
"cell_type": "code",
"source": "beaches_2016.to_crs('EPSG:3857').to_file('../streamlit_data/beaches_sea_lake_2016.shp')",
"execution_count": 13,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "# Download and preprocess data"
},
{
"metadata": {},
"cell_type": "markdown",
"source": "NDVI mosaics are be downloaded from National Satellite Data Centre."
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "beaches_2016 = gpd.read_file('../data/beaches_sea_lake_2016.shp').to_crs('EPSG:3857')",
"execution_count": 24,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-08T08:19:53.581919Z",
"start_time": "2020-12-08T08:19:53.579372Z"
},
"trusted": false
},
"cell_type": "code",
"source": "wcs_url = 'https://data.nsdc.fmi.fi/geoserver/wcs'\nlayer = 'PTA:s2m_ndvi'",
"execution_count": 10,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Data is downloaded as `EPSG:32634` because it's easiest to control size. Basemaps for folium/Leaflet are in `EPSG:3857` **but** require coordinates as `EPSG:4326`."
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Vattaja"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-08T08:28:11.789773Z",
"start_time": "2020-12-08T08:28:11.787127Z"
},
"trusted": false
},
"cell_type": "code",
"source": "vattaja_path = Path('../streamlit_data/vattaja/')",
"execution_count": 11,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Area bounds as `EPSG:32634`"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-08T08:28:12.272039Z",
"start_time": "2020-12-08T08:28:12.268903Z"
},
"trusted": false
},
"cell_type": "code",
"source": "vattaja_bounds = [615760., 7098940., 620880., 7109180.]",
"execution_count": 12,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-08T08:29:35.457558Z",
"start_time": "2020-12-08T08:28:25.396788Z"
},
"trusted": false
},
"cell_type": "code",
"source": "download_and_preprocess(vattaja_path, vattaja_bounds, wcs_url, layer)",
"execution_count": 27,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Make yearly medians, excluding zeroes as they are nodata-values"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:39:57.878868Z",
"start_time": "2020-12-07T08:39:57.155194Z"
},
"trusted": false
},
"cell_type": "code",
"source": "make_yearly_medians(vattaja_path)",
"execution_count": 28,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Mask images to only contain data in beaches."
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:40:12.930899Z",
"start_time": "2020-12-07T08:39:58.739727Z"
},
"trusted": false
},
"cell_type": "code",
"source": "paths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(vattaja_path/p, '../data/beaches_sea_lake_2016.shp')",
"execution_count": 40,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "Make csv data for mean median NDVIs in beach for each timestep:"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:40:17.759686Z",
"start_time": "2020-12-07T08:40:17.314379Z"
},
"trusted": false
},
"cell_type": "code",
"source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(vattaja_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(vattaja_path/'median_data.csv', index=False)",
"execution_count": 41,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Hankoniemi"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:40:18.612638Z",
"start_time": "2020-12-07T08:40:18.610225Z"
},
"trusted": false
},
"cell_type": "code",
"source": "hankoniemi_path = Path('../streamlit_data/hankoniemi/')",
"execution_count": 42,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:40:19.116234Z",
"start_time": "2020-12-07T08:40:19.113612Z"
},
"trusted": false
},
"cell_type": "code",
"source": "hankoniemi_bounds = [605520., 6631740., 609360., 6634300.]",
"execution_count": 43,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:41:07.081458Z",
"start_time": "2020-12-07T08:40:19.520972Z"
},
"trusted": false
},
"cell_type": "code",
"source": "download_and_preprocess(hankoniemi_path, hankoniemi_bounds, wcs_url, layer)\n\nmake_yearly_medians(hankoniemi_path)\n\npaths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(hankoniemi_path/p, '../data/beaches_sea_lake_2016.shp')",
"execution_count": 45,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:41:07.367034Z",
"start_time": "2020-12-07T08:41:07.083653Z"
},
"trusted": false
},
"cell_type": "code",
"source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(hankoniemi_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(hankoniemi_path/'median_data.csv', index=False)",
"execution_count": 46,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Kalajoki"
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:41:07.372935Z",
"start_time": "2020-12-07T08:41:07.370027Z"
},
"trusted": false
},
"cell_type": "code",
"source": "kalajoki_path = Path('../streamlit_data/kalajoki/')",
"execution_count": 47,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:41:07.381081Z",
"start_time": "2020-12-07T08:41:07.375638Z"
},
"trusted": false
},
"cell_type": "code",
"source": "kalajoki_bounds = [ 633680., 7124540., 641360., 7133500.]",
"execution_count": 48,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:42:59.070236Z",
"start_time": "2020-12-07T08:41:07.384858Z"
},
"trusted": false
},
"cell_type": "code",
"source": "download_and_preprocess(kalajoki_path, kalajoki_bounds, wcs_url, layer)\n\nmake_yearly_medians(kalajoki_path)\n\npaths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(kalajoki_path/p, '../data/beaches_sea_lake_2016.shp')",
"execution_count": 49,
"outputs": []
},
{
"metadata": {
"ExecuteTime": {
"end_time": "2020-12-07T08:42:59.597704Z",
"start_time": "2020-12-07T08:42:59.071668Z"
},
"trusted": false
},
"cell_type": "code",
"source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(kalajoki_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(kalajoki_path/'median_data.csv', index=False)",
"execution_count": 50,
"outputs": []
},
{
"metadata": {},
"cell_type": "markdown",
"source": "## Livojärvi\n"
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "livojarvi_path = Path('../streamlit_data/livojärvi')\nlivojarvi_bounds = [817220.,7334340.,826150,7339450]",
"execution_count": 53,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "download_and_preprocess(livojarvi_path, livojarvi_bounds, wcs_url, layer)\n\nmake_yearly_medians(livojarvi_path)\n\npaths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(livojarvi_path/p, '../data/beaches_sea_lake_2016.shp')",
"execution_count": 54,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(livojarvi_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(livojarvi_path/'median_data.csv', index=False)",
"execution_count": 55,
"outputs": []
},
{
"metadata": {
"trusted": false
},
"cell_type": "code",
"source": "",
"execution_count": null,
"outputs": []
}
],
"metadata": {
"_draft": {
"nbviewer_url": "https://gist.github.com/19bb57586e2c62053a5a762c902a3089"
},
"gist": {
"id": "19bb57586e2c62053a5a762c902a3089",
"data": {
"description": "Hiekkarantadata",
"public": true
}
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3",
"language": "python"
},
"language_info": {
"name": "python",
"version": "3.8.3",
"mimetype": "text/x-python",
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"pygments_lexer": "ipython3",
"nbconvert_exporter": "python",
"file_extension": ".py"
},
"latex_envs": {
"LaTeX_envs_menu_present": true,
"autoclose": false,
"autocomplete": true,
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 1,
"hotkeys": {
"equation": "Ctrl-E",
"itemize": "Ctrl-I"
},
"labels_anchors": false,
"latex_user_defs": false,
"report_style_numbering": false,
"user_envs_cfg": false
},
"toc": {
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"base_numbering": 1,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {
"height": "calc(100% - 180px)",
"left": "10px",
"top": "150px",
"width": "319px"
},
"toc_section_display": true,
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment