mayrajeo/Make streamlit data.ipynb

## Make streamlit data.ipynb
{
  "cells": [
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-08T06:32:18.690266Z",
          "start_time": "2020-12-08T06:32:18.667229Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "import geopandas as gpd\nfrom shapely.geometry import Polygon\nfrom pathlib import Path\nimport os\nimport pandas as pd\nimport owslib.wms as wms\nimport owslib.wcs as wcs\nimport rasterio as rio\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom rasterio.warp import calculate_default_transform, reproject\nfrom rasterio.enums import Resampling\nimport rasterio.mask\nimport xarray as xr\nfrom typing import List, Tuple\nfrom pyproj.transformer import Transformer",
      "execution_count": 1,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:33:47.951543Z",
          "start_time": "2020-12-07T08:33:47.947957Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "def open_geotiff(fn, bands:int=None) -> np.ndarray:\n    \"\"\"Open geotiff image from path, cast it to float and scale it to 0-1 range, optionally with only `bands` input bands.\"\n    Returns numpy array of shape (C,W,H)\n    \"\"\"\n    with rio.open(str(fn)) as f:\n        data = f.read()\n        data = data.astype(np.float32)\n    if bands is not None: data = data[bands]\n    return data",
      "execution_count": 2,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-08T08:28:08.777059Z",
          "start_time": "2020-12-08T08:28:08.771948Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "def reproject_geotiff(infile:str, outfile:str, target_crs:str='EPSG:3857'):\n    \"Reproject geotiff to some other coordinate system\"\n    with rio.open(infile) as src:\n        src_crs = src.crs\n        tfm, w, h = calculate_default_transform(src_crs, target_crs, src.width, src.height, *src.bounds)\n        kwargs = src.meta.copy()\n        kwargs.update({\n            'crs':target_crs,\n            'transform': tfm,\n            'width': w,\n            'height': h\n        })\n        \n        with rio.open(outfile, 'w', **kwargs) as dst:\n            for i in range(1, src.count + 1):\n                reproject(source=rio.band(src,i),\n                          destination=rio.band(dst,i),\n                          src_transform=src.transform,\n                          src_crs=src.crs,\n                          dst_transform=tfm,\n                          dst_crs=target_crs,\n                          resampling=Resampling.cubic)\n            bounds = dst.bounds\n        return bounds    ",
      "execution_count": 3,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-08T08:28:09.329442Z",
          "start_time": "2020-12-08T08:28:09.322849Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "def download_and_preprocess(outpath, bounds:List[float], wms_url:str, layer:str):\n    \"Download data from WCS and project it to EPSG:3857\"\n    size = (int((bounds[2] - bounds[0])/10), int((bounds[3] - bounds[1])/10))\n    wcs_url = 'https://data.nsdc.fmi.fi/geoserver/wcs'\n    w = wcs.WebCoverageService(wcs_url, version='1.0.0')\n    c = w[layer]\n    t = c.timepositions\n    for ts in t:\n        if not os.path.exists(outpath/ts[:4]): os.makedirs(outpath/ts[:4])\n        img = w.getCoverage(layer,\n                       crs='EPSG:32634',\n                       bbox=bounds, \n                       WIDTH=size[0],\n                       HEIGHT=size[1],\n                       format='GeoTIFF',\n                       time=[ts])\n                       #interpolations= 'bicubic')\n        out = open(outpath/ts[:4]/f'{ts}.tif', 'wb')\n        out.write(img.read())\n        out.close()\n\n        if not os.path.exists(outpath/f'{ts[:4]}_repro'): os.makedirs(outpath/f'{ts[:4]}_repro')\n        reproject_geotiff(outpath/ts[:4]/f'{ts}.tif', outpath/f'{ts[:4]}_repro'/f'{ts}.tif', 'EPSG:3857')",
      "execution_count": 26,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:33:47.975498Z",
          "start_time": "2020-12-07T08:33:47.971748Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "def calc_yearly_median(path, year):\n    images = [open_geotiff(path/str(year)/f)[0] for f in os.listdir(path/str(year))]\n    masked_images = np.array([np.ma.masked_where(i == 0, i) for i in images])\n    return np.ma.getdata(np.ma.median(masked_images, axis=0)).astype('uint8')",
      "execution_count": 5,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:33:47.982345Z",
          "start_time": "2020-12-07T08:33:47.977130Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "def make_yearly_medians(data_path):\n    if not os.path.exists(data_path/'yearly_repro'): os.makedirs(data_path/'yearly_repro')\n    for y in [2016,2017,2018,2019,2020]:\n        files = os.listdir(data_path/str(y))\n        with rio.open(data_path/f'{y}_repro'/files[0]) as src:\n            prof = src.profile.copy()\n            with rio.open(data_path/f'yearly_repro/{y}.tif', 'w', **prof) as dst:\n                newvals = calc_yearly_median(data_path, y)\n                dst.write(newvals, 1)",
      "execution_count": 6,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:33:47.989076Z",
          "start_time": "2020-12-07T08:33:47.984097Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "def mask_beaches(data_path:Path, beach_mask_path:str):\n    \"Mask only `Hietikko` areas\"\n    files = os.listdir(data_path)\n    beach_mask = gpd.read_file(beach_mask_path)\n    for f in files:\n        with rio.open(data_path/f) as src:\n            temp_beach_mask = beach_mask.to_crs(src.crs)\n            temp_beach_mask = temp_beach_mask.cx[src.bounds[0]:src.bounds[2], src.bounds[1]:src.bounds[3]]\n            out_im, out_tfm = rasterio.mask.mask(src, temp_beach_mask.geometry, crop=False,  all_touched=True)\n            out_meta = src.meta\n            out_meta.update({'transform':out_tfm})\n            with rio.open(data_path/f'{f[:-4]}_beach.tif', 'w', **out_meta) as dst:\n                dst.write(out_im)\n",
      "execution_count": 39,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:33:47.995627Z",
          "start_time": "2020-12-07T08:33:47.991133Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "def make_graph_data(data_path:Path, outfile:str):\n    \"Calculate means for each time step\"\n    files = [f for f in os.listdir(data_path) if 'beach' in f]\n    means = []\n    for f in files:\n        with rio.open(data_path/f) as src:\n            vals = src.read(1)\n        vals = np.ma.masked_where(vals==0, vals)\n        stepmean = (np.ma.mean(vals)/100) - 1\n        if np.isfinite(stepmean): \n            means.append([f[:10]+'T00:00:00.000Z', stepmean])\n    return means ",
      "execution_count": 8,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Monitoring sandy beaches in the Baltic sea with yearly median NDVI"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Data used:\n* `Maastotietokanta` polygon data from the year 2016\n* [Sentinel 2 NDVI mosaics](https://ckan.ymparisto.fi/dataset/sentinel-2-image-index-mosaics-s2ind-sentinel-2-kuvamosaiikit-s2ind)"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Filter Baltic sea beaches from Maastotietokanta data"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Sandy beaches are defined here to be class `Hietikko`, id `34300` with maximum distance of 100 meters from `Meri`, id `36211` or `Järvi`, id `36200`"
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:35:35.030411Z",
          "start_time": "2020-12-07T08:33:50.007824Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "beaches_2016 = gpd.read_file('../sandy-beaches-data/MTKkoosteet_2016.gdb', layer='kivikalliohiekka')\nbeaches_2016 = beaches_2016[beaches_2016.LUOKKA == 34300.]\nbeaches_2016 = beaches_2016.to_crs('EPSG:32634')",
      "execution_count": 9,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:35:55.882976Z",
          "start_time": "2020-12-07T08:35:35.032314Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "waters = gpd.read_file('../sandy-beaches-data/waters.shp')\nwaters = waters[(waters.LUOKKA == 36211.) | (waters.LUOKKA == 36200.)]",
      "execution_count": 10,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-04T13:00:53.059016Z",
          "start_time": "2020-12-04T13:00:53.044495Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "#waters.LUOKKA.value_counts()",
      "execution_count": 12,
      "outputs": [
        {
          "data": {
            "text/plain": "36200.0    167596\n44300.0     44985\n36313.0     18599\n36211.0       680\nName: LUOKKA, dtype: int64"
          },
          "execution_count": 12,
          "metadata": {},
          "output_type": "execute_result"
        }
      ]
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:35:55.888964Z",
          "start_time": "2020-12-07T08:35:55.885337Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "def dist_to_water(row, waters):\n    \"Function to use with df.apply. Calculates minimum distance to water polygon\"\n    nearest_idx = list(waters.sindex.nearest(row.geometry.bounds))[0]\n    return waters.iloc[nearest_idx].geometry.distance(row.geometry)",
      "execution_count": 11,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:37:42.966271Z",
          "start_time": "2020-12-07T08:35:55.890923Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "beaches_2016['dist_to_water'] = beaches_2016.apply(lambda row: dist_to_water(row, waters), axis=1)\nbeaches_2016 = beaches_2016[beaches_2016.dist_to_water <= 100]",
      "execution_count": 12,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Reproject shapefile to `EPSG:3857` for folium/Leaflet maps."
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:37:45.042488Z",
          "start_time": "2020-12-07T08:37:42.969956Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "beaches_2016.to_crs('EPSG:3857').to_file('../streamlit_data/beaches_sea_lake_2016.shp')",
      "execution_count": 13,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "# Download and preprocess data"
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "NDVI mosaics are be downloaded from National Satellite Data Centre."
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "beaches_2016 = gpd.read_file('../data/beaches_sea_lake_2016.shp').to_crs('EPSG:3857')",
      "execution_count": 24,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-08T08:19:53.581919Z",
          "start_time": "2020-12-08T08:19:53.579372Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "wcs_url = 'https://data.nsdc.fmi.fi/geoserver/wcs'\nlayer = 'PTA:s2m_ndvi'",
      "execution_count": 10,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Data is downloaded as `EPSG:32634` because it's easiest to control size. Basemaps for folium/Leaflet are in `EPSG:3857` **but** require coordinates as `EPSG:4326`."
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Vattaja"
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-08T08:28:11.789773Z",
          "start_time": "2020-12-08T08:28:11.787127Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "vattaja_path = Path('../streamlit_data/vattaja/')",
      "execution_count": 11,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Area bounds as `EPSG:32634`"
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-08T08:28:12.272039Z",
          "start_time": "2020-12-08T08:28:12.268903Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "vattaja_bounds = [615760., 7098940.,  620880., 7109180.]",
      "execution_count": 12,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-08T08:29:35.457558Z",
          "start_time": "2020-12-08T08:28:25.396788Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "download_and_preprocess(vattaja_path, vattaja_bounds, wcs_url, layer)",
      "execution_count": 27,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Make yearly medians, excluding zeroes as they are nodata-values"
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:39:57.878868Z",
          "start_time": "2020-12-07T08:39:57.155194Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "make_yearly_medians(vattaja_path)",
      "execution_count": 28,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Mask images to only contain data in beaches."
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:40:12.930899Z",
          "start_time": "2020-12-07T08:39:58.739727Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "paths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(vattaja_path/p,  '../data/beaches_sea_lake_2016.shp')",
      "execution_count": 40,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "Make csv data for mean median NDVIs in beach for each timestep:"
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:40:17.759686Z",
          "start_time": "2020-12-07T08:40:17.314379Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(vattaja_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(vattaja_path/'median_data.csv', index=False)",
      "execution_count": 41,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Hankoniemi"
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:40:18.612638Z",
          "start_time": "2020-12-07T08:40:18.610225Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "hankoniemi_path = Path('../streamlit_data/hankoniemi/')",
      "execution_count": 42,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:40:19.116234Z",
          "start_time": "2020-12-07T08:40:19.113612Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "hankoniemi_bounds = [605520., 6631740.,  609360., 6634300.]",
      "execution_count": 43,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:41:07.081458Z",
          "start_time": "2020-12-07T08:40:19.520972Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "download_and_preprocess(hankoniemi_path, hankoniemi_bounds, wcs_url, layer)\n\nmake_yearly_medians(hankoniemi_path)\n\npaths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(hankoniemi_path/p, '../data/beaches_sea_lake_2016.shp')",
      "execution_count": 45,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:41:07.367034Z",
          "start_time": "2020-12-07T08:41:07.083653Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(hankoniemi_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(hankoniemi_path/'median_data.csv', index=False)",
      "execution_count": 46,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Kalajoki"
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:41:07.372935Z",
          "start_time": "2020-12-07T08:41:07.370027Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "kalajoki_path = Path('../streamlit_data/kalajoki/')",
      "execution_count": 47,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:41:07.381081Z",
          "start_time": "2020-12-07T08:41:07.375638Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "kalajoki_bounds = [ 633680., 7124540.,  641360., 7133500.]",
      "execution_count": 48,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:42:59.070236Z",
          "start_time": "2020-12-07T08:41:07.384858Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "download_and_preprocess(kalajoki_path, kalajoki_bounds, wcs_url, layer)\n\nmake_yearly_medians(kalajoki_path)\n\npaths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(kalajoki_path/p, '../data/beaches_sea_lake_2016.shp')",
      "execution_count": 49,
      "outputs": []
    },
    {
      "metadata": {
        "ExecuteTime": {
          "end_time": "2020-12-07T08:42:59.597704Z",
          "start_time": "2020-12-07T08:42:59.071668Z"
        },
        "trusted": false
      },
      "cell_type": "code",
      "source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(kalajoki_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(kalajoki_path/'median_data.csv', index=False)",
      "execution_count": 50,
      "outputs": []
    },
    {
      "metadata": {},
      "cell_type": "markdown",
      "source": "## Livojärvi\n"
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "livojarvi_path = Path('../streamlit_data/livojärvi')\nlivojarvi_bounds = [817220.,7334340.,826150,7339450]",
      "execution_count": 53,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "download_and_preprocess(livojarvi_path, livojarvi_bounds, wcs_url, layer)\n\nmake_yearly_medians(livojarvi_path)\n\npaths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(livojarvi_path/p, '../data/beaches_sea_lake_2016.shp')",
      "execution_count": 54,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(livojarvi_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(livojarvi_path/'median_data.csv', index=False)",
      "execution_count": 55,
      "outputs": []
    },
    {
      "metadata": {
        "trusted": false
      },
      "cell_type": "code",
      "source": "",
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "_draft": {
      "nbviewer_url": "https://gist.github.com/19bb57586e2c62053a5a762c902a3089"
    },
    "gist": {
      "id": "19bb57586e2c62053a5a762c902a3089",
      "data": {
        "description": "Hiekkarantadata",
        "public": true
      }
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3",
      "language": "python"
    },
    "language_info": {
      "name": "python",
      "version": "3.8.3",
      "mimetype": "text/x-python",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "pygments_lexer": "ipython3",
      "nbconvert_exporter": "python",
      "file_extension": ".py"
    },
    "latex_envs": {
      "LaTeX_envs_menu_present": true,
      "autoclose": false,
      "autocomplete": true,
      "bibliofile": "biblio.bib",
      "cite_by": "apalike",
      "current_citInitial": 1,
      "eqLabelWithNumbers": true,
      "eqNumInitial": 1,
      "hotkeys": {
        "equation": "Ctrl-E",
        "itemize": "Ctrl-I"
      },
      "labels_anchors": false,
      "latex_user_defs": false,
      "report_style_numbering": false,
      "user_envs_cfg": false
    },
    "toc": {
      "nav_menu": {},
      "number_sections": true,
      "sideBar": true,
      "skip_h1_title": false,
      "base_numbering": 1,
      "title_cell": "Table of Contents",
      "title_sidebar": "Contents",
      "toc_cell": false,
      "toc_position": {
        "height": "calc(100% - 180px)",
        "left": "10px",
        "top": "150px",
        "width": "319px"
      },
      "toc_section_display": true,
      "toc_window_display": true
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}
	{
	"cells": [
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-08T06:32:18.690266Z",
	"start_time": "2020-12-08T06:32:18.667229Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "import geopandas as gpd\nfrom shapely.geometry import Polygon\nfrom pathlib import Path\nimport os\nimport pandas as pd\nimport owslib.wms as wms\nimport owslib.wcs as wcs\nimport rasterio as rio\nimport matplotlib.pyplot as plt\nimport numpy as np\nfrom rasterio.warp import calculate_default_transform, reproject\nfrom rasterio.enums import Resampling\nimport rasterio.mask\nimport xarray as xr\nfrom typing import List, Tuple\nfrom pyproj.transformer import Transformer",
	"execution_count": 1,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:33:47.951543Z",
	"start_time": "2020-12-07T08:33:47.947957Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "def open_geotiff(fn, bands:int=None) -> np.ndarray:\n \"\"\"Open geotiff image from path, cast it to float and scale it to 0-1 range, optionally with only `bands` input bands.\"\n Returns numpy array of shape (C,W,H)\n \"\"\"\n with rio.open(str(fn)) as f:\n data = f.read()\n data = data.astype(np.float32)\n if bands is not None: data = data[bands]\n return data",
	"execution_count": 2,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-08T08:28:08.777059Z",
	"start_time": "2020-12-08T08:28:08.771948Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "def reproject_geotiff(infile:str, outfile:str, target_crs:str='EPSG:3857'):\n \"Reproject geotiff to some other coordinate system\"\n with rio.open(infile) as src:\n src_crs = src.crs\n tfm, w, h = calculate_default_transform(src_crs, target_crs, src.width, src.height, src.bounds)\n kwargs = src.meta.copy()\n kwargs.update({\n 'crs':target_crs,\n 'transform': tfm,\n 'width': w,\n 'height': h\n })\n \n with rio.open(outfile, 'w', *kwargs) as dst:\n for i in range(1, src.count + 1):\n reproject(source=rio.band(src,i),\n destination=rio.band(dst,i),\n src_transform=src.transform,\n src_crs=src.crs,\n dst_transform=tfm,\n dst_crs=target_crs,\n resampling=Resampling.cubic)\n bounds = dst.bounds\n return bounds ",
	"execution_count": 3,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-08T08:28:09.329442Z",
	"start_time": "2020-12-08T08:28:09.322849Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "def download_and_preprocess(outpath, bounds:List[float], wms_url:str, layer:str):\n \"Download data from WCS and project it to EPSG:3857\"\n size = (int((bounds[2] - bounds[0])/10), int((bounds[3] - bounds[1])/10))\n wcs_url = 'https://data.nsdc.fmi.fi/geoserver/wcs'\n w = wcs.WebCoverageService(wcs_url, version='1.0.0')\n c = w[layer]\n t = c.timepositions\n for ts in t:\n if not os.path.exists(outpath/ts[:4]): os.makedirs(outpath/ts[:4])\n img = w.getCoverage(layer,\n crs='EPSG:32634',\n bbox=bounds, \n WIDTH=size[0],\n HEIGHT=size[1],\n format='GeoTIFF',\n time=[ts])\n #interpolations= 'bicubic')\n out = open(outpath/ts[:4]/f'{ts}.tif', 'wb')\n out.write(img.read())\n out.close()\n\n if not os.path.exists(outpath/f'{ts[:4]}_repro'): os.makedirs(outpath/f'{ts[:4]}_repro')\n reproject_geotiff(outpath/ts[:4]/f'{ts}.tif', outpath/f'{ts[:4]}_repro'/f'{ts}.tif', 'EPSG:3857')",
	"execution_count": 26,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:33:47.975498Z",
	"start_time": "2020-12-07T08:33:47.971748Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "def calc_yearly_median(path, year):\n images = [open_geotiff(path/str(year)/f)[0] for f in os.listdir(path/str(year))]\n masked_images = np.array([np.ma.masked_where(i == 0, i) for i in images])\n return np.ma.getdata(np.ma.median(masked_images, axis=0)).astype('uint8')",
	"execution_count": 5,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:33:47.982345Z",
	"start_time": "2020-12-07T08:33:47.977130Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "def make_yearly_medians(data_path):\n if not os.path.exists(data_path/'yearly_repro'): os.makedirs(data_path/'yearly_repro')\n for y in [2016,2017,2018,2019,2020]:\n files = os.listdir(data_path/str(y))\n with rio.open(data_path/f'{y}_repro'/files[0]) as src:\n prof = src.profile.copy()\n with rio.open(data_path/f'yearly_repro/{y}.tif', 'w', **prof) as dst:\n newvals = calc_yearly_median(data_path, y)\n dst.write(newvals, 1)",
	"execution_count": 6,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:33:47.989076Z",
	"start_time": "2020-12-07T08:33:47.984097Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "def mask_beaches(data_path:Path, beach_mask_path:str):\n \"Mask only `Hietikko` areas\"\n files = os.listdir(data_path)\n beach_mask = gpd.read_file(beach_mask_path)\n for f in files:\n with rio.open(data_path/f) as src:\n temp_beach_mask = beach_mask.to_crs(src.crs)\n temp_beach_mask = temp_beach_mask.cx[src.bounds[0]:src.bounds[2], src.bounds[1]:src.bounds[3]]\n out_im, out_tfm = rasterio.mask.mask(src, temp_beach_mask.geometry, crop=False, all_touched=True)\n out_meta = src.meta\n out_meta.update({'transform':out_tfm})\n with rio.open(data_path/f'{f[:-4]}_beach.tif', 'w', **out_meta) as dst:\n dst.write(out_im)\n",
	"execution_count": 39,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:33:47.995627Z",
	"start_time": "2020-12-07T08:33:47.991133Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "def make_graph_data(data_path:Path, outfile:str):\n \"Calculate means for each time step\"\n files = [f for f in os.listdir(data_path) if 'beach' in f]\n means = []\n for f in files:\n with rio.open(data_path/f) as src:\n vals = src.read(1)\n vals = np.ma.masked_where(vals==0, vals)\n stepmean = (np.ma.mean(vals)/100) - 1\n if np.isfinite(stepmean): \n means.append([f[:10]+'T00:00:00.000Z', stepmean])\n return means ",
	"execution_count": 8,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Monitoring sandy beaches in the Baltic sea with yearly median NDVI"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Data used:\n* `Maastotietokanta` polygon data from the year 2016\n* [Sentinel 2 NDVI mosaics](https://ckan.ymparisto.fi/dataset/sentinel-2-image-index-mosaics-s2ind-sentinel-2-kuvamosaiikit-s2ind)"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Filter Baltic sea beaches from Maastotietokanta data"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Sandy beaches are defined here to be class `Hietikko`, id `34300` with maximum distance of 100 meters from `Meri`, id `36211` or `Järvi`, id `36200`"
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:35:35.030411Z",
	"start_time": "2020-12-07T08:33:50.007824Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "beaches_2016 = gpd.read_file('../sandy-beaches-data/MTKkoosteet_2016.gdb', layer='kivikalliohiekka')\nbeaches_2016 = beaches_2016[beaches_2016.LUOKKA == 34300.]\nbeaches_2016 = beaches_2016.to_crs('EPSG:32634')",
	"execution_count": 9,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:35:55.882976Z",
	"start_time": "2020-12-07T08:35:35.032314Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "waters = gpd.read_file('../sandy-beaches-data/waters.shp')\nwaters = waters[(waters.LUOKKA == 36211.) \| (waters.LUOKKA == 36200.)]",
	"execution_count": 10,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-04T13:00:53.059016Z",
	"start_time": "2020-12-04T13:00:53.044495Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "#waters.LUOKKA.value_counts()",
	"execution_count": 12,
	"outputs": [
	{
	"data": {
	"text/plain": "36200.0 167596\n44300.0 44985\n36313.0 18599\n36211.0 680\nName: LUOKKA, dtype: int64"
	},
	"execution_count": 12,
	"metadata": {},
	"output_type": "execute_result"
	}
	]
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:35:55.888964Z",
	"start_time": "2020-12-07T08:35:55.885337Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "def dist_to_water(row, waters):\n \"Function to use with df.apply. Calculates minimum distance to water polygon\"\n nearest_idx = list(waters.sindex.nearest(row.geometry.bounds))[0]\n return waters.iloc[nearest_idx].geometry.distance(row.geometry)",
	"execution_count": 11,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:37:42.966271Z",
	"start_time": "2020-12-07T08:35:55.890923Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "beaches_2016['dist_to_water'] = beaches_2016.apply(lambda row: dist_to_water(row, waters), axis=1)\nbeaches_2016 = beaches_2016[beaches_2016.dist_to_water <= 100]",
	"execution_count": 12,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Reproject shapefile to `EPSG:3857` for folium/Leaflet maps."
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:37:45.042488Z",
	"start_time": "2020-12-07T08:37:42.969956Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "beaches_2016.to_crs('EPSG:3857').to_file('../streamlit_data/beaches_sea_lake_2016.shp')",
	"execution_count": 13,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "# Download and preprocess data"
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "NDVI mosaics are be downloaded from National Satellite Data Centre."
	},
	{
	"metadata": {
	"trusted": false
	},
	"cell_type": "code",
	"source": "beaches_2016 = gpd.read_file('../data/beaches_sea_lake_2016.shp').to_crs('EPSG:3857')",
	"execution_count": 24,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-08T08:19:53.581919Z",
	"start_time": "2020-12-08T08:19:53.579372Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "wcs_url = 'https://data.nsdc.fmi.fi/geoserver/wcs'\nlayer = 'PTA:s2m_ndvi'",
	"execution_count": 10,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Data is downloaded as `EPSG:32634` because it's easiest to control size. Basemaps for folium/Leaflet are in `EPSG:3857` but require coordinates as `EPSG:4326`."
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Vattaja"
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-08T08:28:11.789773Z",
	"start_time": "2020-12-08T08:28:11.787127Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "vattaja_path = Path('../streamlit_data/vattaja/')",
	"execution_count": 11,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Area bounds as `EPSG:32634`"
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-08T08:28:12.272039Z",
	"start_time": "2020-12-08T08:28:12.268903Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "vattaja_bounds = [615760., 7098940., 620880., 7109180.]",
	"execution_count": 12,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-08T08:29:35.457558Z",
	"start_time": "2020-12-08T08:28:25.396788Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "download_and_preprocess(vattaja_path, vattaja_bounds, wcs_url, layer)",
	"execution_count": 27,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Make yearly medians, excluding zeroes as they are nodata-values"
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:39:57.878868Z",
	"start_time": "2020-12-07T08:39:57.155194Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "make_yearly_medians(vattaja_path)",
	"execution_count": 28,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Mask images to only contain data in beaches."
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:40:12.930899Z",
	"start_time": "2020-12-07T08:39:58.739727Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "paths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(vattaja_path/p, '../data/beaches_sea_lake_2016.shp')",
	"execution_count": 40,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "Make csv data for mean median NDVIs in beach for each timestep:"
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:40:17.759686Z",
	"start_time": "2020-12-07T08:40:17.314379Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(vattaja_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(vattaja_path/'median_data.csv', index=False)",
	"execution_count": 41,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Hankoniemi"
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:40:18.612638Z",
	"start_time": "2020-12-07T08:40:18.610225Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "hankoniemi_path = Path('../streamlit_data/hankoniemi/')",
	"execution_count": 42,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:40:19.116234Z",
	"start_time": "2020-12-07T08:40:19.113612Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "hankoniemi_bounds = [605520., 6631740., 609360., 6634300.]",
	"execution_count": 43,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:41:07.081458Z",
	"start_time": "2020-12-07T08:40:19.520972Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "download_and_preprocess(hankoniemi_path, hankoniemi_bounds, wcs_url, layer)\n\nmake_yearly_medians(hankoniemi_path)\n\npaths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(hankoniemi_path/p, '../data/beaches_sea_lake_2016.shp')",
	"execution_count": 45,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:41:07.367034Z",
	"start_time": "2020-12-07T08:41:07.083653Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(hankoniemi_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(hankoniemi_path/'median_data.csv', index=False)",
	"execution_count": 46,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Kalajoki"
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:41:07.372935Z",
	"start_time": "2020-12-07T08:41:07.370027Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "kalajoki_path = Path('../streamlit_data/kalajoki/')",
	"execution_count": 47,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:41:07.381081Z",
	"start_time": "2020-12-07T08:41:07.375638Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "kalajoki_bounds = [ 633680., 7124540., 641360., 7133500.]",
	"execution_count": 48,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:42:59.070236Z",
	"start_time": "2020-12-07T08:41:07.384858Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "download_and_preprocess(kalajoki_path, kalajoki_bounds, wcs_url, layer)\n\nmake_yearly_medians(kalajoki_path)\n\npaths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(kalajoki_path/p, '../data/beaches_sea_lake_2016.shp')",
	"execution_count": 49,
	"outputs": []
	},
	{
	"metadata": {
	"ExecuteTime": {
	"end_time": "2020-12-07T08:42:59.597704Z",
	"start_time": "2020-12-07T08:42:59.071668Z"
	},
	"trusted": false
	},
	"cell_type": "code",
	"source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(kalajoki_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(kalajoki_path/'median_data.csv', index=False)",
	"execution_count": 50,
	"outputs": []
	},
	{
	"metadata": {},
	"cell_type": "markdown",
	"source": "## Livojärvi\n"
	},
	{
	"metadata": {
	"trusted": false
	},
	"cell_type": "code",
	"source": "livojarvi_path = Path('../streamlit_data/livojärvi')\nlivojarvi_bounds = [817220.,7334340.,826150,7339450]",
	"execution_count": 53,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": false
	},
	"cell_type": "code",
	"source": "download_and_preprocess(livojarvi_path, livojarvi_bounds, wcs_url, layer)\n\nmake_yearly_medians(livojarvi_path)\n\npaths = ['2016_repro', '2017_repro', '2018_repro', '2019_repro', '2020_repro', 'yearly_repro']\nfor p in paths: mask_beaches(livojarvi_path/p, '../data/beaches_sea_lake_2016.shp')",
	"execution_count": 54,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": false
	},
	"cell_type": "code",
	"source": "retvals = []\nfor p in paths[:-1]: retvals.extend(make_graph_data(livojarvi_path/p, 'def'))\n\noutf = pd.DataFrame(columns=['time', 'ndvi'], data=retvals)\noutf.to_csv(livojarvi_path/'median_data.csv', index=False)",
	"execution_count": 55,
	"outputs": []
	},
	{
	"metadata": {
	"trusted": false
	},
	"cell_type": "code",
	"source": "",
	"execution_count": null,
	"outputs": []
	}
	],
	"metadata": {
	"_draft": {
	"nbviewer_url": "https://gist.github.com/19bb57586e2c62053a5a762c902a3089"
	},
	"gist": {
	"id": "19bb57586e2c62053a5a762c902a3089",
	"data": {
	"description": "Hiekkarantadata",
	"public": true
	}
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3",
	"language": "python"
	},
	"language_info": {
	"name": "python",
	"version": "3.8.3",
	"mimetype": "text/x-python",
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"pygments_lexer": "ipython3",
	"nbconvert_exporter": "python",
	"file_extension": ".py"
	},
	"latex_envs": {
	"LaTeX_envs_menu_present": true,
	"autoclose": false,
	"autocomplete": true,
	"bibliofile": "biblio.bib",
	"cite_by": "apalike",
	"current_citInitial": 1,
	"eqLabelWithNumbers": true,
	"eqNumInitial": 1,
	"hotkeys": {
	"equation": "Ctrl-E",
	"itemize": "Ctrl-I"
	},
	"labels_anchors": false,
	"latex_user_defs": false,
	"report_style_numbering": false,
	"user_envs_cfg": false
	},
	"toc": {
	"nav_menu": {},
	"number_sections": true,
	"sideBar": true,
	"skip_h1_title": false,
	"base_numbering": 1,
	"title_cell": "Table of Contents",
	"title_sidebar": "Contents",
	"toc_cell": false,
	"toc_position": {
	"height": "calc(100% - 180px)",
	"left": "10px",
	"top": "150px",
	"width": "319px"
	},
	"toc_section_display": true,
	"toc_window_display": true
	}
	},
	"nbformat": 4,
	"nbformat_minor": 4
	}