Skip to content

Instantly share code, notes, and snippets.

@bubbobne
Created May 23, 2024 12:24
Show Gist options
  • Save bubbobne/47f55ede17d95c82b3a298567fcba6e3 to your computer and use it in GitHub Desktop.
Save bubbobne/47f55ede17d95c82b3a298567fcba6e3 to your computer and use it in GitHub Desktop.
Copare sLOO of kriging with measured data and also nearest station
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"id": "2a1620cd-344e-4168-ae12-2b64eabd6a66",
"metadata": {},
"source": [
"# LOO kriging"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77087d35-4cef-4d37-8220-8892e15dd400",
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.style as style\n",
"from geoframepy.timeseries.io_csv import*\n",
"import matplotlib.pylab as pylab\n",
"import plotly as py\n",
"import plotly.graph_objs as go\n",
"os.chdir(\"/home/andreisd/Documents/project/GWS2022/NON_SCALE/data/\")\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "412eff72-1a5e-4cad-915c-1cf045c6f140",
"metadata": {},
"outputs": [],
"source": [
"# plot meteostations\n",
"\n",
"import geopandas as gpd\n",
"\n",
"staz=gpd.read_file(\"./meteo_data/Station2024.shp\")\n",
"staz['coords'] = staz['geometry'].apply(lambda x: x.representative_point().coords[:]) \n",
"staz['coords'] = [coords[0][:2] for coords in staz['coords']] # no z\n",
"fig=plt.figure(figsize=(20,10))\n",
"ax=fig.add_subplot(111)\n",
"staz.plot(ax=ax, label='meteostations', color='red', edgecolor='k', markersize=100)\n",
"ax.legend()\n",
"for idx, row in staz.iterrows():\n",
" plt.annotate(text=row['ID'], xy=row['coords'], horizontalalignment='right', color='k')\n",
"ax.legend()"
]
},
{
"cell_type": "markdown",
"id": "fb4362b2-ebfa-40c4-a406-a37c10948925",
"metadata": {},
"source": [
"## Temperature\n",
"\n",
"Plot from the leave one out"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "824a65f8-76d6-4549-8664-9245658b6ac1",
"metadata": {},
"outputs": [],
"source": [
"\n",
"start_date = \"2012-01-01 01:00\"\n",
"measureded =pandas_read_OMS_timeseries(\"meteo_data/temperature_cleaned.csv\")\n",
"estimated =pandas_read_OMS_timeseries(\"LOO_kriging/new_kriging_temperature_trend.csv\")\n",
"\n",
"estimated = estimated[estimated.index >= start_date]\n",
"measureded = measureded[measureded.index >= start_date]\n"
]
},
{
"cell_type": "markdown",
"id": "05cdb3f6-5a78-47b8-98a9-7222bdd7c753",
"metadata": {},
"source": [
"### nearest station"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "644268fc-0bfa-4fbc-bd0f-6a16f4b7cc9c",
"metadata": {},
"outputs": [],
"source": [
"colnames = measureded.columns\n",
"\n",
"def get_nearest(row, df):\n",
" # Check if the geometry is valid and exists\n",
" point = row.geometry\n",
" if point is None or not point.is_valid:\n",
" return pd.Series([None, None])\n",
"\n",
" # Retrieve the nearest geometry using the spatial index\n",
" # Ensure the input to nearest is correctly formatted\n",
" try:\n",
" # Query the spatial index\n",
" temp = df.drop(row.name, axis=0)\n",
" lower_bound = row.z_dem - 300\n",
" upper_bound = row.z_dem + 300\n",
" mask = (temp['z_dem'] >= lower_bound) & (temp['z_dem'] <= upper_bound)\n",
" temp = temp[mask]\n",
" sindex = temp.sindex\n",
" nearest_idx = list(sindex.nearest(point, 1, return_distance = True))\n",
"\n",
" nearest_point = temp.iloc[nearest_idx[0][1][0]]\n",
" nearest_distance = nearest_idx[1][0]\n",
" return pd.Series([int(nearest_point['ID']), nearest_distance])\n",
" except Exception as e:\n",
" # Handle any exceptions that may arise and print them to understand the issue\n",
" print(f\"Error processing row: {e}\")\n",
" return pd.Series([None, None])\n",
" \n",
"staz[\"strID\"] = staz['ID'].astype(str)\n",
"valid_points_gdf = staz[staz['strID'].isin(colnames)]\n",
"sindex = valid_points_gdf.sindex\n",
"\n",
"\n",
"# Apply the function to the original dataframe\n",
"valid_points_gdf[['nearest_id', 'nearest_distance']] = valid_points_gdf.apply(lambda row: get_nearest(row, valid_points_gdf), axis=1)\n",
"\n",
"# plot station against closest centroid\n",
"fig=plt.figure(figsize=(25,40))\n",
"measureded =pandas_read_OMS_timeseries(\"meteo_data/temperature_cleaned.csv\")\n",
"\n",
"xv=np.linspace(-20,40,100)\n",
"\n",
"pos=1\n",
"colnames = measureded.columns\n",
"\n",
"for idx, row in valid_points_gdf.iterrows():\n",
" station_name =str(row[\"ID\"])\n",
" nearste_st = str(int(row[\"nearest_id\"]))\n",
" d = str(row[\"nearest_distance\"])\n",
" ax=fig.add_subplot(9,5,pos)\n",
" ax.scatter(measureded[station_name],measureded[nearste_st] , s=40, color='r', alpha=0.25, edgecolor='r')\n",
" ax.plot(xv,xv, c='k')\n",
" ax.set_title('station %s vs station %s at distance %s' % (station_name,nearste_st,d) )\n",
" ax.set_xlabel('Measured Station ')\n",
" ax.set_ylabel('Nearste ')\n",
" ax.axis('equal')\n",
" temp_corr = measureded[station_name].corr(measureded[nearste_st])\n",
" ax.text(15, 35, 'R='+ \"{:.3f}\".format(temp_corr), fontsize=15)\n",
" ax.set_xlim([-25,45])\n",
" ax.set_ylim([-25,45])\n",
" ax.grid()\n",
" pos=pos+1\n",
" \n",
"fig.tight_layout()\n",
"fig.savefig('./temp_station.png', dpi=300)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5a14e4bf-4c46-427e-8b57-3ddf82b72186",
"metadata": {},
"outputs": [],
"source": [
"for idx, row in valid_points_gdf.iterrows():\n",
" station_name =str(row[\"ID\"])\n",
" nearste_st = str(int(row[\"nearest_id\"]))\n",
" d = measureded[station_name] - measureded[nearste_st]\n",
" d = d.abs()\n",
" mask = d>15\n",
" d = d[mask]\n",
" print(row.ID)\n",
" print(d)"
]
},
{
"cell_type": "markdown",
"id": "143a9091-5ed1-495b-a008-b81eef86b127",
"metadata": {},
"source": [
"### LOO"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9605ef96-6c45-470c-b0ab-398596aa7b3c",
"metadata": {},
"outputs": [],
"source": [
"# plot station against closest centroid\n",
"fig=plt.figure(figsize=(25,40))\n",
"\n",
"xv=np.linspace(-20,40,100)\n",
"\n",
"pos=1\n",
"\n",
"for station_name in colnames:\n",
" station_name =str(station_name)\n",
" ax=fig.add_subplot(9,5,pos)\n",
" ax.scatter(measureded[station_name],estimated[station_name] , s=40, color='r', alpha=0.25, edgecolor='r')\n",
" ax.plot(xv,xv, c='k')\n",
" ax.set_title('TEMP - station %s' % station_name)\n",
" ax.set_xlabel('Measured Station ')\n",
" ax.set_ylabel('Interpolated Centoid ')\n",
" ax.axis('equal')\n",
" temp_corr = measureded[station_name].corr(estimated[station_name])\n",
" ax.text(-15, 35, 'R='+ \"{:.3f}\".format(temp_corr), fontsize=15)\n",
" ax.set_xlim([-25,45])\n",
" ax.set_ylim([-25,45])\n",
" ax.grid()\n",
" pos=pos+1\n",
" \n",
"fig.tight_layout()\n",
"fig.savefig('./temp_kriging.png', dpi=300)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b73f2c2f-7d6a-46e5-bc34-486055ef66f9",
"metadata": {},
"outputs": [],
"source": [
"t = measureded['473']\n",
"mask = t>22\n",
"t[mask]"
]
},
{
"cell_type": "markdown",
"id": "1551bf9c-4533-4e13-b76f-0feb6961ceda",
"metadata": {},
"source": [
"## Precipitation \n",
"\n",
"LOO "
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "30a6dc77-76d2-4da9-8e23-1f8894b6b1de",
"metadata": {},
"outputs": [],
"source": [
"start_date = \"2012-01-01 01:00\"\n",
"measureded =pandas_read_OMS_timeseries(\"meteo_data/precipitation_cleaned.csv\")\n",
"estimated =pandas_read_OMS_timeseries(\"LOO_kriging/new_kriging_precipitation_trend_10closest.csv\")\n",
"\n",
"estimated = estimated[estimated.index >= start_date]\n",
"measureded = measureded[measureded.index >= start_date]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "b93241a0-6d77-42b1-ad79-2aea17040925",
"metadata": {},
"outputs": [],
"source": [
"# plot station against closest centroid\n",
"fig=plt.figure(figsize=(25,40))\n",
"\n",
"xv=np.linspace(-20,40,100)\n",
"\n",
"pos=1\n",
"colnames = measureded.columns\n",
"\n",
"for station_name in colnames:\n",
" station_name =str(station_name)\n",
" ax=fig.add_subplot(9,5,pos)\n",
" ax.scatter(measureded[station_name],estimated[station_name] , s=40, color='b', alpha=0.25, edgecolor='b')\n",
" ax.plot(xv,xv, c='k')\n",
" ax.set_title('Precipitation - station %s' % station_name)\n",
" ax.set_xlabel('Measured Station ')\n",
" ax.set_ylabel('Interpolated Centoid ')\n",
" ax.axis('equal')\n",
" temp_corr = measureded[station_name].corr(estimated[station_name])\n",
" ax.text(15, 35, 'R='+ \"{:.3f}\".format(temp_corr), fontsize=15)\n",
" ax.set_xlim([0,30])\n",
" ax.set_ylim([0,40])\n",
" ax.grid()\n",
" pos=pos+1\n",
" \n",
"fig.tight_layout()\n",
"fig.savefig('./precip_kriging.png', dpi=300)"
]
},
{
"cell_type": "markdown",
"id": "2bcfe192-a900-41e3-b7dc-de8500a4f8be",
"metadata": {},
"source": [
"## Precipitation\n",
"\n",
"Nearest station"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7a667aa8-0d5c-4b62-915c-78b449de2093",
"metadata": {},
"outputs": [],
"source": [
"staz[\"strID\"] = staz['ID'].astype(str)\n",
"valid_points_gdf = staz[staz['strID'].isin(colnames)]\n",
"sindex = valid_points_gdf.sindex\n",
"\n",
"def get_nearest2(row, df):\n",
" # Check if the geometry is valid and exists\n",
" point = row.geometry\n",
" if point is None or not point.is_valid:\n",
" return pd.Series([None, None])\n",
"\n",
" # Retrieve the nearest geometry using the spatial index\n",
" # Ensure the input to nearest is correctly formatted\n",
" try:\n",
" # Query the spatial index\n",
" temp = df.drop(row.name, axis=0)\n",
" sindex = temp.sindex\n",
" nearest_idx = list(sindex.nearest(point, 1, return_distance = True))\n",
"\n",
" nearest_point = temp.iloc[nearest_idx[0][1][0]]\n",
" nearest_distance = nearest_idx[1][0]\n",
" return pd.Series([int(nearest_point['ID']), nearest_distance])\n",
" except Exception as e:\n",
" # Handle any exceptions that may arise and print them to understand the issue\n",
" print(f\"Error processing row: {e}\")\n",
" return pd.Series([None, None])\n",
"# Apply the function to the original dataframe\n",
"valid_points_gdf[['nearest_id', 'nearest_distance']] = valid_points_gdf.apply(lambda row: get_nearest2(row, valid_points_gdf), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3be34e40-9866-49b2-9c9d-17e98e973f10",
"metadata": {},
"outputs": [],
"source": [
"\n",
"# plot station against closest centroid\n",
"fig=plt.figure(figsize=(25,40))\n",
"\n",
"xv=np.linspace(-20,40,100)\n",
"\n",
"pos=1\n",
"colnames = measureded.columns\n",
"\n",
"for idx, row in valid_points_gdf.iterrows():\n",
" station_name =str(row[\"ID\"])\n",
" nearste_st = str(int(row[\"nearest_id\"]))\n",
" d = str(row[\"nearest_distance\"])\n",
" ax=fig.add_subplot(9,5,pos)\n",
" ax.scatter(measureded[station_name],measureded[nearste_st] , s=40, color='b', alpha=0.25, edgecolor='b')\n",
" ax.plot(xv,xv, c='k')\n",
" ax.set_title('station %s vs station %s at distance %s' % (station_name,nearste_st,d) )\n",
" ax.set_xlabel('Measured Station ')\n",
" ax.set_ylabel('Nearste ')\n",
" ax.axis('equal')\n",
" temp_corr = measureded[station_name].corr(measureded[nearste_st])\n",
" ax.text(15, 35, 'R='+ \"{:.3f}\".format(temp_corr), fontsize=15)\n",
" ax.set_xlim([0,30])\n",
" ax.set_ylim([0,40])\n",
" ax.grid()\n",
" pos=pos+1\n",
" \n",
"fig.tight_layout()\n",
"fig.savefig('./precip_station.png', dpi=300)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9ee0573c-3922-4e31-96db-2fb6f9d50fd2",
"metadata": {},
"outputs": [],
"source": [
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "a65576e7-c3f8-4adc-ba2a-957bbce8b34b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.12"
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version_major": 2,
"version_minor": 0
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment