Created
February 22, 2018 09:23
-
-
Save james-morrison-mowi/b5ec55a938e12021c82e727be96a1bd5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"This Notebook processes object files exported from an Olex v7 and converts it into a GeoPandas DataFrame using Shapely objects to allow export of vector data via WKT to allow visualisation in GIS systems." | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from datetime import datetime\n", | |
"import geopandas as gpd\n", | |
"import gzip\n", | |
"import pandas as pd\n", | |
"import shapely" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 326, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"olex_objects_files_path = 'olex_objects_export.gz'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 355, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"split_routes = process_olex_objects_gz_file(olex_objects_files_path)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 328, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def process_olex_objects_gz_file(olex_objects_gz_file):\n", | |
" olex_gzip_file = gzip.open(olex_objects_gz_file,'rt')\n", | |
" olex_string = olex_gzip_file.readlines()\n", | |
" return split_routes(olex_string)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 354, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def split_routes(olex_string):\n", | |
" routes = [[]]\n", | |
" for line in olex_string:\n", | |
" current_route = routes[-1]\n", | |
" if line == '\\n':\n", | |
" routes.append([])\n", | |
" else:\n", | |
" current_route.append(line[:-1])\n", | |
" return routes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 288, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"#Defunct with new split_routes function\n", | |
"def read_split_olex_file(olex_file_path):\n", | |
" # Detect empty lines between routes\n", | |
" olex_objects = open(olex_file_path).readlines()\n", | |
" routes = ''.join(olex_objects).split('\\n\\n')\n", | |
" split_routes = []\n", | |
" for route in routes:\n", | |
" split_routes.append(route.split('\\n'))\n", | |
" return split_routes" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 227, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"split_routes = read_split_olex_file(olex_file_path)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 365, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def convert_olex_coords_to_lat_lon(lat_lon_dataframe):\n", | |
" def convert_olex(coord):\n", | |
" return float(coord)/60\n", | |
" lat_lon_dataframe.Lon = lat_lon_dataframe.Lon.apply(lambda x: convert_olex(x))\n", | |
" lat_lon_dataframe.Lat = lat_lon_dataframe.Lat.apply(lambda x: convert_olex(x))\n", | |
" return lat_lon_dataframe" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 339, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"polygons = []\n", | |
"for route in split_routes:\n", | |
" if ('Rutetype Areal' in route):\n", | |
" records = []\n", | |
" for record in route[2:]:\n", | |
" if record.startswith('3'):\n", | |
" records.append(record.split(' '))\n", | |
" route_df = pd.DataFrame(records, \n", | |
" columns=['Lat','Lon','Time','Type'])\n", | |
" area_dfs.append(route_df)\n", | |
" points = list(zip(route_df.Lon.apply(lambda x: float(x)/60),\n", | |
" route_df.Lat.apply(lambda x: float(x)/60)))\n", | |
" polygon = shapely.geometry.Polygon(points)\n", | |
" polygons.append((datetime.fromtimestamp(int(route_df.Time[0])),\n", | |
" polygon))\n", | |
"gpd.GeoDataFrame(polygons, columns=['Time','WKT']).to_csv('polygons.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 382, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Code for extracting route positions from Olex v7 exports to a GeoPandas\n", | |
"# DataFrame preserving Time and marker Type metadata and exports to csv\n", | |
"def get_geodataframe_write_to_wkt_csv(split_routes, output_csv):\n", | |
" linestrings=[]\n", | |
" for route in split_routes:\n", | |
" records = []\n", | |
" no_of_positions = 0\n", | |
" # Check it isn't a single point in the Route or the LineString creation will fail\n", | |
" for line in route[2:]:\n", | |
" if line.startswith('3'):\n", | |
" no_of_positions = no_of_positions + 1\n", | |
" if no_of_positions > 1:\n", | |
" for record in route[2:]:\n", | |
" if record.startswith('3'):\n", | |
" records.append(record.split(' '))\n", | |
" route_df = pd.DataFrame(records, columns=['Lat','Lon','Time','Type'])\n", | |
" route_df = convert_olex_coords_to_lat_lon(route_df)\n", | |
" points = list(zip(route_df.Lon, route_df.Lat))\n", | |
" linestring = shapely.geometry.LineString(points)\n", | |
" route_df.Time = route_df.Time.apply(lambda x: datetime.fromtimestamp(int(x)))\n", | |
" linestrings.append((route_df.Time.values[0], route_df.Time.values, \n", | |
" route_df.Type.values, linestring))\n", | |
" linestrings_gpd = gpd.GeoDataFrame(linestrings, columns=['First_Point_Time','Time_List','Type','WKT'])\n", | |
" linestrings_gpd.to_csv(output_csv)\n", | |
" return linestrings_gpd" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"geodataframe_linestrings = get_geodataframe_write_to_wkt_csv(split_routes,'linestrings.csv')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 98, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Extract all positions from an Olex objects text file, suitable for locations between 50 & 66 degrees North\n", | |
"# Change olex_lat_start_value for other locations\n", | |
"routes = []\n", | |
"areas = []\n", | |
"lines = []\n", | |
"olex_lat_start_value = '3'\n", | |
"olex_objects_locations = [] \n", | |
"for line_index,line in enumerate(olex_objects_file_lines):\n", | |
" if line.startswith('Rute uten navn'):\n", | |
" routes.append(line_index)\n", | |
" if line.startswith('Rutetype Areal'):\n", | |
" areas.append(line_index)\n", | |
" if line.startswith('Rutetype Linje'):\n", | |
" lines.append(line_index)\n", | |
" if line.startswith(olex_lat_start_value):\n", | |
" position = line[:-1].split(' ')\n", | |
" position.append(line_index)\n", | |
" position.append(len(lines))\n", | |
" position.append(len(routes))\n", | |
" olex_objects_locations.append(position)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 99, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"olex_objects = pd.DataFrame(olex_objects_locations,\n", | |
" columns=['Lat','Lon','Time','Type','Line_Index','Line','Routes'])" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 100, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"olex_objects = convert_olex_coords_to_lat_lon(olex_objects) " | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 102, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"olex_objects.Time = olex_objects.Time.astype('int').apply(lambda x: datetime.fromtimestamp(x))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 91, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"olex_objects.to_csv('olex_objects_including_line_index.csv')" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.3" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment