Skip to content

Instantly share code, notes, and snippets.

@james-morrison-mowi
Created February 22, 2018 09:23
Show Gist options
  • Save james-morrison-mowi/b5ec55a938e12021c82e727be96a1bd5 to your computer and use it in GitHub Desktop.
Save james-morrison-mowi/b5ec55a938e12021c82e727be96a1bd5 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"This Notebook processes object files exported from an Olex v7 and converts it into a GeoPandas DataFrame using Shapely objects to allow export of vector data via WKT to allow visualisation in GIS systems."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime\n",
"import geopandas as gpd\n",
"import gzip\n",
"import pandas as pd\n",
"import shapely"
]
},
{
"cell_type": "code",
"execution_count": 326,
"metadata": {},
"outputs": [],
"source": [
"olex_objects_files_path = 'olex_objects_export.gz'"
]
},
{
"cell_type": "code",
"execution_count": 355,
"metadata": {},
"outputs": [],
"source": [
"split_routes = process_olex_objects_gz_file(olex_objects_files_path)"
]
},
{
"cell_type": "code",
"execution_count": 328,
"metadata": {},
"outputs": [],
"source": [
"def process_olex_objects_gz_file(olex_objects_gz_file):\n",
" olex_gzip_file = gzip.open(olex_objects_gz_file,'rt')\n",
" olex_string = olex_gzip_file.readlines()\n",
" return split_routes(olex_string)"
]
},
{
"cell_type": "code",
"execution_count": 354,
"metadata": {},
"outputs": [],
"source": [
"def split_routes(olex_string):\n",
" routes = [[]]\n",
" for line in olex_string:\n",
" current_route = routes[-1]\n",
" if line == '\\n':\n",
" routes.append([])\n",
" else:\n",
" current_route.append(line[:-1])\n",
" return routes"
]
},
{
"cell_type": "code",
"execution_count": 288,
"metadata": {},
"outputs": [],
"source": [
"#Defunct with new split_routes function\n",
"def read_split_olex_file(olex_file_path):\n",
" # Detect empty lines between routes\n",
" olex_objects = open(olex_file_path).readlines()\n",
" routes = ''.join(olex_objects).split('\\n\\n')\n",
" split_routes = []\n",
" for route in routes:\n",
" split_routes.append(route.split('\\n'))\n",
" return split_routes"
]
},
{
"cell_type": "code",
"execution_count": 227,
"metadata": {},
"outputs": [],
"source": [
"split_routes = read_split_olex_file(olex_file_path)"
]
},
{
"cell_type": "code",
"execution_count": 365,
"metadata": {},
"outputs": [],
"source": [
"def convert_olex_coords_to_lat_lon(lat_lon_dataframe):\n",
" def convert_olex(coord):\n",
" return float(coord)/60\n",
" lat_lon_dataframe.Lon = lat_lon_dataframe.Lon.apply(lambda x: convert_olex(x))\n",
" lat_lon_dataframe.Lat = lat_lon_dataframe.Lat.apply(lambda x: convert_olex(x))\n",
" return lat_lon_dataframe"
]
},
{
"cell_type": "code",
"execution_count": 339,
"metadata": {},
"outputs": [],
"source": [
"polygons = []\n",
"for route in split_routes:\n",
" if ('Rutetype Areal' in route):\n",
" records = []\n",
" for record in route[2:]:\n",
" if record.startswith('3'):\n",
" records.append(record.split(' '))\n",
" route_df = pd.DataFrame(records, \n",
" columns=['Lat','Lon','Time','Type'])\n",
" area_dfs.append(route_df)\n",
" points = list(zip(route_df.Lon.apply(lambda x: float(x)/60),\n",
" route_df.Lat.apply(lambda x: float(x)/60)))\n",
" polygon = shapely.geometry.Polygon(points)\n",
" polygons.append((datetime.fromtimestamp(int(route_df.Time[0])),\n",
" polygon))\n",
"gpd.GeoDataFrame(polygons, columns=['Time','WKT']).to_csv('polygons.csv')"
]
},
{
"cell_type": "code",
"execution_count": 382,
"metadata": {},
"outputs": [],
"source": [
"# Code for extracting route positions from Olex v7 exports to a GeoPandas\n",
"# DataFrame preserving Time and marker Type metadata and exports to csv\n",
"def get_geodataframe_write_to_wkt_csv(split_routes, output_csv):\n",
" linestrings=[]\n",
" for route in split_routes:\n",
" records = []\n",
" no_of_positions = 0\n",
" # Check it isn't a single point in the Route or the LineString creation will fail\n",
" for line in route[2:]:\n",
" if line.startswith('3'):\n",
" no_of_positions = no_of_positions + 1\n",
" if no_of_positions > 1:\n",
" for record in route[2:]:\n",
" if record.startswith('3'):\n",
" records.append(record.split(' '))\n",
" route_df = pd.DataFrame(records, columns=['Lat','Lon','Time','Type'])\n",
" route_df = convert_olex_coords_to_lat_lon(route_df)\n",
" points = list(zip(route_df.Lon, route_df.Lat))\n",
" linestring = shapely.geometry.LineString(points)\n",
" route_df.Time = route_df.Time.apply(lambda x: datetime.fromtimestamp(int(x)))\n",
" linestrings.append((route_df.Time.values[0], route_df.Time.values, \n",
" route_df.Type.values, linestring))\n",
" linestrings_gpd = gpd.GeoDataFrame(linestrings, columns=['First_Point_Time','Time_List','Type','WKT'])\n",
" linestrings_gpd.to_csv(output_csv)\n",
" return linestrings_gpd"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"geodataframe_linestrings = get_geodataframe_write_to_wkt_csv(split_routes,'linestrings.csv')"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [],
"source": [
"# Extract all positions from an Olex objects text file, suitable for locations between 50 & 66 degrees North\n",
"# Change olex_lat_start_value for other locations\n",
"routes = []\n",
"areas = []\n",
"lines = []\n",
"olex_lat_start_value = '3'\n",
"olex_objects_locations = [] \n",
"for line_index,line in enumerate(olex_objects_file_lines):\n",
" if line.startswith('Rute uten navn'):\n",
" routes.append(line_index)\n",
" if line.startswith('Rutetype Areal'):\n",
" areas.append(line_index)\n",
" if line.startswith('Rutetype Linje'):\n",
" lines.append(line_index)\n",
" if line.startswith(olex_lat_start_value):\n",
" position = line[:-1].split(' ')\n",
" position.append(line_index)\n",
" position.append(len(lines))\n",
" position.append(len(routes))\n",
" olex_objects_locations.append(position)"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [],
"source": [
"olex_objects = pd.DataFrame(olex_objects_locations,\n",
" columns=['Lat','Lon','Time','Type','Line_Index','Line','Routes'])"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [],
"source": [
"olex_objects = convert_olex_coords_to_lat_lon(olex_objects) "
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"olex_objects.Time = olex_objects.Time.astype('int').apply(lambda x: datetime.fromtimestamp(x))"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [],
"source": [
"olex_objects.to_csv('olex_objects_including_line_index.csv')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment