james-morrison-mowi/Olex_Object_Export.ipynb

## Olex_Object_Export.ipynb
{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This Notebook processes object files exported from an Olex v7 and converts it into a GeoPandas DataFrame using Shapely objects to allow export of vector data via WKT to allow visualisation in GIS systems."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "import geopandas as gpd\n",
    "import gzip\n",
    "import pandas as pd\n",
    "import shapely"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 326,
   "metadata": {},
   "outputs": [],
   "source": [
    "olex_objects_files_path = 'olex_objects_export.gz'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 355,
   "metadata": {},
   "outputs": [],
   "source": [
    "split_routes = process_olex_objects_gz_file(olex_objects_files_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 328,
   "metadata": {},
   "outputs": [],
   "source": [
    "def process_olex_objects_gz_file(olex_objects_gz_file):\n",
    "    olex_gzip_file = gzip.open(olex_objects_gz_file,'rt')\n",
    "    olex_string = olex_gzip_file.readlines()\n",
    "    return split_routes(olex_string)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 354,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split_routes(olex_string):\n",
    "    routes = [[]]\n",
    "    for line in olex_string:\n",
    "        current_route = routes[-1]\n",
    "        if line == '\\n':\n",
    "            routes.append([])\n",
    "        else:\n",
    "            current_route.append(line[:-1])\n",
    "    return routes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 288,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Defunct with new split_routes function\n",
    "def read_split_olex_file(olex_file_path):\n",
    "    # Detect empty lines between routes\n",
    "    olex_objects = open(olex_file_path).readlines()\n",
    "    routes = ''.join(olex_objects).split('\\n\\n')\n",
    "    split_routes = []\n",
    "    for route in routes:\n",
    "        split_routes.append(route.split('\\n'))\n",
    "    return split_routes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 227,
   "metadata": {},
   "outputs": [],
   "source": [
    "split_routes = read_split_olex_file(olex_file_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 365,
   "metadata": {},
   "outputs": [],
   "source": [
    "def convert_olex_coords_to_lat_lon(lat_lon_dataframe):\n",
    "    def convert_olex(coord):\n",
    "        return float(coord)/60\n",
    "    lat_lon_dataframe.Lon = lat_lon_dataframe.Lon.apply(lambda x: convert_olex(x))\n",
    "    lat_lon_dataframe.Lat = lat_lon_dataframe.Lat.apply(lambda x: convert_olex(x))\n",
    "    return lat_lon_dataframe"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 339,
   "metadata": {},
   "outputs": [],
   "source": [
    "polygons = []\n",
    "for route in split_routes:\n",
    "    if ('Rutetype Areal' in route):\n",
    "        records = []\n",
    "        for record in route[2:]:\n",
    "            if record.startswith('3'):\n",
    "                records.append(record.split(' '))\n",
    "        route_df = pd.DataFrame(records, \n",
    "                                columns=['Lat','Lon','Time','Type'])\n",
    "        area_dfs.append(route_df)\n",
    "        points = list(zip(route_df.Lon.apply(lambda x: float(x)/60),\n",
    "                          route_df.Lat.apply(lambda x: float(x)/60)))\n",
    "        polygon = shapely.geometry.Polygon(points)\n",
    "        polygons.append((datetime.fromtimestamp(int(route_df.Time[0])),\n",
    "                         polygon))\n",
    "gpd.GeoDataFrame(polygons, columns=['Time','WKT']).to_csv('polygons.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 382,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Code for extracting route positions from Olex v7 exports to a GeoPandas\n",
    "# DataFrame preserving Time and marker Type metadata and exports to csv\n",
    "def get_geodataframe_write_to_wkt_csv(split_routes, output_csv):\n",
    "    linestrings=[]\n",
    "    for route in split_routes:\n",
    "        records = []\n",
    "        no_of_positions = 0\n",
    "        # Check it isn't a single point in the Route or the LineString creation will fail\n",
    "        for line in route[2:]:\n",
    "            if line.startswith('3'):\n",
    "                no_of_positions = no_of_positions + 1\n",
    "        if no_of_positions > 1:\n",
    "            for record in route[2:]:\n",
    "                if record.startswith('3'):\n",
    "                    records.append(record.split(' '))\n",
    "            route_df = pd.DataFrame(records, columns=['Lat','Lon','Time','Type'])\n",
    "            route_df = convert_olex_coords_to_lat_lon(route_df)\n",
    "            points = list(zip(route_df.Lon, route_df.Lat))\n",
    "            linestring = shapely.geometry.LineString(points)\n",
    "            route_df.Time = route_df.Time.apply(lambda x: datetime.fromtimestamp(int(x)))\n",
    "            linestrings.append((route_df.Time.values[0], route_df.Time.values, \n",
    "                                route_df.Type.values, linestring))\n",
    "    linestrings_gpd = gpd.GeoDataFrame(linestrings, columns=['First_Point_Time','Time_List','Type','WKT'])\n",
    "    linestrings_gpd.to_csv(output_csv)\n",
    "    return linestrings_gpd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "geodataframe_linestrings = get_geodataframe_write_to_wkt_csv(split_routes,'linestrings.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 98,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Extract all positions from an Olex objects text file, suitable for locations between 50 & 66 degrees North\n",
    "# Change olex_lat_start_value for other locations\n",
    "routes = []\n",
    "areas = []\n",
    "lines = []\n",
    "olex_lat_start_value = '3'\n",
    "olex_objects_locations = []        \n",
    "for line_index,line in enumerate(olex_objects_file_lines):\n",
    "    if line.startswith('Rute uten navn'):\n",
    "        routes.append(line_index)\n",
    "    if line.startswith('Rutetype Areal'):\n",
    "        areas.append(line_index)\n",
    "    if line.startswith('Rutetype Linje'):\n",
    "        lines.append(line_index)\n",
    "    if line.startswith(olex_lat_start_value):\n",
    "        position = line[:-1].split(' ')\n",
    "        position.append(line_index)\n",
    "        position.append(len(lines))\n",
    "        position.append(len(routes))\n",
    "        olex_objects_locations.append(position)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 99,
   "metadata": {},
   "outputs": [],
   "source": [
    "olex_objects = pd.DataFrame(olex_objects_locations,\n",
    "                            columns=['Lat','Lon','Time','Type','Line_Index','Line','Routes'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 100,
   "metadata": {},
   "outputs": [],
   "source": [
    "olex_objects = convert_olex_coords_to_lat_lon(olex_objects) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 102,
   "metadata": {},
   "outputs": [],
   "source": [
    "olex_objects.Time = olex_objects.Time.astype('int').apply(lambda x: datetime.fromtimestamp(x))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 91,
   "metadata": {},
   "outputs": [],
   "source": [
    "olex_objects.to_csv('olex_objects_including_line_index.csv')"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
	{
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {},
	"source": [
	"This Notebook processes object files exported from an Olex v7 and converts it into a GeoPandas DataFrame using Shapely objects to allow export of vector data via WKT to allow visualisation in GIS systems."
	]
	},
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"from datetime import datetime\n",
	"import geopandas as gpd\n",
	"import gzip\n",
	"import pandas as pd\n",
	"import shapely"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 326,
	"metadata": {},
	"outputs": [],
	"source": [
	"olex_objects_files_path = 'olex_objects_export.gz'"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 355,
	"metadata": {},
	"outputs": [],
	"source": [
	"split_routes = process_olex_objects_gz_file(olex_objects_files_path)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 328,
	"metadata": {},
	"outputs": [],
	"source": [
	"def process_olex_objects_gz_file(olex_objects_gz_file):\n",
	" olex_gzip_file = gzip.open(olex_objects_gz_file,'rt')\n",
	" olex_string = olex_gzip_file.readlines()\n",
	" return split_routes(olex_string)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 354,
	"metadata": {},
	"outputs": [],
	"source": [
	"def split_routes(olex_string):\n",
	" routes = [[]]\n",
	" for line in olex_string:\n",
	" current_route = routes[-1]\n",
	" if line == '\\n':\n",
	" routes.append([])\n",
	" else:\n",
	" current_route.append(line[:-1])\n",
	" return routes"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 288,
	"metadata": {},
	"outputs": [],
	"source": [
	"#Defunct with new split_routes function\n",
	"def read_split_olex_file(olex_file_path):\n",
	" # Detect empty lines between routes\n",
	" olex_objects = open(olex_file_path).readlines()\n",
	" routes = ''.join(olex_objects).split('\\n\\n')\n",
	" split_routes = []\n",
	" for route in routes:\n",
	" split_routes.append(route.split('\\n'))\n",
	" return split_routes"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 227,
	"metadata": {},
	"outputs": [],
	"source": [
	"split_routes = read_split_olex_file(olex_file_path)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 365,
	"metadata": {},
	"outputs": [],
	"source": [
	"def convert_olex_coords_to_lat_lon(lat_lon_dataframe):\n",
	" def convert_olex(coord):\n",
	" return float(coord)/60\n",
	" lat_lon_dataframe.Lon = lat_lon_dataframe.Lon.apply(lambda x: convert_olex(x))\n",
	" lat_lon_dataframe.Lat = lat_lon_dataframe.Lat.apply(lambda x: convert_olex(x))\n",
	" return lat_lon_dataframe"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 339,
	"metadata": {},
	"outputs": [],
	"source": [
	"polygons = []\n",
	"for route in split_routes:\n",
	" if ('Rutetype Areal' in route):\n",
	" records = []\n",
	" for record in route[2:]:\n",
	" if record.startswith('3'):\n",
	" records.append(record.split(' '))\n",
	" route_df = pd.DataFrame(records, \n",
	" columns=['Lat','Lon','Time','Type'])\n",
	" area_dfs.append(route_df)\n",
	" points = list(zip(route_df.Lon.apply(lambda x: float(x)/60),\n",
	" route_df.Lat.apply(lambda x: float(x)/60)))\n",
	" polygon = shapely.geometry.Polygon(points)\n",
	" polygons.append((datetime.fromtimestamp(int(route_df.Time[0])),\n",
	" polygon))\n",
	"gpd.GeoDataFrame(polygons, columns=['Time','WKT']).to_csv('polygons.csv')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 382,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Code for extracting route positions from Olex v7 exports to a GeoPandas\n",
	"# DataFrame preserving Time and marker Type metadata and exports to csv\n",
	"def get_geodataframe_write_to_wkt_csv(split_routes, output_csv):\n",
	" linestrings=[]\n",
	" for route in split_routes:\n",
	" records = []\n",
	" no_of_positions = 0\n",
	" # Check it isn't a single point in the Route or the LineString creation will fail\n",
	" for line in route[2:]:\n",
	" if line.startswith('3'):\n",
	" no_of_positions = no_of_positions + 1\n",
	" if no_of_positions > 1:\n",
	" for record in route[2:]:\n",
	" if record.startswith('3'):\n",
	" records.append(record.split(' '))\n",
	" route_df = pd.DataFrame(records, columns=['Lat','Lon','Time','Type'])\n",
	" route_df = convert_olex_coords_to_lat_lon(route_df)\n",
	" points = list(zip(route_df.Lon, route_df.Lat))\n",
	" linestring = shapely.geometry.LineString(points)\n",
	" route_df.Time = route_df.Time.apply(lambda x: datetime.fromtimestamp(int(x)))\n",
	" linestrings.append((route_df.Time.values[0], route_df.Time.values, \n",
	" route_df.Type.values, linestring))\n",
	" linestrings_gpd = gpd.GeoDataFrame(linestrings, columns=['First_Point_Time','Time_List','Type','WKT'])\n",
	" linestrings_gpd.to_csv(output_csv)\n",
	" return linestrings_gpd"
	]
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {},
	"outputs": [],
	"source": [
	"geodataframe_linestrings = get_geodataframe_write_to_wkt_csv(split_routes,'linestrings.csv')"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 98,
	"metadata": {},
	"outputs": [],
	"source": [
	"# Extract all positions from an Olex objects text file, suitable for locations between 50 & 66 degrees North\n",
	"# Change olex_lat_start_value for other locations\n",
	"routes = []\n",
	"areas = []\n",
	"lines = []\n",
	"olex_lat_start_value = '3'\n",
	"olex_objects_locations = [] \n",
	"for line_index,line in enumerate(olex_objects_file_lines):\n",
	" if line.startswith('Rute uten navn'):\n",
	" routes.append(line_index)\n",
	" if line.startswith('Rutetype Areal'):\n",
	" areas.append(line_index)\n",
	" if line.startswith('Rutetype Linje'):\n",
	" lines.append(line_index)\n",
	" if line.startswith(olex_lat_start_value):\n",
	" position = line[:-1].split(' ')\n",
	" position.append(line_index)\n",
	" position.append(len(lines))\n",
	" position.append(len(routes))\n",
	" olex_objects_locations.append(position)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 99,
	"metadata": {},
	"outputs": [],
	"source": [
	"olex_objects = pd.DataFrame(olex_objects_locations,\n",
	" columns=['Lat','Lon','Time','Type','Line_Index','Line','Routes'])"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 100,
	"metadata": {},
	"outputs": [],
	"source": [
	"olex_objects = convert_olex_coords_to_lat_lon(olex_objects) "
	]
	},
	{
	"cell_type": "code",
	"execution_count": 102,
	"metadata": {},
	"outputs": [],
	"source": [
	"olex_objects.Time = olex_objects.Time.astype('int').apply(lambda x: datetime.fromtimestamp(x))"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 91,
	"metadata": {},
	"outputs": [],
	"source": [
	"olex_objects.to_csv('olex_objects_including_line_index.csv')"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "Python 3",
	"language": "python",
	"name": "python3"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.3"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}