Skip to content

Instantly share code, notes, and snippets.

@jtrive84
Created April 23, 2024 16:06
Show Gist options
  • Save jtrive84/1a18e6f0e7b2bea6019caa180a18cd76 to your computer and use it in GitHub Desktop.
Save jtrive84/1a18e6f0e7b2bea6019caa180a18cd76 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Identify bounding envelopes for each linkId in target region."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Author: james.triveri@arity.com\n",
"\n",
"Python implementation: CPython\n",
"Python version : 3.11.6\n",
"IPython version : 8.22.2\n",
"\n",
"conda environment: torch\n",
"\n",
"Compiler : GCC 12.3.0\n",
"OS : Linux\n",
"Release : 5.10.192-183.736.amzn2.x86_64\n",
"Machine : x86_64\n",
"Processor : x86_64\n",
"CPU cores : 4\n",
"Architecture: 64bit\n",
"\n",
"Hostname: ip-10-97-77-255.intr.ue1.prd.aws.cloud.arity.com\n",
"\n",
"pandas : 2.2.1\n",
"shapely : 2.0.1\n",
"numpy : 1.26.4\n",
"s3fs : 2024.2.0\n",
"geopandas : 0.14.3\n",
"boto3 : 1.34.51\n",
"sys : 3.11.6 | packaged by conda-forge | (main, Oct 3 2023, 10:40:35) [GCC 12.3.0]\n",
"matplotlib: 3.8.3\n",
"scipy : 1.12.0\n",
"\n"
]
}
],
"source": [
"%load_ext watermark\n",
"%load_ext autoreload\n",
"\n",
"from ast import literal_eval\n",
"from functools import reduce\n",
"import os\n",
"from pathlib import Path\n",
"import pickle\n",
"import sys\n",
"import time\n",
"import warnings\n",
"\n",
"import boto3\n",
"import geopandas as gpd\n",
"import matplotlib as mpl\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import pandas as pd\n",
"import s3fs\n",
"import scipy\n",
"import shapely\n",
"from shapely.geometry import Point, LineString\n",
"from shapely.geometry.polygon import Polygon\n",
"\n",
"\n",
"pd.set_option('display.max_columns', None)\n",
"pd.set_option('display.width', None)\n",
"np.set_printoptions(suppress=True, precision=12)\n",
"pd.options.mode.chained_assignment = None\n",
"\n",
"# Configuration ----------------------------------------------------------------\n",
"\n",
"\n",
"link_attr_uri = \"s3://arity-sdl-tm-int-us-east-1-prod-geo/riskmap/data/charlotte-link-attributes.csv\"\n",
"\n",
"# Declarations -----------------------------------------------------------------\n",
"\n",
"def check_s3_path(s3_uri: str) -> bool:\n",
" \"\"\"\n",
" Check if s3_uri exists.\n",
" \"\"\"\n",
" client = boto3.client(\"s3\")\n",
" s3_uri = s3_uri.replace(\"s3://\", \"\")\n",
" pp = Path(s3_uri)\n",
" pp_parts = pp.parts\n",
" bucket_name, s3_key = pp_parts[0], \"/\".join(pp_parts[1:])\n",
" result = client.list_objects_v2(Bucket=bucket_name, Prefix=s3_key)\n",
" return(True if \"Contents\" in result else False)\n",
"\n",
"\n",
"def serialize_object(obj, s3_uri: str) -> bool:\n",
" \"\"\"\n",
" Write object to s3_uri.\n",
" \"\"\"\n",
" client = boto3.client(\"s3\")\n",
" s3_uri = s3_uri.replace(\"s3://\", \"\")\n",
" pp = Path(s3_uri)\n",
" pp_parts = pp.parts\n",
" bucket_name, s3_key = pp_parts[0], \"/\".join(pp_parts[1:])\n",
"\n",
" # Write object to s3. \n",
" dev_null = client.put_object(\n",
" Bucket=bucket_name, Key=s3_key, Body=pickle.dumps(obj)\n",
" )\n",
"\n",
" # Verify that object has been written to s3.\n",
" return check_s3_path(s3_uri)\n",
"\n",
"# ------------------------------------------------------------------------------\n",
"\n",
"%autoreload 2\n",
"%watermark --python --conda --hostname --machine --iversions --author=\"james.triveri@arity.com\" \n",
"\n"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"Load link attributes dataset. Assign geometry based on shape_points_sorted."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"df0 = pd.read_csv(\n",
" link_attr_uri, \n",
" usecols=[\"linkId\", \"shape_points_sorted\"],\n",
" converters={\"shape_points_sorted\": literal_eval}\n",
" )\n",
"df0 = (\n",
" df0\n",
" .drop_duplicates(subset=[\"linkId\"])\n",
" .set_index(\"linkId\")\n",
" )\n",
" \n",
"# Transform list of lists to LineStrings.\n",
"df0[\"geometry\"] = df0[\"shape_points_sorted\"].map(lambda v: [(ii[1], ii[0]) for ii in v])\n",
"df0[\"geometry\"] = df0[\"geometry\"].map(lambda v: LineString(v))\n",
"df = gpd.GeoDataFrame(df0, crs=\"EPSG:4326\", geometry=df0[\"geometry\"])\n",
"\n",
"df.head()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"# Get bounding region for each linkId. \n",
"df[\"bbox\"] = df.geometry.map(lambda gg: gg.envelope.bounds)\n",
"\n",
"# Split bounds into separate columns.\n",
"df[[\"lon0\", \"lat0\", \"lon1\", \"lat1\"]] = pd.DataFrame(df.bbox.tolist(), index=df.index)\n",
"\n",
"dbbox = df[[\"lon0\", \"lat0\", \"lon1\", \"lat1\"]].to_dict(orient=\"index\")\n",
"\n",
"# Write results to S3.\n",
"bbox_uri = \"s3://arity-analytics-tm-int-us-east-1-prod-geo/map-images/charlotte-linkid-bboxes.pkl\"\n",
"serialize_model(dbbox, bbox_uri)\n",
"\n",
"# Export bounding regions to EC2. \n",
"pkl_path = \"/data/data0/home/jtriz/data/charlotte-linkid-bboxes.pkl\"\n",
"with open(pkl_path, 'wb') as fpkl:\n",
" pickle.dump(dbbox, fpkl, pickle.HIGHEST_PROTOCOL)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"\n",
"pp_name = \"/data/data0/home/jtriz/data/trips/summary\"\n",
"\n",
"pp = Path(pp_name)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"if not pp.exists():\n",
" os.makedirs(pp)\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "torch",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
},
"orig_nbformat": 4,
"vscode": {
"interpreter": {
"hash": "3220fa3af0552d0548d8ba089b0c12a70a21bfbacbd2bacd5a40b3c64344bb20"
}
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment