wboykinm/robosat_vt.sh

## robosat_vt.sh
# robosat workflow to classify buildings in rutland, vt:

# (to check on any file locally in this process):
# docker cp <IMAGE_ID>:/app/<filename> .

docker pull mapbox/robosat:latest-cpu
docker run -i -t mapbox/robosat:latest-cpu /bin/bash

# configure some things
apt-get update && apt-get install -y sudo && rm -rf /var/lib/apt/lists/*
sudo apt-get update
sudo apt-get install curl software-properties-common -y
curl -sL https://deb.nodesource.com/setup_10.x | bash -
sudo add-apt-repository ppa:ubuntugis/ppa -y && sudo apt-get update
sudo apt-get install wget nodejs gdal-bin vim less -y
pip3 install csvkit

# get vt osm data
wget https://download.geofabrik.de/north-america/us/vermont-latest.osm.pbf

# pull out buildings as geojson
./rs extract --type building vermont-latest.osm.pbf vt-building.geojson

# clip training data to middlebury town:
ogr2ogr -f "GeoJSON" -t_srs "EPSG:4326" middlebury-building.geojson vt-building.geojson -clipsrc -73.2134 43.9801 -73.0891 44.0650
# generate a single geojson feature for rutland area:
echo '{ "type": "FeatureCollection", "features": [ { "type": "Feature", "properties": {}, "geometry": { "type": "Polygon", "coordinates": [ [ [ -73.4161376953125, 43.33816367935935 ], [ -72.83935546875, 43.33816367935935 ], [ -72.83935546875, 43.819665724206956 ], [ -73.4161376953125, 43.819665724206956 ], [ -73.4161376953125, 43.33816367935935 ] ] ] } } ] }' > rutland.geojson

# get cover tile list for both
./rs cover --zoom 17 middlebury-building.geojson middlebury-building.csv
./rs cover --zoom 17 rutland.geojson rutland.csv

# download from vcgi tiles (because free is good, and CIR might be dope)
./rs download https://maps.vcgi.vermont.gov/arcgis/rest/services/EGC_services/IMG_VCGI_CIR_WM_CACHE/ImageServer/tile/{z}/{y}/{x} middlebury-building.csv middlebury-images
./rs download https://maps.vcgi.vermont.gov/arcgis/rest/services/EGC_services/IMG_VCGI_CIR_WM_CACHE/ImageServer/tile/{z}/{y}/{x} rutland.csv rutland-images


# rasterize (after setting up the config file)
echo "
# Configuration related to a specific dataset.
# For syntax see: https://github.com/toml-lang/toml#table-of-contents


# Dataset specific common attributes.
[common]

  # The slippy map dataset's base directory.
  dataset = 'vt/middlebury'

  # Human representation for classes.
  classes = ['background', 'building']

  # Color map for visualization and representing classes in masks.
  # Note: available colors can be found in 'robosat/colors.py'
  colors  = ['denim', 'orange']


# Dataset specific class weights computes on the training data.
# Note: use './rs weights -h' to compute these for new datasets.
[weights]
  values = [1.451183, 21.289612]
" > config/dataset-building-middlebury.toml

./rs rasterize --dataset config/dataset-building-middlebury.toml --zoom 17 --size 256 middlebury-building.geojson middlebury-building.csv middlebury-building

# set up some directories
rm -r vt
mkdir vt
mkdir vt/middlebury
mkdir vt/middlebury/training
mkdir vt/middlebury/validation
mkdir vt/middlebury/training/images
mkdir vt/middlebury/training/labels
mkdir vt/middlebury/validation/images
mkdir vt/middlebury/validation/labels
mkdir vt/middlebury/tmp
cp -r middlebury-images/* vt/middlebury/training/images/
cp -r middlebury-building/* vt/middlebury/training/labels/

# weights
./rs weights --dataset config/dataset-building-middlebury.toml

# train (after setting up another config file)
echo "
# Configuration related to a specific model.
# For syntax see: https://github.com/toml-lang/toml#table-of-contents


# Model specific common attributes.
[common]

  # Use CUDA for GPU acceleration.
  cuda       = false

  # Batch size for training.
  batch_size = 1

  # Image side size in pixels.
  image_size = 256

  # Directory where to save checkpoints to during training.
  checkpoint = 'vt/middlebury/tmp/'


# Model specific optimization parameters.
[opt]

  # Total number of epochs to train for.
  epochs     = 12

  # Learning rate for the optimizer.
  lr         = 0.0001

  # Weight decay l2 penalty for the optimizer
  decay      = 0.0001

  # Loss function name (e.g 'Lovasz', 'mIoU' or 'CrossEntropy')
  loss = 'Lovasz'
" > config/model-unet-middlebury.toml

# peel off a validation set (30%)
THING=$(wc -l < middlebury-building.csv)
THING2=$(( ( $THING - 1 ) / 10 * 3 ))
sort -R middlebury-building.csv | head -n $THING2 > validation.csv
while read p
do
  x=$(echo $p | csvcut -c 1)
  y=$(echo $p | csvcut -c 2)
  z=$(echo $p | csvcut -c 3)
  mkdir -p vt/middlebury/validation/images/$z/$x/
  mv vt/middlebury/training/images/$z/$x/$y.webp vt/middlebury/validation/images/$z/$x/
  mkdir -p vt/middlebury/validation/labels/$z/$x/
  mv vt/middlebury/training/labels/$z/$x/$y.png vt/middlebury/validation/labels/$z/$x/
done < validation.csv

./rs train --model config/model-unet-middlebury.toml --dataset config/dataset-building-middlebury.toml --workers 0

# predict
mkdir probs
./rs predict --batch_size 1 --checkpoint vt/middlebury/tmp/checkpoint-00001-of-00001.pth --tile_size 256 --model config/model-unet-middlebury.toml --dataset config/dataset-building-middlebury.toml rutland/validation/images probs
	# robosat workflow to classify buildings in rutland, vt:

	# (to check on any file locally in this process):
	# docker cp <IMAGE_ID>:/app/<filename> .

	docker pull mapbox/robosat:latest-cpu
	docker run -i -t mapbox/robosat:latest-cpu /bin/bash

	# configure some things
	apt-get update && apt-get install -y sudo && rm -rf /var/lib/apt/lists/*
	sudo apt-get update
	sudo apt-get install curl software-properties-common -y
	curl -sL https://deb.nodesource.com/setup_10.x \| bash -
	sudo add-apt-repository ppa:ubuntugis/ppa -y && sudo apt-get update
	sudo apt-get install wget nodejs gdal-bin vim less -y
	pip3 install csvkit

	# get vt osm data
	wget https://download.geofabrik.de/north-america/us/vermont-latest.osm.pbf

	# pull out buildings as geojson
	./rs extract --type building vermont-latest.osm.pbf vt-building.geojson

	# clip training data to middlebury town:
	ogr2ogr -f "GeoJSON" -t_srs "EPSG:4326" middlebury-building.geojson vt-building.geojson -clipsrc -73.2134 43.9801 -73.0891 44.0650
	# generate a single geojson feature for rutland area:
	echo '{ "type": "FeatureCollection", "features": [ { "type": "Feature", "properties": {}, "geometry": { "type": "Polygon", "coordinates": [ [ [ -73.4161376953125, 43.33816367935935 ], [ -72.83935546875, 43.33816367935935 ], [ -72.83935546875, 43.819665724206956 ], [ -73.4161376953125, 43.819665724206956 ], [ -73.4161376953125, 43.33816367935935 ] ] ] } } ] }' > rutland.geojson

	# get cover tile list for both
	./rs cover --zoom 17 middlebury-building.geojson middlebury-building.csv
	./rs cover --zoom 17 rutland.geojson rutland.csv

	# download from vcgi tiles (because free is good, and CIR might be dope)
	./rs download https://maps.vcgi.vermont.gov/arcgis/rest/services/EGC_services/IMG_VCGI_CIR_WM_CACHE/ImageServer/tile/{z}/{y}/{x} middlebury-building.csv middlebury-images
	./rs download https://maps.vcgi.vermont.gov/arcgis/rest/services/EGC_services/IMG_VCGI_CIR_WM_CACHE/ImageServer/tile/{z}/{y}/{x} rutland.csv rutland-images


	# rasterize (after setting up the config file)
	echo "
	# Configuration related to a specific dataset.
	# For syntax see: https://github.com/toml-lang/toml#table-of-contents


	# Dataset specific common attributes.
	[common]

	# The slippy map dataset's base directory.
	dataset = 'vt/middlebury'

	# Human representation for classes.
	classes = ['background', 'building']

	# Color map for visualization and representing classes in masks.
	# Note: available colors can be found in 'robosat/colors.py'
	colors = ['denim', 'orange']


	# Dataset specific class weights computes on the training data.
	# Note: use './rs weights -h' to compute these for new datasets.
	[weights]
	values = [1.451183, 21.289612]
	" > config/dataset-building-middlebury.toml

	./rs rasterize --dataset config/dataset-building-middlebury.toml --zoom 17 --size 256 middlebury-building.geojson middlebury-building.csv middlebury-building

	# set up some directories
	rm -r vt
	mkdir vt
	mkdir vt/middlebury
	mkdir vt/middlebury/training
	mkdir vt/middlebury/validation
	mkdir vt/middlebury/training/images
	mkdir vt/middlebury/training/labels
	mkdir vt/middlebury/validation/images
	mkdir vt/middlebury/validation/labels
	mkdir vt/middlebury/tmp
	cp -r middlebury-images/* vt/middlebury/training/images/
	cp -r middlebury-building/* vt/middlebury/training/labels/

	# weights
	./rs weights --dataset config/dataset-building-middlebury.toml

	# train (after setting up another config file)
	echo "
	# Configuration related to a specific model.
	# For syntax see: https://github.com/toml-lang/toml#table-of-contents


	# Model specific common attributes.
	[common]

	# Use CUDA for GPU acceleration.
	cuda = false

	# Batch size for training.
	batch_size = 1

	# Image side size in pixels.
	image_size = 256

	# Directory where to save checkpoints to during training.
	checkpoint = 'vt/middlebury/tmp/'


	# Model specific optimization parameters.
	[opt]

	# Total number of epochs to train for.
	epochs = 12

	# Learning rate for the optimizer.
	lr = 0.0001

	# Weight decay l2 penalty for the optimizer
	decay = 0.0001

	# Loss function name (e.g 'Lovasz', 'mIoU' or 'CrossEntropy')
	loss = 'Lovasz'
	" > config/model-unet-middlebury.toml

	# peel off a validation set (30%)
	THING=$(wc -l < middlebury-building.csv)
	THING2=$(( ( $THING - 1 ) / 10 * 3 ))
	sort -R middlebury-building.csv \| head -n $THING2 > validation.csv
	while read p
	do
	x=$(echo $p \| csvcut -c 1)
	y=$(echo $p \| csvcut -c 2)
	z=$(echo $p \| csvcut -c 3)
	mkdir -p vt/middlebury/validation/images/$z/$x/
	mv vt/middlebury/training/images/$z/$x/$y.webp vt/middlebury/validation/images/$z/$x/
	mkdir -p vt/middlebury/validation/labels/$z/$x/
	mv vt/middlebury/training/labels/$z/$x/$y.png vt/middlebury/validation/labels/$z/$x/
	done < validation.csv

	./rs train --model config/model-unet-middlebury.toml --dataset config/dataset-building-middlebury.toml --workers 0

	# predict
	mkdir probs
	./rs predict --batch_size 1 --checkpoint vt/middlebury/tmp/checkpoint-00001-of-00001.pth --tile_size 256 --model config/model-unet-middlebury.toml --dataset config/dataset-building-middlebury.toml rutland/validation/images probs