Skip to content

Instantly share code, notes, and snippets.

@adriantre
adriantre / val_loss_breaking.py
Created May 7, 2020 09:42
Returning val_loss in validation_epoch_end results in TypeError: can't pickle _thread.lock objects
# Version: pytorch-lightning==0.7.5
# Run: python -m lightning_run --batch_size 5 --num_workers 0 --max_epochs 1
def validation_step(self, batch, batch_idx):
inputs, targets = batch
preds, val_loss = self.forward(inputs, targets)
val_acc = torch.div(
@adriantre
adriantre / postgis_setup.sh
Last active November 20, 2019 22:55
Postgis 101
sudo apt-get install postgres
sudo apt-get install postgis
# Connect to postgres with user postgres
psql postgres
# List databases
\d+
# Create database
from functools import partial
import multiprocessing
import warnings
import rasterio
import pandas as pd
import numpy as np
def func_to_run_on_each_row():
# My function that should be called by apply
@adriantre
adriantre / rasterize_vector.py
Created March 25, 2019 10:56
Rasterize Vector based on raster
def RasterizeVectorFileUsingRasterFile(raster_path, vector_path, output_path):
raster_ds = gdal.Open(raster_path)
geo_transform = raster_ds.GetGeoTransform()
x_min = geo_transform[0]
y_max = geo_transform[3]
x_max = x_min + geo_transform[1] * raster_ds.RasterXSize
y_min = y_max + geo_transform[5] * raster_ds.RasterYSize
x_res = raster_ds.RasterXSize
y_res = raster_ds.RasterYSize
@adriantre
adriantre / Dockerfile
Created November 27, 2018 13:08
Dockerfile for spatial data analysis and tensorflow
# Build from existing tensorflow notebook
# https://github.com/jupyter/docker-stacks
FROM jupyter/tensorflow-notebook:latest
USER $NB_UID
# Install gdal (geospatial operations)
RUN conda install --quiet --yes \
'conda-forge::libgdal=2.2*'
def map_points(df, lat_col='latitude', lon_col='longitude', zoom_start=11, \
plot_points=False, pt_radius=15, \
draw_heatmap=False, heat_map_weights_col=None, \
heat_map_weights_normalize=True, heat_map_radius=15):
"""Creates a map given a dataframe of points. Can also produce a heatmap overlay
Arg:
df: dataframe containing points to maps
lat_col: Column containing latitude (string)
lon_col: Column containing longitude (string)
@adriantre
adriantre / explain_analyze_gist_index.sql
Last active May 16, 2018 12:54
Explain analyze gist index slowing down query
WITH my_polygon as (
SELECT ST_GeomFromEWKT('<EWKT polygon>') as geom
)
SELECT count(*)
FROM points as a
, my_polygon as p
WHERE ST_Within(points.geom, my_polygon.geom);
Small table, no index
@adriantre
adriantre / batch_inser.py
Created May 4, 2018 18:31
Batch insert ais messages
def insertPositionData(cur, data_list, batch_size, received_by):
sql = '''
INSERT INTO ais_position
(timestamp, mmsi, geom, mgrs_name, accuracy,
course, speed, status, status_text, type, heading,
shiptype, shiptype_text, received_by, other)
VALUES %s
ON CONFLICT (mmsi, timestamp)
DO NOTHING
'''
@adriantre
adriantre / count_ships.sql
Last active May 15, 2018 19:50
Count ships within each grid-tile
-- Count number of ships (ais_position) within each grid-tile (mgrs)
-- Query 1 : Spatial query
SELECT count(*)
FROM ais_position as a
, mgrs as m
WHERE ST_Within(a.geom, m.geom)
GROUP BY m.name;
-- Way to slow. Instead doing ST_Within on INSERT
# -*- coding: utf-8 -*-
import os
import re
import sys
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from pyspark.sql import SparkSession
from pyspark import SparkContext