Adrian Tofting adriantre

## val_loss_breaking.py
# Version: pytorch-lightning==0.7.5

# Run: python -m lightning_run --batch_size 5 --num_workers 0 --max_epochs 1

def validation_step(self, batch, batch_idx):
    inputs, targets = batch

    preds, val_loss = self.forward(inputs, targets)

    val_acc = torch.div(

## postgis_setup.sh
sudo apt-get install postgres
sudo apt-get install postgis

# Connect to postgres with user postgres
psql postgres

# List databases
\d+

# Create database

## multiprocess_df_reading.py
from functools import partial
import multiprocessing
import warnings

import rasterio
import pandas as pd
import numpy as np

def func_to_run_on_each_row():
    # My function that should be called by apply

## rasterize_vector.py
def RasterizeVectorFileUsingRasterFile(raster_path, vector_path, output_path):
    raster_ds = gdal.Open(raster_path)
    geo_transform = raster_ds.GetGeoTransform()
    x_min = geo_transform[0]
    y_max = geo_transform[3]
    x_max = x_min + geo_transform[1] * raster_ds.RasterXSize
    y_min = y_max + geo_transform[5] * raster_ds.RasterYSize
    x_res = raster_ds.RasterXSize
    y_res = raster_ds.RasterYSize


## Dockerfile
# Build from existing tensorflow notebook
# https://github.com/jupyter/docker-stacks
FROM jupyter/tensorflow-notebook:latest

USER $NB_UID

# Install gdal (geospatial operations)
RUN conda install --quiet --yes \
    'conda-forge::libgdal=2.2*'

## FoliumHeatMap.py
def map_points(df, lat_col='latitude', lon_col='longitude', zoom_start=11, \
                plot_points=False, pt_radius=15, \
                draw_heatmap=False, heat_map_weights_col=None, \
                heat_map_weights_normalize=True, heat_map_radius=15):
    """Creates a map given a dataframe of points. Can also produce a heatmap overlay

    Arg:
        df: dataframe containing points to maps
        lat_col: Column containing latitude (string)
        lon_col: Column containing longitude (string)

## explain_analyze_gist_index.sql
WITH my_polygon as (
  SELECT ST_GeomFromEWKT('<EWKT polygon>') as geom
)
SELECT count(*)
FROM points as a
,    my_polygon as p
WHERE ST_Within(points.geom, my_polygon.geom);


Small table, no index

## batch_inser.py
def insertPositionData(cur, data_list, batch_size, received_by):
    sql = '''
        INSERT INTO ais_position
            (timestamp, mmsi, geom, mgrs_name, accuracy,
            course, speed, status, status_text, type, heading,
            shiptype, shiptype_text, received_by, other)
        VALUES %s
        ON CONFLICT (mmsi, timestamp)
        DO NOTHING
    '''

## count_ships.sql
-- Count number of ships (ais_position) within each grid-tile (mgrs)

-- Query 1 : Spatial query
SELECT count(*)
FROM ais_position as a
,    mgrs as m
WHERE ST_Within(a.geom, m.geom)
GROUP BY m.name;

-- Way to slow. Instead doing ST_Within on INSERT

## haakon.py
# -*- coding: utf-8 -*-
import os
import re
import sys
from pyspark.sql.functions import *
from pyspark.sql.types import *
from pyspark.sql.window import Window
from pyspark.sql import SparkSession
from pyspark import SparkContext
	# Version: pytorch-lightning==0.7.5

	# Run: python -m lightning_run --batch_size 5 --num_workers 0 --max_epochs 1

	def validation_step(self, batch, batch_idx):
	inputs, targets = batch

	preds, val_loss = self.forward(inputs, targets)

	val_acc = torch.div(
	sudo apt-get install postgres
	sudo apt-get install postgis

	# Connect to postgres with user postgres
	psql postgres

	# List databases
	\d+

	# Create database
	from functools import partial
	import multiprocessing
	import warnings

	import rasterio
	import pandas as pd
	import numpy as np

	def func_to_run_on_each_row():
	# My function that should be called by apply
	def RasterizeVectorFileUsingRasterFile(raster_path, vector_path, output_path):
	raster_ds = gdal.Open(raster_path)
	geo_transform = raster_ds.GetGeoTransform()
	x_min = geo_transform[0]
	y_max = geo_transform[3]
	x_max = x_min + geo_transform[1] * raster_ds.RasterXSize
	y_min = y_max + geo_transform[5] * raster_ds.RasterYSize
	x_res = raster_ds.RasterXSize
	y_res = raster_ds.RasterYSize
	# Build from existing tensorflow notebook
	# https://github.com/jupyter/docker-stacks
	FROM jupyter/tensorflow-notebook:latest

	USER $NB_UID

	# Install gdal (geospatial operations)
	RUN conda install --quiet --yes \
	'conda-forge::libgdal=2.2*'
	def map_points(df, lat_col='latitude', lon_col='longitude', zoom_start=11, \
	plot_points=False, pt_radius=15, \
	draw_heatmap=False, heat_map_weights_col=None, \
	heat_map_weights_normalize=True, heat_map_radius=15):
	"""Creates a map given a dataframe of points. Can also produce a heatmap overlay

	Arg:
	df: dataframe containing points to maps
	lat_col: Column containing latitude (string)
	lon_col: Column containing longitude (string)
	WITH my_polygon as (
	SELECT ST_GeomFromEWKT('<EWKT polygon>') as geom
	)
	SELECT count(*)
	FROM points as a
	, my_polygon as p
	WHERE ST_Within(points.geom, my_polygon.geom);


	Small table, no index
	def insertPositionData(cur, data_list, batch_size, received_by):
	sql = '''
	INSERT INTO ais_position
	(timestamp, mmsi, geom, mgrs_name, accuracy,
	course, speed, status, status_text, type, heading,
	shiptype, shiptype_text, received_by, other)
	VALUES %s
	ON CONFLICT (mmsi, timestamp)
	DO NOTHING
	'''
	-- Count number of ships (ais_position) within each grid-tile (mgrs)

	-- Query 1 : Spatial query
	SELECT count(*)
	FROM ais_position as a
	, mgrs as m
	WHERE ST_Within(a.geom, m.geom)
	GROUP BY m.name;

	-- Way to slow. Instead doing ST_Within on INSERT
	# -- coding: utf-8 --
	import os
	import re
	import sys
	from pyspark.sql.functions import *
	from pyspark.sql.types import *
	from pyspark.sql.window import Window
	from pyspark.sql import SparkSession
	from pyspark import SparkContext