Simon Wrigley wriglz

## nearest_neighbour_bigquery.sql
SELECT
  a.id,
  -- Note that on line 13 the results are grouped by a.id so if you want to return the geometry from table_a,
  -- here you can use the function ANY_VALUE(a.geom) (as you cannot group by geometry in BigQuery)
  ARRAY_AGG(b.id ORDER BY ST_Distance(a.geog, b.geog) LIMIT 1) [ORDINAL(1)] AS neighbor_id
  -- Here we return the id of the closest geometry from table_b to each geometry in table_a.
  -- If you want to include more fields from table_b here you can use STRUCT(b.id, b.second_field, b.third_field) etc.
FROM
  table_a a
JOIN

## ndjson.sh
# NB. First install jq using homebrew
cat original.geojson | jq -c '.[]' > new_line_delimited.json

## national_park_voronoi.sql
/*
Data sources for National Park boundaries:
- England: https://environment.data.gov.uk/DefraDataDownload/?mapService=NE/NationalParksEngland&Mode=spatial
- Scotland: https://spatialdata.gov.scot/geonetwork/srv/eng/catalog.search#/home
- Wales: https://datamap.gov.wales/layers/inspire-nrw:NRW_NATIONAL_PARK
*/

WITH
  park_info AS(
    -- Select required information about each National Park from a merged dataset

## bigquery_GeoJSON.sql
UPDATE
  `project.dataset.table`
SET
  geom = ST_GEOGFROMGEOJSON(
    JSON_QUERY(geo_json,'$.geometry'), make_valid => TRUE)
WHERE
  geom IS NULL

## snap_points_to_lines.sql
-- Snap the points to their closest lines, found in the subquery below
SELECT
  point_id,
  line_id,
  ST_LINE_INTERPOLATE_POINT(line_geom,
  ST_Line_Locate_Point(line_geom, point_geom)) AS snapped_points --Create the snapped points
FROM

--Subquery to find the closest line to each point (within a pre-defined raidus)
(

## london_pub_count.py
import pandas as pd
import geopandas as gpd
from shapely import wkt
import time

# Read pub points from CSV to a pandas dataframe
pubs = pd.read_csv('~/Documents/pub_points.csv',
                   header=None, names=['name', 'coordinates'])

# Parse the WKT coordinate format from PostGIS

## OSMnx_requirements.txt
networkx
geopandas
pillow
matplotlib

## remove_old_git_branches
# Remove all merged, closed branches from your local machine, leaving unmerged branches in place

git remote prune origin

git branch -vv | grep 'origin/.*: gone]' | awk '{print $1}' | xargs git branch -d

## merge_shapefiles.py
from pathlib import Path
import pandas
import geopandas

folder = Path("/path/to/shapefile/folder")
shapefiles = folder.glob("shapefile_name_with_wildcards_*.shp")
gdf = pandas.concat([
    geopandas.read_file(shp)
    for shp in shapefiles
]).pipe(geopandas.GeoDataFrame)

## delete_duplicate_rows_based_on_ctid.sql
DELETE
FROM schema.table a USING
  (SELECT MIN(ctid) AS ctid,
          duplicate_field
   FROM schema.table
   GROUP BY duplicate_field
   HAVING COUNT(*) > 1) b
WHERE a.duplicate_field = b.duplicate_field
  AND a.ctid <> b.ctid;
	SELECT
	a.id,
	-- Note that on line 13 the results are grouped by a.id so if you want to return the geometry from table_a,
	-- here you can use the function ANY_VALUE(a.geom) (as you cannot group by geometry in BigQuery)
	ARRAY_AGG(b.id ORDER BY ST_Distance(a.geog, b.geog) LIMIT 1) [ORDINAL(1)] AS neighbor_id
	-- Here we return the id of the closest geometry from table_b to each geometry in table_a.
	-- If you want to include more fields from table_b here you can use STRUCT(b.id, b.second_field, b.third_field) etc.
	FROM
	table_a a
	JOIN
	# NB. First install jq using homebrew
	cat original.geojson \| jq -c '.[]' > new_line_delimited.json
	/*
	Data sources for National Park boundaries:
	- England: https://environment.data.gov.uk/DefraDataDownload/?mapService=NE/NationalParksEngland&Mode=spatial
	- Scotland: https://spatialdata.gov.scot/geonetwork/srv/eng/catalog.search#/home
	- Wales: https://datamap.gov.wales/layers/inspire-nrw:NRW_NATIONAL_PARK
	*/

	WITH
	park_info AS(
	-- Select required information about each National Park from a merged dataset
	UPDATE
	`project.dataset.table`
	SET
	geom = ST_GEOGFROMGEOJSON(
	JSON_QUERY(geo_json,'$.geometry'), make_valid => TRUE)
	WHERE
	geom IS NULL
	-- Snap the points to their closest lines, found in the subquery below
	SELECT
	point_id,
	line_id,
	ST_LINE_INTERPOLATE_POINT(line_geom,
	ST_Line_Locate_Point(line_geom, point_geom)) AS snapped_points --Create the snapped points
	FROM

	--Subquery to find the closest line to each point (within a pre-defined raidus)
	(
	import pandas as pd
	import geopandas as gpd
	from shapely import wkt
	import time

	# Read pub points from CSV to a pandas dataframe
	pubs = pd.read_csv('~/Documents/pub_points.csv',
	header=None, names=['name', 'coordinates'])

	# Parse the WKT coordinate format from PostGIS
	# Remove all merged, closed branches from your local machine, leaving unmerged branches in place

	git remote prune origin

	git branch -vv \| grep 'origin/.*: gone]' \| awk '{print $1}' \| xargs git branch -d
	from pathlib import Path
	import pandas
	import geopandas

	folder = Path("/path/to/shapefile/folder")
	shapefiles = folder.glob("shapefile_name_with_wildcards_*.shp")
	gdf = pandas.concat([
	geopandas.read_file(shp)
	for shp in shapefiles
	]).pipe(geopandas.GeoDataFrame)
	DELETE
	FROM schema.table a USING
	(SELECT MIN(ctid) AS ctid,
	duplicate_field
	FROM schema.table
	GROUP BY duplicate_field
	HAVING COUNT(*) > 1) b
	WHERE a.duplicate_field = b.duplicate_field
	AND a.ctid <> b.ctid;