Created
February 14, 2019 22:23
-
-
Save sfkeller/0e412e302348703c51199d1b466ccdae to your computer and use it in GitHub Desktop.
Stored Function link_pg_server() - Linking to a remote PostgreSQL db read-only.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/*================================================== | |
* Stored Function link_pg_server(...) | |
* ================================================== | |
* | |
* Linking to a remote PostgreSQL db read-only. | |
* Uses postgres_fdw the SQL/MED implementation | |
* (Foreign Data Wrapper). | |
* Goal is to achieve aka "SQL Endpoints" for linked data | |
* similar to SPARQL. | |
* | |
* Caveats of this implementation: | |
* - Must be superuser to create extension postgres_fdw. | |
* - Joins are slow with large datasets. | |
* | |
* Tested with PostgreSQL 11. REPLACE "<...>" | |
* | |
* Credits: https://robots.thoughtbot.com/postgres-foreign-data-wrapper | |
* | |
* Below is the original script: | |
* ================================================== | |
-- Step 1: Preparation | |
CREATE EXTENSION IF NOT EXISTS postgres_fdw; -- Must be superuser | |
DROP SERVER IF EXISTS gis_db_server CASCADE; | |
CREATE SERVER gis_db_server -- remote server name | |
FOREIGN DATA WRAPPER postgres_fdw | |
OPTIONS (host '<ip>', port '<port>', dbname '<dbname>'); | |
CREATE USER MAPPING FOR CURRENT_USER -- Connect | |
SERVER gis_db_server | |
OPTIONS (user '<username>', password '<pw>'); | |
-- Step 2: Import Schema | |
DROP SCHEMA IF EXISTS gis_db_public CASCADE; | |
CREATE SCHEMA gis_db_public; | |
CREATE EXTENSION IF NOT EXISTS postgis; | |
CREATE EXTENSION IF NOT EXISTS hstore; -- needed by gis_db schema | |
IMPORT FOREIGN SCHEMA public | |
FROM SERVER gis_db_server | |
INTO gis_db_public; | |
-- Step 3: Go! | |
SELECT COUNT(*) FROM gis_db_public.osm_point; | |
*/ | |
drop function if exists link_pg_server(text, text, text, text, text, text, text); | |
create or replace function link_pg_server(_host text, _port text, _dbname text, _user text, _pw text, _server text, _schema text default 'public') | |
returns text as $$ | |
declare | |
link_server_schema text := concat(_server, '_', _schema); | |
begin | |
create extension if not exists postgres_fdw; | |
-- Server: | |
execute format('drop server if exists %I cascade', _server); | |
execute format('create server %I | |
foreign data wrapper postgres_fdw | |
options (host %L, port %L, dbname %L)', _server, _host, _port, _dbname); | |
execute format('create user mapping for current_user | |
server %I | |
options (user %L, password %L)', _server, _user, _pw); | |
-- Schema: | |
execute format('drop schema if exists %I cascade', link_server_schema); | |
execute format('create schema %I', link_server_schema); | |
execute format('import foreign schema %I | |
from server %I | |
into %I', _schema, _server, link_server_schema); | |
-- Done: | |
raise info 'server "%" created linking to database "%" and schema "%", now available as schema "%".', _server, _dbname, _schema, link_server_schema; | |
return link_server_schema; | |
end; | |
$$ language plpgsql; | |
-- Link to remote DB (default schema=public): | |
create extension if not exists postgis; -- Needed by gis_db | |
create extension if not exists hstore; -- Needed by gis_db | |
-- REPLACE "<..>" in ('<ip>', '<port>', '<dbname>', '<username>', '<pw>', '<servername>') | |
select link_pg_server('<ip>', '<port>', '<dbname>', '<username>', '<pw>', '<servername>'); | |
-- Test/Demo "count": | |
select count(*) from gis_db_server_public.osm_point; | |
-- "43460156" | |
-- Cleanup: | |
--drop server if exists gis_db_server cascade; | |
--drop schema if exists gis_db_server_public cascade; | |
--drop function if exists link_pg_server(text, text, text, text, text, text, text); | |
-- Test/Demo "filter": | |
select point.* | |
from gis_db_server_public.osm_point as point | |
where osm_id in (4066310784, 1398864548); | |
-- Test/Demo "Join Tables": | |
/* | |
-- Approach 1: Slow since it downloads all remote data in order to do the join. | |
with tmp (osm_id, name) as ( | |
values (4066310784, 'Tierpark Chur'), (1398864548, 'Tierpark Aletsch') | |
) | |
select point.* | |
from gis_db_server_public.osm_point as point | |
left join tmp on tmp.osm_id=point.osm_id; | |
*/ | |
-- Approach 2 and solution: Send a list of ids to fetch over to | |
-- the remote as an array. Feasible for thousands of ids, but not millions. | |
-- Planner doesn't have any kind of join plan that would do that automatically. | |
with tmp (osm_id, name) as ( | |
values (4066310784, 'Tierpark Chur'), (1398864548, 'Tierpark Aletsch') | |
) | |
select point.* | |
from gis_db_server_public.osm_point as point | |
-- Replacing "left join tmp on tmp.osm_id=point.osm_id" with "ANY": | |
where point.osm_id = any(array(select osm_id from tmp)); | |
-- |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment