Skip to content

Instantly share code, notes, and snippets.

@schmidsi
Created November 18, 2014 21:33
Show Gist options
  • Save schmidsi/5a8ed8bdb9bb1fed9059 to your computer and use it in GitHub Desktop.
Save schmidsi/5a8ed8bdb9bb1fed9059 to your computer and use it in GitHub Desktop.
Postgresql refresh: Projects, Persons and Network Analysis
-- Aufgabe 1
\c postgres
DROP DATABASE IF EXISTS projects;
CREATE DATABASE projects encoding='UTF8';
\c projects
-- b) / c)
DROP TABLE IF EXISTS Person;
CREATE TABLE Person (
id serial PRIMARY KEY,
name varchar(100) NOT NULL,
email varchar(100) NOT NULL
);
INSERT INTO Person (name, email) VALUES
('Lewis Baker', 'lewis.baker88@example.com'),
('Owen Hopkins', 'owen.hopkins91@example.com'),
('Peggy Johnson', 'peggy.johnson75@example.com'),
('Isobel Sims', 'isobel.sims49@example.com'),
('Lucy Meyer', 'lucy.meyer22@example.com'),
('Mary Jobless', 'mary.jobless@example.com'); -- Mary Jobless has no associated project
DROP TABLE IF EXISTS Project;
CREATE TABLE Project (
id serial PRIMARY KEY,
title varchar(100) NOT NULL,
start timestamp NOT NULL,
stop timestamp NOT NULL,
profit integer
);
INSERT INTO Project (title, start, stop, profit) VALUES
('A normal project',
TIMESTAMP '2014-10-01 08:00:00',
TIMESTAMP '2014-12-31 17:00:00',
1500),
('A looong project',
TIMESTAMP '2013-01-01 08:00:00',
TIMESTAMP '2015-08-01 17:00:00',
2123),
('A short project',
TIMESTAMP '2014-10-01 08:00:00',
TIMESTAMP '2014-10-31 17:00:00',
788),
('A empty project',
TIMESTAMP '2014-08-01 08:00:00',
TIMESTAMP '2014-11-30 17:00:00',
NULL)
;
DROP TABLE IF EXISTS ProjectPerson;
CREATE TABLE ProjectPerson (
person integer REFERENCES Person,
project integer REFERENCES Project
);
INSERT INTO ProjectPerson (person, project) VALUES
(1, 1),-- will be removed for network analysis
(1, 2),
(1, 3),
(2, 1),
(3, 2),
(3, 3),
(4, 1),
(4, 2),
(5, 2),-- will be removed for network analysis
(5, 3)
;
-- d)
-- a. All projects in which person X is
SELECT pj.title
FROM Project pj, Person pe, ProjectPerson pp
WHERE pj.id = pp.project
AND pp.person = pe.id
AND pe.name = 'Lewis Baker';
/* ->
title
------------------
A normal project
A looong project
A short project
(3 rows)
*/
-- b. All emails of project members of any project
SELECT DISTINCT pe.email
FROM Project pj, Person pe, ProjectPerson pp
WHERE pj.id = pp.project
AND pp.person = pe.id;
/* ->
email
-----------------------------
lewis.baker88@example.com
lucy.meyer22@example.com
owen.hopkins91@example.com
isobel.sims49@example.com
peggy.johnson75@example.com
(5 rows)
*/
-- c. Number of projects per person
SELECT pe.name, count(pp)
FROM Person pe, ProjectPerson pp
WHERE pp.person = pe.id
GROUP BY pe.name;
/* ->
name | count
---------------+-------
Lucy Meyer | 2
Isobel Sims | 2
Owen Hopkins | 1
Lewis Baker | 3
Peggy Johnson | 2
(5 rows)
*/
-- Extension a) a.: All titles for projects in which person x works and are active now
SELECT pj.title
FROM Project pj, Person pe, ProjectPerson pp
WHERE pj.id = pp.project
AND pp.person = pe.id
AND pe.name = 'Lewis Baker'
AND pj.start < now() AND pj.stop > now();
/* ->
title
------------------
A normal project
A looong project
(2 rows)
*/
-- Extension a) b.: Title of project with most persons
SELECT pjc.title, pjc.count FROM (
SELECT pj.title AS title, count(pp.person)
FROM Project pj, ProjectPerson pp
WHERE pj.id = pp.project
GROUP BY pj.title) AS pjc
WHERE count = (
SELECT max(pjc.count) FROM (
SELECT count(pp)
FROM ProjectPerson pp
GROUP BY pp.project
) AS pjc
);
/* ->
title | count
------------------+-------
A looong project | 4
(1 row)
*/
-- Extension b): Sum of all project profit per person:
SELECT pe.name, sum(pj.profit)
FROM Project pj, Person pe, ProjectPerson pp
WHERE pj.id = pp.project
AND pp.person = pe.id
GROUP BY pe.name;
/* ->
name | sum
---------------+------
Lucy Meyer | 2911
Isobel Sims | 3623
Owen Hopkins | 1500
Lewis Baker | 4411
Peggy Johnson | 2911
(5 rows)
*/
-- Extension c): a. Insert from XML:
/*
<project>
<title> Adaptive Bahnzugregelung</title>
<start>19.03.2011</start>
<end>19.09.2011</end>
<price>120000</price>
</project>
*/
INSERT INTO Project (title, start, stop, profit) VALUES
(' Adaptive Bahnzugregelung',
TIMESTAMP '2011-03-19 00:00:00',
TIMESTAMP '2011-09-19 00:00:00',
120000);
-- Extension c): b. XPATH2sql: "/project[start > NOW]/title"
SELECT title FROM Project WHERE start > now();
/* ->
title
-------
(0 rows)
*/
-- Extension c): b. better: XPATH2sql: "/project[stop > NOW]/title"
SELECT title FROM Project WHERE stop > now();
/* ->
title
------------------
A normal project
A looong project
A empty project
(3 rows)
*/
-- Aufgabe 2
-- a) Relation with all Persons working together:
-- Preparation: remove some people from projects to make
-- the data more interesting:
DELETE FROM ProjectPerson WHERE person=1 and project=1;
DELETE FROM ProjectPerson WHERE person=5 and project=2;
-- var 1: implicit joins:
SELECT DISTINCT p1.name, p2.name, pj.title
FROM Person p1, Person p2, ProjectPerson pp1, ProjectPerson pp2, Project pj
WHERE p1 != p2
AND pp1.person = p1.id
AND pp1.project = pj.id
AND pj.id = pp2.project
AND pp2.person = p2.id;
/* ->
name | name | title
---------------+---------------+------------------
Isobel Sims | Lewis Baker | A looong project
Isobel Sims | Owen Hopkins | A normal project
Isobel Sims | Peggy Johnson | A looong project
Lewis Baker | Isobel Sims | A looong project
Lewis Baker | Lucy Meyer | A short project
Lewis Baker | Peggy Johnson | A looong project
Lewis Baker | Peggy Johnson | A short project
Lucy Meyer | Lewis Baker | A short project
Lucy Meyer | Peggy Johnson | A short project
Owen Hopkins | Isobel Sims | A normal project
Peggy Johnson | Isobel Sims | A looong project
Peggy Johnson | Lewis Baker | A looong project
Peggy Johnson | Lewis Baker | A short project
Peggy Johnson | Lucy Meyer | A short project
(14 rows)
*/
-- var 2: semi-explicit joins: (countable)
SELECT p1.name, p2.name, pj.title FROM (
SELECT DISTINCT
pp1.person AS p1id,
pp2.person AS p2id,
pp1.project AS pjid
FROM ProjectPerson pp1
INNER JOIN ProjectPerson pp2
ON pp1.project = pp2.project
AND pp1.person != pp2.person
) AS conn,
Person AS p1,
Person AS p2,
Project AS pj
WHERE conn.p1id = p1.id
AND conn.p2id = p2.id
AND conn.pjid = pj.id;
/* ->
name | name | title
---------------+---------------+------------------
Lewis Baker | Peggy Johnson | A looong project
Lewis Baker | Peggy Johnson | A short project
Lewis Baker | Isobel Sims | A looong project
Lewis Baker | Lucy Meyer | A short project
Owen Hopkins | Isobel Sims | A normal project
Peggy Johnson | Lewis Baker | A looong project
Peggy Johnson | Lewis Baker | A short project
Peggy Johnson | Isobel Sims | A looong project
Peggy Johnson | Lucy Meyer | A short project
Isobel Sims | Lewis Baker | A looong project
Isobel Sims | Owen Hopkins | A normal project
Isobel Sims | Peggy Johnson | A looong project
Lucy Meyer | Lewis Baker | A short project
Lucy Meyer | Peggy Johnson | A short project
(14 rows)
*/
-- b) Node Centrality:
SELECT p1.name, count(p1) FROM (
SELECT DISTINCT
pp1.person AS p1id,
pp2.person AS p2id,
pp1.project AS pjid
FROM ProjectPerson pp1
INNER JOIN ProjectPerson pp2
ON pp1.project = pp2.project
AND pp1.person != pp2.person
) AS conn,
Person AS p1
WHERE conn.p1id = p1.id
GROUP BY p1.name;
/* ->
name | count
---------------+-------
Lucy Meyer | 2
Isobel Sims | 3
Owen Hopkins | 1
Lewis Baker | 4
Peggy Johnson | 4
(5 rows)
*/
-- with left outer join to include Mary Jobless:
SELECT p1.name, count(conn.pjid) FROM (
SELECT DISTINCT
pp1.person AS p1id,
pp2.person AS p2id,
pp1.project AS pjid
FROM ProjectPerson pp1
INNER JOIN ProjectPerson pp2
ON pp1.project = pp2.project
AND pp1.person != pp2.person
) AS conn
RIGHT OUTER JOIN Person AS p1
ON conn.p1id = p1.id
GROUP BY p1.name
ORDER BY count DESC;
/* ->
name | count
---------------+-------
Lewis Baker | 4
Peggy Johnson | 4
Isobel Sims | 3
Lucy Meyer | 2
Owen Hopkins | 1
Mary Jobless | 0
(6 rows)
*/
-- without duplicates: if two persons work
-- in more than one projects, this only counts as one
-- connection:
SELECT p1.name, count(conn) FROM (
SELECT DISTINCT
pp1.person AS p1id,
pp2.person AS p2id
FROM ProjectPerson pp1
INNER JOIN ProjectPerson pp2
ON pp1.project = pp2.project
AND pp1.person != pp2.person
) AS conn
RIGHT OUTER JOIN Person AS p1
ON conn.p1id = p1.id
GROUP BY p1.name
ORDER BY count DESC;
/* ->
name | count
---------------+-------
Isobel Sims | 3
Lewis Baker | 3
Peggy Johnson | 3
Lucy Meyer | 2
Owen Hopkins | 1
Mary Jobless | 0
(6 rows)
*/
--> normalized network graph:
SELECT
p1.id AS id_1,
p1.name AS name_1,
p2.id AS id_2,
p2.name AS name_2
FROM (
SELECT DISTINCT
pp1.person AS p1id,
pp2.person AS p2id
FROM ProjectPerson pp1
INNER JOIN ProjectPerson pp2
ON pp1.project = pp2.project
AND pp1.person != pp2.person
) AS conn,
Person AS p1,
Person AS p2
WHERE p1.id = conn.p1id
AND p2.id = conn.p2id;
/* ->
id_1 | name_1 | id_2 | name_2
------+---------------+------+---------------
1 | Lewis Baker | 3 | Peggy Johnson
1 | Lewis Baker | 4 | Isobel Sims
1 | Lewis Baker | 5 | Lucy Meyer
2 | Owen Hopkins | 4 | Isobel Sims
3 | Peggy Johnson | 1 | Lewis Baker
3 | Peggy Johnson | 4 | Isobel Sims
3 | Peggy Johnson | 5 | Lucy Meyer
4 | Isobel Sims | 1 | Lewis Baker
4 | Isobel Sims | 2 | Owen Hopkins
4 | Isobel Sims | 3 | Peggy Johnson
5 | Lucy Meyer | 1 | Lewis Baker
5 | Lucy Meyer | 3 | Peggy Johnson
(12 rows)
*/
--> as graph:
/*
(1) Lewis Baker
+ + +
| | |
| | |
| | |
(6) Mary Jobless | | | (2) Owen Hopkins
| | | +
| | | |
| | | +------------+
| | | |
| | | |
(5) Lucy Meyer +---+ | +--------+ (3) Peggy Johnson
+ | | + +
| | | | |
+-----------------------------+ |
| | |
+ + |
(4) Isobel Sims +--------+
*/
-- Extension 2: Select all persons which can be reached within 2 hops
-- Observation: Owen (2) can only be reached from "friends" of Isobel (4)
-- Step 1: Create the 2 hop graph:
SELECT DISTINCT
hop1.p1id,
hop2.p2id
FROM (
SELECT DISTINCT
pp1.person AS p1id,
pp2.person AS p2id
FROM ProjectPerson pp1
INNER JOIN ProjectPerson pp2
ON pp1.project = pp2.project
AND pp1.person != pp2.person
) AS hop1, (
SELECT DISTINCT
pp1.person AS p1id,
pp2.person AS p2id
FROM ProjectPerson pp1
INNER JOIN ProjectPerson pp2
ON pp1.project = pp2.project
AND pp1.person != pp2.person
) AS hop2
WHERE hop1.p2id = hop2.p1id;
/* ->
p1id | p2id
------+------
1 | 1
1 | 2
1 | 3
1 | 4
1 | 5
2 | 1
2 | 2
2 | 3
3 | 1
3 | 2
3 | 3
3 | 4
3 | 5
4 | 1
4 | 3
4 | 4
4 | 5
5 | 1
5 | 3
5 | 4
5 | 5
(21 rows)
*/
-- Step 2: Humanize the 2 hop graph and search all persons, which can be
-- reached from Owen (2) whithin 2 hops:
SELECT pe.id, pe.name FROM (
SELECT DISTINCT
hop1.p1id,
hop2.p2id
FROM (
SELECT DISTINCT
pp1.person AS p1id,
pp2.person AS p2id
FROM ProjectPerson pp1
INNER JOIN ProjectPerson pp2
ON pp1.project = pp2.project
AND pp1.person != pp2.person
) AS hop1, (
SELECT DISTINCT
pp1.person AS p1id,
pp2.person AS p2id
FROM ProjectPerson pp1
INNER JOIN ProjectPerson pp2
ON pp1.project = pp2.project
AND pp1.person != pp2.person
) AS hop2
WHERE hop1.p2id = hop2.p1id
) AS conn2hop,
Person AS pe
WHERE conn2hop.p1id = 2
AND pe.id = conn2hop.p2id;
/* ->
id | name
----+---------------
2 | Owen Hopkins
1 | Lewis Baker
3 | Peggy Johnson
(3 rows)
*/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment