/* ================================================================================ Pivot example with variable number of columns in the output. ================================================================================ example data is straight forward, imagine a table with a customer identifier, an invoice date and an amount. */ DROP TABLE IF EXISTS invoice; CREATE TEMPORARY TABLE invoice ( id serial not null primary key, customer_id int not null, invoice_date date not null, amount int not null ); INSERT INTO invoice (customer_id, invoice_date, amount) values (2, '2017-06-16 04:11:50', 8706) , (2, '2017-02-12 15:26:53', 4163) , (2, '2017-12-05 23:30:36', 3473) , (2, '2017-11-01 21:16:45', 5952) , (2, '2017-10-03 09:35:34', 4170) , (2, '2017-06-09 03:22:14', 4818) , (1, '2017-03-26 19:20:28', 2958) , (1, '2017-12-02 09:15:26', 463) , (1, '2017-02-19 09:58:43', 2944) , (1, '2017-12-03 12:38:20', 5879) , (2, '2017-01-13 10:54:31', 4294) , (2, '2017-01-28 14:03:22', 2557) , (1, '2017-07-10 19:06:52', 6506) , (2, '2017-02-18 11:50:49', 1105) , (2, '2017-08-29 06:54:49', 6794) , (2, '2017-04-15 07:00:34', 2136) , (1, '2017-07-13 23:30:07', 1421) , (1, '2017-12-29 12:24:54', 5460) , (1, '2017-06-20 23:12:59', 4217) , (1, '2017-04-15 23:47:50', 2695) , (1, '2017-11-22 05:51:11', 3208) , (2, '2017-04-11 12:29:28', 1921) , (2, '2017-01-09 11:51:56', 6512) , (2, '2017-07-10 23:14:41', 1658) , (1, '2017-06-02 16:44:31', 3348) , (1, '2017-11-08 11:15:59', 467) , (2, '2017-09-01 20:13:31', 9222) , (2, '2017-09-02 22:11:56', 7029) , (2, '2017-06-07 11:22:30', 9933) , (1, '2017-04-11 20:58:57', 2284) , (2, '2017-12-13 06:30:01', 7945) , (2, '2017-10-19 17:16:21', 8940) , (1, '2017-02-16 06:57:09', 1202) , (2, '2017-01-17 22:02:24', 7567) , (2, '2017-11-16 17:02:20', 545) , (2, '2017-12-13 12:40:18', 6266) , (2, '2017-07-31 00:12:44', 9151) , (2, '2017-05-20 03:14:10', 7798) , (1, '2017-04-30 04:32:55', 420) , (2, '2017-12-10 02:21:36', 9770) , (1, '2017-09-28 15:55:24', 14) , (2, '2017-09-29 05:34:56', 2170) , (2, '2017-09-12 22:58:57', 2598) , (2, '2017-01-16 09:54:42', 1100) , (1, '2017-06-26 09:55:05', 7256) , (1, '2017-02-05 10:42:52', 6024) , (1, '2017-10-19 16:30:23', 5733) , (1, '2017-12-28 02:08:37', 2645) , (2, '2017-02-19 14:22:03', 7315) , (1, '2017-10-28 05:39:54', 9733) ;
✓ ✓
50 rows affected
/* I'm going to explain the moving parts one by one and we will bring it all together at the end. First thing we need to do is gather all of the data we want to use as columns in our pivoted output, plus any data we want to break by on those columns. Lets just do the first part to start with and then we will come back and show how to break by other data. So lets start by just trying to sum all of the amounts by month and have a column for each month. This data is for the year 2017, and I want to make sure we dont miss any empty months in our output where we might have had no revenue, so we will use generate_series to get a data point for each month. */ SELECT generate_series::DATE dte FROM GENERATE_SERIES('2017-01-01'::DATE, '2017-12-31', '1 month')
| dte | | :--------- | | 2017-01-01 | | 2017-02-01 | | 2017-03-01 | | 2017-04-01 | | 2017-05-01 | | 2017-06-01 | | 2017-07-01 | | 2017-08-01 | | 2017-09-01 | | 2017-10-01 | | 2017-11-01 | | 2017-12-01 |
/* Now, lets take our columns and get the data for each - this will give us a normal relational dataset where our pivoted "columns" will be their own column and the data for each will be another column. We select from our columns data set and LEFT OUTER JOIN so that if there was no data for that column we would still get it in our output. */ WITH columns AS ( SELECT generate_series::DATE dte FROM GENERATE_SERIES('2017-01-01'::DATE, '2017-12-31', '1 month') ) SELECT columns.dte , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) GROUP BY columns.dte
dte | total :--------- | ----: 2017-01-01 | 22030 2017-08-01 | 6794 2017-03-01 | 2958 2017-06-01 | 38278 2017-05-01 | 7798 2017-02-01 | 22753 2017-10-01 | 28576 2017-07-01 | 18736 2017-11-01 | 10172 2017-04-01 | 9456 2017-09-01 | 21033 2017-12-01 | 41901
/* Now comes the secret sauce, the JSONB_OBJECT_AGG() that makes our pivot work. We aggregate our data together, passing the "column" we want to use as the first argument and the data we want to aggregate using that column as the second argument. Basically the first arg is your pivoted "column" column from the data set (or a formatted version of it, like in our example) and the second arg is the "cell" of data you want for that "column". You get a json object with a key for each "column" and its value being its data. (Remember to scroll down for a more complicated example where we are aggregating this data by customer) */ WITH columns AS ( SELECT generate_series::DATE dte FROM GENERATE_SERIES('2017-01-01'::DATE, '2017-12-31', '1 month') ) , data AS ( SELECT columns.dte , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) GROUP BY columns.dte ) SELECT JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData FROM data;
| pivotdata | | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | {"2017-01": 22030, "2017-02": 22753, "2017-03": 2958, "2017-04": 9456, "2017-05": 7798, "2017-06": 38278, "2017-07": 18736, "2017-08": 6794, "2017-09": 21033, "2017-10": 28576, "2017-11": 10172, "2017-12": 41901} |
/* If your application can use the json object as it is you can stop at the last query, but if you need to get actual columns out and you dont mind creating a dynamic sql query, you can pull the data out into columns like this example. *But* this requires you to know the columns you want before hand. */ WITH columns AS ( SELECT generate_series::DATE dte FROM GENERATE_SERIES('2017-01-01'::DATE, '2017-12-31', '1 month') ) , data AS ( SELECT columns.dte , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) GROUP BY columns.dte ), result AS ( SELECT JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData FROM data ) SELECT (pivotData->>'2017-01') "jan" , (pivotData->>'2017-02') "feb" , (pivotData->>'2017-03') "mar" , (pivotData->>'2017-04') "apr" , (pivotData->>'2017-05') "may" , (pivotData->>'2017-06') "jun" , (pivotData->>'2017-07') "jul" , (pivotData->>'2017-08') "aug" , (pivotData->>'2017-09') "sep" , (pivotData->>'2017-10') "oct" , (pivotData->>'2017-11') "nov" , (pivotData->>'2017-12') "dec" FROM result ;
jan | feb | mar | apr | may | jun | jul | aug | sep | oct | nov | dec :---- | :---- | :--- | :--- | :--- | :---- | :---- | :--- | :---- | :---- | :---- | :---- 22030 | 22753 | 2958 | 9456 | 7798 | 38278 | 18736 | 6794 | 21033 | 28576 | 10172 | 41901
/* Now for a more complicated example, lets say you want to do the same thing, but you want to get totals for each customer. You just need to pull that grouped data along for the ride. Your "columns" query needs to create a column for each month, plus the data youre grouuping by. We use a cross join here with a list of all of our customers, again so that if we are missing data for one month for a customer we will still have that column in our output. */ SELECT generate_series dte , customer_id FROM generate_series('2017-01-01'::date, '2017-12-31', '1 month') CROSS JOIN ( SELECT DISTINCT customer_id FROM invoice ) AS customers
dte | customer_id :--------------------- | ----------: 2017-01-01 00:00:00+00 | 1 2017-01-01 00:00:00+00 | 2 2017-02-01 00:00:00+00 | 1 2017-02-01 00:00:00+00 | 2 2017-03-01 00:00:00+00 | 1 2017-03-01 00:00:00+00 | 2 2017-04-01 00:00:00+01 | 1 2017-04-01 00:00:00+01 | 2 2017-05-01 00:00:00+01 | 1 2017-05-01 00:00:00+01 | 2 2017-06-01 00:00:00+01 | 1 2017-06-01 00:00:00+01 | 2 2017-07-01 00:00:00+01 | 1 2017-07-01 00:00:00+01 | 2 2017-08-01 00:00:00+01 | 1 2017-08-01 00:00:00+01 | 2 2017-09-01 00:00:00+01 | 1 2017-09-01 00:00:00+01 | 2 2017-10-01 00:00:00+01 | 1 2017-10-01 00:00:00+01 | 2 2017-11-01 00:00:00+00 | 1 2017-11-01 00:00:00+00 | 2 2017-12-01 00:00:00+00 | 1 2017-12-01 00:00:00+00 | 2
/* Now we need to group our "data" query by the grouping column and sum for each "column". You can see some examples this time of months with no money. */ WITH columns AS ( SELECT generate_series dte , customer_id FROM generate_series('2017-01-01'::date, '2017-12-31', '1 month') CROSS JOIN ( SELECT DISTINCT customer_id FROM invoice ) AS customers ) SELECT columns.dte , columns.customer_id , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) AND columns.customer_id = invoice.customer_id GROUP BY columns.dte , columns.customer_id;
dte | customer_id | total :--------------------- | ----------: | ----: 2017-12-01 00:00:00+00 | 2 | 27454 2017-03-01 00:00:00+00 | 1 | 2958 2017-09-01 00:00:00+01 | 1 | 14 2017-08-01 00:00:00+01 | 1 | 0 2017-01-01 00:00:00+00 | 1 | 0 2017-04-01 00:00:00+01 | 1 | 5399 2017-02-01 00:00:00+00 | 2 | 12583 2017-05-01 00:00:00+01 | 1 | 0 2017-10-01 00:00:00+01 | 1 | 15466 2017-06-01 00:00:00+01 | 1 | 14821 2017-11-01 00:00:00+00 | 2 | 6497 2017-07-01 00:00:00+01 | 1 | 7927 2017-03-01 00:00:00+00 | 2 | 0 2017-12-01 00:00:00+00 | 1 | 14447 2017-01-01 00:00:00+00 | 2 | 22030 2017-08-01 00:00:00+01 | 2 | 6794 2017-04-01 00:00:00+01 | 2 | 4057 2017-09-01 00:00:00+01 | 2 | 21019 2017-10-01 00:00:00+01 | 2 | 13110 2017-06-01 00:00:00+01 | 2 | 23457 2017-02-01 00:00:00+00 | 1 | 10170 2017-05-01 00:00:00+01 | 2 | 7798 2017-07-01 00:00:00+01 | 2 | 10809 2017-11-01 00:00:00+00 | 1 | 3675
/* Now we do our final aggregation, passing our "column" heading as the first arg, the "cell" as the second arg, and the grouping by our customer_id. Again if your application can handle the json data then you may be done, but if you want to turn it back into columns see the next example. */ WITH columns AS ( SELECT generate_series dte , customer_id FROM generate_series('2017-01-01'::date, '2017-12-31', '1 month') CROSS JOIN ( SELECT DISTINCT customer_id FROM invoice ) AS customers ), data AS ( SELECT columns.dte , columns.customer_id , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) AND columns.customer_id = invoice.customer_id GROUP BY columns.dte , columns.customer_id ) SELECT customer_id , JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData FROM data GROUP BY customer_id ;
customer_id | pivotdata ----------: | :--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- 1 | {"2017-01": 0, "2017-02": 10170, "2017-03": 2958, "2017-04": 5399, "2017-05": 0, "2017-06": 14821, "2017-07": 7927, "2017-08": 0, "2017-09": 14, "2017-10": 15466, "2017-11": 3675, "2017-12": 14447} 2 | {"2017-01": 22030, "2017-02": 12583, "2017-03": 0, "2017-04": 4057, "2017-05": 7798, "2017-06": 23457, "2017-07": 10809, "2017-08": 6794, "2017-09": 21019, "2017-10": 13110, "2017-11": 6497, "2017-12": 27454}
/* And here is our final output back as columns. */ WITH columns AS ( SELECT generate_series dte , customer_id FROM generate_series('2017-01-01'::date, '2017-12-31', '1 month') CROSS JOIN ( SELECT DISTINCT customer_id FROM invoice ) AS customers ), data AS ( SELECT columns.dte , columns.customer_id , SUM(COALESCE(invoice.amount, 0)) total FROM columns LEFT OUTER JOIN invoice ON DATE_PART('year', invoice_date) = DATE_PART('year', columns.dte) AND DATE_PART('month', invoice_date) = DATE_PART('month', columns.dte) AND columns.customer_id = invoice.customer_id GROUP BY columns.dte , columns.customer_id ), result AS ( SELECT customer_id , JSONB_OBJECT_AGG(TO_CHAR(dte, 'YYYY-MM'), total) pivotData FROM data GROUP BY customer_id ) SELECT customer_id , (pivotData->>'2017-01') "jan" , (pivotData->>'2017-02') "feb" , (pivotData->>'2017-03') "mar" , (pivotData->>'2017-04') "apr" , (pivotData->>'2017-05') "may" , (pivotData->>'2017-06') "jun" , (pivotData->>'2017-07') "jul" , (pivotData->>'2017-08') "aug" , (pivotData->>'2017-09') "sep" , (pivotData->>'2017-10') "oct" , (pivotData->>'2017-11') "nov" , (pivotData->>'2017-12') "dec" FROM result ORDER BY customer_id
customer_id | jan | feb | mar | apr | may | jun | jul | aug | sep | oct | nov | dec ----------: | :---- | :---- | :--- | :--- | :--- | :---- | :---- | :--- | :---- | :---- | :--- | :---- 1 | 0 | 10170 | 2958 | 5399 | 0 | 14821 | 7927 | 0 | 14 | 15466 | 3675 | 14447 2 | 22030 | 12583 | 0 | 4057 | 7798 | 23457 | 10809 | 6794 | 21019 | 13110 | 6497 | 27454
dbfiddle here