Skip to content

Instantly share code, notes, and snippets.

@ftfarias
Last active December 27, 2022 18:46
Show Gist options
  • Save ftfarias/a4dae91f5374db3d9b35bf9a71afd4cc to your computer and use it in GitHub Desktop.
Save ftfarias/a4dae91f5374db3d9b35bf9a71afd4cc to your computer and use it in GitHub Desktop.
how to do a linear regression in SQL
drop table if exists linear_test;
create table linear_test (
x numeric(19,4),
y numeric(19,4),
workspace_id text
);
insert into linear_test (x,y, workspace_id) values (1, 10, 'a'), (2, 20, 'a'), (3, 30, 'a'), (4, 40, 'a'), (5, 50, 'a');
insert into linear_test (x,y, workspace_id) values (5, 10, 'b'), (6, 20, 'b'), (7, 30, 'b'), (8, 40, 'b'), (9, 50, 'b');
insert into linear_test (x,y, workspace_id) values (1, -1, 'c'), (2, -2, 'c'), (3, -3, 'c'), (4, -4, 'c'), (5, -5, 'c');
select * from linear_test;
with base_table as (
select
workspace_id,
x as x_axis,
y
from linear_test
),
sums_table as (select distinct
workspace_id,
sum(x_axis) over (partition by workspace_id) as sum_x,
sum(x_axis * x_axis) over (partition by workspace_id) as sum_x2,
sum(y) over (partition by workspace_id) as sum_y,
sum(x_axis * y) over (partition by workspace_id) as sum_x_y,
COUNT(x_axis) over (partition by workspace_id) as num_rows
from base_table
)
select
workspace_id,
num_rows,
sum_x,
sum_x2,
sum_y,
sum_x_y,
((sum_y * sum_x2) - (sum_x * sum_x_y)) / nullif((num_rows * sum_x2) - (sum_x * sum_x),0) as a_order_count,
((num_rows * sum_x_y ) - (sum_x * sum_y )) / nullif((num_rows * sum_x2) - (sum_x * sum_x),0) as b_order_count
from sums_table
order by workspace_id
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment