Skip to content

Instantly share code, notes, and snippets.

@ermolaev
Last active May 25, 2017 23:55
Show Gist options
  • Save ermolaev/a20e61aa3904b324c53c to your computer and use it in GitHub Desktop.
Save ermolaev/a20e61aa3904b324c53c to your computer and use it in GitHub Desktop.
PostgreSQL with R
http://www.joeconway.com/presentations/plr-PGConfNYC2014.pdf
sudo apt-get install postgresql-9.4-plr
---------------------------------------------------
CREATE EXTENSION plr;
CREATE OR REPLACE FUNCTION r_lib_paths ()
RETURNS text AS '
.libPaths()
' LANGUAGE 'plr';
SELECT r_lib_paths();
---------------------------------------------------
CREATE OR REPLACE FUNCTION r_version (text)
RETURNS text AS '
R.Version()
' LANGUAGE 'plr';
SELECT r_version();
---------------------------------------------------
DROP FUNCTION r_apriori(query text);
CREATE OR REPLACE FUNCTION r_apriori(query text) returns TABLE(quantity text, quantity2 float8) AS '
library("arules") # install as sudo
df <- pg.spi.exec(query)
trans <- as(split(df$product_id, df$transaction_id), "transactions")
rules <- apriori(trans, parameter = list(minlen=2, maxlen=3, sup = 0.00005, conf = 0.001))
itemsets <- unique(generatingItemsets(rules))
itemsets.df <- as(itemsets, "data.frame")
itemsets.df$items <- as.character(itemsets.df$items)
return(itemsets.df)
' LANGUAGE 'plr' STRICT;
SELECT * from r_apriori('
SELECT DISTINCT
warehouses.product_id,
user_id AS transaction_id
FROM
line_items
INNER JOIN users ON users.id = line_items.user_id
INNER JOIN warehouses ON warehouses.id = line_items.warehouse_id
INNER JOIN products ON products.id = warehouses.product_id
AND (products.deleted_at IS NULL)
WHERE
line_items.deleted_at IS NULL
AND (
users.partner_type = 0
AND products.is_quantity_supplier = 1 OR products.storehouse_ids && ARRAY[1]
AND line_items.pre_order_at IS NOT NULL
)
');
---------------------------------------------------
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment