Skip to content

Instantly share code, notes, and snippets.

@ermolaev
Forked from thinkerbot/madlib_example.sql
Last active August 29, 2015 14:06
Show Gist options
  • Save ermolaev/6896e64d7fa121457d2c to your computer and use it in GitHub Desktop.
Save ermolaev/6896e64d7fa121457d2c to your computer and use it in GitHub Desktop.
Madlib PostgreSQL
CREATE TEMP TABLE test_data ON COMMIT DROP AS (SELECT
warehouses.product_id::TEXT,
user_id AS transaction_id
FROM
"line_items"
INNER JOIN "users" ON "users"."id" = "line_items"."user_id"
INNER JOIN "warehouses" ON "warehouses"."id" = "line_items"."warehouse_id"
INNER JOIN "products" ON "products"."id" = "warehouses"."product_id"
AND (products.deleted_at IS NULL)
WHERE
(
line_items.deleted_at IS NULL
)
AND (
users.partner_type = 0
AND products.is_quantity_supplier = 1
OR products.storehouse_ids && ARRAY [ 1 ]
AND line_items.pre_order_at IS NOT NULL
)
);
SELECT * FROM madlib.assoc_rules( 0.0003,
0.001,
'transaction_id',
'product_id',
'test_data',
NULL,
FALSE
);
SELECT * FROM assoc_rules
ORDER BY support DESC;
-- http://doc.madlib.net/latest/group__grp__glm.html
DROP TABLE IF EXISTS glm_model, glm_model_summary, ads_regression, ads_regression_dummy;
CREATE TABLE ads_regression AS (
SELECT id, price, price_m, repair_type::VARCHAR, wall_type::VARCHAR, ceiling_height::VARCHAR FROM ads WHERE ad_type = 0 AND repair_type is not NULL and wall_type is not NULL limit 100
);
SELECT madlib.create_indicator_variables('ads_regression', 'ads_regression_dummy', 'wall_type,repair_type');
SELECT madlib.glm( 'ads_regression_dummy',
'glm_model',
'price_m',
'ARRAY[1.0,"repair_type_3","wall_type_1"]',
'family=poisson, link=identity'
);
SELECT * FROM glm_model;
SELECT
ads_regression_dummy.id,
ads_regression_dummy.price_m,
madlib.glm_predict_poisson(
coef,
ARRAY[1, "repair_type_3", "wall_type_1"]::float8[],
'identity') AS poisson_count
FROM ads_regression_dummy, glm_model
ORDER BY ads_regression_dummy.id;
CREATE TEMP TABLE ads_kmeans ON COMMIT DROP AS (
SELECT id, ARRAY[price, repair_type] as points
FROM ads WHERE ad_type = 0 AND repair_type is not NULL
);
SELECT
ads_kmeans.*,
(madlib.closest_column(centroids, points)).column_id as cluster_id
FROM
ads_kmeans,
(
SELECT *
FROM madlib.kmeanspp(
'ads_kmeans',
'points',
5,
'madlib.squared_dist_norm2',
'madlib.avg',
20,
0.001
)
) as centroids
#!/bin/bash
# https://github.com/madlib/madlib/wiki/Building-MADlib-from-Source
apt-get install -y cmake
apt-get install -y postgresql-server-dev-9.3
apt-get install -y postgresql-plpython-9.3
apt-get install -y libkrb5-dev
git clone https://github.com/madlib/madlib.git
cd madlib
./configure
cd build/
make
sudo make install
# gz - db name
sudo -u postgres createlang plpythonu gz
/usr/local/madlib/bin/madpack -p postgres -c postgres@localhost/gz install
wget --no-check-certificate https://gist.githubusercontent.com/ermolaev/6896e64d7fa121457d2c/raw/madlib_install.sh && sudo sh madlib_install.sh
https://github.com/pgRouting/pgrouting/issues/274
http://hssl.cs.jhu.edu/wiki/doku.php?id=cs423:schen:assignment3
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment