#!/bin/bash -eu
# Environment variables to enable GPUs, InfiniBand, NVLink
# These are read by the scheduler and client script
module load cuda/11.0.3
export PYTHON=/gpfs/fs1/bzaitlen/miniconda3/envs/20201008/bin/python
export DASK_UCX__TCP=True
export DASK_UCX__NVLINK=True
name: deconvolve
- rapidsai-nightly
- nvidia
- conda-forge
- defaults
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=1_llvm
- abseil-cpp=20200225.2=he1b5a44_0
quasiben /
Created July 16, 2020 17:01
SQL and Dask
conda install -c conda-forge postgresql psycopg2 sqlalchemy
initdb -D /tmp/dask_db
postgresql -D /tmp/dask_db
pg_ctl -D /tmp/dask_db -l logfile start
createuser --encrypted --pwprompt dask
createdb --owner=dask dask_db
Query 01
Find top 100 products that are sold together frequently in given stores. Only products in certain categories
sold in specific stores are considered, and "sold together frequently" means at least 50 customers bought
these productstogether in a transaction.
Query 02
Find the top 30 products that are mostly viewed together with a given product in online store. Note that
the order of products viewed does not matter, and "viewed together" relates to a web_clickstreams
click_session of a known user with a session timeout of 60min.If the duration between two click of a user is
greater then the session timeout, a new session begins. With a session timeout of 60min.
Query 03
