Created
May 17, 2018 04:32
-
-
Save jsams/d3accbec6e66d5b762e9a1b2b074181c to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
function get_tracks_structured(db, user, track_id, author_id, genre, ca, weeks; | |
random_type::Symbol=:fake, rand_pct=0.20, count_each=2, | |
nrepost=count_each, | |
nartist=count_each, nartist_repost=count_each, | |
nseen=count_each, nseen_repost=count_each, | |
nsameartist=count_each, nsartist_repost=count_each, | |
do_selection=false) | |
if random_type == :fake | |
do_random = "AND rand01() < $(rand_pct)" | |
elseif random_type == :sort | |
do_random = "ORDER BY RANDOM()" | |
elseif random_type == :none | |
do_random = "" | |
else | |
error("random_type may be one of `:fake`, `:sort`, or `:none`.") | |
end | |
fillselect = x -> do_selection ? """, "$(x)" AS selection""" : "" | |
author_id = ismissing(author_id) ? 0 : author_id | |
d = Dict(:user => user, | |
:track_id => track_id, | |
:author_id => author_id, | |
:genre => genre, | |
:ptime => ca, | |
:nseen => nseen, | |
:nseen_repost => nseen_repost, | |
:nartist => nartist, | |
:nartist_repost => nartist_repost, | |
:nsameartist => nsameartist, | |
:nsartist_repost => nsartist_repost, | |
:nrepost => nrepost, | |
:repost_feature_date => REPOST_DATE, | |
:weeks => weeks) | |
# * if looking for a repost, make sure the repost was made | |
# AFTER the affiliation but BEFORE the play | |
# * if looking for a not repost, make sure that it is not a repost | |
# in the sense that there was no repost made | |
# before the affiliation was j | |
#do_random = use_random ? "AND rand01() < 0.1" : "" # this is not purely random! | |
join_did_not_see_repost = """ | |
LEFT JOIN affils a ON p.listener_id = a.fan_id | |
LEFT JOIN reposts rp ON a.contact_id = rp.user_id AND p.track_id = rp.track_id | |
AND (a.created_at > :ptime OR rp.created_at > :ptime OR | |
rp.created_at > a.created_at) | |
AND rp.track_id IS NULL""" | |
join_did_see_repost = """ -- add to WHERE | |
JOIN affils a ON p.listener_id = a.fan_id | |
JOIN reposts rp ON a.contact_id = rp.user_id AND p.track_id = rp.track_id | |
AND a.created_at < rp.created_at AND rp.created_at < :ptime | |
AND p.created_at >= :repost_feature_date -- logically belongs in where, but easier here | |
AND rp.created_at > (:ptime - 60 * 60 * 24 * 7 * :weeks) | |
""" | |
# need to dispatch this bastard based on whether author_id is missing or not | |
# requirements for each subquery: | |
res = SQLite.query(db, """ | |
EXPLAIN QUERY PLAN | |
-- tracks seen before, but not reposted; should exclude focal track, focal author | |
SELECT * FROM ( | |
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre | |
$(fillselect("'seen_track' AS selection")) | |
FROM tracks t | |
JOIN plays p ON t.track_id = p.track_id -- not _prp because not reposted is good | |
$(join_did_not_see_repost) | |
WHERE p.listener_id = :user AND p.created_at < :ptime AND | |
t.track_id <> :track_id and t.author_id <> :author_id | |
$(do_random) | |
LIMIT :nseen) | |
UNION | |
-- tracks seen before and reposted, should exclude focal track, focal author | |
SELECT * FROM ( | |
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre | |
$(fillselect("'seen_track_and_repost' AS selection")) | |
FROM tracks t | |
JOIN plays_prp p ON t.track_id = p.track_id | |
$(join_did_see_repost) | |
WHERE p.listener_id = :user and p.created_at < :ptime AND | |
t.track_id <> :track_id | |
$(do_random) | |
LIMIT :nseen_repost) | |
UNION | |
-- haven't seen track before, but have seen artist, not reposted should | |
-- exclude focal author | |
SELECT * FROM ( | |
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre | |
$(fillselect("'seen_artist' AS selection")) | |
FROM tracks t | |
JOIN (SELECT listener_id, track_id, author_id FROM plays | |
WHERE created_at < :ptime AND author_id IS NOT NULL AND | |
listener_id = :user AND author_id <> :author_id | |
) p ON p.author_id = t.author_id AND t.track_id <> p.track_id | |
$(join_did_not_see_repost) | |
WHERE t.created_at < :ptime | |
$(do_random) | |
LIMIT :nartist) | |
UNION | |
-- haven't seen track before, have seen artist, track was reposted | |
-- exclude focal author | |
SELECT * FROM ( | |
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre | |
$(fillselect("'seen_artist_repost' AS selection")) | |
FROM tracks t | |
JOIN (SELECT listener_id, track_id, author_id, created_at FROM plays | |
WHERE created_at < :ptime AND author_id IS NOT NULL AND | |
listener_id = :user AND author_id <> :author_id | |
) p ON p.author_id = t.author_id AND t.track_id <> p.track_id | |
$(join_did_see_repost) | |
WHERE t.created_at < :ptime | |
$(do_random) | |
LIMIT :nartist_repost) | |
UNION | |
-- have seen neither track nor artist before, BUT has been reposted | |
-- have to think a little more about the join here, different than the | |
-- others | |
SELECT * FROM ( | |
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre | |
$(fillselect("'unseen_repost' AS selection")) | |
FROM reposts rp | |
JOIN affils a ON a.contact_id = rp.user_id | |
JOIN tracks t on rp.track_id = t.track_id | |
LEFT JOIN | |
(SELECT DISTINCT listener_id, track_id FROM plays | |
WHERE listener_id = :user AND created_at < :ptime) p | |
ON a.fan_id = p.listener_id AND rp.track_id = p.track_id | |
LEFT JOIN | |
(SELECT DISTINCT listener_id, author_id FROM plays | |
WHERE listener_id = :user AND created_at < :ptime AND author_id IS NOT NULL) p2 | |
ON a.fan_id = p2.listener_id AND t.track_id = p2.author_id | |
WHERE a.fan_id = :user | |
AND rp.created_at < :ptime AND a.created_at < rp.created_at AND | |
rp.created_at > (:ptime - 60 * 60 * 24 * 7 * :weeks) AND | |
p.track_id IS NULL AND p2.author_id is NULL | |
$(do_random) | |
LIMIT :nrepost) | |
UNION | |
-- seen different track by same author as focal, not reposted | |
SELECT * FROM ( | |
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre | |
$(fillselect("'same_artist' AS selection")) | |
FROM tracks t | |
JOIN plays p ON t.track_id = p.track_id | |
$(join_did_not_see_repost) | |
WHERE p.listener_id = :user AND t.track_id <> :track_id AND | |
t.author_id = :author_id AND p.created_at < :ptime | |
$(do_random) | |
LIMIT :nsameartist) | |
UNION | |
-- track by same author as focal, reposted | |
SELECT * FROM ( | |
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre | |
$(fillselect("'same_artist_repost' AS selection")) | |
FROM tracks t | |
JOIN plays p ON t.track_id = p.track_id | |
$(join_did_see_repost) | |
WHERE p.listener_id = :user AND t.track_id <> :track_id AND | |
p.created_at < :ptime | |
$(do_random) | |
LIMIT :nsartist_repost) | |
UNION | |
SELECT track_id, track_index, author_id, genre | |
$(fillselect("'focal_track' AS selection")) | |
FROM tracks WHERE track_id = :track_id | |
ORDER BY track_id; | |
""", values=d) | |
return res | |
if do_selection | |
@NT(track_id = Integer.(res[:track_id]), | |
track_index = Integer.(res[:track_index]), | |
author_id = res[:author_id], | |
genre = String.(res[:genre]), | |
selection = String.(res[:selection])) | |
else | |
@NT(track_id = Integer.(res[:track_id]), | |
track_index = Integer.(res[:track_index]), | |
author_id = res[:author_id], | |
genre = String.(res[:genre])) | |
end | |
end | |
#= | |
julia> get_tracks_structured(D.db, user, focal_track, focal_author, | |
focal_genre, ptime, D.weeks; count_each=D.num_alts, | |
random_type=D.random_type, rand_pct=D.rand_pct) | |
55×4 DataFrames.DataFrame | |
│ Row │ selectid │ order │ from │ detail │ | |
├─────┼──────────┼───────┼──────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤ | |
│ 1 │ 8 │ 0 │ 1 │ SEARCH TABLE plays AS p USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at<?) │ | |
│ 2 │ 8 │ 1 │ 0 │ SEARCH TABLE tracks AS t USING INTEGER PRIMARY KEY (rowid=?) │ | |
│ 3 │ 8 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │ | |
│ 4 │ 7 │ 0 │ 0 │ SCAN SUBQUERY 8 │ | |
│ 5 │ 7 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │ | |
│ 6 │ 10 │ 0 │ 2 │ SEARCH TABLE affils AS a USING COVERING INDEX affils__fan_created_idx (fan_id=?) │ | |
│ 7 │ 10 │ 1 │ 3 │ SEARCH TABLE reposts AS rp USING COVERING INDEX reposts__user_created_idx (user_id=? AND created_at>? AND created_at<?) │ | |
│ 8 │ 10 │ 2 │ 0 │ SEARCH TABLE tracks AS t USING INTEGER PRIMARY KEY (rowid=?) │ | |
│ 9 │ 10 │ 3 │ 1 │ SEARCH TABLE plays_prp AS p USING COVERING INDEX sqlite_autoindex_plays_prp_1 (listener_id=? AND created_at>? AND created_at<?) │ | |
│ 10 │ 10 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │ | |
│ 11 │ 9 │ 0 │ 0 │ SCAN SUBQUERY 10 │ | |
│ 12 │ 9 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │ | |
│ 13 │ 6 │ 0 │ 0 │ COMPOUND SUBQUERIES 7 AND 9 (UNION) │ | |
│ 14 │ 12 │ 0 │ 1 │ SEARCH TABLE plays USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at<?) │ | |
│ 15 │ 12 │ 1 │ 0 │ SEARCH TABLE tracks AS t USING INDEX tracks__author_id_idx (author_id=?) │ | |
│ 16 │ 12 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │ | |
│ 17 │ 11 │ 0 │ 0 │ SCAN SUBQUERY 12 │ | |
│ 18 │ 11 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │ | |
│ 19 │ 5 │ 0 │ 0 │ COMPOUND SUBQUERIES 6 AND 11 (UNION) │ | |
│ 20 │ 14 │ 0 │ 2 │ SEARCH TABLE affils AS a USING COVERING INDEX affils__fan_created_idx (fan_id=?) │ | |
│ 21 │ 14 │ 1 │ 1 │ SEARCH TABLE plays USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at>? AND created_at<?) │ | |
│ 22 │ 14 │ 2 │ 3 │ SEARCH TABLE reposts AS rp USING COVERING INDEX sqlite_autoindex_reposts_1 (user_id=? AND track_id=?) │ | |
│ 23 │ 14 │ 3 │ 0 │ SEARCH TABLE tracks AS t USING INDEX tracks__author_id_idx (author_id=?) │ | |
│ 24 │ 14 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │ | |
│ 25 │ 13 │ 0 │ 0 │ SCAN SUBQUERY 14 │ | |
│ 26 │ 13 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │ | |
│ 27 │ 4 │ 0 │ 0 │ COMPOUND SUBQUERIES 5 AND 13 (UNION) │ | |
│ 28 │ 17 │ 0 │ 0 │ SEARCH TABLE plays USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at<?) │ | |
│ 29 │ 17 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │ | |
│ 30 │ 18 │ 0 │ 0 │ SEARCH TABLE plays USING COVERING INDEX plays__listener_author_created_idx (listener_id=?) │ | |
│ 31 │ 16 │ 0 │ 1 │ SEARCH TABLE affils AS a USING COVERING INDEX affils__fan_created_idx (fan_id=?) │ | |
│ 32 │ 16 │ 1 │ 0 │ SEARCH TABLE reposts AS rp USING COVERING INDEX reposts__user_created_idx (user_id=? AND created_at>? AND created_at<?) │ | |
│ 33 │ 16 │ 2 │ 2 │ SEARCH TABLE tracks AS t USING INTEGER PRIMARY KEY (rowid=?) │ | |
│ 34 │ 16 │ 3 │ 3 │ SEARCH SUBQUERY 17 AS p USING AUTOMATIC COVERING INDEX (track_id=? AND listener_id=?) │ | |
│ 35 │ 16 │ 4 │ 4 │ SEARCH SUBQUERY 18 AS p2 USING AUTOMATIC COVERING INDEX (author_id=? AND listener_id=?) │ | |
│ 36 │ 16 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │ | |
│ 37 │ 15 │ 0 │ 0 │ SCAN SUBQUERY 16 │ | |
│ 38 │ 15 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │ | |
│ 39 │ 3 │ 0 │ 0 │ COMPOUND SUBQUERIES 4 AND 15 (UNION) │ | |
│ 40 │ 20 │ 0 │ 1 │ SEARCH TABLE plays AS p USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at<?) │ | |
│ 41 │ 20 │ 1 │ 0 │ SEARCH TABLE tracks AS t USING INTEGER PRIMARY KEY (rowid=?) │ | |
│ 42 │ 20 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │ | |
│ 43 │ 19 │ 0 │ 0 │ SCAN SUBQUERY 20 │ | |
│ 44 │ 19 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │ | |
│ 45 │ 2 │ 0 │ 0 │ COMPOUND SUBQUERIES 3 AND 19 (UNION) │ | |
│ 46 │ 22 │ 0 │ 2 │ SEARCH TABLE affils AS a USING COVERING INDEX affils__fan_created_idx (fan_id=?) │ | |
│ 47 │ 22 │ 1 │ 3 │ SEARCH TABLE reposts AS rp USING COVERING INDEX reposts__user_created_idx (user_id=? AND created_at>? AND created_at<?) │ | |
│ 48 │ 22 │ 2 │ 0 │ SEARCH TABLE tracks AS t USING INTEGER PRIMARY KEY (rowid=?) │ | |
│ 49 │ 22 │ 3 │ 1 │ SEARCH TABLE plays AS p USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at>? AND created_at<?) │ | |
│ 50 │ 22 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │ | |
│ 51 │ 21 │ 0 │ 0 │ SCAN SUBQUERY 22 │ | |
│ 52 │ 21 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │ | |
│ 53 │ 1 │ 0 │ 0 │ COMPOUND SUBQUERIES 2 AND 21 (UNION) │ | |
│ 54 │ 23 │ 0 │ 0 │ SEARCH TABLE tracks USING INTEGER PRIMARY KEY (rowid=?) │ | |
│ 55 │ 0 │ 0 │ 0 │ COMPOUND SUBQUERIES 1 AND 23 (UNION) │ | |
=# |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment