Skip to content

Instantly share code, notes, and snippets.

@jsams
Created May 17, 2018 04:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save jsams/d3accbec6e66d5b762e9a1b2b074181c to your computer and use it in GitHub Desktop.
Save jsams/d3accbec6e66d5b762e9a1b2b074181c to your computer and use it in GitHub Desktop.
function get_tracks_structured(db, user, track_id, author_id, genre, ca, weeks;
random_type::Symbol=:fake, rand_pct=0.20, count_each=2,
nrepost=count_each,
nartist=count_each, nartist_repost=count_each,
nseen=count_each, nseen_repost=count_each,
nsameartist=count_each, nsartist_repost=count_each,
do_selection=false)
if random_type == :fake
do_random = "AND rand01() < $(rand_pct)"
elseif random_type == :sort
do_random = "ORDER BY RANDOM()"
elseif random_type == :none
do_random = ""
else
error("random_type may be one of `:fake`, `:sort`, or `:none`.")
end
fillselect = x -> do_selection ? """, "$(x)" AS selection""" : ""
author_id = ismissing(author_id) ? 0 : author_id
d = Dict(:user => user,
:track_id => track_id,
:author_id => author_id,
:genre => genre,
:ptime => ca,
:nseen => nseen,
:nseen_repost => nseen_repost,
:nartist => nartist,
:nartist_repost => nartist_repost,
:nsameartist => nsameartist,
:nsartist_repost => nsartist_repost,
:nrepost => nrepost,
:repost_feature_date => REPOST_DATE,
:weeks => weeks)
# * if looking for a repost, make sure the repost was made
# AFTER the affiliation but BEFORE the play
# * if looking for a not repost, make sure that it is not a repost
# in the sense that there was no repost made
# before the affiliation was j
#do_random = use_random ? "AND rand01() < 0.1" : "" # this is not purely random!
join_did_not_see_repost = """
LEFT JOIN affils a ON p.listener_id = a.fan_id
LEFT JOIN reposts rp ON a.contact_id = rp.user_id AND p.track_id = rp.track_id
AND (a.created_at > :ptime OR rp.created_at > :ptime OR
rp.created_at > a.created_at)
AND rp.track_id IS NULL"""
join_did_see_repost = """ -- add to WHERE
JOIN affils a ON p.listener_id = a.fan_id
JOIN reposts rp ON a.contact_id = rp.user_id AND p.track_id = rp.track_id
AND a.created_at < rp.created_at AND rp.created_at < :ptime
AND p.created_at >= :repost_feature_date -- logically belongs in where, but easier here
AND rp.created_at > (:ptime - 60 * 60 * 24 * 7 * :weeks)
"""
# need to dispatch this bastard based on whether author_id is missing or not
# requirements for each subquery:
res = SQLite.query(db, """
EXPLAIN QUERY PLAN
-- tracks seen before, but not reposted; should exclude focal track, focal author
SELECT * FROM (
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre
$(fillselect("'seen_track' AS selection"))
FROM tracks t
JOIN plays p ON t.track_id = p.track_id -- not _prp because not reposted is good
$(join_did_not_see_repost)
WHERE p.listener_id = :user AND p.created_at < :ptime AND
t.track_id <> :track_id and t.author_id <> :author_id
$(do_random)
LIMIT :nseen)
UNION
-- tracks seen before and reposted, should exclude focal track, focal author
SELECT * FROM (
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre
$(fillselect("'seen_track_and_repost' AS selection"))
FROM tracks t
JOIN plays_prp p ON t.track_id = p.track_id
$(join_did_see_repost)
WHERE p.listener_id = :user and p.created_at < :ptime AND
t.track_id <> :track_id
$(do_random)
LIMIT :nseen_repost)
UNION
-- haven't seen track before, but have seen artist, not reposted should
-- exclude focal author
SELECT * FROM (
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre
$(fillselect("'seen_artist' AS selection"))
FROM tracks t
JOIN (SELECT listener_id, track_id, author_id FROM plays
WHERE created_at < :ptime AND author_id IS NOT NULL AND
listener_id = :user AND author_id <> :author_id
) p ON p.author_id = t.author_id AND t.track_id <> p.track_id
$(join_did_not_see_repost)
WHERE t.created_at < :ptime
$(do_random)
LIMIT :nartist)
UNION
-- haven't seen track before, have seen artist, track was reposted
-- exclude focal author
SELECT * FROM (
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre
$(fillselect("'seen_artist_repost' AS selection"))
FROM tracks t
JOIN (SELECT listener_id, track_id, author_id, created_at FROM plays
WHERE created_at < :ptime AND author_id IS NOT NULL AND
listener_id = :user AND author_id <> :author_id
) p ON p.author_id = t.author_id AND t.track_id <> p.track_id
$(join_did_see_repost)
WHERE t.created_at < :ptime
$(do_random)
LIMIT :nartist_repost)
UNION
-- have seen neither track nor artist before, BUT has been reposted
-- have to think a little more about the join here, different than the
-- others
SELECT * FROM (
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre
$(fillselect("'unseen_repost' AS selection"))
FROM reposts rp
JOIN affils a ON a.contact_id = rp.user_id
JOIN tracks t on rp.track_id = t.track_id
LEFT JOIN
(SELECT DISTINCT listener_id, track_id FROM plays
WHERE listener_id = :user AND created_at < :ptime) p
ON a.fan_id = p.listener_id AND rp.track_id = p.track_id
LEFT JOIN
(SELECT DISTINCT listener_id, author_id FROM plays
WHERE listener_id = :user AND created_at < :ptime AND author_id IS NOT NULL) p2
ON a.fan_id = p2.listener_id AND t.track_id = p2.author_id
WHERE a.fan_id = :user
AND rp.created_at < :ptime AND a.created_at < rp.created_at AND
rp.created_at > (:ptime - 60 * 60 * 24 * 7 * :weeks) AND
p.track_id IS NULL AND p2.author_id is NULL
$(do_random)
LIMIT :nrepost)
UNION
-- seen different track by same author as focal, not reposted
SELECT * FROM (
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre
$(fillselect("'same_artist' AS selection"))
FROM tracks t
JOIN plays p ON t.track_id = p.track_id
$(join_did_not_see_repost)
WHERE p.listener_id = :user AND t.track_id <> :track_id AND
t.author_id = :author_id AND p.created_at < :ptime
$(do_random)
LIMIT :nsameartist)
UNION
-- track by same author as focal, reposted
SELECT * FROM (
SELECT DISTINCT t.track_id, t.track_index, t.author_id, t.genre
$(fillselect("'same_artist_repost' AS selection"))
FROM tracks t
JOIN plays p ON t.track_id = p.track_id
$(join_did_see_repost)
WHERE p.listener_id = :user AND t.track_id <> :track_id AND
p.created_at < :ptime
$(do_random)
LIMIT :nsartist_repost)
UNION
SELECT track_id, track_index, author_id, genre
$(fillselect("'focal_track' AS selection"))
FROM tracks WHERE track_id = :track_id
ORDER BY track_id;
""", values=d)
return res
if do_selection
@NT(track_id = Integer.(res[:track_id]),
track_index = Integer.(res[:track_index]),
author_id = res[:author_id],
genre = String.(res[:genre]),
selection = String.(res[:selection]))
else
@NT(track_id = Integer.(res[:track_id]),
track_index = Integer.(res[:track_index]),
author_id = res[:author_id],
genre = String.(res[:genre]))
end
end
#=
julia> get_tracks_structured(D.db, user, focal_track, focal_author,
focal_genre, ptime, D.weeks; count_each=D.num_alts,
random_type=D.random_type, rand_pct=D.rand_pct)
55×4 DataFrames.DataFrame
│ Row │ selectid │ order │ from │ detail │
├─────┼──────────┼───────┼──────┼─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤
│ 1 │ 8 │ 0 │ 1 │ SEARCH TABLE plays AS p USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at<?) │
│ 2 │ 8 │ 1 │ 0 │ SEARCH TABLE tracks AS t USING INTEGER PRIMARY KEY (rowid=?) │
│ 3 │ 8 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │
│ 4 │ 7 │ 0 │ 0 │ SCAN SUBQUERY 8 │
│ 5 │ 7 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │
│ 6 │ 10 │ 0 │ 2 │ SEARCH TABLE affils AS a USING COVERING INDEX affils__fan_created_idx (fan_id=?) │
│ 7 │ 10 │ 1 │ 3 │ SEARCH TABLE reposts AS rp USING COVERING INDEX reposts__user_created_idx (user_id=? AND created_at>? AND created_at<?) │
│ 8 │ 10 │ 2 │ 0 │ SEARCH TABLE tracks AS t USING INTEGER PRIMARY KEY (rowid=?) │
│ 9 │ 10 │ 3 │ 1 │ SEARCH TABLE plays_prp AS p USING COVERING INDEX sqlite_autoindex_plays_prp_1 (listener_id=? AND created_at>? AND created_at<?) │
│ 10 │ 10 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │
│ 11 │ 9 │ 0 │ 0 │ SCAN SUBQUERY 10 │
│ 12 │ 9 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │
│ 13 │ 6 │ 0 │ 0 │ COMPOUND SUBQUERIES 7 AND 9 (UNION) │
│ 14 │ 12 │ 0 │ 1 │ SEARCH TABLE plays USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at<?) │
│ 15 │ 12 │ 1 │ 0 │ SEARCH TABLE tracks AS t USING INDEX tracks__author_id_idx (author_id=?) │
│ 16 │ 12 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │
│ 17 │ 11 │ 0 │ 0 │ SCAN SUBQUERY 12 │
│ 18 │ 11 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │
│ 19 │ 5 │ 0 │ 0 │ COMPOUND SUBQUERIES 6 AND 11 (UNION) │
│ 20 │ 14 │ 0 │ 2 │ SEARCH TABLE affils AS a USING COVERING INDEX affils__fan_created_idx (fan_id=?) │
│ 21 │ 14 │ 1 │ 1 │ SEARCH TABLE plays USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at>? AND created_at<?) │
│ 22 │ 14 │ 2 │ 3 │ SEARCH TABLE reposts AS rp USING COVERING INDEX sqlite_autoindex_reposts_1 (user_id=? AND track_id=?) │
│ 23 │ 14 │ 3 │ 0 │ SEARCH TABLE tracks AS t USING INDEX tracks__author_id_idx (author_id=?) │
│ 24 │ 14 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │
│ 25 │ 13 │ 0 │ 0 │ SCAN SUBQUERY 14 │
│ 26 │ 13 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │
│ 27 │ 4 │ 0 │ 0 │ COMPOUND SUBQUERIES 5 AND 13 (UNION) │
│ 28 │ 17 │ 0 │ 0 │ SEARCH TABLE plays USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at<?) │
│ 29 │ 17 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │
│ 30 │ 18 │ 0 │ 0 │ SEARCH TABLE plays USING COVERING INDEX plays__listener_author_created_idx (listener_id=?) │
│ 31 │ 16 │ 0 │ 1 │ SEARCH TABLE affils AS a USING COVERING INDEX affils__fan_created_idx (fan_id=?) │
│ 32 │ 16 │ 1 │ 0 │ SEARCH TABLE reposts AS rp USING COVERING INDEX reposts__user_created_idx (user_id=? AND created_at>? AND created_at<?) │
│ 33 │ 16 │ 2 │ 2 │ SEARCH TABLE tracks AS t USING INTEGER PRIMARY KEY (rowid=?) │
│ 34 │ 16 │ 3 │ 3 │ SEARCH SUBQUERY 17 AS p USING AUTOMATIC COVERING INDEX (track_id=? AND listener_id=?) │
│ 35 │ 16 │ 4 │ 4 │ SEARCH SUBQUERY 18 AS p2 USING AUTOMATIC COVERING INDEX (author_id=? AND listener_id=?) │
│ 36 │ 16 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │
│ 37 │ 15 │ 0 │ 0 │ SCAN SUBQUERY 16 │
│ 38 │ 15 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │
│ 39 │ 3 │ 0 │ 0 │ COMPOUND SUBQUERIES 4 AND 15 (UNION) │
│ 40 │ 20 │ 0 │ 1 │ SEARCH TABLE plays AS p USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at<?) │
│ 41 │ 20 │ 1 │ 0 │ SEARCH TABLE tracks AS t USING INTEGER PRIMARY KEY (rowid=?) │
│ 42 │ 20 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │
│ 43 │ 19 │ 0 │ 0 │ SCAN SUBQUERY 20 │
│ 44 │ 19 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │
│ 45 │ 2 │ 0 │ 0 │ COMPOUND SUBQUERIES 3 AND 19 (UNION) │
│ 46 │ 22 │ 0 │ 2 │ SEARCH TABLE affils AS a USING COVERING INDEX affils__fan_created_idx (fan_id=?) │
│ 47 │ 22 │ 1 │ 3 │ SEARCH TABLE reposts AS rp USING COVERING INDEX reposts__user_created_idx (user_id=? AND created_at>? AND created_at<?) │
│ 48 │ 22 │ 2 │ 0 │ SEARCH TABLE tracks AS t USING INTEGER PRIMARY KEY (rowid=?) │
│ 49 │ 22 │ 3 │ 1 │ SEARCH TABLE plays AS p USING COVERING INDEX sqlite_autoindex_plays_1 (listener_id=? AND created_at>? AND created_at<?) │
│ 50 │ 22 │ 0 │ 0 │ USE TEMP B-TREE FOR DISTINCT │
│ 51 │ 21 │ 0 │ 0 │ SCAN SUBQUERY 22 │
│ 52 │ 21 │ 0 │ 0 │ USE TEMP B-TREE FOR ORDER BY │
│ 53 │ 1 │ 0 │ 0 │ COMPOUND SUBQUERIES 2 AND 21 (UNION) │
│ 54 │ 23 │ 0 │ 0 │ SEARCH TABLE tracks USING INTEGER PRIMARY KEY (rowid=?) │
│ 55 │ 0 │ 0 │ 0 │ COMPOUND SUBQUERIES 1 AND 23 (UNION) │
=#
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment