Skip to content

Instantly share code, notes, and snippets.

@rileyjshaw
Created December 13, 2019 23:57
Show Gist options
  • Save rileyjshaw/fa46b8bc6839d3851af52ff2105f6aeb to your computer and use it in GitHub Desktop.
Save rileyjshaw/fa46b8bc6839d3851af52ff2105f6aeb to your computer and use it in GitHub Desktop.
Analysis queries for Mozilla Common Voice's Open Voice Challenge Pilot
-- Overall voting accuracy
SELECT client_id,
IF(total, Round (100 * valid / validated, 2), NULL) AS rate
FROM (SELECT user_clients.*,
COUNT(vote_id) AS total,
Coalesce(Sum(agree_count > disagree_count), 0) AS valid,
Coalesce(Sum(( agree_count >= 1
OR disagree_count >= 2 )
AND agree_count <> disagree_count), 0) AS validated
FROM (SELECT user_clients.*,
votes.id AS vote_id,
Sum(votes.is_valid = other_votes.is_valid) AS
agree_count,
Sum(votes.is_valid <> other_votes.is_valid) AS
disagree_count
FROM user_clients
INNER JOIN enroll
ON enroll.client_id = user_clients.client_id
INNER JOIN challenges
ON enroll.challenge_id = challenges.id
LEFT JOIN votes
ON user_clients.client_id = votes.client_id
LEFT JOIN clips
ON votes.clip_id = clips.id
LEFT JOIN votes other_votes
ON clips.id = other_votes.clip_id
AND other_votes.id <> votes.id
WHERE challenges.url_token = "pilot"
GROUP BY user_clients.client_id,
votes.id) user_clients
GROUP BY client_id) t
ORDER BY client_id;
-- Overall recording accuracy
SELECT client_id,
Round(100 * valid / validated, 2) AS rate
FROM (SELECT user_clients.*,
Coalesce(Sum(upvotes >= 2
AND upvotes > downvotes), 0) AS valid,
Coalesce(Sum(( upvotes >= 2
OR downvotes >= 2 )
AND upvotes <> downvotes), 0) AS validated
FROM (SELECT user_clients.*,
Sum(votes.is_valid) AS upvotes,
Sum( ! votes.is_valid) AS downvotes
FROM user_clients
INNER JOIN enroll
ON enroll.client_id = user_clients.client_id
INNER JOIN challenges
ON enroll.challenge_id = challenges.id
LEFT JOIN clips
ON user_clients.client_id = clips.client_id
AND clips.created_at BETWEEN
challenges.start_date AND
Timestampadd(week, 3,
challenges.start_date)
LEFT JOIN votes
ON clips.id = votes.clip_id
WHERE challenges.url_token = "pilot"
GROUP BY user_clients.client_id,
clips.id) user_clients
GROUP BY user_clients.client_id) t
ORDER BY client_id;
-- Number of votes
SELECT client_id,
COUNT(CASE
WHEN created_at BETWEEN start_date AND Timestampadd(week, 1,
start_date)
THEN
1
END) AS week_1,
COUNT(CASE
WHEN created_at BETWEEN Timestampadd(week, 1, start_date) AND
Timestampadd(week, 2, start_date) THEN 1
END) AS week_2,
COUNT(CASE
WHEN created_at BETWEEN Timestampadd(week, 2, start_date) AND
end_date
THEN 1
END) AS week_3
FROM (SELECT challenges.start_date,
Timestampadd(week, 3, challenges.start_date) AS end_date,
votes.created_at,
votes.id AS vote_id,
user_clients.client_id
FROM user_clients
INNER JOIN enroll
ON user_clients.client_id = enroll.client_id
INNER JOIN challenges
ON enroll.challenge_id = challenges.id
LEFT OUTER JOIN votes
ON user_clients.client_id = votes.client_id
WHERE challenges.url_token = "pilot") recordings
GROUP BY recordings.client_id
ORDER BY recordings.client_id;
-- Number of recordings
SELECT client_id,
COUNT(CASE
WHEN created_at BETWEEN start_date AND Timestampadd(week, 1,
start_date)
THEN
1
END) AS week_1,
COUNT(CASE
WHEN created_at BETWEEN Timestampadd(week, 1, start_date) AND
Timestampadd(week, 2, start_date) THEN 1
END) AS week_2,
COUNT(CASE
WHEN created_at BETWEEN Timestampadd(week, 2, start_date) AND
end_date
THEN 1
END) AS week_3
FROM (SELECT challenges.start_date,
Timestampadd(week, 3, challenges.start_date) AS end_date,
clips.created_at,
clips.id AS clip_id,
user_clients.client_id
FROM user_clients
INNER JOIN enroll
ON user_clients.client_id = enroll.client_id
INNER JOIN challenges
ON enroll.challenge_id = challenges.id
LEFT OUTER JOIN clips
ON user_clients.client_id = clips.client_id
WHERE challenges.url_token = "pilot") recordings
GROUP BY recordings.client_id
ORDER BY recordings.client_id;
-- Reached custom goals
SELECT user_clients.client_id,
IF(awards.id IS NOT NULL, 'Yes', 'No') AS reached_custom_goal
FROM user_clients
INNER JOIN enroll
ON user_clients.client_id = enroll.client_id
INNER JOIN challenges
ON enroll.challenge_id = challenges.id
LEFT OUTER JOIN awards
ON user_clients.client_id = awards.client_id
AND awards.goal_interval_start > challenges.start_date
AND awards.created_at < Timestampadd(week, 3,
challenges.start_date)
WHERE challenges.url_token = "pilot"
GROUP BY user_clients.client_id
ORDER BY user_clients.client_id;
-- Custom goals ** Bug: does not include goals that exist in the awards table, but the custom_goals entry has been deleted.
SELECT user_clients.client_id,
teams.name AS team,
IF(enroll.invited_by IS NOT NULL, 'Yes', 'No') AS was_invited,
IF(custom_goals.id IS NOT NULL, 'Yes', 'No') AS created_custom_goal,
user_clients.gender,
user_clients.age
FROM user_clients
INNER JOIN enroll
ON user_clients.client_id = enroll.client_id
INNER JOIN teams
ON enroll.team_id = teams.id
INNER JOIN challenges
ON enroll.challenge_id = challenges.id
LEFT OUTER JOIN custom_goals
ON user_clients.client_id = custom_goals.client_id
AND custom_goals.created_at BETWEEN
challenges.start_date AND
Timestampadd(week, 3, challenges.start_date)
WHERE challenges.url_token = "pilot"
GROUP BY user_clients.client_id
ORDER BY user_clients.client_id;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment