Created
December 13, 2019 23:57
-
-
Save rileyjshaw/fa46b8bc6839d3851af52ff2105f6aeb to your computer and use it in GitHub Desktop.
Analysis queries for Mozilla Common Voice's Open Voice Challenge Pilot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Overall voting accuracy | |
SELECT client_id, | |
IF(total, Round (100 * valid / validated, 2), NULL) AS rate | |
FROM (SELECT user_clients.*, | |
COUNT(vote_id) AS total, | |
Coalesce(Sum(agree_count > disagree_count), 0) AS valid, | |
Coalesce(Sum(( agree_count >= 1 | |
OR disagree_count >= 2 ) | |
AND agree_count <> disagree_count), 0) AS validated | |
FROM (SELECT user_clients.*, | |
votes.id AS vote_id, | |
Sum(votes.is_valid = other_votes.is_valid) AS | |
agree_count, | |
Sum(votes.is_valid <> other_votes.is_valid) AS | |
disagree_count | |
FROM user_clients | |
INNER JOIN enroll | |
ON enroll.client_id = user_clients.client_id | |
INNER JOIN challenges | |
ON enroll.challenge_id = challenges.id | |
LEFT JOIN votes | |
ON user_clients.client_id = votes.client_id | |
LEFT JOIN clips | |
ON votes.clip_id = clips.id | |
LEFT JOIN votes other_votes | |
ON clips.id = other_votes.clip_id | |
AND other_votes.id <> votes.id | |
WHERE challenges.url_token = "pilot" | |
GROUP BY user_clients.client_id, | |
votes.id) user_clients | |
GROUP BY client_id) t | |
ORDER BY client_id; | |
-- Overall recording accuracy | |
SELECT client_id, | |
Round(100 * valid / validated, 2) AS rate | |
FROM (SELECT user_clients.*, | |
Coalesce(Sum(upvotes >= 2 | |
AND upvotes > downvotes), 0) AS valid, | |
Coalesce(Sum(( upvotes >= 2 | |
OR downvotes >= 2 ) | |
AND upvotes <> downvotes), 0) AS validated | |
FROM (SELECT user_clients.*, | |
Sum(votes.is_valid) AS upvotes, | |
Sum( ! votes.is_valid) AS downvotes | |
FROM user_clients | |
INNER JOIN enroll | |
ON enroll.client_id = user_clients.client_id | |
INNER JOIN challenges | |
ON enroll.challenge_id = challenges.id | |
LEFT JOIN clips | |
ON user_clients.client_id = clips.client_id | |
AND clips.created_at BETWEEN | |
challenges.start_date AND | |
Timestampadd(week, 3, | |
challenges.start_date) | |
LEFT JOIN votes | |
ON clips.id = votes.clip_id | |
WHERE challenges.url_token = "pilot" | |
GROUP BY user_clients.client_id, | |
clips.id) user_clients | |
GROUP BY user_clients.client_id) t | |
ORDER BY client_id; | |
-- Number of votes | |
SELECT client_id, | |
COUNT(CASE | |
WHEN created_at BETWEEN start_date AND Timestampadd(week, 1, | |
start_date) | |
THEN | |
1 | |
END) AS week_1, | |
COUNT(CASE | |
WHEN created_at BETWEEN Timestampadd(week, 1, start_date) AND | |
Timestampadd(week, 2, start_date) THEN 1 | |
END) AS week_2, | |
COUNT(CASE | |
WHEN created_at BETWEEN Timestampadd(week, 2, start_date) AND | |
end_date | |
THEN 1 | |
END) AS week_3 | |
FROM (SELECT challenges.start_date, | |
Timestampadd(week, 3, challenges.start_date) AS end_date, | |
votes.created_at, | |
votes.id AS vote_id, | |
user_clients.client_id | |
FROM user_clients | |
INNER JOIN enroll | |
ON user_clients.client_id = enroll.client_id | |
INNER JOIN challenges | |
ON enroll.challenge_id = challenges.id | |
LEFT OUTER JOIN votes | |
ON user_clients.client_id = votes.client_id | |
WHERE challenges.url_token = "pilot") recordings | |
GROUP BY recordings.client_id | |
ORDER BY recordings.client_id; | |
-- Number of recordings | |
SELECT client_id, | |
COUNT(CASE | |
WHEN created_at BETWEEN start_date AND Timestampadd(week, 1, | |
start_date) | |
THEN | |
1 | |
END) AS week_1, | |
COUNT(CASE | |
WHEN created_at BETWEEN Timestampadd(week, 1, start_date) AND | |
Timestampadd(week, 2, start_date) THEN 1 | |
END) AS week_2, | |
COUNT(CASE | |
WHEN created_at BETWEEN Timestampadd(week, 2, start_date) AND | |
end_date | |
THEN 1 | |
END) AS week_3 | |
FROM (SELECT challenges.start_date, | |
Timestampadd(week, 3, challenges.start_date) AS end_date, | |
clips.created_at, | |
clips.id AS clip_id, | |
user_clients.client_id | |
FROM user_clients | |
INNER JOIN enroll | |
ON user_clients.client_id = enroll.client_id | |
INNER JOIN challenges | |
ON enroll.challenge_id = challenges.id | |
LEFT OUTER JOIN clips | |
ON user_clients.client_id = clips.client_id | |
WHERE challenges.url_token = "pilot") recordings | |
GROUP BY recordings.client_id | |
ORDER BY recordings.client_id; | |
-- Reached custom goals | |
SELECT user_clients.client_id, | |
IF(awards.id IS NOT NULL, 'Yes', 'No') AS reached_custom_goal | |
FROM user_clients | |
INNER JOIN enroll | |
ON user_clients.client_id = enroll.client_id | |
INNER JOIN challenges | |
ON enroll.challenge_id = challenges.id | |
LEFT OUTER JOIN awards | |
ON user_clients.client_id = awards.client_id | |
AND awards.goal_interval_start > challenges.start_date | |
AND awards.created_at < Timestampadd(week, 3, | |
challenges.start_date) | |
WHERE challenges.url_token = "pilot" | |
GROUP BY user_clients.client_id | |
ORDER BY user_clients.client_id; | |
-- Custom goals ** Bug: does not include goals that exist in the awards table, but the custom_goals entry has been deleted. | |
SELECT user_clients.client_id, | |
teams.name AS team, | |
IF(enroll.invited_by IS NOT NULL, 'Yes', 'No') AS was_invited, | |
IF(custom_goals.id IS NOT NULL, 'Yes', 'No') AS created_custom_goal, | |
user_clients.gender, | |
user_clients.age | |
FROM user_clients | |
INNER JOIN enroll | |
ON user_clients.client_id = enroll.client_id | |
INNER JOIN teams | |
ON enroll.team_id = teams.id | |
INNER JOIN challenges | |
ON enroll.challenge_id = challenges.id | |
LEFT OUTER JOIN custom_goals | |
ON user_clients.client_id = custom_goals.client_id | |
AND custom_goals.created_at BETWEEN | |
challenges.start_date AND | |
Timestampadd(week, 3, challenges.start_date) | |
WHERE challenges.url_token = "pilot" | |
GROUP BY user_clients.client_id | |
ORDER BY user_clients.client_id; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment