Last active
August 29, 2015 14:02
-
-
Save veryeli/c00a2683b5cdc6b98b84 to your computer and use it in GitHub Desktop.
Worked Examples SQL Queries
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT top_alternative AS alternative, | |
ROUND(NTH(500, QUANTILES(ud_points, 1001)), 2) AS median_points, | |
ROUND(avg(ud_points),2) AS mean_points, | |
ROUND(NTH(500, QUANTILES(ud_weekly_problem_days, 1001)),2) AS median_weekly_days, | |
ROUND(avg(ud_weekly_problem_days),2) AS mean_weekly_days, | |
ROUND(NTH(500, QUANTILES(ud_monthly_problem_seconds/60, 1001)),2) AS median_monthly_minutes, | |
ROUND(avg(ud_monthly_problem_seconds/60),2) AS mean_monthly_minutes, | |
ROUND(NTH(500, QUANTILES(ud_monthly_problems_done, 1001)),2) AS median_monthly_problems, | |
ROUND(avg(ud_monthly_problems_done),2) AS mean_monthly_problems, | |
ROUND(NTH(500, QUANTILES(percent_mastered, 1001)*100),2) AS median_percent_mastered, | |
ROUND(avg(percent_mastered) * 100,2) AS mean_percent_mastered, | |
COUNT(1) AS N | |
FROM [eli.worked_example_users] | |
GROUP BY alternative; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT * FROM eli.worked_example_problem_logs where exercise in( | |
'multi-step-word-problems-with-whole-numbers', | |
'comparing-with-multiplication', | |
'percentage_word_problems_1', | |
'recog-features-func-2', | |
'constructing-proportions-to-solve-application-problems', | |
'interpret-features-func-2' | |
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT SUM(became_mastered) as N, exercise, SUM(if(alternative = 'control',became_practiced, 0)) as control_N, | |
SUM(if(alternative = 'control', 0, became_practiced)) as worked_examples_N, | |
SUM(if(alternative = 'control', 0, became_practiced) - if(alternative = 'control',became_practiced, 0))/SUM(if(alternative = 'control',became_practiced, 1)) as diff | |
FROM | |
(SELECT e.alternative as alternative, | |
p.exercise as exercise, | |
sum(num_exercises) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) as N, | |
sum(became_practiced) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) as became_practiced, | |
sum(became_mastered) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) as became_mastered | |
FROM | |
(SELECT user_id, exercise, count(1) as num_exercises, | |
MAX(if(incoming_level = 'unstarted', 1, 0)) as was_ever_unstarted, | |
MAX(if(incoming_level = 'unstarted' and outgoing_level != 'unstarted', 1, 0)) as became_practiced, | |
MAX(if(outgoing_level = 'mastery3', 1, 0)) as became_mastered, | |
from eli.worked_example_problem_logs | |
-- ignore review problems | |
WHERE incoming_level != 'mastery3' group EACH by user_id, exercise | |
HAVING was_ever_unstarted = 1) p | |
JOIN EACH | |
(select top_alternative as alternative, ud_user_id from eli.worked_example_users where not regexp_match( top_alternative, 'curat' | |
)) e | |
ON e.ud_user_id = p.user_id | |
GROUP BY exercise, alternative | |
order by exercise, alternative) | |
GROUP BY exercise | |
HAVING N > 10 | |
order by diff desc; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import string | |
import curses | |
from curses.ascii import isdigit | |
import nltk | |
from nltk.corpus import cmudict | |
d = cmudict.dict() | |
# Only works on known words | |
def nsyl(word): | |
return [len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]] | |
sent_detector = nltk.data.load('tokenizers/punkt/english.pickle') | |
reading_levels = {} | |
def reading_level(str): | |
sentences = sent_detector.tokenize(str.strip()) | |
num_valid_sentences = 0 | |
num_valid_words = 0 | |
num_valid_syllables = 0 | |
for sentence in sentences: | |
print sentence | |
words_this_sentence = 0 | |
words=nltk.tokenize.word_tokenize(sentence) | |
for word in words: | |
if word.lower() in d: | |
words_this_sentence += 1 | |
if words_this_sentence >= 5: | |
num_valid_sentences += 1 | |
for word in words: | |
if word.lower() in d: | |
num_valid_words += 1 | |
num_valid_syllables += nsyl(word)[0] | |
if not num_valid_sentences: | |
return 0 | |
return 0.39 * (num_valid_words/num_valid_sentences) + 11.8 * (num_valid_syllables/ num_valid_words) - 15.59 | |
all_exercises = FrozenExercise.get_all() | |
for exercise in all_exercises: | |
if exercise.name in reading_levels: | |
continue | |
item_ids = [] | |
for pt in exercise.problem_types: | |
for i in pt['items']: | |
item_ids.append(i['id']) | |
probs = [AssessmentItem.get_by_id(i) for i in item_ids] | |
all_hints = '' | |
for problem in probs: | |
h = string.join([hi.get('content', '') for hi in json.loads(problem.item_data).get('hints', '')], ' ') | |
all_hints += h | |
rl = reading_level(all_hints) | |
print exercise.name, rl | |
reading_levels[exercise.name] = rl |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT *, | |
N/became_practiced AS mean_exs_per_practiced, | |
N/became_mastered AS mean_exs_per_mastery | |
FROM | |
(SELECT e.alternative AS alternative, | |
sum(num_exercises) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) AS N, | |
sum(became_practiced) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) AS became_practiced, | |
sum(became_mastered) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) AS became_mastered | |
FROM | |
(SELECT user_id, | |
exercise, | |
count(1) AS num_exercises, | |
MAX(if(incoming_level = 'unstarted', 1, 0)) AS was_ever_unstarted, | |
MAX(if(incoming_level = 'unstarted' | |
AND outgoing_level != 'unstarted', 1, 0)) AS became_practiced, | |
MAX(if(outgoing_level = 'mastery3', 1, 0)) AS became_mastered, | |
FROM eli.worked_example_problem_logs -- ignore review problems | |
WHERE incoming_level != 'mastery3' | |
GROUP EACH BY user_id, | |
exercise HAVING was_ever_unstarted = 1) p | |
JOIN EACH | |
(SELECT top_alternative AS alternative, | |
ud_user_id | |
FROM eli.worked_example_users) e ON e.ud_user_id = p.user_id | |
GROUP EACH BY alternative); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT p.user_id as user_id, p.exercise as exercise, p.correct as correct, p.task_type as task_type, | |
p.incoming_level as incoming_level, p.outgoing_level as outgoing_level, | |
case when p.time_taken < 1 then 1 when time_taken > 600 then 600 else time_taken end as time_taken | |
FROM | |
(SELECT * from [2014_05_31.ProblemLog] WHERE date_done_str > '2014-05-27')p | |
JOIN EACH | |
(select exercise from eli.worked_example_exercise) e | |
ON e.exercise = p.exercise; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
SELECT ud.*, | |
mt.slug AS mission_slug, | |
mt.title AS mission_title, | |
mm.percent_mastered AS percent_mastered, | |
bm.alternative as top_alternative, | |
IFNULL(bm_p.alternative, '') as lower_alternative, | |
CONCAT(bm.alternative, IFNULL(bm_p.alternative, '')) AS alternative | |
FROM | |
(SELECT * | |
FROM [latest_derived.user_segments]) ud | |
JOIN EACH | |
(SELECT slug, | |
title | |
FROM eli.mission_title_slug) mt ON ud.active_mission = mt.title | |
JOIN EACH | |
(SELECT bingo_id, | |
alternative | |
FROM [bingo.bingo_map] | |
WHERE canonical_name='worked_examples') AS bm ON bm.bingo_id=ud.gae_bingo_identity | |
LEFT JOIN EACH | |
(SELECT bingo_id, | |
alternative | |
FROM [bingo.bingo_map] | |
WHERE canonical_name='worked_examples_popup') AS bm_p ON bm_p.bingo_id=ud.gae_bingo_identity | |
JOIN EACH | |
(SELECT percent_mastered, | |
user_email, | |
mission | |
FROM [latest_derived.mission_mastery]) mm ON ud.user_email = mm.user_email | |
AND mt.slug = mm.mission; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment