Skip to content

Instantly share code, notes, and snippets.

@veryeli
Last active August 29, 2015 14:02
Show Gist options
  • Save veryeli/c00a2683b5cdc6b98b84 to your computer and use it in GitHub Desktop.
Save veryeli/c00a2683b5cdc6b98b84 to your computer and use it in GitHub Desktop.
Worked Examples SQL Queries
SELECT top_alternative AS alternative,
ROUND(NTH(500, QUANTILES(ud_points, 1001)), 2) AS median_points,
ROUND(avg(ud_points),2) AS mean_points,
ROUND(NTH(500, QUANTILES(ud_weekly_problem_days, 1001)),2) AS median_weekly_days,
ROUND(avg(ud_weekly_problem_days),2) AS mean_weekly_days,
ROUND(NTH(500, QUANTILES(ud_monthly_problem_seconds/60, 1001)),2) AS median_monthly_minutes,
ROUND(avg(ud_monthly_problem_seconds/60),2) AS mean_monthly_minutes,
ROUND(NTH(500, QUANTILES(ud_monthly_problems_done, 1001)),2) AS median_monthly_problems,
ROUND(avg(ud_monthly_problems_done),2) AS mean_monthly_problems,
ROUND(NTH(500, QUANTILES(percent_mastered, 1001)*100),2) AS median_percent_mastered,
ROUND(avg(percent_mastered) * 100,2) AS mean_percent_mastered,
COUNT(1) AS N
FROM [eli.worked_example_users]
GROUP BY alternative;
SELECT * FROM eli.worked_example_problem_logs where exercise in(
'multi-step-word-problems-with-whole-numbers',
'comparing-with-multiplication',
'percentage_word_problems_1',
'recog-features-func-2',
'constructing-proportions-to-solve-application-problems',
'interpret-features-func-2'
)
SELECT SUM(became_mastered) as N, exercise, SUM(if(alternative = 'control',became_practiced, 0)) as control_N,
SUM(if(alternative = 'control', 0, became_practiced)) as worked_examples_N,
SUM(if(alternative = 'control', 0, became_practiced) - if(alternative = 'control',became_practiced, 0))/SUM(if(alternative = 'control',became_practiced, 1)) as diff
FROM
(SELECT e.alternative as alternative,
p.exercise as exercise,
sum(num_exercises) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) as N,
sum(became_practiced) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) as became_practiced,
sum(became_mastered) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) as became_mastered
FROM
(SELECT user_id, exercise, count(1) as num_exercises,
MAX(if(incoming_level = 'unstarted', 1, 0)) as was_ever_unstarted,
MAX(if(incoming_level = 'unstarted' and outgoing_level != 'unstarted', 1, 0)) as became_practiced,
MAX(if(outgoing_level = 'mastery3', 1, 0)) as became_mastered,
from eli.worked_example_problem_logs
-- ignore review problems
WHERE incoming_level != 'mastery3' group EACH by user_id, exercise
HAVING was_ever_unstarted = 1) p
JOIN EACH
(select top_alternative as alternative, ud_user_id from eli.worked_example_users where not regexp_match( top_alternative, 'curat'
)) e
ON e.ud_user_id = p.user_id
GROUP BY exercise, alternative
order by exercise, alternative)
GROUP BY exercise
HAVING N > 10
order by diff desc;
import json
import string
import curses
from curses.ascii import isdigit
import nltk
from nltk.corpus import cmudict
d = cmudict.dict()
# Only works on known words
def nsyl(word):
return [len(list(y for y in x if isdigit(y[-1]))) for x in d[word.lower()]]
sent_detector = nltk.data.load('tokenizers/punkt/english.pickle')
reading_levels = {}
def reading_level(str):
sentences = sent_detector.tokenize(str.strip())
num_valid_sentences = 0
num_valid_words = 0
num_valid_syllables = 0
for sentence in sentences:
print sentence
words_this_sentence = 0
words=nltk.tokenize.word_tokenize(sentence)
for word in words:
if word.lower() in d:
words_this_sentence += 1
if words_this_sentence >= 5:
num_valid_sentences += 1
for word in words:
if word.lower() in d:
num_valid_words += 1
num_valid_syllables += nsyl(word)[0]
if not num_valid_sentences:
return 0
return 0.39 * (num_valid_words/num_valid_sentences) + 11.8 * (num_valid_syllables/ num_valid_words) - 15.59
all_exercises = FrozenExercise.get_all()
for exercise in all_exercises:
if exercise.name in reading_levels:
continue
item_ids = []
for pt in exercise.problem_types:
for i in pt['items']:
item_ids.append(i['id'])
probs = [AssessmentItem.get_by_id(i) for i in item_ids]
all_hints = ''
for problem in probs:
h = string.join([hi.get('content', '') for hi in json.loads(problem.item_data).get('hints', '')], ' ')
all_hints += h
rl = reading_level(all_hints)
print exercise.name, rl
reading_levels[exercise.name] = rl
SELECT *,
N/became_practiced AS mean_exs_per_practiced,
N/became_mastered AS mean_exs_per_mastery
FROM
(SELECT e.alternative AS alternative,
sum(num_exercises) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) AS N,
sum(became_practiced) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) AS became_practiced,
sum(became_mastered) * if(regexp_match(alternative, 'show'), 2, 1) * if(regexp_match(alternative, 'curated'), 3, 1) AS became_mastered
FROM
(SELECT user_id,
exercise,
count(1) AS num_exercises,
MAX(if(incoming_level = 'unstarted', 1, 0)) AS was_ever_unstarted,
MAX(if(incoming_level = 'unstarted'
AND outgoing_level != 'unstarted', 1, 0)) AS became_practiced,
MAX(if(outgoing_level = 'mastery3', 1, 0)) AS became_mastered,
FROM eli.worked_example_problem_logs -- ignore review problems
WHERE incoming_level != 'mastery3'
GROUP EACH BY user_id,
exercise HAVING was_ever_unstarted = 1) p
JOIN EACH
(SELECT top_alternative AS alternative,
ud_user_id
FROM eli.worked_example_users) e ON e.ud_user_id = p.user_id
GROUP EACH BY alternative);
SELECT p.user_id as user_id, p.exercise as exercise, p.correct as correct, p.task_type as task_type,
p.incoming_level as incoming_level, p.outgoing_level as outgoing_level,
case when p.time_taken < 1 then 1 when time_taken > 600 then 600 else time_taken end as time_taken
FROM
(SELECT * from [2014_05_31.ProblemLog] WHERE date_done_str > '2014-05-27')p
JOIN EACH
(select exercise from eli.worked_example_exercise) e
ON e.exercise = p.exercise;
SELECT ud.*,
mt.slug AS mission_slug,
mt.title AS mission_title,
mm.percent_mastered AS percent_mastered,
bm.alternative as top_alternative,
IFNULL(bm_p.alternative, '') as lower_alternative,
CONCAT(bm.alternative, IFNULL(bm_p.alternative, '')) AS alternative
FROM
(SELECT *
FROM [latest_derived.user_segments]) ud
JOIN EACH
(SELECT slug,
title
FROM eli.mission_title_slug) mt ON ud.active_mission = mt.title
JOIN EACH
(SELECT bingo_id,
alternative
FROM [bingo.bingo_map]
WHERE canonical_name='worked_examples') AS bm ON bm.bingo_id=ud.gae_bingo_identity
LEFT JOIN EACH
(SELECT bingo_id,
alternative
FROM [bingo.bingo_map]
WHERE canonical_name='worked_examples_popup') AS bm_p ON bm_p.bingo_id=ud.gae_bingo_identity
JOIN EACH
(SELECT percent_mastered,
user_email,
mission
FROM [latest_derived.mission_mastery]) mm ON ud.user_email = mm.user_email
AND mt.slug = mm.mission;
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment