Skip to content

Instantly share code, notes, and snippets.

@bnmnetp
Created July 30, 2014 13:46
Show Gist options
  • Save bnmnetp/82e0e287a09580431a59 to your computer and use it in GitHub Desktop.
Save bnmnetp/82e0e287a09580431a59 to your computer and use it in GitHub Desktop.
interactivepython database cleanup
Display the source blob
Display the rendered blob
Raw
{
"metadata": {
"name": "",
"signature": "sha256:962bc8e068d054a027f1195730bba2115cb820266aabc57ae123f73d5294650f"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Data Cleanup Notebook\n",
"\n",
"Some thoughts before I get started with this. \n",
" \n",
" * Start by removing short term users. Recalculate for repeatability\n",
" \n",
" * Remove blank SIDs, instructors, and test courses (gatech,devcourse, and lightly used courses) \n",
" \n",
" * Remove nontraditional pages (those not begining with courselib or runestone \n",
" \n",
" * Remove unofficial div_ids\n",
" \n",
" * Remove outliers. Recalculate them for repeatability\n",
" \n",
" * Replace runestone with courselib\n",
" \n",
" * Replace mChoice with mchoice for consistency\n",
" \n",
" \n",
"THINGS YOU NEED BEFORE RUNNING THIS NOTEBOOK:\n",
"\n",
"Tables:\n",
" * useinfo (or something similar)\n",
" * courses\n",
" \n",
"Python Scripts:\n",
" * bookParse.py\n",
" * getDivs.py\n"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"import getDivs\n",
"import bookParse as bp\n",
"from getInstructors import getInstructors\n",
"import re\n",
"import pandas as pd\n",
"import psycopg2\n",
"import socket\n",
"import numpy as np\n",
"import pandas.io.sql as psql\n",
"from pandas import DataFrame\n",
"from sqlalchemy import create_engine\n",
"\n",
"db_to_clean = 'bmiller'\n",
"db_user = 'bmiller'\n",
"db_pass = ''\n",
"\n",
"if db_pass:\n",
" engine2 = create_engine('postgresql+psycopg2://%s:%s@localhost/%s' % (db_user, db_pass, db_to_clean))\n",
" conn = psycopg2.connect(host=\"localhost\",database=db_to_clean,user=db_user,password=db_pass)\n",
"else:\n",
" engine2 = create_engine('postgresql+psycopg2://%s@localhost/%s' % (db_user, db_to_clean))\n",
" conn = psycopg2.connect(host=\"localhost\",database=db_to_clean,user=db_user)\n",
"\n",
"connection2 = engine2.connect()\n",
"\n",
"\n",
"%load_ext sql \n",
"%sql postgresql://bmiller@localhost/bmiller"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"The sql extension is already loaded. To reload it, use:\n",
" %reload_ext sql\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 47,
"text": [
"'Connected: bmiller@bmiller'"
]
}
],
"prompt_number": 47
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Remove non-traditional pages, blank sids, and instructors"
]
},
{
"cell_type": "heading",
"level": 6,
"metadata": {},
"source": [
"Before running the following query, make sure to insert the most recent list of instructors."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"DELETE FROM useinfo\n",
"WHERE (event = 'page' AND NOT (div_id ~ '^/(courselib|runestone).*')) --#non-traditional pages\n",
"OR sid = '' --#blank sids\n",
"OR sid IN ('DaveCousins', '01418116', 'sbcmharrison', 'mrcpittway', 'paolobianconi', 'OrganizeFISH', 'brekke', 'haugrudm', 'fnurl', 'Martin Highmore', 'gisc', 'jstephens', 'pguse', 'klaidley', 'Nobula', 'ivipul', 'dgyanek', 'Nicholas Seward', 'asahny', 'chorse', 'rschulz', 'nedoluzhko', 'sara', 'memedu', 'swbcteach', 'bula', 'pbeens', 'wmp', 'potterwalt', 'walkermsum', 'acsizmadia', 'the.krup', 'the_bob_student', 'the_bob', 'jgreenawalt', 'Mcintoshchs', 'gkondrak', 'a.reid', 'mckeownp', 'timbellnz', 'quintin', 'rahmani', 'tiverson', 'gccktaira', 'vtn2', 'bmiller', 'rosew', 'jbschafer', 'keen', 'sgilbert@occ.cccd.edu', 'as2653', 'RKumar', 'dcaswell', 'dzolzer', 'gvrinten', 'minsu', 'mzmacky2', 'ktaira', 'rkowaney', 'WarrenPettitt', 'tompkins', 'tompkinsJack', 'HullSmurf001', 'tagliarinig', 'CSC131tompkins', 'vellinga', 'pearcej', 'rambasnet', 'mdogy', 'cmurakami', 'ficekr', 'ficek', 'wilf.lendrum', 'shlomor', 'jazcap', 'dlovell', 'alter718', 'polarlemniscate', 'acbart', 'pmisterovich', 'CheeseTurtle', 'mr.octavio', 'mirita81', 'shyampurk', 'robertcduvall', 'ludaesch', 'rpugh', 'khelson1', 'emcpPYTHON', 'horacers', 'JohnDoorenbos', 'dunfeer', 'techplex.engineer', 'misa.tesi', 'kwhillans', 'dr_bwilson', 'r70smith', 'tand', 'bschormann', 'traveltcg', 'mridgway', 'hhtsimpson', 'bmoening', 'mhilland', 'pam.meier', 'tdragon@ithaca.edu', 'bokhariz', 'MrYantho', 'ine5201', 'avunque', 'rasika', 'dtgreen', 'camingus', 'hogue', 'tstrat', 'stormeb', 'lwchadd', 'xty.dsouza', 'elevel', 'ThaddeusAid', 'davidUrr', 'hitoshi', 'mark.schmitt', 'eallatta', 'mfitzpatrick', 'vcowal', 'salihtuncer', 'warrenpettitt', 'pylearner', 'william.levi.collins', 'Barrie', 'Jacob777', 'vfxpraveen117', 'aoteatech', 'nask.meerpaal', 'jsolin', 'd.giordan', 'tmoffdawg', 'scmreason', 'Mr_Gray', 'case.andrew', 'trampgeek', 'niroshseven', 'decadous', 'MrBate', 'stcrispin', 'mieke.gorissen', 'englert.ervin', 'ericsonga', 'mithron', 'numodica', 'Mathad19', 'JenksW', 'LonnekeDriessen', 'kwaljee', 'larry', 'pentrium', 'moffdawg', 'maxmori77', 'SandeepShelke', 'davelai', 'tommrrtt', 'spock_tpol', 'ekeoid', 'BantamJoe', 'alter718_2', 'MegW', 'elanus', 'cn.gheorghetiteica', 'yoyo_available', 'sdmcgee', 'rdotsch', 'gjvoigt', 'gmacmanus', 'mharrison', 'jfemiani', 'femiani', 'kwright', 'dekrwright', 'jenniferchoffmann', 'mrschellenberg', 'The Salty Seaman', 'shane.dicks1', 'mleblanc', 'ProfNorman', 'metrpy', 'gregmax', 'ironwk85', 'hannutam', 'andrewgtp', 'realblurgh', 'madhavjha', 'acase', 'mchen53', 'jgilgen', 'ofenerci', 'JackTompkins', 'mdeakyne', 'spsofficers', 'comqsph', 'SteveTanimoto', 'jiffp', 'rkumar', 'Cmcguire', 'jheckelsmiller', 'OvidioMarinho', 'dianneoc', 'AppylPye', 'cwormsl2', 'kidbookrev', 'bic', 'afseTestAccount', 'vivek.lpc', '10081453', 'kaspermunch', 'supamrchen', 'johnkershaw', 'mesri', 'jfall', 'cheriegilmore', 'cewing', 'beth-mac', 'mcraig', 'jspacco', 'ranum', 'bcooper', 'xuxiaomin19892', 'cryptogoth', 'rowzer', 'allen', 'isaacdl', 'james adams', 'sciberteach', 'perry.fizzano', 'Green1313', 'cmheisel', 'wavemind', 'steven_king', 'assault', 'jgb', 'komo', 'gjenkins@lbcc.edu', 'FEdericoSOmaschini', 'david236', 'zarial01', 'miguser1', 'kev_jones', 'JuYoungKim', 'garyphillips', 'jasonlanda', 'kfkennedy', 'holcma01', 'jrincon@emsofl.com', 'johnpeeb', 'eudaimonious', 'VineetaSingh', 'rmueller', 'kash41', 'rtindell', 'mike.farfaraway', 'vilipold', 'weihongw', 'tonisvahi', 'rafaja', 'aliev', 'zacharyblackwood', 'smckennitt', 'drtech', 'bnminstruct', 'elliott', 'mleblanc@wheatoncollege.edu', 'CameronA', 'nicokie', 'volcan', 'matthewvenn', 'ttttamaki', 'Terry.Pasley@kctcs.edu', 'mpearse', 'gabrinerd', 'danschellenberg', 'alnitak')\n",
"--#list of instructors\n",
";\n",
"\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"97999 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 48,
"text": [
"[]"
]
}
],
"prompt_number": 48
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Remove known test courses. "
]
},
{
"cell_type": "heading",
"level": 6,
"metadata": {},
"source": [
"Currently limited to just gatch and devcourse. Will revise when known test courses has been updated."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"DELETE FROM useinfo\n",
"WHERE course_id IN ('gatech', 'devcourse','overview');\n",
"\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"106732 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 49,
"text": [
"[]"
]
}
],
"prompt_number": 49
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Remove unofficial div_ids"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"pythondsDivInfo = getDivs.getDivInfo('pythonds')\n",
"pythondsDivs = pythondsDivInfo['div_id']\n",
"str(pythondsDivs) #NOTE: I formatted as a string so I could copy and paste it more easily. Uncomment to get the list"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 50,
"text": [
"\"['active5', 'active6', 'active7', 'analysis_1', 'analysis_2', 'analysis_3', 'findMinVid', 'mcpyperform', 'mcpydictperf', 'pythonopsperf', 'active1', 'active2', 'active3', 'divby2', 'baseconvert', 'baseconvert1', 'baseconvert2', 'baseconvert3', 'video_Stack2', 'deqtest', 'ququeuetest', 'queue_1', 'stack_1ac', 'stack_ex_1', 'stack_cl_1', 'stack_1', 'stack_2', 'stack_stringrev', 'stack1_video', 'orderedlistclass', 'unorderedlistcomplete', 'self_check_list1', 'self_check_list2', 'intopost', 'postfixeval', 'postfix1', 'postfix2', 'postfix3', 'video_Stack3', 'palchecker', 'parcheck1', 'qujosephussim', 'qumainsim', 'print_sim_selfcheck', 'intro_8', 'self_check_1', 'list_unique', 'self_check_2', 'listcomp', 'monkeyvid', 'intro_1', 'intro_2', 'intro_3', 'intro_5', 'intro_7', 'scratch_01_01', 'strstuff', 'gcd_cl', 'fraction_class', 'self_check_4', 'fraction', 'complete_cuircuit', 'self_check_5', 'logicgates', 'lst_itsum', 'lst_recsum', 'lst_rectostr', 'recursion_sc_1', 'recursion_sc_2', 'chp12_koch', 'lst_change2', 'lst_dpremember', 'completemaze', 'chp11_recursivesum', 'recursion_scratch_1', 'lst_st', 'lst_recstack', 'question_recsimp_1', 'question_recsimp_2 ', 'hanoi', 'lst_turt1', 'lst_complete_tree', 'recursion_sc_3', 'HASH_1', 'HASH_2', 'hashtablecomplete', 'search3', 'search4', 'BSRCH_1', 'BSRCH_2', 'lst_bubble', 'bubble_anim', 'bubbletrace', 'lst_shortbubble', 'shortbubbletrace', 'question_sort_1', 'lst_insertion', 'insertion_anim', 'insertionsortcodetrace', 'question_sort_3', 'lst_merge', 'merge_anim', 'mergetrace', 'question_sort_5', 'question_sort_6', 'lst_quick', 'quick_anim', 'quicktrace', 'question_sort_7', 'question_sort_8', 'question_sort_9', 'lst_selectionsortcode', 'selection_anim', 'selectionsortcodetrace', 'question_sort_2', 'search1', 'search2', 'question_SRCH_1', 'question_SRCH_2', 'lst_shellSort', 'shell_anim', 'shellSorttrace', 'question_sort_4', 'completeheap', 'heap1', 'tree_list1', 'bin_tree', 'mctree_1', 'mctree_2', 'bintree', 'mctree_3', 'parsebuild', 'bst_1', 'completebstcode']\""
]
}
],
"prompt_number": 50
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"DELETE FROM useinfo\n",
"WHERE id in \n",
"(\n",
"SELECT useinfo.id FROM \n",
"useinfo\n",
"JOIN bookcourses\n",
"ON (useinfo.course_id = bookcourses.course_name)\n",
"WHERE \n",
" book = 'pythonds' \n",
" AND event NOT IN ('page','highlight','ac_error')\n",
" AND div_id NOT IN ('active5', 'active6', 'active7', 'analysis_1', 'analysis_2', 'analysis_3', 'findMinVid', 'mcpyperform', 'mcpydictperf', 'pythonopsperf', 'active1', 'active2', 'active3', 'divby2', 'baseconvert', 'baseconvert1', 'baseconvert2', 'baseconvert3', 'video_Stack2', 'deqtest', 'ququeuetest', 'queue_1', 'stack_1ac', 'stack_ex_1', 'stack_cl_1', 'stack_1', 'stack_2', 'stack_stringrev', 'stack1_video', 'orderedlistclass', 'unorderedlistcomplete', 'self_check_list1', 'self_check_list2', 'intopost', 'postfixeval', 'postfix1', 'postfix2', 'postfix3', 'video_Stack3', 'palchecker', 'parcheck1', 'qujosephussim', 'qumainsim', 'print_sim_selfcheck', 'intro_8', 'self_check_1', 'list_unique', 'self_check_2', 'listcomp', 'monkeyvid', 'intro_1', 'intro_2', 'intro_3', 'intro_5', 'intro_7', 'scratch_01_01', 'strstuff', 'gcd_cl', 'fraction_class', 'self_check_4', 'fraction', 'complete_cuircuit', 'self_check_5', 'logicgates', 'lst_itsum', 'lst_recsum', 'lst_rectostr', 'recursion_sc_1', 'recursion_sc_2', 'chp12_koch', 'lst_change2', 'lst_dpremember', 'completemaze', 'chp11_recursivesum', 'recursion_scratch_1', 'lst_st', 'lst_recstack', 'question_recsimp_1', 'question_recsimp_2 ', 'hanoi', 'lst_turt1', 'lst_complete_tree', 'recursion_sc_3', 'HASH_1', 'HASH_2', 'hashtablecomplete', 'search3', 'search4', 'BSRCH_1', 'BSRCH_2', 'lst_bubble', 'bubble_anim', 'bubbletrace', 'lst_shortbubble', 'shortbubbletrace', 'question_sort_1', 'lst_insertion', 'insertion_anim', 'insertionsortcodetrace', 'question_sort_3', 'lst_merge', 'merge_anim', 'mergetrace', 'question_sort_5', 'question_sort_6', 'lst_quick', 'quick_anim', 'quicktrace', 'question_sort_7', 'question_sort_8', 'question_sort_9', 'lst_selectionsortcode', 'selection_anim', 'selectionsortcodetrace', 'question_sort_2', 'search1', 'search2', 'question_SRCH_1', 'question_SRCH_2', 'lst_shellSort', 'shell_anim', 'shellSorttrace', 'question_sort_4', 'completeheap', 'heap1', 'tree_list1', 'bin_tree', 'mctree_1', 'mctree_2', 'bintree', 'mctree_3', 'parsebuild', 'bst_1', 'completebstcode')\n",
");\n",
"\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"31112 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 51,
"text": [
"[]"
]
}
],
"prompt_number": 51
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"thinkcspyDivInfo = getDivs.getDivInfo('thinkcspy')\n",
"thinkcspyDivs = thinkcspyDivInfo['div_id']\n",
"str(thinkcspyDivs) #NOTE: I formatted as a string so I could copy and paste it more easily. Uncomment to get the list"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 52,
"text": [
"\"['chp13_classes4', 'chp13_classes5', 'chp13_classesstr1', 'chp13_classesstr2', 'ch_cl_ex_1_answer', 'ch_cl_02', 'ch_cl_ex_3_answer', 'ch_cl_04', 'ch_cl_05_answer', 'chp13_improveconstructor', 'chp13_classesmid1', 'scratch_cl_01', 'chp13_classes6', 'chp13_classes1', 'chp13_points', 'chp13_classes2', 'fractions_add1', 'scratch_cl_02\\\\t', 'ch_cl2_answer1', 'ch_cl2_q2 ', 'ch_cl2_q3answer', 'ch_cl2_q4', 'ch_cl2_q5answer', 'ch_cl2_answer7', 'ch_cl2_q8', 'fractions_init', 'fractions_gcd', 'fractions_simplify', 'fractions_is', 'fractions_eq1', 'db_ex3_1', 'db_ex3_2', 'db_q_ex3_1', 'db_ex3_3', 'db_ex3_4', 'db_ex3_5', 'db_qex32', 'db_ex3_6', 'db_ex3_7', 'db_ex3_8', 'db_ex3_9', 'db_ex3_10', 'db_ex3_11', 'db_ex3_12', 'ch12_dict11', 'test_question11_4_1', 'chp12_dict6', 'chp12_dict7', 'chp12_dict8', 'chp12_dict9', 'chp12_dict10', 'scratch_11_02', 'test_question11_3_1', 'test_question11_3_2', 'test_question11_3_3', 'test_question11_3_4', 'test_question11_3_5', 'ch12_dict4', 'ch12_dict4a', 'ch12_dict5', 'test_question11_2_1', 'ex_11_01', 'q1_answer', 'ex_11_02', 'ex_11_03', 'ex_11_04', 'ch11_q5_answer', 'chp12_sparse', 'chp12_dict1', 'chp12_dict2', 'chp12_dict3', 'scratch_11_01', 'test_question11_1_1 ', 'test_question11_1_2', 'files_while', 'ex_6_1', 'ch_files_q1answer', 'ex_10_2', 'ex_6_3', 'ch_files_q3answer', 'ex_10_4', 'ex_10_5', 'ch_files_q5answer', 'files_for', 'files_write01', 'ch05_barchart', 'scratch_05_06', 'ch06_boolfun1', 'ch06_boolfun2', 'ch06_newarea', 'test_question5_6_1', 'test_question5_6_2', 'sumofsquares', 'ch04_3', 'ch04_4', 'ch04_5', 'ch04_6', 'ch04_square', 'ch04_clsquare', 'ch04_clsquare_bad', 'test_question5_2_1', 'test_question5_2_2', 'ch06_distance1', 'ch06_distancefinal', 'function_accumulator_pattern', 'sq_accum1', 'sq_accum3', 'scratch_05_04', 'test_question5_4_1', 'question5_4_1p', 'bad_local', 'badsquare_1', 'cl_powerof_bad', 'cl_change_parm', 'test_question5_3_1', 'test_question5_3_2', 'test_question5_3_3', 'function_intro', 'ch04_1', 'ch04_1a', 'ch04_2', 'scratch_05_01', 'test_question5_1_1', 'test_question5_1_2', 'test_question5_1_3', 'test_question5_1_4', 'test_question5_1_5', 'test_question5_1_6', 'test_question5_1_7', 'ch01_2', 'question1_11_1', 'question1_1_1', 'question1_1_2', 'ch01_3', 'question1_12_1', 'scratch_01', 'question1_9_1', 'question1_10_1', 'question1_10_2', 'question1_4_1', 'question1_7_1', 'question1_8_1', 'codelensvid', 'ch01_1', 'firstexample', 'question1_3_1', 'question1_3_2', 'question1_6_1', 'question1_6_2', 'question1_2_1', 'question1_2_2', 'question1_2_3', 'question1_5_1', 'astlab_ast1', 'lab01_1', 'lab01_2', 'l0401', 'lab0401a', 'lab0401aa', 'chp12_single', 'piguessstart', 'seq3nlab1', 'sin1', 'sin2', 'sinlab1', 'sinlab2', 'chp09_02', 'test_question9_3_1', 'test_question9_3_2', 'test_question9_3_3', 'listalias1', 'chp09_is3', 'test_question9_10_1', 'appcon1', 'appcon2', 'appcon3', 'test_question9_15_1', 'chp09_is4', 'chp09_5', 'chp09_concatid', 'test_question9_5_1', 'test_question9_5_2', 'ex_9_2', 'ex_9_3', 'q3_answer', 'ex_9_4', 'ex_9_5', 'q5_answer', 'ex_7_11', 'ex_9_6', 'q7_answer', 'ex_9_7', 'ex_9_8', 'q9_answer', 'ex_9_9', 'ex_9_10', 'q11_answer', 'ex_9_11', 'ex_9_12', 'q13_answer', 'ex_9_13', 'ex_9_14', 'q15_answer', 'listcomp1', 'scratch_09_06', 'test_question9_20_1', 'ch09_11', 'scratch_09_01', 'chp09_01a', 'test_question9_2_1 ', 'test_question9_2_2', 'chp09_4', 'test_question9_4_1', 'test_question9_4_2', 'chp09_meth1', 'chp09_meth2', 'test_question9_13_1', 'test_question9_13_2', 'test_question9_13_3', 'test_question9_13_4', 'scratch_09_03', 'chp09_6', 'test_question9_6_1', 'chp09_01', 'test_question9_1_1 ', 'chp09_03a', 'chp09_03b', 'chp09_for3', 'chp09_for4', 'test_question9_16_1', 'ch09_7', 'item_assign', 'ch09_8', 'ch09_9', 'ch09_10', 'test_question9_7_1', 'chp09_nest', 'test_question9_21_1', 'chp09_is1', 'chp09_is2', 'chp09_istrace', 'ch09_mod2', 'ch09_mod3', 'repref1', 'repref2', 'repref3', 'reprefstep', 'test_question9_12_1', 'test_question9_12_2', 'ch09_split1', 'ch09_split2', 'ch09_join', 'test_question9_22_1', 'list_lsys1', 'list_lsys2', 'ch09_tuple1', 'chp09_tuple2', 'chp09_tuple3', 'scratch_09_07', 'chp09_parm1', 'chp09_parm1_trace', 'ch09_list1', 'pixelex1a', 'test_question7_8_1_1', 'pixelex1', 'test_question7_8_2_1', 'nested1', 'nested2', 'acimg_1', 'test_question7_8_3_1', 'test_question7_8_3_2', 'scratch_07_03', 'ex_7_7', 'q1_answer', 'ex_7_8', 'ex_7_9', 'q3_answer', 'ex_7_14', 'ex_7_13', 'q5_answer', 'ex_7_12', 'ex_7_15', 'q7_answer', 'ex_7_16', 'ex_7_17', 'q9_answer', 'ex_7_18', 'ex_7_19', 'answer_7_11', 'ex_7_20', 'ex_7_21', 'q13_answer', 'ex_7_22', 'ex_7_23', 'q15_answer', 'scratch_07_05', 'chp07_newtonsdef', 'chp07_newtonswhile', 'iter_randwalk1', 'iter_randwalk2', 'test_question7_3_1', 'test_question7_3_2', 'ch07_table1', 'test_question7_7_1', 'ch07_indef1', 'test_question7_4_1', 'ch07_for1', 'ch07_summation', 'whileloop', 'ch07_while1', 'ch07_while2', 'scratch_07_01', 'test_question7_2_1', 'test_question7_2_2', 'mod_q1_answer', 'ex_mod_2', 'mod_q3_answer', 'ex_mod_4', 'randmodvid', 'chmodule_02', 'question4_2_1', 'chmodule_rand', 'chmodule_rand2', 'question4_4_1', 'question4_4_2', 'question4_4_3', 'question4_4_4', 'scratch_04', 'inputvid', 'chmod_01', 'question4_1_1', 'question4_1_2', 'question4_1_3', 'ch03_7', '3_10', '3_11', 'ex_3_1', 'q1_answer', 'ex_3_2', 'ex_3_3', 'q3_answer', 'ex_3_4', 'ex_3_5', 'ex_3_6', 'ex_3_7', 'q7_answer', 'ex_3_8', 'ex_3_9', 'q9_answer', 'ex_3_10', 'ex_3_11', 'q11_answer', 'ex_3_12', 'ex_3_13', 'q13_answer', 'vtest', 'ch03_3', 'test_question3_2_1', '3_6', '3_7', 'ch03_for1', 'ch03_forcolor', 'colorlist', '3_8', '3_9', 'test_question3_4_1', 'test_question3_4_2', 'test_question3_4_3', 'test_question3_4_4', 'ch03_1', 'test_question3_1_0', '3_1', '3_2', '3_3', 'ch03_2', 'test_question3_1_1', 'test_question3_1_2', 'test_question3_1_3', 'test_question3_1_4', '3_4', '3_5', 'scratch_03', 'forloopvid', 'ch03_4', 'advrange', 'ch03_5', 'rangeme', 'ch03_6', 'rangeme2', 'test_question3_5_1', 'test_question3_5_2', 'test_question3_5_3', 'assignvid', 'lst_itsum', 'lst_recsum', 'lst_rectostr', 'recursion_sc_1', 'recursion_sc_2', 'chp12_koch', 'lst_change2', 'lst_dpremember', 'completemaze', 'chp11_recursivesum', 'recursion_scratch_1', 'lst_st', 'lst_recstack', 'question_recsimp_1', 'question_recsimp_2 ', 'hanoi', 'lst_turt1', 'lst_complete_tree', 'recursion_sc_3', 'ch06_boolfun1', 'ch06_boolfun2', 'ch06_boolcodelens', 'test_question6_8_1', 'test_question6_8_2', 'scratch_06_03', 'booleanexpressions', 'ch05_1', 'ch05_1a', 'ch05_2', 'test_question6_1_1', 'sel4', 'scratch_06_02', 'test_question6_7_1', 'test_question6_7_2', 'binaryselection', 'ch05_4', 'test_question6_4_1', 'test_question6_4_2', 'test_question6_4_3', 'ex_6_1', 'ex_6_2', 'ex_6_3', 'q3_question', 'ex_6_4', 'ex_6_5', 'answer_ex_6_5', 'ex_6_6', 'ex_6_7', 'q7_answer', 'ex_6_8', 'ex_6_9', 'q9_answer', 'ex_6_10', 'ex_6_11', 'q11_answer', 'ex_6_12', 'ex_6_13', 'answer_ex_6_13', 'chp05_3', 'test_question6_2_1', 'sel2', 'sel1', 'test_question6_6_1', 'unaryselection', 'ch05_unaryselection', 'test_question6_5_1', 'test_question6_5_2', 'scratch_06_01', 'test_question6_3_1', 'ch02_ex1', 'ex_2_2', 'ex_2_3', 'q3_answer', 'ex_2_4', 'ex_2_5', 'q5_answer ', 'ex_2_6', 'ex_2_7', 'q7_answer', 'ex_2_8', 'ex_2_9', 'q9_answer', 'ex_2_10', 'ex_2_11', 'q11_answer', 'ex_2_12', 'inputvid', 'inputfun', 'int_secs', 'test_question2_7_1', 'ch02_15', 'ch02_16', 'ch02_17', 'ch02_18', 'ch02_19_codelens', 'test_question2_6_1', 'test_question2_6_2', 'test_question2_6_3', 'precedencevid', 'associativityvid', 'ch02_23', 'test_question2_8_1', 'test_question2_8_2', 'reassignmentvid', 'ch07_reassign1', 'ch07_reassign2', 'test_question2_9_1', 'expression_vid', 'ch02_13', 'ch02_14', 'ch02_20', 'ch02_21', 'ch02_22', 'test_question2_2_1', 'updatevid', 'ch07_update1', 'test_question2_10_1', 'test_question2_10_2', 'question2_10_3', 'scratch_02', 'ch02_1', 'ch02_2', 'ch02_3', 'ch02_4', 'ch02_5', 'ch02_6', 'ch02_7', 'ch02_8', 'ch02_8a', 'test_question2_1_1', 'test_question2_1_2', 'test_question2_4_1', 'assignvid', 'ch02_9', 'ch02_9_codelens', 'ch02_10', 'ch02_11', 'test_question2_3_2', 'typesnconvert', 'ch08_run3', 'scratch_08_04', 'ex_8_2', 'ex_8_3', 'q3_answer', 'ex_8_4', 'ex_7_10', 'q5_answer', 'ex_8_5', 'ex_8_6', 'q7_answer', 'ex_8_7', 'ex_8_8', 'q9_answer', 'ex_8_9', 'ex_8_10', 'q11_answer', 'ex_8_11', 'ex_8_12', 'q13_answer', 'ex_8_13', 'ex_8_14', 'q15_answer', 'ex_8_15', 'ex_8_16', 'q17_answer', 'ex_8_17', 'ex_8_18', 'q19_answer', 'ex_8_19', 'ex_8_20', 'q21_answer', 'chp08_index1', 'test_question8_2_1', 'test_question8_2_2', 'chp08_len1', 'chp08_len2', 'ch08_len3', 'test_question8_4_1', 'test_question8_4_2', 'chp08_fun2', 'ch08_add', 'ch08_mult', 'test_question8_1_1 ', 'test_question8_1_2', 'ch08_fun4', 'chp08_fun5', 'chp08_fun6', 'ch08_comp1', 'ch08_comp2', 'chp08_ord1', 'ch08_ord2', 'ch08_ord3', 'test_question8_6_1', 'test_question8_6_2', 'test_question8_6_3', 'chp08_upper', 'ch08_methods1', 'ch08_methods2', 'test_question8_3_1', 'test_question8_3_2', 'cg08_imm1', 'ch08_imm2', 'test_question8_7_1', 'ch08_acc1', 'ch08_acc2', 'test_question8_11_1', 'scratch_08_03', 'chp08_slice1', 'chp08_slice2', 'test_question8_5_1', 'test_question8_5_2', 'scratch_08_01', 'chp8_in1', 'chp8_in2', 'chp8_in3', 'ch08_7', 'ch08_7b', 'ch08_8', 'test_question8_9_1', 'ch08_4', 'ch08_5', 'ch08_6', 'test_question8_8_1', 'test_question8_8_2', 'ch08_7c', 'ch08_7c1', 'test_question8_10_1', 'scratch_08_02', 'string_lsys1', 'strings_lys2']\""
]
}
],
"prompt_number": 52
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"DELETE FROM useinfo\n",
"WHERE id IN\n",
"(\n",
"SELECT useinfo.id FROM useinfo\n",
"JOIN bookcourses\n",
"ON (useinfo.course_id = bookcourses.course_name)\n",
"WHERE \n",
" book = 'thinkcspy' \n",
" AND event NOT IN ('page','highlight','ac_error')\n",
" AND div_id NOT IN ('chp13_classes4', 'chp13_classes5', 'chp13_classesstr1', 'chp13_classesstr2', 'ch_cl_ex_1_answer', 'ch_cl_02', 'ch_cl_ex_3_answer', 'ch_cl_04', 'ch_cl_05_answer', 'chp13_improveconstructor', 'chp13_classesmid1', 'scratch_cl_01', 'chp13_classes6', 'chp13_classes1', 'chp13_points', 'chp13_classes2', 'fractions_add1', 'scratch_cl_02\\\\t', 'ch_cl2_answer1', 'ch_cl2_q2 ', 'ch_cl2_q3answer', 'ch_cl2_q4', 'ch_cl2_q5answer', 'ch_cl2_answer7', 'ch_cl2_q8', 'fractions_init', 'fractions_gcd', 'fractions_simplify', 'fractions_is', 'fractions_eq1', 'db_ex3_1', 'db_ex3_2', 'db_q_ex3_1', 'db_ex3_3', 'db_ex3_4', 'db_ex3_5', 'db_qex32', 'db_ex3_6', 'db_ex3_7', 'db_ex3_8', 'db_ex3_9', 'db_ex3_10', 'db_ex3_11', 'db_ex3_12', 'ch12_dict11', 'test_question11_4_1', 'chp12_dict6', 'chp12_dict7', 'chp12_dict8', 'chp12_dict9', 'chp12_dict10', 'scratch_11_02', 'test_question11_3_1', 'test_question11_3_2', 'test_question11_3_3', 'test_question11_3_4', 'test_question11_3_5', 'ch12_dict4', 'ch12_dict4a', 'ch12_dict5', 'test_question11_2_1', 'ex_11_01', 'q1_answer', 'ex_11_02', 'ex_11_03', 'ex_11_04', 'ch11_q5_answer', 'chp12_sparse', 'chp12_dict1', 'chp12_dict2', 'chp12_dict3', 'scratch_11_01', 'test_question11_1_1 ', 'test_question11_1_2', 'files_while', 'ex_6_1', 'ch_files_q1answer', 'ex_10_2', 'ex_6_3', 'ch_files_q3answer', 'ex_10_4', 'ex_10_5', 'ch_files_q5answer', 'files_for', 'files_write01', 'ch05_barchart', 'scratch_05_06', 'ch06_boolfun1', 'ch06_boolfun2', 'ch06_newarea', 'test_question5_6_1', 'test_question5_6_2', 'sumofsquares', 'ch04_3', 'ch04_4', 'ch04_5', 'ch04_6', 'ch04_square', 'ch04_clsquare', 'ch04_clsquare_bad', 'test_question5_2_1', 'test_question5_2_2', 'ch06_distance1', 'ch06_distancefinal', 'function_accumulator_pattern', 'sq_accum1', 'sq_accum3', 'scratch_05_04', 'test_question5_4_1', 'question5_4_1p', 'bad_local', 'badsquare_1', 'cl_powerof_bad', 'cl_change_parm', 'test_question5_3_1', 'test_question5_3_2', 'test_question5_3_3', 'function_intro', 'ch04_1', 'ch04_1a', 'ch04_2', 'scratch_05_01', 'test_question5_1_1', 'test_question5_1_2', 'test_question5_1_3', 'test_question5_1_4', 'test_question5_1_5', 'test_question5_1_6', 'test_question5_1_7', 'ch01_2', 'question1_11_1', 'question1_1_1', 'question1_1_2', 'ch01_3', 'question1_12_1', 'scratch_01', 'question1_9_1', 'question1_10_1', 'question1_10_2', 'question1_4_1', 'question1_7_1', 'question1_8_1', 'codelensvid', 'ch01_1', 'firstexample', 'question1_3_1', 'question1_3_2', 'question1_6_1', 'question1_6_2', 'question1_2_1', 'question1_2_2', 'question1_2_3', 'question1_5_1', 'astlab_ast1', 'lab01_1', 'lab01_2', 'l0401', 'lab0401a', 'lab0401aa', 'chp12_single', 'piguessstart', 'seq3nlab1', 'sin1', 'sin2', 'sinlab1', 'sinlab2', 'chp09_02', 'test_question9_3_1', 'test_question9_3_2', 'test_question9_3_3', 'listalias1', 'chp09_is3', 'test_question9_10_1', 'appcon1', 'appcon2', 'appcon3', 'test_question9_15_1', 'chp09_is4', 'chp09_5', 'chp09_concatid', 'test_question9_5_1', 'test_question9_5_2', 'ex_9_2', 'ex_9_3', 'q3_answer', 'ex_9_4', 'ex_9_5', 'q5_answer', 'ex_7_11', 'ex_9_6', 'q7_answer', 'ex_9_7', 'ex_9_8', 'q9_answer', 'ex_9_9', 'ex_9_10', 'q11_answer', 'ex_9_11', 'ex_9_12', 'q13_answer', 'ex_9_13', 'ex_9_14', 'q15_answer', 'listcomp1', 'scratch_09_06', 'test_question9_20_1', 'ch09_11', 'scratch_09_01', 'chp09_01a', 'test_question9_2_1 ', 'test_question9_2_2', 'chp09_4', 'test_question9_4_1', 'test_question9_4_2', 'chp09_meth1', 'chp09_meth2', 'test_question9_13_1', 'test_question9_13_2', 'test_question9_13_3', 'test_question9_13_4', 'scratch_09_03', 'chp09_6', 'test_question9_6_1', 'chp09_01', 'test_question9_1_1 ', 'chp09_03a', 'chp09_03b', 'chp09_for3', 'chp09_for4', 'test_question9_16_1', 'ch09_7', 'item_assign', 'ch09_8', 'ch09_9', 'ch09_10', 'test_question9_7_1', 'chp09_nest', 'test_question9_21_1', 'chp09_is1', 'chp09_is2', 'chp09_istrace', 'ch09_mod2', 'ch09_mod3', 'repref1', 'repref2', 'repref3', 'reprefstep', 'test_question9_12_1', 'test_question9_12_2', 'ch09_split1', 'ch09_split2', 'ch09_join', 'test_question9_22_1', 'list_lsys1', 'list_lsys2', 'ch09_tuple1', 'chp09_tuple2', 'chp09_tuple3', 'scratch_09_07', 'chp09_parm1', 'chp09_parm1_trace', 'ch09_list1', 'pixelex1a', 'test_question7_8_1_1', 'pixelex1', 'test_question7_8_2_1', 'nested1', 'nested2', 'acimg_1', 'test_question7_8_3_1', 'test_question7_8_3_2', 'scratch_07_03', 'ex_7_7', 'q1_answer', 'ex_7_8', 'ex_7_9', 'q3_answer', 'ex_7_14', 'ex_7_13', 'q5_answer', 'ex_7_12', 'ex_7_15', 'q7_answer', 'ex_7_16', 'ex_7_17', 'q9_answer', 'ex_7_18', 'ex_7_19', 'answer_7_11', 'ex_7_20', 'ex_7_21', 'q13_answer', 'ex_7_22', 'ex_7_23', 'q15_answer', 'scratch_07_05', 'chp07_newtonsdef', 'chp07_newtonswhile', 'iter_randwalk1', 'iter_randwalk2', 'test_question7_3_1', 'test_question7_3_2', 'ch07_table1', 'test_question7_7_1', 'ch07_indef1', 'test_question7_4_1', 'ch07_for1', 'ch07_summation', 'whileloop', 'ch07_while1', 'ch07_while2', 'scratch_07_01', 'test_question7_2_1', 'test_question7_2_2', 'mod_q1_answer', 'ex_mod_2', 'mod_q3_answer', 'ex_mod_4', 'randmodvid', 'chmodule_02', 'question4_2_1', 'chmodule_rand', 'chmodule_rand2', 'question4_4_1', 'question4_4_2', 'question4_4_3', 'question4_4_4', 'scratch_04', 'inputvid', 'chmod_01', 'question4_1_1', 'question4_1_2', 'question4_1_3', 'ch03_7', '3_10', '3_11', 'ex_3_1', 'q1_answer', 'ex_3_2', 'ex_3_3', 'q3_answer', 'ex_3_4', 'ex_3_5', 'ex_3_6', 'ex_3_7', 'q7_answer', 'ex_3_8', 'ex_3_9', 'q9_answer', 'ex_3_10', 'ex_3_11', 'q11_answer', 'ex_3_12', 'ex_3_13', 'q13_answer', 'vtest', 'ch03_3', 'test_question3_2_1', '3_6', '3_7', 'ch03_for1', 'ch03_forcolor', 'colorlist', '3_8', '3_9', 'test_question3_4_1', 'test_question3_4_2', 'test_question3_4_3', 'test_question3_4_4', 'ch03_1', 'test_question3_1_0', '3_1', '3_2', '3_3', 'ch03_2', 'test_question3_1_1', 'test_question3_1_2', 'test_question3_1_3', 'test_question3_1_4', '3_4', '3_5', 'scratch_03', 'forloopvid', 'ch03_4', 'advrange', 'ch03_5', 'rangeme', 'ch03_6', 'rangeme2', 'test_question3_5_1', 'test_question3_5_2', 'test_question3_5_3', 'assignvid', 'lst_itsum', 'lst_recsum', 'lst_rectostr', 'recursion_sc_1', 'recursion_sc_2', 'chp12_koch', 'lst_change2', 'lst_dpremember', 'completemaze', 'chp11_recursivesum', 'recursion_scratch_1', 'lst_st', 'lst_recstack', 'question_recsimp_1', 'question_recsimp_2 ', 'hanoi', 'lst_turt1', 'lst_complete_tree', 'recursion_sc_3', 'ch06_boolfun1', 'ch06_boolfun2', 'ch06_boolcodelens', 'test_question6_8_1', 'test_question6_8_2', 'scratch_06_03', 'booleanexpressions', 'ch05_1', 'ch05_1a', 'ch05_2', 'test_question6_1_1', 'sel4', 'scratch_06_02', 'test_question6_7_1', 'test_question6_7_2', 'binaryselection', 'ch05_4', 'test_question6_4_1', 'test_question6_4_2', 'test_question6_4_3', 'ex_6_1', 'ex_6_2', 'ex_6_3', 'q3_question', 'ex_6_4', 'ex_6_5', 'answer_ex_6_5', 'ex_6_6', 'ex_6_7', 'q7_answer', 'ex_6_8', 'ex_6_9', 'q9_answer', 'ex_6_10', 'ex_6_11', 'q11_answer', 'ex_6_12', 'ex_6_13', 'answer_ex_6_13', 'chp05_3', 'test_question6_2_1', 'sel2', 'sel1', 'test_question6_6_1', 'unaryselection', 'ch05_unaryselection', 'test_question6_5_1', 'test_question6_5_2', 'scratch_06_01', 'test_question6_3_1', 'ch02_ex1', 'ex_2_2', 'ex_2_3', 'q3_answer', 'ex_2_4', 'ex_2_5', 'q5_answer ', 'ex_2_6', 'ex_2_7', 'q7_answer', 'ex_2_8', 'ex_2_9', 'q9_answer', 'ex_2_10', 'ex_2_11', 'q11_answer', 'ex_2_12', 'inputvid', 'inputfun', 'int_secs', 'test_question2_7_1', 'ch02_15', 'ch02_16', 'ch02_17', 'ch02_18', 'ch02_19_codelens', 'test_question2_6_1', 'test_question2_6_2', 'test_question2_6_3', 'precedencevid', 'associativityvid', 'ch02_23', 'test_question2_8_1', 'test_question2_8_2', 'reassignmentvid', 'ch07_reassign1', 'ch07_reassign2', 'test_question2_9_1', 'expression_vid', 'ch02_13', 'ch02_14', 'ch02_20', 'ch02_21', 'ch02_22', 'test_question2_2_1', 'updatevid', 'ch07_update1', 'test_question2_10_1', 'test_question2_10_2', 'question2_10_3', 'scratch_02', 'ch02_1', 'ch02_2', 'ch02_3', 'ch02_4', 'ch02_5', 'ch02_6', 'ch02_7', 'ch02_8', 'ch02_8a', 'test_question2_1_1', 'test_question2_1_2', 'test_question2_4_1', 'assignvid', 'ch02_9', 'ch02_9_codelens', 'ch02_10', 'ch02_11', 'test_question2_3_2', 'typesnconvert', 'ch08_run3', 'scratch_08_04', 'ex_8_2', 'ex_8_3', 'q3_answer', 'ex_8_4', 'ex_7_10', 'q5_answer', 'ex_8_5', 'ex_8_6', 'q7_answer', 'ex_8_7', 'ex_8_8', 'q9_answer', 'ex_8_9', 'ex_8_10', 'q11_answer', 'ex_8_11', 'ex_8_12', 'q13_answer', 'ex_8_13', 'ex_8_14', 'q15_answer', 'ex_8_15', 'ex_8_16', 'q17_answer', 'ex_8_17', 'ex_8_18', 'q19_answer', 'ex_8_19', 'ex_8_20', 'q21_answer', 'chp08_index1', 'test_question8_2_1', 'test_question8_2_2', 'chp08_len1', 'chp08_len2', 'ch08_len3', 'test_question8_4_1', 'test_question8_4_2', 'chp08_fun2', 'ch08_add', 'ch08_mult', 'test_question8_1_1 ', 'test_question8_1_2', 'ch08_fun4', 'chp08_fun5', 'chp08_fun6', 'ch08_comp1', 'ch08_comp2', 'chp08_ord1', 'ch08_ord2', 'ch08_ord3', 'test_question8_6_1', 'test_question8_6_2', 'test_question8_6_3', 'chp08_upper', 'ch08_methods1', 'ch08_methods2', 'test_question8_3_1', 'test_question8_3_2', 'cg08_imm1', 'ch08_imm2', 'test_question8_7_1', 'ch08_acc1', 'ch08_acc2', 'test_question8_11_1', 'scratch_08_03', 'chp08_slice1', 'chp08_slice2', 'test_question8_5_1', 'test_question8_5_2', 'scratch_08_01', 'chp8_in1', 'chp8_in2', 'chp8_in3', 'ch08_7', 'ch08_7b', 'ch08_8', 'test_question8_9_1', 'ch08_4', 'ch08_5', 'ch08_6', 'test_question8_8_1', 'test_question8_8_2', 'ch08_7c', 'ch08_7c1', 'test_question8_10_1', 'scratch_08_02', 'string_lsys1', 'strings_lys2')\n",
");\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"961911 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 53,
"text": [
"[]"
]
}
],
"prompt_number": 53
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Find and remove infered test courses or inactive courses"
]
},
{
"cell_type": "heading",
"level": 6,
"metadata": {},
"source": [
"In order to clean out test courses, we need to infer which book goes with each course. This will result in the creation of a bookcourses table, a table similar to courses but with the book tacked on the end."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"class CoursesForBook():\n",
" def __init__(self,book):\n",
" self.name = book\n",
" self.courses = []\n",
" self.keyWords = []\n",
" def addCourse(self,course):\n",
" if course not in self.courses:\n",
" self.courses.append(course)\n",
" def addKeyWords(self, wordlst):\n",
" for word in wordlst:\n",
" self.keyWords.append(word)\n",
" def __str__(self):\n",
" string = self.name + '\\n'+''\n",
" for i in self.courses:\n",
" string +=i + '\\n'\n",
" return string"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 54
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"book = bp.makeBookOutline('pythonds')\n",
"book2 = bp.makeBookOutline('thinkcspy')\n",
"\n",
"pythonds = CoursesForBook('pythonds')\n",
"pythonds.addKeyWords(['Analysis','Trees','BasicDS'])\n",
"thinkcspy = CoursesForBook('thinkcspy')\n",
"thinkcspy.addKeyWords(['Labs','SimplePythonData','GeneralIntro', 'moreiteration', 'helloturtle'])"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 55
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"thinkcspy.courses"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 56,
"text": [
"[]"
]
}
],
"prompt_number": 56
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"query = '''\n",
"SELECT distinct course_id, div_id\n",
"FROM useinfo where event = 'page'\n",
"'''\n",
"df = psql.read_sql(query,conn)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 57
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"count = 0\n",
"for num in df.index:\n",
" count += 1\n",
" if count % 500000 == 1:\n",
" print(count)\n",
" row = df.iloc[num]\n",
" course = row.course_id\n",
" div = row.div_id\n",
" for word in pythonds.keyWords:\n",
" try:\n",
" if word in div :\n",
" pythonds.addCourse(course)\n",
" except TypeError:\n",
" pass\n",
" for word in thinkcspy.keyWords:\n",
" try:\n",
" if word in div :\n",
" thinkcspy.addCourse(course)\n",
" except TypeError:\n",
" pass"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1\n"
]
}
],
"prompt_number": 58
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"len(pythonds.courses)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 59,
"text": [
"14"
]
}
],
"prompt_number": 59
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"q = '''\n",
"SELECT * FROM courses\n",
"'''\n",
"course_ids = psql.read_sql(q, conn)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 60
},
{
"cell_type": "heading",
"level": 6,
"metadata": {},
"source": [
"These two lists are the same lists as pythonds.courses and thinkcspy.courses. I simply copied them to save time. "
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#temp1 = ['pythonds', 'COSC122', 'gatech', 'cs151ranum', 'cs22lbcc', 'cs22-lbcc', 'PaideiaCS2', 'CS136', 'GFU-Algorithms', 'GUDataStructures', 'PythonLevel2', 'luther151', 'devcourse']\n",
"#temp2 = ['thinkcspy', 'aaron-thinkpy', 'IntroCompScience', 'CSC131HUA', 'CSC131004Fall2013', 'STEM_LWSD', 'CSC131tagliarini', 'CS115', 'GalileoAOIT', 'green', '729G04-HT2013', 'ISTA130_FALL_2013', 'FlemingPSP', 'Duke_CompSci101', 'METR1313', 'CS_Python', 'HPComSci_IB', 'CS1-Python', 'PSSD_Python', 'cs150ranum', 'CS30', '152', 'luther150c_fall13', 'SER101F2013', 'scacs', 'IHSPython', 'CS101', 'python_13', 'SMU_CS101', 'CS1140', 'CS140-WCC-F13', 'ISYS217', 'python', 'ICS2O-3C', 'LearnPython', '10ACS', 'Python', 'ITLP-Python', 'csf13f', 'secondp', 'FCHSCP2013', 'wspython', '01418116_56', 'qwerty', 'Thacker', 'IntroProgrammingJHS', 'Griess_Python', 'ParsonsTest', 'ComputerProgramming', 'CSIS101', 'MrEasonECSFall2013', 'CS@WPS', 'CS_20', 'python4lindseya', 'CSC226', 'BC_GCSECompSci', 'lsmsacs244v', 'pyladies', 'CS244', 'ITEC-1150-01-S14', '152Haugrud', 'CSC131tompkinsj', 'ComputerLiteracyPython', 'bcscomputing', 'BigSnake', 'CSC112800', 'testcamp', 'IC-COMP-171-SP-14', 'ProjectNorKam', 'cs110williams', 'CSIS152Spr2014Ficek', 'SUS13', 'SMU_CS106', 'CSIS101W14', 'ANewCourse', 'SBC2014', 'CSGPython2', 'comp123-s14', 'cs150ranumspr2014', 'NewmanSKE2014', 'CS902', 'peeblescourse', 'Spring2014CS104', 'cf_pyintro', 'HTHcomptech', 'PythonASMSA', 'CodingLevel1', 'PythonLevel1', 'CASMTFeb14', 'Pythonintro', 'PythonIntro', 'neintrocs', 'CSIT902', 'CS902Spr14', 'cap-comp101', 'cs-147-s14', 'LASAIntroCS', 'cs902python', 'APCS-Python', 'WCC_CS140_SP14', 'RMBSProgrammingSkills', 'SolinMediaComp', 'csTestPrject', '1314SP_HCT2447', 'ITLP', 'CS101SP14', 'IS1008', 'ProvaAnn77Corso', 'BeensPy', 'python4bio', 'gatech', 'bet_test', 'devcourse', 'pyocc', 'craig-trial', 'CSC112-800', 'comp115', 'unics1140', 'luther150a', 'luther150bc', 'CMPUT 174 Fall 2012', 'CS-1P', 'CMPUT174Fall2012', 'HCHS_GCSE_Python3', 'overview', 'PHBCS', 'HCHS_GCSE_Python', 'COMP201', 'Explorations', 'SKECS2012PythonI', 'SKECS2013PythonI', 'Python101', 'UniOfHertsCS101', 'BMO', 'AMSACS', 'C152_Sum', 'NZTeach', 'IntroCC', 'Summer2013', 'CBCS_CS101', 'PythonWorkshop', 'OCC_CS_A131_FALL_13', 'Python1', 'bic-think-tutorial', 'test_tssi']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 61
},
{
"cell_type": "heading",
"level": 6,
"metadata": {},
"source": [
"If running anew, replace temp1 and temp2 with pythonds.courses and thinkcspy.courses respectively."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"f = (lambda x: 'pythonds' if x in pythonds.courses else None or 'thinkcspy' if x in thinkcspy.courses else None)\n",
"course_ids['book'] = course_ids['course_name'].map(f)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 62
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"course_ids\n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>course_id</th>\n",
" <th>course_name</th>\n",
" <th>term_start_date</th>\n",
" <th>book</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0 </th>\n",
" <td> 144</td>\n",
" <td> None</td>\n",
" <td> CSC131tagliarini</td>\n",
" <td> 2013-08-20</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1 </th>\n",
" <td> 2</td>\n",
" <td> pythonds</td>\n",
" <td> pythonds</td>\n",
" <td> 2011-01-01</td>\n",
" <td> pythonds</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2 </th>\n",
" <td> 3</td>\n",
" <td> thinkcspy</td>\n",
" <td> thinkcspy</td>\n",
" <td> 2011-01-01</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3 </th>\n",
" <td> 139</td>\n",
" <td> None</td>\n",
" <td> HPComSci_IB</td>\n",
" <td> 2013-08-17</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4 </th>\n",
" <td> 116</td>\n",
" <td> None</td>\n",
" <td> a</td>\n",
" <td> 2013-08-07</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5 </th>\n",
" <td> 6</td>\n",
" <td> luther151</td>\n",
" <td> luther151</td>\n",
" <td> 2011-01-01</td>\n",
" <td> pythonds</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6 </th>\n",
" <td> 8</td>\n",
" <td> python4bio</td>\n",
" <td> python4bio</td>\n",
" <td> 2011-01-01</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7 </th>\n",
" <td> 88</td>\n",
" <td> Python 101</td>\n",
" <td> Python 101</td>\n",
" <td> 2011-01-01</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8 </th>\n",
" <td> 9</td>\n",
" <td> BeensPy</td>\n",
" <td> BeensPy</td>\n",
" <td> 2011-01-01</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9 </th>\n",
" <td> 10</td>\n",
" <td> pyocc</td>\n",
" <td> pyocc</td>\n",
" <td> 2011-01-01</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10 </th>\n",
" <td> 11</td>\n",
" <td> bet_test</td>\n",
" <td> bet_test</td>\n",
" <td> 2011-01-01</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>11 </th>\n",
" <td> 117</td>\n",
" <td> None</td>\n",
" <td> Moffdawg</td>\n",
" <td> 2014-02-01</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>12 </th>\n",
" <td> 119</td>\n",
" <td> None</td>\n",
" <td> web2py</td>\n",
" <td> 2013-08-12</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13 </th>\n",
" <td> 7</td>\n",
" <td> COSC122</td>\n",
" <td> COSC122</td>\n",
" <td> 2013-07-07</td>\n",
" <td> pythonds</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14 </th>\n",
" <td> 118</td>\n",
" <td> None</td>\n",
" <td> STEM 10th Grade Computer Programming</td>\n",
" <td> 2013-08-07</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>15 </th>\n",
" <td> 13</td>\n",
" <td> CS 115</td>\n",
" <td> CS 115</td>\n",
" <td> 2011-01-01</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16 </th>\n",
" <td> 14</td>\n",
" <td> CS115</td>\n",
" <td> CS115</td>\n",
" <td> 2011-01-01</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17 </th>\n",
" <td> 15</td>\n",
" <td> NorKam Programming With Python</td>\n",
" <td> NorKam Programming With Python</td>\n",
" <td> 2011-01-01</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18 </th>\n",
" <td> 4</td>\n",
" <td> gatech</td>\n",
" <td> gatech</td>\n",
" <td> 2013-08-09</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19 </th>\n",
" <td> 120</td>\n",
" <td> None</td>\n",
" <td> Web2py sem Segredos</td>\n",
" <td> 2013-08-10</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20 </th>\n",
" <td> 121</td>\n",
" <td> None</td>\n",
" <td> Prj1</td>\n",
" <td> 2013-08-10</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21 </th>\n",
" <td> 122</td>\n",
" <td> None</td>\n",
" <td> test</td>\n",
" <td> 2013-08-10</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>22 </th>\n",
" <td> 124</td>\n",
" <td> None</td>\n",
" <td> green</td>\n",
" <td> 2013-08-12</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>23 </th>\n",
" <td> 16</td>\n",
" <td> wm_cs20</td>\n",
" <td> wm_cs20</td>\n",
" <td> 2011-01-01</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>24 </th>\n",
" <td> 151</td>\n",
" <td> None</td>\n",
" <td> algopython</td>\n",
" <td> 2013-08-23</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25 </th>\n",
" <td> 125</td>\n",
" <td> None</td>\n",
" <td> AppliedGenomeInformatics</td>\n",
" <td> 2014-09-01</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>26 </th>\n",
" <td> 126</td>\n",
" <td> None</td>\n",
" <td> AppliedGenomeInformatics2</td>\n",
" <td> 2014-09-01</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>27 </th>\n",
" <td> 17</td>\n",
" <td> craig-trial</td>\n",
" <td> craig-trial</td>\n",
" <td> 2011-01-01</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>28 </th>\n",
" <td> 152</td>\n",
" <td> None</td>\n",
" <td> none</td>\n",
" <td> 2013-08-23</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>29 </th>\n",
" <td> 128</td>\n",
" <td> None</td>\n",
" <td> ISTA130_FALL_2013</td>\n",
" <td> 2013-08-26</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>380</th>\n",
" <td> 334</td>\n",
" <td> None</td>\n",
" <td> Spring2014CS104</td>\n",
" <td> 2014-02-01</td>\n",
" <td> thinkcspy</td>\n",
" </tr>\n",
" <tr>\n",
" <th>381</th>\n",
" <td> 470</td>\n",
" <td> None</td>\n",
" <td> Choice</td>\n",
" <td> 2014-06-02</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>382</th>\n",
" <td> 499</td>\n",
" <td> None</td>\n",
" <td> sci30x</td>\n",
" <td> 2014-06-19</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>383</th>\n",
" <td> 500</td>\n",
" <td> None</td>\n",
" <td> sci30x2</td>\n",
" <td> 2014-12-11</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>384</th>\n",
" <td> 501</td>\n",
" <td> None</td>\n",
" <td> Decidability</td>\n",
" <td> 2014-06-19</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>385</th>\n",
" <td> 471</td>\n",
" <td> None</td>\n",
" <td> choice</td>\n",
" <td> 2014-05-27</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>386</th>\n",
" <td> 475</td>\n",
" <td> None</td>\n",
" <td> bnmtest91</td>\n",
" <td> 2014-05-28</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>387</th>\n",
" <td> 477</td>\n",
" <td> None</td>\n",
" <td> bnmtest92</td>\n",
" <td> 2014-05-28</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>388</th>\n",
" <td> 476</td>\n",
" <td> None</td>\n",
" <td> CS104Fall2014</td>\n",
" <td> 2014-09-02</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>389</th>\n",
" <td> 478</td>\n",
" <td> None</td>\n",
" <td> CCPS_Python</td>\n",
" <td> 2014-05-29</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>390</th>\n",
" <td> 479</td>\n",
" <td> None</td>\n",
" <td> ru_python</td>\n",
" <td> 2014-06-01</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>391</th>\n",
" <td> 481</td>\n",
" <td> None</td>\n",
" <td> TestCourse131234</td>\n",
" <td> 2014-06-04</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>392</th>\n",
" <td> 482</td>\n",
" <td> None</td>\n",
" <td> TestCourse131235</td>\n",
" <td> 2014-06-04</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>393</th>\n",
" <td> 483</td>\n",
" <td> None</td>\n",
" <td> TestCourse131236</td>\n",
" <td> 2014-06-04</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>394</th>\n",
" <td> 484</td>\n",
" <td> None</td>\n",
" <td> NYU-CSCI-UA2-005</td>\n",
" <td> 2014-09-01</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>395</th>\n",
" <td> 485</td>\n",
" <td> None</td>\n",
" <td> TestCourse131237</td>\n",
" <td> 2014-06-04</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>396</th>\n",
" <td> 486</td>\n",
" <td> None</td>\n",
" <td> lbcc-cs22</td>\n",
" <td> 2014-06-05</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>397</th>\n",
" <td> 487</td>\n",
" <td> None</td>\n",
" <td> WormsleyTest</td>\n",
" <td> 2014-06-05</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>398</th>\n",
" <td> 488</td>\n",
" <td> None</td>\n",
" <td> Programming1</td>\n",
" <td> 2014-06-06</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>399</th>\n",
" <td> 489</td>\n",
" <td> None</td>\n",
" <td> cchspython</td>\n",
" <td> 2014-06-06</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>400</th>\n",
" <td> 472</td>\n",
" <td> None</td>\n",
" <td> ChoiceCoding</td>\n",
" <td> 2014-05-27</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>401</th>\n",
" <td> 490</td>\n",
" <td> None</td>\n",
" <td> FirenzePasA042</td>\n",
" <td> 2014-06-11</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>402</th>\n",
" <td> 491</td>\n",
" <td> None</td>\n",
" <td> tp-Python</td>\n",
" <td> 2014-06-12</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>403</th>\n",
" <td> 492</td>\n",
" <td> None</td>\n",
" <td> Univ-of-Wash-CSE-373-Autumn-2014</td>\n",
" <td> 2014-09-24</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>404</th>\n",
" <td> 493</td>\n",
" <td> None</td>\n",
" <td> Ordinamentovettori</td>\n",
" <td> 2014-06-16</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>405</th>\n",
" <td> 494</td>\n",
" <td> None</td>\n",
" <td> CS902SU14</td>\n",
" <td> 2014-06-16</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>406</th>\n",
" <td> 495</td>\n",
" <td> None</td>\n",
" <td> 123</td>\n",
" <td> 2014-06-16</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>407</th>\n",
" <td> 503</td>\n",
" <td> None</td>\n",
" <td> ICS3U</td>\n",
" <td> 2014-06-18</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>408</th>\n",
" <td> 504</td>\n",
" <td> None</td>\n",
" <td> Learn_Pythom</td>\n",
" <td> 2014-06-20</td>\n",
" <td> None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>409</th>\n",
" <td> 507</td>\n",
" <td> None</td>\n",
" <td> WestlakeCSP</td>\n",
" <td> 2014-06-22</td>\n",
" <td> None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>410 rows \u00d7 5 columns</p>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 63,
"text": [
" id course_id \\\n",
"0 144 None \n",
"1 2 pythonds \n",
"2 3 thinkcspy \n",
"3 139 None \n",
"4 116 None \n",
"5 6 luther151 \n",
"6 8 python4bio \n",
"7 88 Python 101 \n",
"8 9 BeensPy \n",
"9 10 pyocc \n",
"10 11 bet_test \n",
"11 117 None \n",
"12 119 None \n",
"13 7 COSC122 \n",
"14 118 None \n",
"15 13 CS 115 \n",
"16 14 CS115 \n",
"17 15 NorKam Programming With Python \n",
"18 4 gatech \n",
"19 120 None \n",
"20 121 None \n",
"21 122 None \n",
"22 124 None \n",
"23 16 wm_cs20 \n",
"24 151 None \n",
"25 125 None \n",
"26 126 None \n",
"27 17 craig-trial \n",
"28 152 None \n",
"29 128 None \n",
".. ... ... \n",
"380 334 None \n",
"381 470 None \n",
"382 499 None \n",
"383 500 None \n",
"384 501 None \n",
"385 471 None \n",
"386 475 None \n",
"387 477 None \n",
"388 476 None \n",
"389 478 None \n",
"390 479 None \n",
"391 481 None \n",
"392 482 None \n",
"393 483 None \n",
"394 484 None \n",
"395 485 None \n",
"396 486 None \n",
"397 487 None \n",
"398 488 None \n",
"399 489 None \n",
"400 472 None \n",
"401 490 None \n",
"402 491 None \n",
"403 492 None \n",
"404 493 None \n",
"405 494 None \n",
"406 495 None \n",
"407 503 None \n",
"408 504 None \n",
"409 507 None \n",
"\n",
" course_name term_start_date book \n",
"0 CSC131tagliarini 2013-08-20 thinkcspy \n",
"1 pythonds 2011-01-01 pythonds \n",
"2 thinkcspy 2011-01-01 thinkcspy \n",
"3 HPComSci_IB 2013-08-17 thinkcspy \n",
"4 a 2013-08-07 None \n",
"5 luther151 2011-01-01 pythonds \n",
"6 python4bio 2011-01-01 thinkcspy \n",
"7 Python 101 2011-01-01 None \n",
"8 BeensPy 2011-01-01 thinkcspy \n",
"9 pyocc 2011-01-01 thinkcspy \n",
"10 bet_test 2011-01-01 thinkcspy \n",
"11 Moffdawg 2014-02-01 thinkcspy \n",
"12 web2py 2013-08-12 None \n",
"13 COSC122 2013-07-07 pythonds \n",
"14 STEM 10th Grade Computer Programming 2013-08-07 None \n",
"15 CS 115 2011-01-01 None \n",
"16 CS115 2011-01-01 thinkcspy \n",
"17 NorKam Programming With Python 2011-01-01 None \n",
"18 gatech 2013-08-09 None \n",
"19 Web2py sem Segredos 2013-08-10 None \n",
"20 Prj1 2013-08-10 None \n",
"21 test 2013-08-10 None \n",
"22 green 2013-08-12 thinkcspy \n",
"23 wm_cs20 2011-01-01 None \n",
"24 algopython 2013-08-23 None \n",
"25 AppliedGenomeInformatics 2014-09-01 None \n",
"26 AppliedGenomeInformatics2 2014-09-01 None \n",
"27 craig-trial 2011-01-01 None \n",
"28 none 2013-08-23 None \n",
"29 ISTA130_FALL_2013 2013-08-26 thinkcspy \n",
".. ... ... ... \n",
"380 Spring2014CS104 2014-02-01 thinkcspy \n",
"381 Choice 2014-06-02 None \n",
"382 sci30x 2014-06-19 None \n",
"383 sci30x2 2014-12-11 None \n",
"384 Decidability 2014-06-19 None \n",
"385 choice 2014-05-27 None \n",
"386 bnmtest91 2014-05-28 None \n",
"387 bnmtest92 2014-05-28 None \n",
"388 CS104Fall2014 2014-09-02 None \n",
"389 CCPS_Python 2014-05-29 None \n",
"390 ru_python 2014-06-01 None \n",
"391 TestCourse131234 2014-06-04 None \n",
"392 TestCourse131235 2014-06-04 None \n",
"393 TestCourse131236 2014-06-04 None \n",
"394 NYU-CSCI-UA2-005 2014-09-01 None \n",
"395 TestCourse131237 2014-06-04 None \n",
"396 lbcc-cs22 2014-06-05 None \n",
"397 WormsleyTest 2014-06-05 None \n",
"398 Programming1 2014-06-06 None \n",
"399 cchspython 2014-06-06 None \n",
"400 ChoiceCoding 2014-05-27 None \n",
"401 FirenzePasA042 2014-06-11 None \n",
"402 tp-Python 2014-06-12 None \n",
"403 Univ-of-Wash-CSE-373-Autumn-2014 2014-09-24 None \n",
"404 Ordinamentovettori 2014-06-16 None \n",
"405 CS902SU14 2014-06-16 None \n",
"406 123 2014-06-16 None \n",
"407 ICS3U 2014-06-18 None \n",
"408 Learn_Pythom 2014-06-20 None \n",
"409 WestlakeCSP 2014-06-22 None \n",
"\n",
"[410 rows x 5 columns]"
]
}
],
"prompt_number": 63
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"course_ids.to_sql('bookcourses',engine2,if_exists='replace')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 64
},
{
"cell_type": "heading",
"level": 6,
"metadata": {},
"source": [
"Removal starts here"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"CREATE INDEX bookIndex\n",
"ON bookcourses (index);\n",
"\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Done.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 65,
"text": [
"[]"
]
}
],
"prompt_number": 65
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"query = '''\n",
"SELECT book, course_id ,clicks,numDivs,numSids FROM\n",
"(\n",
"SELECT book, course_name\n",
"FROM bookcourses\n",
") AS S\n",
"JOIN\n",
"(SELECT course_id, COUNT(*) AS clicks, COUNT(DISTINCT div_id) AS numDivs, COUNT(DISTINCT sid) AS numSids\n",
"FROM useinfo\n",
"GROUP BY course_id\n",
") AS T\n",
"ON (S.course_name = T.course_id)\n",
"'''\n",
"\n",
"course_idInfo = psql.read_sql(query, conn)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 66
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#Collect all of the courses that don't have a book associated with them\n",
"noBook = course_idInfo[course_idInfo['book'] != 'pythonds']\n",
"noBook = noBook[noBook['book'] != 'thinkcspy']"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 67
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = course_idInfo\n",
"df1 = df[df['numdivs'] < 5]\n",
"df2 = df[df['numsids'] < 5]\n",
"df3 = df[df['clicks']<100]\n",
"df4 = pd.merge(df1,df2,how='outer')\n",
"df5 = pd.merge(df3,df4,how='outer')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 68
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"remove = pd.merge(df5,noBook, how = 'outer')['course_id']\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 69
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"remove"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 70,
"text": [
"0 9my0\n",
"1 ACcomp11\n",
"2 APCS-Python\n",
"3 AppliedGenomeInformatics2\n",
"4 BASE\n",
"5 BC_GCSECompSci\n",
"6 C152_Sum\n",
"7 CBCS\n",
"8 CS140\n",
"9 CSC100_J_term\n",
"10 CSIS152\n",
"11 CSIT902\n",
"12 CS_Python\n",
"13 CodingLevel1\n",
"14 DAM_00\n",
"...\n",
"75 septtest1\n",
"76 stolaf121AS14\n",
"77 tc_1\n",
"78 test_tssi\n",
"79 testcamp\n",
"80 testproject\n",
"81 IntroCC\n",
"82 MrEasonECSFall2013\n",
"83 NZTeach\n",
"84 PythonASMSA\n",
"85 SKECS2012PythonI\n",
"86 WPSCS\n",
"87 JavaReview\n",
"88 Learning\n",
"89 SER101F2013\n",
"Name: course_id, Length: 90, dtype: object"
]
}
],
"prompt_number": 70
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"#This is for the clean database notebook. After making a table of the removed items, I'll remove those course_ids from useinfo \n",
"remove.to_sql('outlier_courses',engine2,if_exists='replace')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 71
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"CREATE INDEX ourlier_course_Index\n",
"ON outlier_courses (index);\n",
"\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Done.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 72,
"text": [
"[]"
]
}
],
"prompt_number": 72
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"DELETE FROM useinfo\n",
"WHERE course_id IN \n",
"(SELECT course_id FROM outlier_courses);\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"24827 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 73,
"text": [
"[]"
]
}
],
"prompt_number": 73
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Delete Single Div Users from the database"
]
},
{
"cell_type": "heading",
"level": 5,
"metadata": {},
"source": [
"Pythonds"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"DELETE FROM useinfo WHERE id IN\n",
"(\n",
"SELECT useinfo.id FROM useinfo JOIN bookcourses\n",
" ON (useinfo.course_id = bookcourses.course_name)\n",
"WHERE sid IN\n",
"(\n",
"SELECT sid\n",
"FROM \n",
"(\n",
" SELECT DISTINCT sid,div_id\n",
" FROM useinfo JOIN bookcourses\n",
" ON (useinfo.course_id = bookcourses.course_name)\n",
" WHERE book = 'pythonds'\n",
") AS foo\n",
"GROUP BY sid HAVING count(*) = 1\n",
")\n",
"AND book = 'pythonds');\n",
"\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"432951 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 74,
"text": [
"[]"
]
}
],
"prompt_number": 74
},
{
"cell_type": "heading",
"level": 5,
"metadata": {},
"source": [
"Thinkcspy"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"DELETE FROM useinfo WHERE id IN\n",
"(\n",
"SELECT useinfo.id FROM useinfo JOIN bookcourses\n",
" ON (useinfo.course_id = bookcourses.course_name)\n",
"WHERE sid IN\n",
"(\n",
"SELECT sid\n",
"FROM \n",
"(\n",
" SELECT DISTINCT sid,div_id\n",
" FROM useinfo JOIN bookcourses\n",
" ON (useinfo.course_id = bookcourses.course_name)\n",
" WHERE book = 'thinkcspy'\n",
") AS foo\n",
"GROUP BY sid HAVING count(*) = 1\n",
")\n",
"AND book = 'thinkcspy');\n",
"\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"520677 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 75,
"text": [
"[]"
]
}
],
"prompt_number": 75
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Delete Short Term Users from the database"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"DELETE FROM useinfo\n",
"WHERE sid IN\n",
"(\n",
"--#Select all users whose duration on the website was less than an hour \n",
" SELECT sid FROM\n",
" \n",
" (\n",
" SELECT sid,timestamp,event,act,div_id,course_id \n",
" FROM \n",
" (\n",
" SELECT sid FROM useinfo\n",
" GROUP BY sid HAVING COUNT(*) > 1\n",
" ) AS T \n",
" NATURAL JOIN \n",
" (\n",
" SELECT sid,min(timestamp) startd, max(timestamp) endd FROM useinfo\n",
" GROUP BY sid\n",
" ) AS S \n",
" NATURAL JOIN \n",
" useinfo\n",
" WHERE (endd - startd) < ('1 day')::interval\n",
" ) AS mvu\n",
")\n",
";\n",
"COMMIT;\n",
" \n",
" \n"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"3164790 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 76,
"text": [
"[]"
]
}
],
"prompt_number": 76
},
{
"cell_type": "code",
"collapsed": false,
"input": [
" %%sql\n",
"DELETE FROM useinfo\n",
"WHERE sid IN\n",
"(\n",
" SELECT sid FROM\n",
"--#Select all the users with only one action. \n",
" (\n",
" SELECT sid,timestamp,event,act,div_id,course_id FROM \n",
" (\n",
" SELECT sid FROM useinfo GROUP BY sid HAVING COUNT(*) < 2\n",
" ) AS R \n",
" NATURAL JOIN \n",
" useinfo\n",
" \n",
" ) AS svu\n",
" \n",
")\n",
";\n",
"\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"10451 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 77,
"text": [
"[]"
]
}
],
"prompt_number": 77
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 77
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Remove outliers with respect to clicks and duration."
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"query = ''' \n",
"SELECT sid, min(timestamp) startd, max(timestamp) endd\n",
"FROM useinfo\n",
"GROUP BY sid\n",
"'''\n",
"\n",
"d = psql.frame_query(query,conn)\n",
"d.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sid</th>\n",
" <th>startd</th>\n",
" <th>endd</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td> bcselvitazurkevica</td>\n",
" <td>2012-10-02 08:15:40</td>\n",
" <td>2014-02-11 21:10:49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> jPayne</td>\n",
" <td>2013-09-10 15:43:59</td>\n",
" <td>2013-09-29 20:55:51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> lisadawn</td>\n",
" <td>2013-08-21 21:21:58</td>\n",
" <td>2013-08-26 19:49:02</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> mackedee</td>\n",
" <td>2013-08-27 23:46:49</td>\n",
" <td>2013-09-10 01:17:01</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> +4dB</td>\n",
" <td>2013-07-14 01:20:04</td>\n",
" <td>2013-10-30 00:11:56</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 78,
"text": [
" sid startd endd\n",
"0 bcselvitazurkevica 2012-10-02 08:15:40 2014-02-11 21:10:49\n",
"1 jPayne 2013-09-10 15:43:59 2013-09-29 20:55:51\n",
"2 lisadawn 2013-08-21 21:21:58 2013-08-26 19:49:02\n",
"3 mackedee 2013-08-27 23:46:49 2013-09-10 01:17:01\n",
"4 +4dB 2013-07-14 01:20:04 2013-10-30 00:11:56"
]
}
],
"prompt_number": 78
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"d['duration'] = d['endd'] - d['startd']\n",
"d['durdays'] = d['duration'].map(lambda x: x.astype('timedelta64[D]')) #takes a function as a parameter and"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 79
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"df = d[d.duration > np.timedelta64(1,'D')]\n",
"#df = df[df.sid != 'bmiller']\n",
"#df = df[df.sid != 'ranum' ]\n",
"df.index = df.sid"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 80
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"q = '''\n",
"SELECT sid, COUNT(*) AS numvisits FROM useinfo\n",
"GROUP BY sid \n",
"ORDER BY numvisits\n",
"'''\n",
"visitf = psql.frame_query(q,conn)\n",
"visitf.index = visitf.sid"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 81
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"dfr = df['sid'].values\n",
"dfrs = set(dfr)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 82
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"count = 1\n",
"pairs = []\n",
"for u in df.index:\n",
" if u in visitf.index:\n",
" count += 1\n",
" pairs.append((int(df.ix[u].durdays)/1000000000/(60*60*24),visitf.ix[u].numvisits, df.ix[u].sid ))"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 83
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"\n",
"pf = DataFrame( {'duration':[x[0] for x in pairs], 'clicks':[x[1] for x in pairs],'sid': [x[2] for x in pairs] } )\n"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 84
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"def getOutliers(dataframe, dimension):\n",
" mean = dataframe[dimension].mean()\n",
" std = dataframe[dimension].std() \n",
"\n",
" newDF = dataframe[(dataframe[dimension] - mean)/std < 3.5]\n",
" return newDF\n",
" print(mean, \" \" , std)"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 85
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"temp = getOutliers(pf, 'clicks') #Finds outliers with respects to clicks\n",
"temp1 = getOutliers(pf, 'duration') #Finds the rest of the outliers with respect to duration. "
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 86
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"temp1.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"<div style=\"max-height:1000px;max-width:1500px;overflow:auto;\">\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>clicks</th>\n",
" <th>duration</th>\n",
" <th>sid</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td> 66</td>\n",
" <td> 19</td>\n",
" <td> jPayne</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td> 544</td>\n",
" <td> 4</td>\n",
" <td> lisadawn</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td> 209</td>\n",
" <td> 13</td>\n",
" <td> mackedee</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td> 171</td>\n",
" <td> 107</td>\n",
" <td> +4dB</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td> 1563</td>\n",
" <td> 111</td>\n",
" <td> 0v3r2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 87,
"text": [
" clicks duration sid\n",
"1 66 19 jPayne\n",
"2 544 4 lisadawn\n",
"3 209 13 mackedee\n",
"4 171 107 +4dB\n",
"5 1563 111 0v3r2"
]
}
],
"prompt_number": 87
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"outliers = pd.merge(temp,temp1, how ='outer', on ='sid')\n",
"temp1.to_sql('outlierssid',engine2,if_exists='replace')"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 88
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"CREATE INDEX outlierIndex\n",
"ON outlierssid (sid);\n",
"\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"Done.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 89,
"text": [
"[]"
]
}
],
"prompt_number": 89
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"DELETE FROM useinfo\n",
"WHERE sid IN \n",
" (SELECT sid \n",
" FROM useinfo\n",
" NATURAL JOIN \n",
" (\n",
" SELECT sid \n",
" FROM useinfo\n",
" EXCEPT \n",
" (SELECT sid \n",
" FROM outlierssid\n",
" )\n",
" ) \n",
" AS T\n",
" );\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"628996 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 90,
"text": [
"[]"
]
}
],
"prompt_number": 90
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"SELECT count(*) FROM useinfo;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1 rows affected.\n"
]
},
{
"html": [
"<table>\n",
" <tr>\n",
" <th>count</th>\n",
" </tr>\n",
" <tr>\n",
" <td>9357764</td>\n",
" </tr>\n",
"</table>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 91,
"text": [
"[(9357764,)]"
]
}
],
"prompt_number": 91
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Replace courselib with runestone"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"UPDATE useinfo\n",
"SET div_id = regexp_replace(div_id,'^/courselib(.*)','/runestone\\1');\n",
"\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"9357764 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 92,
"text": [
"[]"
]
}
],
"prompt_number": 92
},
{
"cell_type": "heading",
"level": 3,
"metadata": {},
"source": [
"Replace mChoice with mchoice"
]
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql \n",
"UPDATE useinfo\n",
"SET event = 'mchoice'\n",
"WHERE event = 'mChoice';\n",
"\n",
"COMMIT;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1166988 rows affected.\n",
"Done.\n"
]
},
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 93,
"text": [
"[]"
]
}
],
"prompt_number": 93
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"SELECT * FROM useinfo\n",
"where event = 'mChoice';"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"0 rows affected.\n"
]
},
{
"html": [
"<table>\n",
" <tr>\n",
" <th>id</th>\n",
" <th>timestamp</th>\n",
" <th>sid</th>\n",
" <th>event</th>\n",
" <th>act</th>\n",
" <th>div_id</th>\n",
" <th>course_id</th>\n",
" </tr>\n",
"</table>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 94,
"text": [
"[]"
]
}
],
"prompt_number": 94
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"SELECT count(*) FROM (\n",
"SELECT DISTINCT sid FROM useinfo) AS t;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1 rows affected.\n"
]
},
{
"html": [
"<table>\n",
" <tr>\n",
" <th>count</th>\n",
" </tr>\n",
" <tr>\n",
" <td>81894</td>\n",
" </tr>\n",
"</table>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 95,
"text": [
"[(81894,)]"
]
}
],
"prompt_number": 95
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"%%sql\n",
"SELECT count(*) FROM useinfo;"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"1 rows affected.\n"
]
},
{
"html": [
"<table>\n",
" <tr>\n",
" <th>count</th>\n",
" </tr>\n",
" <tr>\n",
" <td>9357764</td>\n",
" </tr>\n",
"</table>"
],
"metadata": {},
"output_type": "pyout",
"prompt_number": 96,
"text": [
"[(9357764,)]"
]
}
],
"prompt_number": 96
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 96
}
],
"metadata": {}
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment