Created
October 27, 2014 19:30
-
-
Save jasonost/5a81243a717060c358a6 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"worksheets": [ | |
{ | |
"cells": [ | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "import pandas as pd, numpy as np, datetime, random, cPickle as pickle\nfrom sklearn import linear_model\nfrom sklearn.metrics import roc_auc_score, roc_curve, confusion_matrix\nfrom sklearn.feature_selection import RFE\nfrom __future__ import division\npd.set_option('max_colwidth', 200)\n%matplotlib inline", | |
"prompt_number": 1, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Loading data" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Base file path" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "base_path = 'data/HumanitiesSciences_Econ-1_Summer2014/'\nraw_file = 'HumanitiesSciences_Econ-1_Summer2014'\neng_file = 'engagement_HumanitiesSciences_Econ-1_Summer2014'", | |
"prompt_number": 2, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Event Extract" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "event = pd.read_csv('%s%s_EventXtract.csv' % (base_path,raw_file),\n skiprows=1,\n names=[\"anon_screen_name\",\n \"event_type\",\n \"ip_country\",\n \"time\",\n \"course_display_name\",\n \"resource_display_name\",\n \"success\",\n \"video_code\",\n \"video_current_time\",\n \"video_speed\",\n \"video_old_time\",\n \"video_new_time\",\n \"video_seek_type\",\n \"video_new_speed\",\n \"video_old_speed\",\n \"goto_from\",\n \"goto_dest\"],\n parse_dates=[\"time\"],\n na_values=[\"None\"]\n )", | |
"prompt_number": 3, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Activity Grade" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "grades = pd.read_csv('%s%s_ActivityGrade.csv' % (base_path,raw_file),\n skiprows=1,\n names=[\"activity_grade_id\",\n \"student_id\",\n \"course_display_name\",\n \"grade\",\n \"max_grade\",\n \"percent_grade\",\n \"parts_correctness\",\n \"answers\",\n \"num_attempts\",\n \"first_submit\",\n \"last_submit\",\n \"module_type\",\n \"anon_screen_name\",\n \"resource_display_name\",\n \"module_id\",\n \"name\",\n \"screen_name\"],\n parse_dates=[\"first_submit\",\"last_submit\"]\n )", | |
"prompt_number": 5, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Video Interaction" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "video = pd.read_csv('%s%s_VideoInteraction.csv' % (base_path,raw_file),\n skiprows=1,\n names=[\"event_type\",\n \"resource_display_name\",\n \"video_current_time\",\n \"video_speed\",\n \"video_new_speed\",\n \"video_old_speed\",\n \"video_new_time\",\n \"video_old_time\",\n \"video_seek_type\",\n \"video_code\",\n \"time\",\n \"course_display_name\",\n \"anon_screen_name\",\n \"video_id\",\n \"name\",\n \"screen_name\"],\n parse_dates=[\"time\"],\n na_values=[\"None\"]\n )", | |
"prompt_number": 6, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Engagement data (all)" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "eng_all = pd.read_csv('%s%s_allData.csv' % (base_path,eng_file),\n skiprows=1,\n names=[\"Platform\",\n \"Course\",\n \"anon_screen_name\",\n \"Date\",\n \"Time\",\n \"SessionLength\",\n \"NumEventsInSession\"]\n )", | |
"prompt_number": 7, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Weekly engagement data" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "eng_weekly = pd.read_csv('%s%s_weeklyEffort.csv' % (base_path,eng_file),\n skiprows=1,\n names=[\"Platform\",\n \"Course\",\n \"anon_screen_name\",\n \"Week\",\n \"Effort\"]\n )", | |
"prompt_number": 8, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Creating useful extracts" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Create useful grades extract" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "grades_ok = grades[(grades.max_grade > 0) & \n (grades.resource_display_name.apply(lambda x: 'Quiz' in str(x))) &\n (grades.num_attempts == 1)][['grade',\n 'max_grade',\n 'parts_correctness',\n 'module_type',\n 'anon_screen_name',\n 'resource_display_name',\n 'module_id']]\ngrades_ok.update(grades_ok.module_id.apply(lambda x: x[x.rfind('/')+1:]))", | |
"prompt_number": 9, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Create extract with first attempt at each quiz" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "first_attempts = event[(event.event_type == 'problem_check') &\n (event.resource_display_name.apply(lambda x: 'Quiz' in str(x))) &\n (event.success.notnull())][['anon_screen_name',\n 'time',\n 'resource_display_name',\n 'success']]\n\n# adding order for each quiz, and selecting first\nfirst_attempts['order_quiz'] = first_attempts.groupby(['anon_screen_name','resource_display_name']).time.rank('min')\nfirst_attempts = first_attempts[first_attempts.order_quiz == 1][['anon_screen_name',\n 'time',\n 'resource_display_name',\n 'success']]\n\n# total number of quizzes\nfa = first_attempts.groupby('anon_screen_name').time.count()\nfirst_attempts['num_quiz'] = first_attempts.anon_screen_name.apply(lambda x: fa[x])\n\n# only looking at those with more than one quiz\nfirst_attempts = first_attempts[first_attempts.num_quiz > 1][['anon_screen_name',\n 'time',\n 'resource_display_name',\n 'success']]", | |
"prompt_number": 32, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "### Features" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Quiz-level measure" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "quiz_total = first_attempts.groupby('resource_display_name').time.count()\nquiz_correct = first_attempts[first_attempts.success == 'correct'].groupby('resource_display_name').time.count()\n\nquiz_correct_share = quiz_correct / quiz_total", | |
"prompt_number": 11, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Person-level measure" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "person_total = first_attempts.groupby('anon_screen_name').time.count()\nperson_correct = first_attempts[first_attempts.success == 'correct'].groupby('anon_screen_name').time.count()\n\nperson_correct_share = person_correct / person_total", | |
"prompt_number": 12, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Activity prior to quiz" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "# shuffling attempts\nattempt_sample = list(first_attempts.values)\nrandom.shuffle(attempt_sample)\n# attempt_sample = attempt_sample[:10000]", | |
"prompt_number": 33, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "# function to summarize type of event based on resource_display_name and event_type\ndef type_summary(row):\n return 'LECTURE' if 'Lecture:' in str(row['resource_display_name']) else \\\n 'QUIZ' if str(row['resource_display_name']).startswith('Quiz') else \\\n 'INTERFACE' if str(row['resource_display_name']) in ['Multiple Choice','Checkboxes','Dropdown'] else \\\n 'COURSEWARE' if not pd.isnull(row['resource_display_name']) else \\\n 'VIDEO' if '_video' in row['event_type'] else \\\n 'PROBLEM' if '_problem;' in row['event_type'] or '/problem' in row['event_type'] else \\\n 'GRADES' if '_transcript' in row['event_type'] else \\\n 'DISCUSSION' if '/discussion' in row['event_type'] or 'forum.searched' in row['event_type'] else \\\n 'COURSEWARE' if '/courseware' in row['event_type'] else \\\n 'BLOG' if '/info' in row['event_type'] else \\\n 'PROGRESS' if '/progress' in row['event_type'] else \\\n 'MODULES' if row['event_type'].startswith('seq_') or row['event_type'].endswith('goto_position') else \\\n 'WIKI' if '/wiki' in row['event_type'] or '/course_wiki' in row['event_type'] else \\\n 'INSTRUCTOR' if '/instructor' in row['event_type'] else \\\n 'LINKS' if 'e0b95242b5db4fdba5da3dbca7097681' in row['event_type'] else \\\n 'USELESS' if '1e0faf66846c425dafe91eb33bb90c51' in row['event_type'] or row['event_type'].startswith('/') else \\\n row['event_type']\n\n# function to aggregate output of value_counts() on previous summaries\ndef count_types(summary):\n return [sum([summary[k] for k in summary if k in ['LECTURE','VIDEO']]), # video\n sum([summary[k] for k in summary if k in ['COURSEWARE','MODULES']]), # course\n sum([summary[k] for k in summary if k in ['DISCUSSION']]), # discussion\n sum([summary[k] for k in summary if k in ['BLOG','WIKI','LINKS']]), # wiki\n sum([summary[k] for k in summary if k in ['QUIZ','INTERFACE','PROBLEM']]), # quiz\n sum([summary[k] for k in summary if k in ['PROGRESS','GRADES']])] # grades", | |
"prompt_number": 151, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "# storage = []\nfor i in range(21013,len(attempt_sample)):\n a, t, r, s = attempt_sample[i]\n activity = []\n acts = event[(event.anon_screen_name == a) & (event.time <= t) & (event.time >= t - datetime.timedelta(hours=6))]\n for h in [6,1,0.5,0.1]:\n subacts = acts[acts.time >= t - datetime.timedelta(hours=h)].apply(type_summary, axis=1).value_counts()\n activity += count_types(dict(subacts))\n storage.append([a,t,r,s] + activity)\n if i % 500 == 0: print i, datetime.datetime.now()", | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "col_names = ['anon_screen_name',\n 'time',\n 'resource_display_name',\n 'success']\n\nfor t in ['6hours','1hour','30min','6min']:\n for r in ['video','course','wiki','quiz','grades']:\n col_names.append('resource_%s_%s' % (t, r))\n\nold_activity = pd.DataFrame(storage[:21013], columns=col_names)\nactivity = pd.DataFrame(storage[21013:], columns=col_names)", | |
"prompt_number": 121, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "for c in col_names[4:]:\n maxval = activity[c].max()\n minval = activity[c].min()\n activity[c + '_any'] = activity[c].apply(lambda x: 1 if x > 0 else 0)\n activity[c] = activity[c].apply(lambda x: (x - minval) / (maxval - minval))\n\nactivity['quiz_correct_share'] = activity.resource_display_name.map(quiz_correct_share)\nactivity['person_correct_share'] = activity.anon_screen_name.map(person_correct_share)\nactivity.person_correct_share = activity.person_correct_share.replace(np.nan,0)\nactivity['success'] = activity.success.apply(lambda x: 1 if x == 'correct' else 0)", | |
"prompt_number": 125, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "for c in col_names[4:]:\n maxval = old_activity[c].max()\n minval = old_activity[c].min()\n old_activity[c + '_any'] = old_activity[c].apply(lambda x: 1 if x > 0 else 0)\n old_activity[c] = old_activity[c].apply(lambda x: (x - minval) / (maxval - minval))\n\nold_activity['quiz_correct_share'] = old_activity.resource_display_name.map(quiz_correct_share)\nold_activity['person_correct_share'] = old_activity.anon_screen_name.map(person_correct_share)\nold_activity.person_correct_share = old_activity.person_correct_share.replace(np.nan,0)", | |
"prompt_number": 126, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "concatenated = pd.concat([old_activity, activity])\npickle.dump(concatenated, open('activity.pickle','wb'))", | |
"prompt_number": 128, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "activity = pickle.load(open('activity.pickle','rb'))", | |
"prompt_number": 58, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "markdown", | |
"source": "Trying to optimize this" | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "# initializing dictionary for dataframes of only those people we are looking at\ndfs = {}", | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "# populating dictionary\nfor k in first_attempts.groupby('anon_screen_name').groups.keys():\n if k not in dfs:\n dfs[k] = event[event.anon_screen_name == k]", | |
"prompt_number": 147, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "pickle.dump(dfs, open('event_dict.pickle','wb'))", | |
"prompt_number": 149, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "storage2 = []\nfor i in range(len(attempt_sample)):\n a, t, r, s = attempt_sample[i]\n if a in dfs:\n activity = []\n cur_df = dfs[a]\n acts = cur_df[(cur_df.time <= t) & (cur_df.time >= t - datetime.timedelta(hours=6))]\n for h in [6,1,0.5,0.1]:\n subacts = acts[acts.time >= t - datetime.timedelta(hours=h)].apply(type_summary, axis=1).value_counts()\n activity += count_types(dict(subacts))\n storage2.append([a,t,r,s] + activity)\n if i % 500 == 0: print i, datetime.datetime.now()", | |
"prompt_number": 152, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": "0 2014-10-27 11:03:34.237024\n500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:03:54.851823\n1000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:04:15.245450\n1500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:04:33.072404\n2000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:04:53.306127\n2500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:05:13.944672\n3000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:05:34.138569\n3500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:05:56.399671\n4000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:06:16.337065\n4500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:06:37.062941\n5000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:06:55.849502\n5500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:07:13.796666\n6000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:07:31.799823\n6500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:07:49.704206\n7000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:08:07.934125\n7500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:08:26.343233\n8000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:08:45.117150\n8500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:09:04.234663\n9000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:09:21.157156\n9500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:09:38.741805\n10000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:09:58.189321\n10500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:10:15.735977\n11000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:10:38.300154\n11500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:10:57.953401\n12000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:11:16.650318\n12500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:11:36.684224\n13000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:11:57.354762\n13500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:12:17.473436\n14000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:12:36.943896\n14500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:12:55.035823\n15000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:13:15.792556\n15500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:13:38.186435\n16000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:13:59.929229\n16500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:14:19.382506\n17000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:14:42.192833\n17500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:15:01.283186\n18000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:15:20.447119\n18500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:15:41.314251\n19000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:16:03.113775\n19500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:16:25.404203\n20000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:16:45.226568\n20500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:17:03.171314\n21000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:17:20.676830\n21500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:17:38.499961\n22000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:17:55.992648\n22500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:18:13.914992\n23000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:18:32.524462\n23500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:18:49.583950\n24000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:19:08.177233\n24500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:19:26.470250\n25000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:19:45.560429\n25500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:20:03.342082\n26000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:20:22.237072\n26500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:20:39.524033\n27000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:20:58.497706\n27500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:21:19.857610\n28000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:21:37.552539\n28500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:21:57.279325\n29000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:22:14.942724\n29500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:22:32.386296\n30000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:22:50.459661\n30500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:23:08.890897\n31000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:23:27.636047\n31500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:23:45.602049\n32000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:24:03.445068\n32500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:24:21.753831\n33000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:24:39.408077\n33500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:24:56.899293\n34000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:25:14.599139\n34500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:25:32.782685\n35000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:25:50.004346\n35500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:26:08.403383\n36000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:26:26.327823\n36500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:26:44.236467\n37000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:27:01.469091\n37500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:27:20.011740\n38000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:27:38.304257\n38500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:27:57.663245\n39000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:28:15.807952\n39500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:28:34.362414\n40000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:28:52.833261\n40500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:29:10.628461\n41000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:29:27.270448\n41500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:29:44.324895\n42000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:30:01.724763\n42500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:30:19.050785\n43000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:30:36.914742\n43500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:30:54.822246\n44000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:31:13.026938\n44500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:31:31.380504\n45000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:31:50.473807\n45500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:32:08.888659\n46000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:32:27.260992\n46500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:32:45.156315\n47000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:33:03.639859\n47500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:33:21.107805\n48000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:33:39.711672\n48500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:33:58.477618\n49000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:34:15.469009\n49500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:34:34.295875\n50000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:34:52.225234\n50500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:35:10.335047\n51000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:35:30.523229\n51500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:35:47.762531\n52000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:36:04.836141\n52500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:36:23.118148\n53000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:36:41.431932\n53500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:36:59.257263\n54000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:37:17.234768\n54500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:37:35.932701\n55000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:37:53.878923\n55500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:38:12.178446\n56000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:38:29.483291\n56500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:38:48.792852\n57000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:39:06.960973\n57500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:39:24.702827\n58000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:39:43.186532\n58500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:40:02.263123\n59000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:40:20.345162\n59500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:40:36.930184\n60000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:40:56.585237\n60500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:41:14.814615\n61000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:41:33.028213\n61500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:41:50.890852\n62000" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:42:12.007967\n62500" | |
}, | |
{ | |
"output_type": "stream", | |
"stream": "stdout", | |
"text": " 2014-10-27 11:42:32.935895\n" | |
} | |
], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "len(storage2)", | |
"prompt_number": 153, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 153, | |
"metadata": {}, | |
"text": "62678" | |
} | |
], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "col_names = ['anon_screen_name',\n 'time',\n 'resource_display_name',\n 'success']\n\nfor t in ['6hours','1hour','30min','6min']:\n for r in ['video','course','discuss','wiki','quiz','grades']:\n col_names.append('resource_%s_%s' % (t, r))\n\nactivity = pd.DataFrame(storage2, columns=col_names)", | |
"prompt_number": 154, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "activity['quiz_correct_share'] = activity.resource_display_name.map(quiz_correct_share)\nactivity['person_correct_share'] = activity.anon_screen_name.map(person_correct_share)\nactivity.person_correct_share = activity.person_correct_share.replace(np.nan,0)\nactivity['success'] = activity.success.apply(lambda x: 1 if x == 'correct' else 0)", | |
"prompt_number": 156, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "activity.success.value_counts()", | |
"prompt_number": 160, | |
"outputs": [ | |
{ | |
"output_type": "pyout", | |
"prompt_number": 160, | |
"metadata": {}, | |
"text": "1 35565\n0 27113\ndtype: int64" | |
} | |
], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "pickle.dump(activity, open('activity_raw.pickle','wb'))", | |
"prompt_number": 158, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "for c in col_names[4:]:\n maxval = activity[c].max()\n minval = activity[c].min()\n activity[c + '_any'] = activity[c].apply(lambda x: 1 if x > 0 else 0)\n activity[c] = activity[c].apply(lambda x: (x - minval) / (maxval - minval))", | |
"prompt_number": 159, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "pickle.dump(activity, open('activity.pickle','wb'))", | |
"prompt_number": 161, | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
}, | |
{ | |
"metadata": {}, | |
"cell_type": "code", | |
"input": "", | |
"outputs": [], | |
"language": "python", | |
"trusted": true, | |
"collapsed": false | |
} | |
], | |
"metadata": {} | |
} | |
], | |
"metadata": { | |
"name": "", | |
"signature": "sha256:d2003c17d7155b60ad824f2352a8c3d7bd632f164c6909a6a78d8174f2fed380" | |
}, | |
"nbformat": 3 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment