Skip to content

Instantly share code, notes, and snippets.

@ztraboo
Forked from mulby/dashboard.patch
Created June 17, 2016 07:03
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ztraboo/ac9eadd9ec6de6ffc27769fc9b55ac1e to your computer and use it in GitHub Desktop.
Save ztraboo/ac9eadd9ec6de6ffc27769fc9b55ac1e to your computer and use it in GitHub Desktop.
edX Analytics Conference Workshop 2016
From add560e7e139e2671f6f3eb9bda5038e69114d69 Mon Sep 17 00:00:00 2001
From: Dennis Jen <djen@edx.org>
Date: Mon, 13 Jun 2016 22:28:28 -0400
Subject: [PATCH] WIP: workshop demo of course views
---
.../courses/presenters/engagement.py | 3 ++
.../templates/courses/engagement_content.html | 24 +++++++++++++++
analytics_dashboard/courses/views/engagement.py | 3 ++
.../learner_analytics_api/v0/views.py | 12 ++++----
.../static/js/engagement-content-main.js | 35 ++++++++++++++++++++--
.../static/js/performance-content-main.js | 2 +-
requirements/base.txt | 2 +-
7 files changed, 71 insertions(+), 10 deletions(-)
diff --git a/analytics_dashboard/courses/presenters/engagement.py b/analytics_dashboard/courses/presenters/engagement.py
index 191a398..3f48cec 100644
--- a/analytics_dashboard/courses/presenters/engagement.py
+++ b/analytics_dashboard/courses/presenters/engagement.py
@@ -132,6 +132,9 @@ def get_summary_and_trend_data(self):
self._annotate_with_enrollment(summary, trends, enrollment_data)
return summary, trends
+ def get_course_views(self):
+ return self.course.views()
+
class CourseEngagementVideoPresenter(CourseAPIPresenterMixin, BasePresenter):
diff --git a/analytics_dashboard/courses/templates/courses/engagement_content.html b/analytics_dashboard/courses/templates/courses/engagement_content.html
index 9f600f5..5cf5a86 100644
--- a/analytics_dashboard/courses/templates/courses/engagement_content.html
+++ b/analytics_dashboard/courses/templates/courses/engagement_content.html
@@ -42,6 +42,30 @@ <h4 class="section-title">{% trans "Weekly Student Engagement" %}</h4>
{% endif %}
</section>
+ <section class="view-section" aria-hidden="true">
+ <div class="section-heading">
+ <h4 class="section-title">{% trans "Content Views" %}</h4>
+ <span class="section-heading-note small">{% trans "How many views?" %}</span>
+ </div>
+
+ {% if js_data.course.courseViews %}
+ <div class="section-content section-data-graph">
+ <div class="section-content section-data-viz">
+ <div class="analytics-chart-container">
+ <div class="chart-info">{% trans "Views" %}</div>
+ {% trans "Course Views." as tip_text %}
+ {% include "chart_tooltip.html" with tip_text=tip_text track_category="trend"%}
+ <div id="course-views" class="analytics-chart">
+ {% include "loading.html" %}
+ </div>
+ </div>
+ </div>
+ </div>
+ {% else %}
+ {% show_chart_error %}
+ {% endif %}
+ </section>
+
<section class="view-section">
<div class="section-heading bordered">
<h4 class="section-title">{% trans "Student Activity Metrics" %}</h4>
diff --git a/analytics_dashboard/courses/views/engagement.py b/analytics_dashboard/courses/views/engagement.py
index 56e1257..1d71ff2 100644
--- a/analytics_dashboard/courses/views/engagement.py
+++ b/analytics_dashboard/courses/views/engagement.py
@@ -45,13 +45,16 @@ def get_context_data(self, **kwargs):
summary = None
trends = None
last_updated = None
+ course_views = None
try:
summary, trends = self.presenter.get_summary_and_trend_data()
+ course_views = self.presenter.get_course_views()
last_updated = summary['last_updated']
except NotFoundError:
logger.error("Failed to retrieve engagement content data for %s.", self.course_id)
context['js_data']['course']['engagementTrends'] = trends
+ context['js_data']['course']['courseViews'] = course_views
context.update({
'summary': summary,
'update_message': self.get_last_updated_message(last_updated)
diff --git a/analytics_dashboard/learner_analytics_api/v0/views.py b/analytics_dashboard/learner_analytics_api/v0/views.py
index fc94995..cdc7aeb 100644
--- a/analytics_dashboard/learner_analytics_api/v0/views.py
+++ b/analytics_dashboard/learner_analytics_api/v0/views.py
@@ -1,6 +1,6 @@
import json
-from requests.exceptions import ConnectTimeout
+# from requests.exceptions import ConnectTimeout
from rest_framework.exceptions import PermissionDenied
from rest_framework.generics import RetrieveAPIView
@@ -52,11 +52,11 @@ def handle_exception(self, exc):
Handles timeouts raised by the API client by returning an HTTP
504.
"""
- if isinstance(exc, ConnectTimeout):
- return Response(
- data={'developer_message': 'Learner Analytics API timed out.', 'error_code': 'analytics_api_timeout'},
- status=504
- )
+ # if isinstance(exc, ConnectTimeout):
+ # return Response(
+ # data={'developer_message': 'Learner Analytics API timed out.', 'error_code': 'analytics_api_timeout'},
+ # status=504
+ # )
return super(BaseLearnerApiView, self).handle_exception(exc)
diff --git a/analytics_dashboard/static/js/engagement-content-main.js b/analytics_dashboard/static/js/engagement-content-main.js
index d0f9de4..9cb0738 100644
--- a/analytics_dashboard/static/js/engagement-content-main.js
+++ b/analytics_dashboard/static/js/engagement-content-main.js
@@ -6,7 +6,8 @@
require(['vendor/domReady!', 'load/init-page'], function (doc, page) {
'use strict';
- require(['underscore', 'views/data-table-view', 'views/trends-view'], function (_, DataTableView, TrendsView) {
+ require(['underscore', 'views/data-table-view', 'views/trends-view', 'views/stacked-bar-view'],
+ function (_, DataTableView, TrendsView, StackedBarView) {
// shared settings between the chart and table
// colors are chosen to be color-blind accessible
var settings = [
@@ -51,7 +52,8 @@ require(['vendor/domReady!', 'load/init-page'], function (doc, page) {
type: 'percent'
}
],
- trendSettings;
+ trendSettings,
+ courseViewsColumns;
// remove settings for data that doesn't exist (ex. forums)
settings = _(settings).filter(function (setting) {
@@ -92,5 +94,34 @@ require(['vendor/domReady!', 'load/init-page'], function (doc, page) {
sorting: ['-weekEnding'],
replaceNull: '-'
});
+
+ courseViewsColumns = [
+ {
+ key: 'total_views',
+ title: gettext('Average Correct'),
+ className: 'text-right',
+ type: 'number',
+ color: '#4BB4FB'
+ },
+ {
+ key: 'unique_user_views',
+ title: gettext('Average Incorrect'),
+ className: 'text-right',
+ type: 'number',
+ color: '#CA0061'
+ }
+ ];
+ // create a unique name...
+ _(page.models.courseModel.get('courseViews')).each(function (view) {
+ view.name = [view.section, view.subsection].join('_');
+ });
+
+ new StackedBarView({
+ el: '#course-views',
+ model: page.models.courseModel,
+ modelAttribute: 'courseViews',
+ trends: courseViewsColumns
+ });
+
});
});
diff --git a/analytics_dashboard/static/js/performance-content-main.js b/analytics_dashboard/static/js/performance-content-main.js
index 9cb4819..f36e6e8 100644
--- a/analytics_dashboard/static/js/performance-content-main.js
+++ b/analytics_dashboard/static/js/performance-content-main.js
@@ -50,7 +50,7 @@ require(['vendor/domReady!', 'load/init-page'], function (doc, page) {
if (model.get('hasData')) {
new StackedBarView({
- el: '#chart-view',
+ el: '#course-views',
model: model,
modelAttribute: 'primaryContent',
dataType: 'decimal',
diff --git a/requirements/base.txt b/requirements/base.txt
index 1568d14..36520ae 100644
--- a/requirements/base.txt
+++ b/requirements/base.txt
@@ -25,7 +25,7 @@ python-social-auth==0.2.14
git+https://github.com/pinax/django-announcements.git@f85e690705e038a62407abe54ac195f60760934b#egg=django-announcements # MIT
git+https://github.com/edx/django-lang-pref-middleware.git@0.1.0#egg=django-lang-pref-middleware
-git+https://github.com/edx/edx-analytics-data-api-client.git@0.6.1#egg=edx-analytics-data-api-client==0.6.1 # edX
+git+https://github.com/edx/edx-analytics-data-api-client.git@0da6968783fa795dbd770f53ce74af2780d6ca5c#egg=edx-analytics-data-api-client==0.6.1 # edX
git+https://github.com/edx/i18n-tools.git@0d7847f9dfa2281640527b4dc51f5854f950f9b7#egg=i18n_tools
git+https://github.com/edx/opaque-keys.git@d45d0bd8d64c69531be69178b9505b5d38806ce0#egg=opaque-keys
# custom opaque-key implementations for ccx
From 0da6968783fa795dbd770f53ce74af2780d6ca5c Mon Sep 17 00:00:00 2001
From: Dennis Jen <djen@edx.org>
Date: Mon, 13 Jun 2016 17:11:55 -0400
Subject: [PATCH] WIP: adding course views demo endpoint
---
analyticsclient/course.py | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/analyticsclient/course.py b/analyticsclient/course.py
index 83f077e..5a3e2de 100644
--- a/analyticsclient/course.py
+++ b/analyticsclient/course.py
@@ -21,6 +21,11 @@ def __init__(self, client, course_id):
self.client = client
self.course_id = unicode(course_id)
+ def views(self, data_format=DF.JSON):
+ path = 'courses/{0}/views/'.format(self.course_id)
+ return self.client.get(path, data_format=data_format)
+
+
def enrollment(self, demographic=None, start_date=None, end_date=None, data_format=DF.JSON):
"""
Get course enrollment data.
From 5f98937172c09faaa0c8501906fbe9f0c7979edb Mon Sep 17 00:00:00 2001
From: Gabe Mulley <gabe@edx.org>
Date: Mon, 13 Jun 2016 11:57:50 -0400
Subject: [PATCH 1/3] initial API attempt
---
analytics_data_api/v0/models.py | 11 +++++++++++
analytics_data_api/v0/serializers.py | 6 ++++++
analytics_data_api/v0/urls/courses.py | 3 ++-
analytics_data_api/v0/views/courses.py | 9 +++++++++
4 files changed, 28 insertions(+), 1 deletion(-)
diff --git a/analytics_data_api/v0/models.py b/analytics_data_api/v0/models.py
index ee004df..64af53d 100644
--- a/analytics_data_api/v0/models.py
+++ b/analytics_data_api/v0/models.py
@@ -497,3 +497,14 @@ class ModuleEngagementMetricRanges(models.Model):
class Meta(object):
db_table = 'module_engagement_metric_ranges'
+
+
+class ViewDistribution(models.Model):
+ course_id = models.CharField(db_index=True, max_length=255)
+ section = models.CharField(db_index=True, max_length=255)
+ subsection = models.CharField(db_index=True, max_length=255)
+ unique_user_views = models.IntegerField()
+ total_views = models.IntegerField()
+
+ class Meta(object):
+ db_table = 'content_views'
diff --git a/analytics_data_api/v0/serializers.py b/analytics_data_api/v0/serializers.py
index 93352cc..1543b5e 100644
--- a/analytics_data_api/v0/serializers.py
+++ b/analytics_data_api/v0/serializers.py
@@ -478,3 +478,9 @@ def get_engagement_ranges(self, obj):
})
return engagement_ranges
+
+
+class ViewDistributionSerializer(ModelSerializerWithCreatedField):
+
+ class Meta(object):
+ model = models.ViewDistribution
diff --git a/analytics_data_api/v0/urls/courses.py b/analytics_data_api/v0/urls/courses.py
index d219095..6fc823d 100644
--- a/analytics_data_api/v0/urls/courses.py
+++ b/analytics_data_api/v0/urls/courses.py
@@ -14,7 +14,8 @@
('enrollment/location', views.CourseEnrollmentByLocationView, 'enrollment_by_location'),
('problems', views.ProblemsListView, 'problems'),
('problems_and_tags', views.ProblemsAndTagsListView, 'problems_and_tags'),
- ('videos', views.VideosListView, 'videos')
+ ('videos', views.VideosListView, 'videos'),
+ ('views', views.ViewDistributionView, 'views')
]
urlpatterns = []
diff --git a/analytics_data_api/v0/views/courses.py b/analytics_data_api/v0/views/courses.py
index 5b48d97..cfae79f 100644
--- a/analytics_data_api/v0/views/courses.py
+++ b/analytics_data_api/v0/views/courses.py
@@ -765,3 +765,12 @@ class VideosListView(BaseCourseView):
def apply_date_filtering(self, queryset):
# no date filtering for videos -- just return the queryset
return queryset
+
+
+class ViewDistributionView(BaseCourseView):
+ slug = 'view-distribution'
+ serializer_class = serializers.ViewDistributionSerializer
+ model = models.ViewDistribution
+
+ def apply_date_filtering(self, queryset):
+ return queryset
\ No newline at end of file
From 155d04ae05dacdfe7f91b1b5dbbcc724a99cbe1c Mon Sep 17 00:00:00 2001
From: Gabe Mulley <gabe@edx.org>
Date: Mon, 13 Jun 2016 12:01:52 -0400
Subject: [PATCH 2/3] add in a created field
---
analytics_data_api/v0/models.py | 1 +
1 file changed, 1 insertion(+)
diff --git a/analytics_data_api/v0/models.py b/analytics_data_api/v0/models.py
index 64af53d..338401d 100644
--- a/analytics_data_api/v0/models.py
+++ b/analytics_data_api/v0/models.py
@@ -505,6 +505,7 @@ class ViewDistribution(models.Model):
subsection = models.CharField(db_index=True, max_length=255)
unique_user_views = models.IntegerField()
total_views = models.IntegerField()
+ created = models.DateTimeField(auto_now_add=True)
class Meta(object):
db_table = 'content_views'
From 16acff7493c5fee8bd73280ccda6dad2765e8a53 Mon Sep 17 00:00:00 2001
From: Dennis Jen <djen@edx.org>
Date: Mon, 13 Jun 2016 22:31:55 -0400
Subject: [PATCH 3/3] WIP: generate course view data
---
.../management/commands/generate_fake_course_data.py | 16 ++++++++++++++++
1 file changed, 16 insertions(+)
diff --git a/analytics_data_api/management/commands/generate_fake_course_data.py b/analytics_data_api/management/commands/generate_fake_course_data.py
index 44e9f0b..c5c6c95 100644
--- a/analytics_data_api/management/commands/generate_fake_course_data.py
+++ b/analytics_data_api/management/commands/generate_fake_course_data.py
@@ -253,6 +253,18 @@ def generate_tags_distribution_data(self, course_id):
total_submissions=total_submissions, correct_submissions=correct_submissions
)
+ def generate_video_module_data(self, course_id, section_num, subsection_num):
+ unique_user_views = random.randint(1, 100)
+ total_views = unique_user_views + random.randint(1, 100)
+
+
+ models.ViewDistribution.objects.create(course_id=course_id,
+ section='section_{}'.format(section_num),
+ subsection='subsection_{}'.format(section_num),
+ unique_user_views=unique_user_views,
+ total_views=total_views)
+
+
def handle(self, *args, **options):
course_id = options['course_id']
username = options['username']
@@ -274,3 +286,7 @@ def handle(self, *args, **options):
self.generate_learner_engagement_data(course_id, username, start_date, end_date)
self.generate_learner_engagement_range_data(course_id, start_date, end_date)
self.generate_tags_distribution_data(course_id)
+
+ for i in range(10):
+ for j in range(10):
+ self.generate_video_module_data(course_id, i, j)
# HELLO WORLD TASK
# running the task
launch-task HelloWorldTask --local-scheduler --input-url /var/tmp/analytics.txt --output-url /tmp/output.txt
# running it again is a nop
launch-task HelloWorldTask --local-scheduler --input-url /var/tmp/analytics.txt --output-url /tmp/output.txt
# by default, the output determines if it should be re-run or not
launch-task HelloWorldTask --local-scheduler --input-url /var/tmp/devops.txt --output-url /tmp/output.txt
# change the output to have the task run
launch-task HelloWorldTask --local-scheduler --input-url /var/tmp/devops.txt --output-url /tmp/devops_output.txt
# you can mix and match input sources using URLs (this is an edX extension to luigi)
launch-task HelloWorldTask --local-scheduler --input-url hdfs://localhost:9000/tmp/analytics.txt --output-url /tmp/dfs_io.txt
launch-task HelloWorldTask --local-scheduler --input-url hdfs://localhost:9000/tmp/analytics.txt --output-url hdfs://localhost:9000/tmp/output.txt
# HELLO WORLD MAP REDUCE TASK
# note the output *must* be a DFS path, not a local path
launch-task HelloWorldMapReduceTask --local-scheduler --interval 2016 --output-root hdfs://localhost:9000/tmp/hello/ --n-reduce-tasks 1
hdfs dfs -cat hdfs://localhost:9000/tmp/hello/2016/*
# VIEW DISTRIBUTION AFTER CHALLENGES
launch-task ViewDistribution --local-scheduler --interval 2016-01-01-2016-06-16 --output-root hdfs://localhost:9000/tmp/content_views/ --n-reduce-tasks 1
hdfs dfs -cat hdfs://localhost:9000/tmp/content_views/2016-01-01-2016-06-16/*
# TO MYSQL
launch-task ViewDistributionMysqlTask --local-scheduler --interval 2016-01-01-2016-06-16 --output-root hdfs://localhost:9000/tmp/content_views/ --n-reduce-tasks 1
# AFTER CLICKING AROUND
launch-task ViewDistributionMysqlTask --local-scheduler --interval 2016-01-01-2016-06-17 --output-root hdfs://localhost:9000/tmp/content_views/ --n-reduce-tasks 1
from edx.analytics.tasks.mysql_load import MysqlInsertTask
from edx.analytics.tasks.url import get_target_from_url, url_path_join, ExternalURL
from edx.analytics.tasks.pathutil import EventLogSelectionMixin
from edx.analytics.tasks.mapreduce import MapReduceJobTask
from edx.analytics.tasks.util import eventlog
import luigi
import re
from edx.analytics.tasks.util.record import Record, StringField, IntegerField
import logging
log = logging.getLogger(__name__)
class HelloWorldTask(luigi.Task):
input_url = luigi.Parameter()
output_url = luigi.Parameter()
def requires(self):
return ExternalURL(self.input_url)
def run(self):
with self.input().open(mode='r') as input_file:
with self.output().open(mode='w') as output_file:
for line in input_file:
output_file.write('Hello ' + line)
def output(self):
return get_target_from_url(self.output_url)
class HelloWorldMapReduceTask(EventLogSelectionMixin, MapReduceJobTask):
output_root = luigi.Parameter()
def mapper(self, line):
value = self.get_event_and_date_string(line)
if value is None:
return
event, _date_string = value
key = 'hello ' + event.get('event_type', 'UNKNOWN')
yield key, 1
def reducer(self, key, values):
yield key, sum(values)
def output(self):
return get_target_from_url(url_path_join(self.output_root, self.interval.to_string()) + '/')
class ViewDistribution(EventLogSelectionMixin, MapReduceJobTask):
SUBSECTION_ACCESSED_PATTERN = r'/courses/.*?courseware/([^/]+)/([^/]+)/.*$'
output_root = luigi.Parameter()
def mapper(self, line):
value = self.get_event_and_date_string(line)
if value is None:
return
event, _date_string = value
event_type = event.get('event_type')
if event_type is None:
return
# Challenge #1
# Use the SUBSECTION_ACCESSED_PATTERN to extract the section and subsection from the URL. The first match will
# be the section and the second will be the subsection.
m = re.match(self.SUBSECTION_ACCESSED_PATTERN, event_type)
if not m:
return
section, subsection = m.group(1, 2)
username = event.get('username', '').strip()
if not username:
return
course_id = eventlog.get_course_id(event)
if not course_id:
return
# Challenge #2
# What should this method yield?
yield (course_id, section, subsection), (username)
def reducer(self, key, values):
# Challenge #2 continued
# I recommend assigning each element of your key to a variable here: foo, bar = key
course_id, section, subsection = key
# Challenge 3
# Compute the total number of views for each (section, subsection) as well as the unique number of users who
# viewed that subsection.
unique_usernames = set()
total_views = 0
for username in values:
unique_usernames.add(username)
total_views += 1
unique_user_views = len(unique_usernames)
yield ViewRecord(
course_id=course_id,
section=section,
subsection=subsection,
unique_user_views=unique_user_views,
total_views=total_views
).to_string_tuple()
def output(self):
return get_target_from_url(url_path_join(self.output_root, self.interval.to_string()) + '/')
class ViewRecord(Record):
course_id = StringField(length=255, nullable=False)
section = StringField(length=255, nullable=False)
subsection = StringField(length=255, nullable=False)
unique_user_views = IntegerField()
total_views = IntegerField()
class ViewDistributionMysqlTask(MysqlInsertTask):
output_root = luigi.Parameter()
interval = luigi.DateIntervalParameter()
n_reduce_tasks = luigi.IntParameter(significant=False)
@property
def table(self):
return 'content_views'
@property
def columns(self):
return ViewRecord.get_sql_schema()
@property
def indexes(self):
return [
('course_id',)
]
@property
def insert_source_task(self):
return ViewDistribution(
interval=self.interval,
output_root=self.output_root,
n_reduce_tasks=self.n_reduce_tasks
)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment