Created
January 10, 2017 15:19
-
-
Save aqt01/27f5da1594e323b37c5bc3b12b0647a4 to your computer and use it in GitHub Desktop.
Err while running remote-task --host localhost --user ubuntu --remote-name analyticstack --skip-setup --wait ImportEnrollmentsIntoMysql --interval 2017-01-01-2017-01-10 --verbose --local-scheduler --overwrite-n-days 12
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(pipeline) ubuntu@ip:~/edx-analytics-pipeline$ remote-task --host localhost --user ubuntu --remote-name analyticstack --skip-setup --wait ImportEnrollmentsIntoMysql --interval 2017-01-01-2017-01-10 --verbose --local-scheduler --overwrite-n-days 12 | |
Parsed arguments = Namespace(branch='release', extra_repo=None, host='localhost', job_flow_id=None, job_flow_name=None, launch_task_arguments=['ImportEnrollmentsIntoMysql', '--interval', '2017-01-01-2017-01-10', '--local-scheduler', '--overwrite-n-days', '12'], log_path=None, override_config=None, package=None, private_key=None, remote_name='analyticstack', repo=None, secure_config=None, secure_config_branch=None, secure_config_repo=None, shell=None, skip_setup=True, sudo_user='hadoop', user='ubuntu', vagrant_path=None, verbose=True, virtualenv_extra_args=None, wait=True, wheel_url=None, workflow_profiler=None) | |
Running commands from path = /home/ubuntu/pipeline/share/edx.analytics.tasks | |
Remote name = analyticstack | |
Running command = ['ssh', '-tt', '-o', 'ForwardAgent=yes', '-o', 'StrictHostKeyChecking=no', '-o', 'UserKnownHostsFile=/dev/null', '-o', 'KbdInteractiveAuthentication=no', '-o', 'PasswordAuthentication=no', '-o', 'User=ubuntu', '-o', 'ConnectTimeout=10', 'localhost', "sudo -Hu hadoop /bin/bash -c 'cd /var/lib/analytics-tasks/analyticstack/repo && . $HOME/.bashrc && /var/lib/analytics-tasks/analyticstack/venv/bin/launch-task ImportEnrollmentsIntoMysql --interval 2017-01-01-2017-01-10 --local-scheduler --overwrite-n-days 12'"] | |
Warning: Permanently added 'localhost' (ECDSA) to the list of known hosts. | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('sqoop-import = edx.analytics.tasks.sqoop:SqoopImportFromMysql') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('run-vertica-sql-script = edx.analytics.tasks.run_vertica_sql_script:RunVerticaSqlScriptTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('obfuscation = edx.analytics.tasks.obfuscation:ObfuscatedCourseTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollment_validation = edx.analytics.tasks.enrollment_validation:CourseEnrollmentValidationTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('inc-enrollments-report = edx.analytics.tasks.reports.incremental_enrollments:WeeklyIncrementalUsersAndEnrollments') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('total-enrollments-report = edx.analytics.tasks.reports.total_enrollments:WeeklyAllUsersAndEnrollments') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('push_to_vertica_lms_courseware_link_clicked = edx.analytics.tasks.lms_courseware_link_clicked:PushToVerticaLMSCoursewareLinkClickedTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('database-import = edx.analytics.tasks.database_imports:ImportAllDatabaseTablesTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('ed_services_report = edx.analytics.tasks.reports.ed_services_financial_report:BuildEdServicesReportTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-internal-database = edx.analytics.tasks.load_internal_reporting_database:ImportMysqlToVerticaTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('export-student-module = edx.analytics.tasks.database_exports:StudentModulePerCourseAfterImportWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('calendar = edx.analytics.tasks.calendar_task:CalendarTableTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('orders = edx.analytics.tasks.reports.orders_import:OrderTableTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('cybersource = edx.analytics.tasks.reports.cybersource:DailyPullFromCybersourceTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-d-user = edx.analytics.tasks.load_internal_reporting_user:LoadInternalReportingUserToWarehouse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('location-per-course = edx.analytics.tasks.location_per_course:LastCountryOfUser') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('payment_reconcile = edx.analytics.tasks.reports.reconcile:ReconcileOrdersAndTransactionsTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollments-report = edx.analytics.tasks.reports.enrollments:EnrollmentsByWeek') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-warehouse = edx.analytics.tasks.load_warehouse:LoadWarehouseWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('engagement = edx.analytics.tasks.module_engagement:ModuleEngagementDataTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('events_obfuscation = edx.analytics.tasks.events_obfuscation:ObfuscateCourseEventsTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('dump-student-module = edx.analytics.tasks.database_exports:StudentModulePerCourseTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('export-events-by-course = edx.analytics.tasks.event_exports_by_course:EventExportByCourseTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('noop = edx.analytics.tasks.performance:ParseEventLogPerformanceTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-d-certificates = edx.analytics.tasks.load_internal_reporting_certificates:LoadInternalReportingCertificatesToWarehouse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('user-activity = edx.analytics.tasks.user_activity:CourseActivityWeeklyTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('run-vertica-sql-scripts = edx.analytics.tasks.run_vertica_sql_scripts:RunVerticaSqlScriptTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('paypal = edx.analytics.tasks.reports.paypal:PaypalTransactionsByDayTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('grade-dist = edx.analytics.tasks.studentmodule_dist:GradeDistFromSqoopToMySQLWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollments_and_registrations_workflow-manifest = edx.analytics.tasks.reports.enrollments_and_registrations_workflow_manifest:EnrollmentsandRegistrationsWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('financial_reports = edx.analytics.tasks.reports.finance_reports:BuildFinancialReportsTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('catalog = edx.analytics.tasks.course_catalog:CourseCatalogWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-course-catalog = edx.analytics.tasks.load_internal_reporting_course_catalog:PullCourseCatalogAPIData') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollments = edx.analytics.tasks.enrollments:ImportEnrollmentsIntoMysql') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('event-type-dist = edx.analytics.tasks.event_type_dist:PushToVerticaEventTypeDistributionTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('tags-dist = edx.analytics.tasks.tags_dist:TagsDistributionPerCourse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('course-enroll = edx.analytics.tasks.course_enroll:CourseEnrollmentChangesPerDay') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('data_obfuscation = edx.analytics.tasks.data_obfuscation:ObfuscatedCourseDumpTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('export-events = edx.analytics.tasks.event_exports:EventExportTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-d-user-course = edx.analytics.tasks.load_internal_reporting_user_course:LoadUserCourseSummary') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-d-country = edx.analytics.tasks.load_internal_reporting_country:LoadInternalReportingCountryToWarehouse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('overall_events = edx.analytics.tasks.overall_events:TotalEventsDailyTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-f-user-activity = edx.analytics.tasks.load_internal_reporting_user_activity:LoadInternalReportingUserActivityToWarehouse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('student_engagement = edx.analytics.tasks.student_engagement:StudentEngagementTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('answer-dist = edx.analytics.tasks.answer_dist:AnswerDistributionPerCourse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('video = edx.analytics.tasks.video:InsertToMysqlAllVideoTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('insert-into-table = edx.analytics.tasks.mysql_load:MysqlInsertTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('all_events_report = edx.analytics.tasks.reports.total_events_report:TotalEventsReportWorkflow') | |
DEBUG:edx.analytics.tasks.launchers.local:Using override.cfg | |
2017-01-10 15:05:24,947 INFO 3975 [luigi-interface] worker.py:267 - Scheduled ImportEnrollmentsIntoMysql(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, date=2017-01-10, partner_short_codes=None, api_root_url=None, interval=2017-01-01-2017-01-10, enable_course_catalog=False) (PENDING) | |
2017-01-10 15:05:24,986 INFO 3975 [luigi-interface] worker.py:267 - Scheduled ImportCourseSummaryEnrollmentsIntoMysql(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, database=stage_reports, credentials=/edx/etc/edx-analytics-pipeline/output.json, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, overwrite=False, hive_overwrite=False, date=2017-01-10, partner_short_codes=None, api_root_url=None, interval=2017-01-01-2017-01-10, enable_course_catalog=False) (PENDING) | |
2017-01-10 15:05:40,499 INFO 3975 [luigi-interface] worker.py:267 - Scheduled CourseTableTask(warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/) (DONE) | |
2017-01-10 15:05:40,501 INFO 3975 [luigi-interface] worker.py:267 - Scheduled EnrollmentByModeTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, database=stage_reports, credentials=/edx/etc/edx-analytics-pipeline/output.json, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, overwrite=True, hive_overwrite=False, interval=2017-01-01-2017-01-10) (PENDING) | |
2017-01-10 15:05:56,525 INFO 3975 [luigi-interface] worker.py:267 - Scheduled ImportAuthUserProfileTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=stage_edxapp, import_date=None) (DONE) | |
2017-01-10 15:06:16,789 INFO 3975 [luigi-interface] worker.py:267 - Scheduled CourseEnrollmentTableTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-01-01-2017-01-10) (PENDING) | |
2017-01-10 15:06:21,356 INFO 3975 [luigi-interface] worker.py:267 - Scheduled CourseEnrollmentTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-01-01-2017-01-10, output_root=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment/dt=2017-01-10/) (PENDING) | |
2017-01-10 15:06:21,358 INFO 3975 [luigi-interface] worker.py:267 - Scheduled CourseEnrollmentEventsTask(source=('hdfs://localhost:9000/data/',), interval=2016-12-29-2017-01-10, expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/) (PENDING) | |
2017-01-10 15:06:30,525 INFO 3975 [luigi-interface] worker.py:267 - Scheduled PathSelectionByDateIntervalTask(source=('hdfs://localhost:9000/data/',), interval=2016-12-29-2017-01-10, expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d) (DONE) | |
2017-01-10 15:06:35,200 INFO 3975 [luigi-interface] worker.py:267 - Scheduled PathSelectionByDateIntervalTask(source=('hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events',), interval=2017-01-01-2016-12-29, expand_interval=0:00:00, pattern=('.*?course_enrollment_events_(?P<date>\\d{4}-\\d{2}-\\d{2})',), date_pattern=%Y-%m-%d) (DONE) | |
2017-01-10 15:06:35,201 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2017-01-09/course_enrollment_events_2017-01-09) (DONE) | |
2017-01-10 15:06:35,202 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2017-01-08/course_enrollment_events_2017-01-08) (DONE) | |
2017-01-10 15:06:35,202 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2017-01-07/course_enrollment_events_2017-01-07) (DONE) | |
2017-01-10 15:06:35,202 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2017-01-06/course_enrollment_events_2017-01-06) (DONE) | |
2017-01-10 15:06:35,203 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2017-01-05/course_enrollment_events_2017-01-05) (DONE) | |
2017-01-10 15:06:35,203 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2017-01-04/course_enrollment_events_2017-01-04) (DONE) | |
2017-01-10 15:06:35,203 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2017-01-03/course_enrollment_events_2017-01-03) (DONE) | |
2017-01-10 15:06:35,204 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2017-01-02/course_enrollment_events_2017-01-02) (DONE) | |
2017-01-10 15:06:35,204 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2017-01-01/course_enrollment_events_2017-01-01) (DONE) | |
2017-01-10 15:06:35,204 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2016-12-31/course_enrollment_events_2016-12-31) (DONE) | |
2017-01-10 15:06:35,205 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2016-12-30/course_enrollment_events_2016-12-30) (DONE) | |
2017-01-10 15:06:35,205 INFO 3975 [luigi-interface] worker.py:267 - Scheduled UncheckedExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_events/dt=2016-12-29/course_enrollment_events_2016-12-29) (DONE) | |
2017-01-10 15:06:39,660 INFO 3975 [luigi-interface] worker.py:267 - Scheduled HiveTableFromParameterQueryTask(warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query= | |
SELECT | |
ce.date, | |
ce.course_id, | |
ce.mode, | |
SUM(ce.at_end), | |
COUNT(ce.user_id) | |
FROM course_enrollment ce | |
GROUP BY | |
ce.date, | |
ce.course_id, | |
ce.mode | |
, table=course_enrollment_mode_daily, columns=(('date', 'STRING'), ('course_id', 'STRING'), ('mode', 'STRING'), ('count', 'INT'), ('cumulative_count', 'INT')), partition=dt=2017-01-10) (PENDING) | |
2017-01-10 15:06:44,658 INFO 3975 [luigi-interface] worker.py:267 - Scheduled HiveTableFromParameterQueryTask(warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query= | |
SELECT | |
enrollment_end.course_id, | |
course.catalog_course_title, | |
course.catalog_course, | |
course.start_time, | |
course.end_time, | |
course.pacing_type, | |
course.availability, | |
enrollment_end.mode, | |
enrollment_end.count, | |
(enrollment_end.count - COALESCE(enrollment_start.count, 0)) AS count_change_7_days, | |
enrollment_end.cumulative_count | |
FROM course_enrollment_mode_daily enrollment_end | |
LEFT OUTER JOIN course_enrollment_mode_daily enrollment_start | |
ON enrollment_start.course_id = enrollment_end.course_id | |
AND enrollment_start.mode = enrollment_end.mode | |
AND enrollment_start.date = '2017-01-02' | |
LEFT OUTER JOIN course_catalog course | |
ON course.course_id = enrollment_end.course_id | |
WHERE enrollment_end.date = '2017-01-09' | |
, table=course_meta_summary_enrollment, columns=(('course_id', 'STRING'), ('catalog_course_title', 'STRING'), ('catalog_course', 'STRING'), ('start_time', 'TIMESTAMP'), ('end_time', 'TIMESTAMP'), ('pacing_type', 'STRING'), ('availability', 'STRING'), ('enrollment_mode', 'STRING'), ('count', 'INT'), ('count_change_7_days', 'INT'), ('cumulative_count', 'INT')), partition=dt=2017-01-10) (PENDING) | |
2017-01-10 15:06:44,659 INFO 3975 [luigi-interface] worker.py:267 - Scheduled ExternalURL(url=/edx/etc/edx-analytics-pipeline/output.json) (DONE) | |
2017-01-10 15:06:44,670 INFO 3975 [luigi-interface] worker.py:267 - Scheduled EnrollmentDailyTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, database=stage_reports, credentials=/edx/etc/edx-analytics-pipeline/output.json, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, overwrite=True, hive_overwrite=False, interval=2017-01-01-2017-01-10) (PENDING) | |
2017-01-10 15:06:49,210 INFO 3975 [luigi-interface] worker.py:267 - Scheduled HiveTableFromParameterQueryTask(warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query= | |
SELECT | |
ce.course_id, | |
ce.date, | |
SUM(ce.at_end), | |
COUNT(ce.user_id) | |
FROM course_enrollment ce | |
GROUP BY | |
ce.course_id, | |
ce.date | |
, table=course_enrollment_daily, columns=(('course_id', 'STRING'), ('date', 'STRING'), ('count', 'INT'), ('cumulative_count', 'INT')), partition=dt=2017-01-10) (PENDING) | |
2017-01-10 15:06:49,213 INFO 3975 [luigi-interface] worker.py:267 - Scheduled EnrollmentByEducationLevelTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, database=stage_reports, credentials=/edx/etc/edx-analytics-pipeline/output.json, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, overwrite=True, hive_overwrite=False, interval=2017-01-01-2017-01-10) (PENDING) | |
2017-01-10 15:06:53,766 INFO 3975 [luigi-interface] worker.py:267 - Scheduled HiveTableFromParameterQueryTask(warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query= | |
SELECT | |
ce.date, | |
ce.course_id, | |
CASE p.level_of_education | |
WHEN 'el' THEN 'primary' | |
WHEN 'jhs' THEN 'junior_secondary' | |
WHEN 'hs' THEN 'secondary' | |
WHEN 'a' THEN 'associates' | |
WHEN 'b' THEN 'bachelors' | |
WHEN 'm' THEN 'masters' | |
WHEN 'p' THEN 'doctorate' | |
WHEN 'p_se' THEN 'doctorate' | |
WHEN 'p_oth' THEN 'doctorate' | |
WHEN 'none' THEN 'none' | |
WHEN 'other' THEN 'other' | |
ELSE NULL | |
END, | |
SUM(ce.at_end), | |
COUNT(ce.user_id) | |
FROM course_enrollment ce | |
LEFT OUTER JOIN auth_userprofile p ON p.user_id = ce.user_id | |
WHERE ce.date = '2017-01-09' | |
GROUP BY | |
ce.date, | |
ce.course_id, | |
CASE p.level_of_education | |
WHEN 'el' THEN 'primary' | |
WHEN 'jhs' THEN 'junior_secondary' | |
WHEN 'hs' THEN 'secondary' | |
WHEN 'a' THEN 'associates' | |
WHEN 'b' THEN 'bachelors' | |
WHEN 'm' THEN 'masters' | |
WHEN 'p' THEN 'doctorate' | |
WHEN 'p_se' THEN 'doctorate' | |
WHEN 'p_oth' THEN 'doctorate' | |
WHEN 'none' THEN 'none' | |
WHEN 'other' THEN 'other' | |
ELSE NULL | |
END | |
, table=course_enrollment_education_level_daily, columns=(('date', 'STRING'), ('course_id', 'STRING'), ('education_level', 'STRING'), ('count', 'INT'), ('cumulative_count', 'INT')), partition=dt=2017-01-10) (PENDING) | |
2017-01-10 15:06:53,779 INFO 3975 [luigi-interface] worker.py:267 - Scheduled EnrollmentByBirthYearTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, database=stage_reports, credentials=/edx/etc/edx-analytics-pipeline/output.json, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, overwrite=True, hive_overwrite=False, interval=2017-01-01-2017-01-10) (PENDING) | |
2017-01-10 15:06:58,267 INFO 3975 [luigi-interface] worker.py:267 - Scheduled HiveTableFromParameterQueryTask(warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query= | |
SELECT | |
ce.date, | |
ce.course_id, | |
p.year_of_birth, | |
SUM(ce.at_end), | |
COUNT(ce.user_id) | |
FROM course_enrollment ce | |
LEFT OUTER JOIN auth_userprofile p ON p.user_id = ce.user_id | |
WHERE ce.date = '2017-01-09' | |
GROUP BY | |
ce.date, | |
ce.course_id, | |
p.year_of_birth | |
, table=course_enrollment_birth_year_daily, columns=(('date', 'STRING'), ('course_id', 'STRING'), ('birth_year', 'INT'), ('count', 'INT'), ('cumulative_count', 'INT')), partition=dt=2017-01-10) (PENDING) | |
2017-01-10 15:06:58,269 INFO 3975 [luigi-interface] worker.py:267 - Scheduled EnrollmentByGenderTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, database=stage_reports, credentials=/edx/etc/edx-analytics-pipeline/output.json, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, overwrite=True, hive_overwrite=False, interval=2017-01-01-2017-01-10) (PENDING) | |
2017-01-10 15:07:05,008 INFO 3975 [luigi-interface] worker.py:267 - Scheduled HiveTableFromParameterQueryTask(warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, insert_query= | |
SELECT | |
ce.date, | |
ce.course_id, | |
IF(p.gender != '', p.gender, NULL), | |
SUM(ce.at_end), | |
COUNT(ce.user_id) | |
FROM course_enrollment ce | |
LEFT OUTER JOIN auth_userprofile p ON p.user_id = ce.user_id | |
GROUP BY | |
ce.date, | |
ce.course_id, | |
IF(p.gender != '', p.gender, NULL) | |
, table=course_enrollment_gender_daily, columns=(('date', 'STRING'), ('course_id', 'STRING'), ('gender', 'STRING'), ('count', 'INT'), ('cumulative_count', 'INT')), partition=dt=2017-01-10) (PENDING) | |
2017-01-10 15:07:23,677 INFO 3975 [luigi-interface] worker.py:267 - Scheduled CourseEnrollmentSummaryTableTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-01-01-2017-01-10) (PENDING) | |
2017-01-10 15:07:28,287 INFO 3975 [luigi-interface] worker.py:267 - Scheduled CourseEnrollmentSummaryTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-01-01-2017-01-10, output_root=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/course_enrollment_summary/dt=2017-01-10/) (PENDING) | |
2017-01-10 15:07:28,287 INFO 3975 [luigi-interface] interface.py:193 - Done scheduling tasks | |
2017-01-10 15:07:28,288 INFO 3975 [luigi-interface] worker.py:282 - [pid 3975] Worker Worker(salt=752975086, host=ip, username=hadoop, pid=3975) running CourseEnrollmentEventsTask(source=('hdfs://localhost:9000/data/',), interval=2016-12-29-2017-01-10, expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/) | |
2017-01-10 15:07:36,504 INFO 3975 [luigi-interface] hadoop.py:242 - /edx/app/hadoop/hadoop/bin/hadoop jar /edx/app/hadoop/hadoop/share/hadoop/tools/lib/hadoop-streaming-2.3.0.jar -D mapred.job.name=CourseEnrollmentEventsTask(source=('hdfs://localhost:9000/data/',), interval=2016-12-29-2017-01-10, expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/) -D mapred.reduce.tasks=25 -mapper /usr/bin/python2.7 mrrunner.py map -reducer /usr/bin/python2.7 mrrunner.py reduce -file /var/lib/analytics-tasks/analyticstack/venv/local/lib/python2.7/site-packages/luigi/mrrunner.py -file /tmp/tmpoajRa5/packages.tar -file /tmp/tmpoajRa5/job-instance.pickle -input /data/tracking.log -output /edx-analytics-pipeline/marker/7120053529202767554-temp-2017-01-10T15-07-32.808456 | |
2017-01-10 15:07:38,460 INFO 3975 [luigi-interface] hadoop.py:273 - 17/01/10 15:07:38 WARN streaming.StreamJob: -file option is deprecated, please use generic option -files instead. | |
2017-01-10 15:07:41,986 INFO 3975 [luigi-interface] hadoop.py:273 - 17/01/10 15:07:41 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032 | |
2017-01-10 15:07:42,675 INFO 3975 [luigi-interface] hadoop.py:273 - 17/01/10 15:07:42 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032 | |
2017-01-10 15:07:46,948 INFO 3975 [luigi-interface] hadoop.py:273 - 17/01/10 15:07:46 INFO mapred.FileInputFormat: Total input paths to process : 1 | |
2017-01-10 15:07:47,125 INFO 3975 [luigi-interface] hadoop.py:273 - 17/01/10 15:07:47 INFO mapreduce.JobSubmitter: number of splits:1 | |
2017-01-10 15:07:47,165 INFO 3975 [luigi-interface] hadoop.py:273 - 17/01/10 15:07:47 INFO Configuration.deprecation: mapred.job.name is deprecated. Instead, use mapreduce.job.name | |
2017-01-10 15:07:47,167 INFO 3975 [luigi-interface] hadoop.py:273 - 17/01/10 15:07:47 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces | |
2017-01-10 15:07:47,674 INFO 3975 [luigi-interface] hadoop.py:273 - 17/01/10 15:07:47 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1484060492990_0001 | |
2017-01-10 15:07:49,304 INFO 3975 [luigi-interface] hadoop.py:273 - 17/01/10 15:07:49 INFO impl.YarnClientImpl: Submitted application application_1484060492990_0001 | |
2017-01-10 15:07:49,407 INFO 3975 [luigi-interface] hadoop.py:273 - 17/01/10 15:07:49 INFO mapreduce.Job: The url to track the job: http://ip:8088/proxy/application_1484060492990_0001/ | |
2017-01-10 15:07:49,417 INFO 3975 [luigi-interface] hadoop.py:273 - 17/01/10 15:07:49 INFO mapreduce.Job: Running job: job_1484060492990_0001 | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment