Created
February 10, 2017 22:38
-
-
Save melvinsoft/92d11bb70871c118d750df511bd78a51 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# On branch master | |
# Your branch is up-to-date with 'origin/master'. | |
# | |
nothing to commit, working tree clean | |
(pipeline)user@host:~/edx-analytics-pipeline$ vi requirements/default.txt | |
(pipeline)user@host:~/edx-analytics-pipeline$ git checkout open-release/eucalyptus.master | |
Switched to branch 'open-release/eucalyptus.master' | |
Your branch is up-to-date with 'origin/open-release/eucalyptus.master'. | |
(pipeline)user@host:~/edx-analytics-pipeline$ cd .. | |
(pipeline)user@host:~$ remote-task --host localhost --user maxi --remote-name analyticstack --wait InsertToMysqlCourseEnrollByCountryWorkflow --local-scheduler --interval-start $(date +%Y-%m-%d -d "$FROM_DATE") --interval-end $(date +%Y-%m-%d -d "$TO_DATE") --n-reduce-tasks 2 --overwrite --local-scheduler --skip-setup --overwrite-n-days 0 --override-config $HOME/edx-analytics-pipeline/config/devstack.cfg --input algo | |
Parsed arguments = Namespace(branch='release', extra_repo=None, host='localhost', job_flow_id=None, job_flow_name=None, launch_task_arguments=['InsertToMysqlCourseEnrollByCountryWorkflow', '--local-scheduler', '--interval-start', '2017-02-10', '--interval-end', '2017-02-10', '--n-reduce-tasks', '2', '--overwrite', '--local-scheduler', '--overwrite-n-days', '0', '--input', 'algo'], log_path=None, override_config='/home/maxi/edx-analytics-pipeline/config/devstack.cfg', package=None, private_key=None, remote_name='analyticstack', repo=None, secure_config=None, secure_config_branch=None, secure_config_repo=None, shell=None, skip_setup=True, sudo_user='hadoop', user='maxi', vagrant_path=None, verbose=False, virtualenv_extra_args=None, wait=True, wheel_url=None, workflow_profiler=None) | |
Running commands from path = /home/maxi/pipeline/share/edx.analytics.tasks | |
Remote name = analyticstack | |
Running command = ['ssh', '-tt', '-o', 'ForwardAgent=yes', '-o', 'StrictHostKeyChecking=no', '-o', 'UserKnownHostsFile=/dev/null', '-o', 'KbdInteractiveAuthentication=no', '-o', 'PasswordAuthentication=no', '-o', 'User=maxi', '-o', 'ConnectTimeout=10', 'localhost', "sudo -Hu hadoop /bin/bash -c 'cd /var/lib/analytics-tasks/analyticstack/repo && . $HOME/.bashrc && /var/lib/analytics-tasks/analyticstack/venv/bin/launch-task InsertToMysqlCourseEnrollByCountryWorkflow --local-scheduler --interval-start 2017-02-10 --interval-end 2017-02-10 --n-reduce-tasks 2 --overwrite --local-scheduler --overwrite-n-days 0 --input algo'"] | |
Warning: Permanently added 'localhost' (ECDSA) to the list of known hosts. | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('sqoop-import = edx.analytics.tasks.common.sqoop:SqoopImportFromMysql') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('run-vertica-sql-script = edx.analytics.tasks.warehouse.run_vertica_sql_script:RunVerticaSqlScriptTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('obfuscation = edx.analytics.tasks.export.obfuscation:ObfuscatedCourseTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollment_validation = edx.analytics.tasks.monitor.enrollment_validation:CourseEnrollmentValidationTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('inc-enrollments-report = edx.analytics.tasks.warehouse.enrollment.incremental_enrollments:WeeklyIncrementalUsersAndEnrollments') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('total-enrollments-report = edx.analytics.tasks.warehouse.enrollment.total_enrollments:WeeklyAllUsersAndEnrollments') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('orders = edx.analytics.tasks.warehouse.financial.orders_import:OrderTableTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('problem_response = edx.analytics.tasks.insights.problem_response:LatestProblemResponseDataTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('push_to_vertica_lms_courseware_link_clicked = edx.analytics.tasks.warehouse.lms_courseware_link_clicked:PushToVerticaLMSCoursewareLinkClickedTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('video = edx.analytics.tasks.insights.video:InsertToMysqlAllVideoTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('ed_services_report = edx.analytics.tasks.warehouse.financial.ed_services_financial_report:BuildEdServicesReportTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-internal-database = edx.analytics.tasks.warehouse.load_internal_reporting_database:ImportMysqlToVerticaTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('export-student-module = edx.analytics.tasks.export.database_exports:StudentModulePerCourseAfterImportWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('calendar = edx.analytics.tasks.insights.calendar_task:CalendarTableTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-warehouse = edx.analytics.tasks.warehouse.load_warehouse:LoadWarehouseWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('cybersource = edx.analytics.tasks.warehouse.financial.cybersource:DailyPullFromCybersourceTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-d-user = edx.analytics.tasks.warehouse.load_internal_reporting_user:LoadInternalReportingUserToWarehouse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('location-per-course = edx.analytics.tasks.insights.location_per_course:LastCountryOfUser') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('payment_reconcile = edx.analytics.tasks.warehouse.financial.reconcile:ReconcileOrdersAndTransactionsTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollments-report = edx.analytics.tasks.warehouse.enrollment.enrollments:EnrollmentsByWeek') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('financial_reports = edx.analytics.tasks.warehouse.financial.finance_reports:BuildFinancialReportsTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('engagement = edx.analytics.tasks.insights.module_engagement:ModuleEngagementDataTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('events_obfuscation = edx.analytics.tasks.export.events_obfuscation:ObfuscateCourseEventsTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('dump-student-module = edx.analytics.tasks.export.database_exports:StudentModulePerCourseTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('export-events-by-course = edx.analytics.tasks.export.event_exports_by_course:EventExportByCourseTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('noop = edx.analytics.tasks.monitor.performance:ParseEventLogPerformanceTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('course_blocks = edx.analytics.tasks.insights.course_blocks:CourseBlocksApiDataTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-events = edx.analytics.tasks.warehouse.load_internal_reporting_events:TrackingEventRecordDataTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-d-certificates = edx.analytics.tasks.warehouse.load_internal_reporting_certificates:LoadInternalReportingCertificatesToWarehouse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('user-activity = edx.analytics.tasks.insights.user_activity:CourseActivityWeeklyTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('run-vertica-sql-scripts = edx.analytics.tasks.warehouse.run_vertica_sql_scripts:RunVerticaSqlScriptTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('paypal = edx.analytics.tasks.warehouse.financial.paypal:PaypalTransactionsByDayTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('grade-dist = edx.analytics.tasks.data_api.studentmodule_dist:GradeDistFromSqoopToMySQLWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollments_and_registrations_workflow-manifest = edx.analytics.tasks.warehouse.enrollment.enrollments_and_registrations_workflow_manifest:EnrollmentsandRegistrationsWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('database-import = edx.analytics.tasks.insights.database_imports:ImportAllDatabaseTablesTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('catalog = edx.analytics.tasks.warehouse.course_catalog:CourseCatalogWorkflow') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-course-catalog = edx.analytics.tasks.warehouse.load_internal_reporting_course_catalog:PullCourseCatalogAPIData') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('enrollments = edx.analytics.tasks.insights.enrollments:ImportEnrollmentsIntoMysql') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('event-type-dist = edx.analytics.tasks.warehouse.event_type_dist:PushToVerticaEventTypeDistributionTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('tags-dist = edx.analytics.tasks.insights.tags_dist:TagsDistributionPerCourse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('export-events = edx.analytics.tasks.export.event_exports:EventExportTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('course-enroll = edx.analytics.tasks.warehouse.enrollment.course_enroll:CourseEnrollmentChangesPerDay') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('data_obfuscation = edx.analytics.tasks.export.data_obfuscation:ObfuscatedCourseDumpTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('course_list = edx.analytics.tasks.insights.course_list:CourseListApiDataTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-d-user-course = edx.analytics.tasks.warehouse.load_internal_reporting_user_course:LoadUserCourseSummary') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-d-country = edx.analytics.tasks.warehouse.load_internal_reporting_country:LoadInternalReportingCountryToWarehouse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('overall_events = edx.analytics.tasks.monitor.overall_events:TotalEventsDailyTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('load-f-user-activity = edx.analytics.tasks.warehouse.load_internal_reporting_user_activity:LoadInternalReportingUserActivityToWarehouse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('student_engagement = edx.analytics.tasks.data_api.student_engagement:StudentEngagementTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('answer-dist = edx.analytics.tasks.insights.answer_dist:AnswerDistributionPerCourse') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('insert-into-table = edx.analytics.tasks.common.mysql_load:MysqlInsertTask') | |
DEBUG:stevedore.extension:found extension EntryPoint.parse('all_events_report = edx.analytics.tasks.monitor.total_events_report:TotalEventsReportWorkflow') | |
DEBUG:edx.analytics.tasks.launchers.local:Using override.cfg | |
2017-02-10 22:22:50,162 INFO 8423 [luigi-interface] worker.py:267 - Scheduled InsertToMysqlCourseEnrollByCountryWorkflow(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, geolocation_data=hdfs://localhost:9000/edx-analytics-pipeline/geo.dat, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=None) (PENDING) | |
2017-02-10 22:22:50,165 INFO 8423 [luigi-interface] worker.py:267 - Scheduled InsertToMysqlLastCountryPerCourseTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, geolocation_data=hdfs://localhost:9000/edx-analytics-pipeline/geo.dat, database=reports, credentials=/edx/etc/edx-analytics-pipeline/output.json, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-02-10-2017-02-10) (PENDING) | |
2017-02-10 22:22:50,167 INFO 8423 [luigi-interface] worker.py:267 - Scheduled QueryLastCountryPerCourseTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, geolocation_data=hdfs://localhost:9000/edx-analytics-pipeline/geo.dat, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-02-10-2017-02-10) (PENDING) | |
2017-02-10 22:22:50,168 INFO 8423 [luigi-interface] worker.py:267 - Scheduled ImportAuthUserTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None) (PENDING) | |
2017-02-10 22:22:50,169 INFO 8423 [luigi-interface] worker.py:267 - Scheduled SqoopImportFromMysql(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/auth_user/dt=2017-02-10/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, num_mappers=None, verbose=False, table_name=auth_user, where=None, columns=('id', 'username', 'last_login', 'date_joined', 'is_active', 'is_superuser', 'is_staff', 'email'), null_string=\\N, fields_terminated_by=, delimiter_replacement= , mysql_delimiters=False) (PENDING) | |
2017-02-10 22:22:50,170 INFO 8423 [luigi-interface] worker.py:267 - Scheduled ExternalURL(url=/edx/etc/edx-analytics-pipeline/input.json) (DONE) | |
2017-02-10 22:22:50,171 INFO 8423 [luigi-interface] worker.py:267 - Scheduled ImportStudentCourseEnrollmentTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None) (PENDING) | |
2017-02-10 22:22:50,171 INFO 8423 [luigi-interface] worker.py:267 - Scheduled SqoopImportFromMysql(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/student_courseenrollment/dt=2017-02-10/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, num_mappers=None, verbose=False, table_name=student_courseenrollment, where=None, columns=('id', 'user_id', 'course_id', 'created', 'is_active', 'mode'), null_string=\\N, fields_terminated_by=, delimiter_replacement= , mysql_delimiters=False) (PENDING) | |
2017-02-10 22:22:50,173 INFO 8423 [luigi-interface] worker.py:267 - Scheduled LastCountryOfUserPartitionTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, geolocation_data=hdfs://localhost:9000/edx-analytics-pipeline/geo.dat, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-02-10-2017-02-10) (PENDING) | |
2017-02-10 22:22:55,219 INFO 8423 [luigi-interface] worker.py:267 - Scheduled LastCountryOfUserTableTask(warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/) (DONE) | |
2017-02-10 22:22:55,220 INFO 8423 [luigi-interface] worker.py:267 - Scheduled LastCountryOfUser(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, geolocation_data=hdfs://localhost:9000/edx-analytics-pipeline/geo.dat, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-02-10-2017-02-10) (PENDING) | |
2017-02-10 22:22:56,728 INFO 8423 [luigi-interface] worker.py:267 - Scheduled ExternalURL(url=hdfs://localhost:9000/edx-analytics-pipeline/geo.dat) (DONE) | |
2017-02-10 22:22:58,258 INFO 8423 [luigi-interface] worker.py:267 - Scheduled PathSelectionByDateIntervalTask(source=('hdfs://localhost:9000/edx-analytics-pipeline/warehouse/last_ip_of_user',), interval=2017-02-10-2017-02-10, expand_interval=0:00:00, pattern=('.*?last_ip_of_user_(?P<date>\\d{4}-\\d{2}-\\d{2})',), date_pattern=%Y-%m-%d) (DONE) | |
2017-02-10 22:22:58,258 INFO 8423 [luigi-interface] worker.py:267 - Scheduled ExternalURL(url=/edx/etc/edx-analytics-pipeline/output.json) (DONE) | |
2017-02-10 22:22:58,260 INFO 8423 [luigi-interface] worker.py:267 - Scheduled InsertToMysqlLastCountryOfUserTask(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, geolocation_data=hdfs://localhost:9000/edx-analytics-pipeline/geo.dat, database=reports, credentials=/edx/etc/edx-analytics-pipeline/output.json, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-02-10-2017-02-10) (PENDING) | |
2017-02-10 22:22:58,260 INFO 8423 [luigi-interface] interface.py:193 - Done scheduling tasks | |
2017-02-10 22:22:58,261 INFO 8423 [luigi-interface] worker.py:282 - [pid 8423] Worker Worker(salt=023637734, host=prod-host, username=hadoop, pid=8423) running SqoopImportFromMysql(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/auth_user/dt=2017-02-10/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, num_mappers=None, verbose=False, table_name=auth_user, where=None, columns=('id', 'username', 'last_login', 'date_joined', 'is_active', 'is_superuser', 'is_staff', 'email'), null_string=\\N, fields_terminated_by=, delimiter_replacement= , mysql_delimiters=False) | |
2017-02-10 22:22:59,794 INFO 8423 [edx.analytics.tasks.util.overwrite] overwrite.py:59 - Removing existing output for task SqoopImportFromMysql(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/auth_user/dt=2017-02-10/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, num_mappers=None, verbose=False, table_name=auth_user, where=None, columns=('id', 'username', 'last_login', 'date_joined', 'is_active', 'is_superuser', 'is_staff', 'email'), null_string=\\N, fields_terminated_by=, delimiter_replacement= , mysql_delimiters=False) | |
2017-02-10 22:23:07,527 INFO 8423 [luigi-interface] hadoop.py:242 - sqoop import --connect jdbc:mysql://104.198.35.13/edxapp --username read_only --password-file /tmp/luigi/partial/luigitemp-17529769 --table auth_user --target-dir hdfs://localhost:9000/edx-analytics-pipeline/warehouse/auth_user/dt=2017-02-10/ --columns id,username,last_login,date_joined,is_active,is_superuser,is_staff,email --null-string \\N --null-non-string \\N --fields-terminated-by --hive-delims-replacement --direct | |
2017-02-10 22:23:08,177 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:08 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6 | |
2017-02-10 22:23:09,140 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:09 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset. | |
2017-02-10 22:23:09,141 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:09 INFO tool.CodeGenTool: Beginning code generation | |
2017-02-10 22:23:09,877 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:09 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `auth_user` AS t LIMIT 1 | |
2017-02-10 22:23:10,076 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:10 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `auth_user` AS t LIMIT 1 | |
2017-02-10 22:23:10,257 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:10 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /edx/app/hadoop/hadoop | |
2017-02-10 22:23:11,972 INFO 8423 [luigi-interface] hadoop.py:273 - Note: /tmp/sqoop-hadoop/compile/8161d20fd38bd7fa5e32119bf03d2502/auth_user.java uses or overrides a deprecated API. | |
2017-02-10 22:23:11,973 INFO 8423 [luigi-interface] hadoop.py:273 - Note: Recompile with -Xlint:deprecation for details. | |
2017-02-10 22:23:11,977 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:11 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-hadoop/compile/8161d20fd38bd7fa5e32119bf03d2502/auth_user.jar | |
2017-02-10 22:23:11,983 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:11 WARN manager.DirectMySQLManager: Direct-mode import from MySQL does not support column | |
2017-02-10 22:23:11,984 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:11 WARN manager.DirectMySQLManager: selection. Falling back to JDBC-based import. | |
2017-02-10 22:23:11,985 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:11 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql) | |
2017-02-10 22:23:12,166 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:12 INFO mapreduce.ImportJobBase: Beginning import of auth_user | |
2017-02-10 22:23:12,177 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:12 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar | |
2017-02-10 22:23:12,236 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:12 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps | |
2017-02-10 22:23:12,348 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:12 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032 | |
2017-02-10 22:23:14,165 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:14 INFO db.DBInputFormat: Using read commited transaction isolation | |
2017-02-10 22:23:14,201 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:14 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`id`), MAX(`id`) FROM `auth_user` | |
2017-02-10 22:23:14,354 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:14 INFO mapreduce.JobSubmitter: number of splits:4 | |
2017-02-10 22:23:14,475 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:14 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1486740767764_0049 | |
2017-02-10 22:23:14,807 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:14 INFO impl.YarnClientImpl: Submitted application application_1486740767764_0049 | |
2017-02-10 22:23:14,843 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:14 INFO mapreduce.Job: The url to track the job: http://prod-host:8088/proxy/application_1486740767764_0049/ | |
2017-02-10 22:23:14,843 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:14 INFO mapreduce.Job: Running job: job_1486740767764_0049 | |
2017-02-10 22:23:21,049 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:21 INFO mapreduce.Job: Job job_1486740767764_0049 running in uber mode : false | |
2017-02-10 22:23:21,050 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:21 INFO mapreduce.Job: map 0% reduce 0% | |
2017-02-10 22:23:26,106 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:26 INFO mapreduce.Job: map 25% reduce 0% | |
2017-02-10 22:23:31,160 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:31 INFO mapreduce.Job: map 50% reduce 0% | |
2017-02-10 22:23:36,197 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:36 INFO mapreduce.Job: map 75% reduce 0% | |
2017-02-10 22:23:41,231 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:41 INFO mapreduce.Job: map 100% reduce 0% | |
2017-02-10 22:23:41,237 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:41 INFO mapreduce.Job: Job job_1486740767764_0049 completed successfully | |
2017-02-10 22:23:41,417 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:41 INFO mapreduce.Job: Counters: 30 | |
2017-02-10 22:23:41,417 INFO 8423 [luigi-interface] hadoop.py:273 - File System Counters | |
2017-02-10 22:23:41,418 INFO 8423 [luigi-interface] hadoop.py:273 - FILE: Number of bytes read=0 | |
2017-02-10 22:23:41,418 INFO 8423 [luigi-interface] hadoop.py:273 - FILE: Number of bytes written=419156 | |
2017-02-10 22:23:41,418 INFO 8423 [luigi-interface] hadoop.py:273 - FILE: Number of read operations=0 | |
2017-02-10 22:23:41,419 INFO 8423 [luigi-interface] hadoop.py:273 - FILE: Number of large read operations=0 | |
2017-02-10 22:23:41,419 INFO 8423 [luigi-interface] hadoop.py:273 - FILE: Number of write operations=0 | |
2017-02-10 22:23:41,419 INFO 8423 [luigi-interface] hadoop.py:273 - HDFS: Number of bytes read=401 | |
2017-02-10 22:23:41,420 INFO 8423 [luigi-interface] hadoop.py:273 - HDFS: Number of bytes written=12692 | |
2017-02-10 22:23:41,420 INFO 8423 [luigi-interface] hadoop.py:273 - HDFS: Number of read operations=16 | |
2017-02-10 22:23:41,421 INFO 8423 [luigi-interface] hadoop.py:273 - HDFS: Number of large read operations=0 | |
2017-02-10 22:23:41,421 INFO 8423 [luigi-interface] hadoop.py:273 - HDFS: Number of write operations=8 | |
2017-02-10 22:23:41,421 INFO 8423 [luigi-interface] hadoop.py:273 - Job Counters | |
2017-02-10 22:23:41,422 INFO 8423 [luigi-interface] hadoop.py:273 - Launched map tasks=4 | |
2017-02-10 22:23:41,422 INFO 8423 [luigi-interface] hadoop.py:273 - Other local map tasks=4 | |
2017-02-10 22:23:41,422 INFO 8423 [luigi-interface] hadoop.py:273 - Total time spent by all maps in occupied slots (ms)=54172 | |
2017-02-10 22:23:41,423 INFO 8423 [luigi-interface] hadoop.py:273 - Total time spent by all reduces in occupied slots (ms)=0 | |
2017-02-10 22:23:41,423 INFO 8423 [luigi-interface] hadoop.py:273 - Total time spent by all map tasks (ms)=13543 | |
2017-02-10 22:23:41,423 INFO 8423 [luigi-interface] hadoop.py:273 - Total vcore-seconds taken by all map tasks=13543 | |
2017-02-10 22:23:41,424 INFO 8423 [luigi-interface] hadoop.py:273 - Total megabyte-seconds taken by all map tasks=55472128 | |
2017-02-10 22:23:41,424 INFO 8423 [luigi-interface] hadoop.py:273 - Map-Reduce Framework | |
2017-02-10 22:23:41,424 INFO 8423 [luigi-interface] hadoop.py:273 - Map input records=120 | |
2017-02-10 22:23:41,425 INFO 8423 [luigi-interface] hadoop.py:273 - Map output records=120 | |
2017-02-10 22:23:41,425 INFO 8423 [luigi-interface] hadoop.py:273 - Input split bytes=401 | |
2017-02-10 22:23:41,425 INFO 8423 [luigi-interface] hadoop.py:273 - Spilled Records=0 | |
2017-02-10 22:23:41,426 INFO 8423 [luigi-interface] hadoop.py:273 - Failed Shuffles=0 | |
2017-02-10 22:23:41,426 INFO 8423 [luigi-interface] hadoop.py:273 - Merged Map outputs=0 | |
2017-02-10 22:23:41,426 INFO 8423 [luigi-interface] hadoop.py:273 - GC time elapsed (ms)=261 | |
2017-02-10 22:23:41,427 INFO 8423 [luigi-interface] hadoop.py:273 - CPU time spent (ms)=4320 | |
2017-02-10 22:23:41,427 INFO 8423 [luigi-interface] hadoop.py:273 - Physical memory (bytes) snapshot=694120448 | |
2017-02-10 22:23:41,428 INFO 8423 [luigi-interface] hadoop.py:273 - Virtual memory (bytes) snapshot=21433782272 | |
2017-02-10 22:23:41,429 INFO 8423 [luigi-interface] hadoop.py:273 - Total committed heap usage (bytes)=583532544 | |
2017-02-10 22:23:41,431 INFO 8423 [luigi-interface] hadoop.py:273 - File Input Format Counters | |
2017-02-10 22:23:41,431 INFO 8423 [luigi-interface] hadoop.py:273 - Bytes Read=0 | |
2017-02-10 22:23:41,433 INFO 8423 [luigi-interface] hadoop.py:273 - File Output Format Counters | |
2017-02-10 22:23:41,433 INFO 8423 [luigi-interface] hadoop.py:273 - Bytes Written=12692 | |
2017-02-10 22:23:41,438 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:41 INFO mapreduce.ImportJobBase: Transferred 12.3945 KB in 29.1882 seconds (434.8333 bytes/sec) | |
2017-02-10 22:23:41,446 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:41 INFO mapreduce.ImportJobBase: Retrieved 120 records. | |
2017-02-10 22:23:51,361 INFO 8423 [luigi-interface] worker.py:296 - [pid 8423] Worker Worker(salt=023637734, host=prod-host, username=hadoop, pid=8423) done SqoopImportFromMysql(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/auth_user/dt=2017-02-10/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, num_mappers=None, verbose=False, table_name=auth_user, where=None, columns=('id', 'username', 'last_login', 'date_joined', 'is_active', 'is_superuser', 'is_staff', 'email'), null_string=\\N, fields_terminated_by=, delimiter_replacement= , mysql_delimiters=False) | |
2017-02-10 22:23:51,362 INFO 8423 [luigi-interface] worker.py:282 - [pid 8423] Worker Worker(salt=023637734, host=prod-host, username=hadoop, pid=8423) running ImportAuthUserTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None) | |
2017-02-10 22:23:53,023 INFO 8423 [edx.analytics.tasks.insights.database_imports] database_imports.py:110 - Marking existing output as having been removed for task ImportAuthUserTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None) | |
2017-02-10 22:23:53,024 INFO 8423 [luigi-interface] hive.py:358 - ['hive', '-f', '/tmp/tmp6iniU_', '--hiveconf', 'mapred.job.name=ImportAuthUserTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None)'] | |
2017-02-10 22:23:53,024 INFO 8423 [luigi-interface] hadoop.py:242 - hive -f /tmp/tmp6iniU_ --hiveconf mapred.job.name=ImportAuthUserTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None) | |
2017-02-10 22:23:54,298 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:54 INFO Configuration.deprecation: mapred.input.dir.recursive is deprecated. Instead, use mapreduce.input.fileinputformat.input.dir.recursive | |
2017-02-10 22:23:54,299 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:54 INFO Configuration.deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize | |
2017-02-10 22:23:54,300 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:54 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize | |
2017-02-10 22:23:54,300 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:54 INFO Configuration.deprecation: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack | |
2017-02-10 22:23:54,301 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:54 INFO Configuration.deprecation: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node | |
2017-02-10 22:23:54,301 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:54 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces | |
2017-02-10 22:23:54,302 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:54 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative | |
2017-02-10 22:23:54,557 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:54 WARN conf.Configuration: org.apache.hadoop.hive.conf.LoopingByteArrayInputStream@711f39f9:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring. | |
2017-02-10 22:23:54,564 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:54 WARN conf.Configuration: org.apache.hadoop.hive.conf.LoopingByteArrayInputStream@711f39f9:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring. | |
2017-02-10 22:23:54,567 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:23:54 INFO Configuration.deprecation: mapred.job.name is deprecated. Instead, use mapreduce.job.name | |
2017-02-10 22:23:54,752 INFO 8423 [luigi-interface] hadoop.py:273 - Logging initialized using configuration in jar:file:/edx/app/hadoop/hive-0.11.0-bin/lib/hive-common-0.11.0.jar!/hive-log4j.properties | |
2017-02-10 22:23:54,760 INFO 8423 [luigi-interface] hadoop.py:273 - Hive history file=/tmp/hadoop/hive_job_log_hadoop_10000@prod-host.c.hostname.internal_201702102223_194869848.txt | |
2017-02-10 22:23:54,894 INFO 8423 [luigi-interface] hadoop.py:273 - SLF4J: Class path contains multiple SLF4J bindings. | |
2017-02-10 22:23:54,896 INFO 8423 [luigi-interface] hadoop.py:273 - SLF4J: Found binding in [jar:file:/edx/app/hadoop/hadoop-2.3.0/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
2017-02-10 22:23:54,897 INFO 8423 [luigi-interface] hadoop.py:273 - SLF4J: Found binding in [jar:file:/edx/app/hadoop/hive-0.11.0-bin/lib/slf4j-log4j12-1.6.1.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
2017-02-10 22:23:54,897 INFO 8423 [luigi-interface] hadoop.py:273 - SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. | |
2017-02-10 22:23:54,898 INFO 8423 [luigi-interface] hadoop.py:273 - SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] | |
2017-02-10 22:23:57,658 INFO 8423 [luigi-interface] hadoop.py:273 - OK | |
2017-02-10 22:23:57,659 INFO 8423 [luigi-interface] hadoop.py:273 - Time taken: 2.628 seconds | |
2017-02-10 22:23:59,210 INFO 8423 [luigi-interface] hadoop.py:273 - OK | |
2017-02-10 22:23:59,210 INFO 8423 [luigi-interface] hadoop.py:273 - Time taken: 1.55 seconds | |
2017-02-10 22:23:59,489 INFO 8423 [luigi-interface] hadoop.py:273 - OK | |
2017-02-10 22:23:59,490 INFO 8423 [luigi-interface] hadoop.py:273 - Time taken: 0.279 seconds | |
2017-02-10 22:23:59,985 INFO 8423 [luigi-interface] hadoop.py:273 - OK | |
2017-02-10 22:23:59,986 INFO 8423 [luigi-interface] hadoop.py:273 - Time taken: 0.495 seconds | |
2017-02-10 22:24:00,109 INFO 8423 [luigi-interface] worker.py:296 - [pid 8423] Worker Worker(salt=023637734, host=prod-host, username=hadoop, pid=8423) done ImportAuthUserTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None) | |
2017-02-10 22:24:00,110 INFO 8423 [luigi-interface] worker.py:282 - [pid 8423] Worker Worker(salt=023637734, host=prod-host, username=hadoop, pid=8423) running SqoopImportFromMysql(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/student_courseenrollment/dt=2017-02-10/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, num_mappers=None, verbose=False, table_name=student_courseenrollment, where=None, columns=('id', 'user_id', 'course_id', 'created', 'is_active', 'mode'), null_string=\\N, fields_terminated_by=, delimiter_replacement= , mysql_delimiters=False) | |
2017-02-10 22:24:01,826 INFO 8423 [edx.analytics.tasks.util.overwrite] overwrite.py:59 - Removing existing output for task SqoopImportFromMysql(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/student_courseenrollment/dt=2017-02-10/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, num_mappers=None, verbose=False, table_name=student_courseenrollment, where=None, columns=('id', 'user_id', 'course_id', 'created', 'is_active', 'mode'), null_string=\\N, fields_terminated_by=, delimiter_replacement= , mysql_delimiters=False) | |
2017-02-10 22:24:10,458 INFO 8423 [luigi-interface] hadoop.py:242 - sqoop import --connect jdbc:mysql://104.198.35.13/edxapp --username read_only --password-file /tmp/luigi/partial/luigitemp-163909365 --table student_courseenrollment --target-dir hdfs://localhost:9000/edx-analytics-pipeline/warehouse/student_courseenrollment/dt=2017-02-10/ --columns id,user_id,course_id,created,is_active,mode --null-string \\N --null-non-string \\N --fields-terminated-by --hive-delims-replacement --direct | |
2017-02-10 22:24:11,174 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:11 INFO sqoop.Sqoop: Running Sqoop version: 1.4.6 | |
2017-02-10 22:24:12,254 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:12 INFO manager.MySQLManager: Preparing to use a MySQL streaming resultset. | |
2017-02-10 22:24:12,255 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:12 INFO tool.CodeGenTool: Beginning code generation | |
2017-02-10 22:24:12,913 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:12 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `student_courseenrollment` AS t LIMIT 1 | |
2017-02-10 22:24:13,099 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:13 INFO manager.SqlManager: Executing SQL statement: SELECT t.* FROM `student_courseenrollment` AS t LIMIT 1 | |
2017-02-10 22:24:13,241 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:13 INFO orm.CompilationManager: HADOOP_MAPRED_HOME is /edx/app/hadoop/hadoop | |
2017-02-10 22:24:15,042 INFO 8423 [luigi-interface] hadoop.py:273 - Note: /tmp/sqoop-hadoop/compile/106d0f6fa75ec4f9d26a9388de7aa6fe/student_courseenrollment.java uses or overrides a deprecated API. | |
2017-02-10 22:24:15,042 INFO 8423 [luigi-interface] hadoop.py:273 - Note: Recompile with -Xlint:deprecation for details. | |
2017-02-10 22:24:15,046 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:15 INFO orm.CompilationManager: Writing jar file: /tmp/sqoop-hadoop/compile/106d0f6fa75ec4f9d26a9388de7aa6fe/student_courseenrollment.jar | |
2017-02-10 22:24:15,052 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:15 WARN manager.DirectMySQLManager: Direct-mode import from MySQL does not support column | |
2017-02-10 22:24:15,052 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:15 WARN manager.DirectMySQLManager: selection. Falling back to JDBC-based import. | |
2017-02-10 22:24:15,053 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:15 INFO manager.MySQLManager: Setting zero DATETIME behavior to convertToNull (mysql) | |
2017-02-10 22:24:15,227 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:15 INFO mapreduce.ImportJobBase: Beginning import of student_courseenrollment | |
2017-02-10 22:24:15,258 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:15 INFO Configuration.deprecation: mapred.jar is deprecated. Instead, use mapreduce.job.jar | |
2017-02-10 22:24:15,307 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:15 INFO Configuration.deprecation: mapred.map.tasks is deprecated. Instead, use mapreduce.job.maps | |
2017-02-10 22:24:15,405 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:15 INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032 | |
2017-02-10 22:24:17,027 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:17 INFO db.DBInputFormat: Using read commited transaction isolation | |
2017-02-10 22:24:17,062 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:17 INFO db.DataDrivenDBInputFormat: BoundingValsQuery: SELECT MIN(`id`), MAX(`id`) FROM `student_courseenrollment` | |
2017-02-10 22:24:17,217 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:17 INFO mapreduce.JobSubmitter: number of splits:4 | |
2017-02-10 22:24:17,354 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:17 INFO mapreduce.JobSubmitter: Submitting tokens for job: job_1486740767764_0050 | |
2017-02-10 22:24:17,717 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:17 INFO impl.YarnClientImpl: Submitted application application_1486740767764_0050 | |
2017-02-10 22:24:17,753 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:17 INFO mapreduce.Job: The url to track the job: http://prod-host:8088/proxy/application_1486740767764_0050/ | |
2017-02-10 22:24:17,754 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:17 INFO mapreduce.Job: Running job: job_1486740767764_0050 | |
2017-02-10 22:24:23,976 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:23 INFO mapreduce.Job: Job job_1486740767764_0050 running in uber mode : false | |
2017-02-10 22:24:23,977 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:23 INFO mapreduce.Job: map 0% reduce 0% | |
2017-02-10 22:24:30,040 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:30 INFO mapreduce.Job: map 25% reduce 0% | |
2017-02-10 22:24:35,072 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:35 INFO mapreduce.Job: map 50% reduce 0% | |
2017-02-10 22:24:39,101 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:39 INFO mapreduce.Job: map 75% reduce 0% | |
2017-02-10 22:24:44,146 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:44 INFO mapreduce.Job: map 100% reduce 0% | |
2017-02-10 22:24:45,160 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:45 INFO mapreduce.Job: Job job_1486740767764_0050 completed successfully | |
2017-02-10 22:24:45,313 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:45 INFO mapreduce.Job: Counters: 30 | |
2017-02-10 22:24:45,314 INFO 8423 [luigi-interface] hadoop.py:273 - File System Counters | |
2017-02-10 22:24:45,314 INFO 8423 [luigi-interface] hadoop.py:273 - FILE: Number of bytes read=0 | |
2017-02-10 22:24:45,314 INFO 8423 [luigi-interface] hadoop.py:273 - FILE: Number of bytes written=419264 | |
2017-02-10 22:24:45,314 INFO 8423 [luigi-interface] hadoop.py:273 - FILE: Number of read operations=0 | |
2017-02-10 22:24:45,314 INFO 8423 [luigi-interface] hadoop.py:273 - FILE: Number of large read operations=0 | |
2017-02-10 22:24:45,314 INFO 8423 [luigi-interface] hadoop.py:273 - FILE: Number of write operations=0 | |
2017-02-10 22:24:45,315 INFO 8423 [luigi-interface] hadoop.py:273 - HDFS: Number of bytes read=405 | |
2017-02-10 22:24:45,315 INFO 8423 [luigi-interface] hadoop.py:273 - HDFS: Number of bytes written=23191 | |
2017-02-10 22:24:45,315 INFO 8423 [luigi-interface] hadoop.py:273 - HDFS: Number of read operations=16 | |
2017-02-10 22:24:45,315 INFO 8423 [luigi-interface] hadoop.py:273 - HDFS: Number of large read operations=0 | |
2017-02-10 22:24:45,315 INFO 8423 [luigi-interface] hadoop.py:273 - HDFS: Number of write operations=8 | |
2017-02-10 22:24:45,315 INFO 8423 [luigi-interface] hadoop.py:273 - Job Counters | |
2017-02-10 22:24:45,316 INFO 8423 [luigi-interface] hadoop.py:273 - Launched map tasks=4 | |
2017-02-10 22:24:45,316 INFO 8423 [luigi-interface] hadoop.py:273 - Other local map tasks=4 | |
2017-02-10 22:24:45,316 INFO 8423 [luigi-interface] hadoop.py:273 - Total time spent by all maps in occupied slots (ms)=54320 | |
2017-02-10 22:24:45,316 INFO 8423 [luigi-interface] hadoop.py:273 - Total time spent by all reduces in occupied slots (ms)=0 | |
2017-02-10 22:24:45,316 INFO 8423 [luigi-interface] hadoop.py:273 - Total time spent by all map tasks (ms)=13580 | |
2017-02-10 22:24:45,316 INFO 8423 [luigi-interface] hadoop.py:273 - Total vcore-seconds taken by all map tasks=13580 | |
2017-02-10 22:24:45,317 INFO 8423 [luigi-interface] hadoop.py:273 - Total megabyte-seconds taken by all map tasks=55623680 | |
2017-02-10 22:24:45,317 INFO 8423 [luigi-interface] hadoop.py:273 - Map-Reduce Framework | |
2017-02-10 22:24:45,317 INFO 8423 [luigi-interface] hadoop.py:273 - Map input records=326 | |
2017-02-10 22:24:45,317 INFO 8423 [luigi-interface] hadoop.py:273 - Map output records=326 | |
2017-02-10 22:24:45,317 INFO 8423 [luigi-interface] hadoop.py:273 - Input split bytes=405 | |
2017-02-10 22:24:45,317 INFO 8423 [luigi-interface] hadoop.py:273 - Spilled Records=0 | |
2017-02-10 22:24:45,318 INFO 8423 [luigi-interface] hadoop.py:273 - Failed Shuffles=0 | |
2017-02-10 22:24:45,318 INFO 8423 [luigi-interface] hadoop.py:273 - Merged Map outputs=0 | |
2017-02-10 22:24:45,318 INFO 8423 [luigi-interface] hadoop.py:273 - GC time elapsed (ms)=247 | |
2017-02-10 22:24:45,318 INFO 8423 [luigi-interface] hadoop.py:273 - CPU time spent (ms)=4230 | |
2017-02-10 22:24:45,318 INFO 8423 [luigi-interface] hadoop.py:273 - Physical memory (bytes) snapshot=699310080 | |
2017-02-10 22:24:45,318 INFO 8423 [luigi-interface] hadoop.py:273 - Virtual memory (bytes) snapshot=21434757120 | |
2017-02-10 22:24:45,319 INFO 8423 [luigi-interface] hadoop.py:273 - Total committed heap usage (bytes)=588251136 | |
2017-02-10 22:24:45,319 INFO 8423 [luigi-interface] hadoop.py:273 - File Input Format Counters | |
2017-02-10 22:24:45,319 INFO 8423 [luigi-interface] hadoop.py:273 - Bytes Read=0 | |
2017-02-10 22:24:45,319 INFO 8423 [luigi-interface] hadoop.py:273 - File Output Format Counters | |
2017-02-10 22:24:45,319 INFO 8423 [luigi-interface] hadoop.py:273 - Bytes Written=23191 | |
2017-02-10 22:24:45,330 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:45 INFO mapreduce.ImportJobBase: Transferred 22.6475 KB in 30.0128 seconds (772.7042 bytes/sec) | |
2017-02-10 22:24:45,337 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:45 INFO mapreduce.ImportJobBase: Retrieved 326 records. | |
2017-02-10 22:24:54,876 INFO 8423 [luigi-interface] worker.py:296 - [pid 8423] Worker Worker(salt=023637734, host=prod-host, username=hadoop, pid=8423) done SqoopImportFromMysql(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/student_courseenrollment/dt=2017-02-10/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, num_mappers=None, verbose=False, table_name=student_courseenrollment, where=None, columns=('id', 'user_id', 'course_id', 'created', 'is_active', 'mode'), null_string=\\N, fields_terminated_by=, delimiter_replacement= , mysql_delimiters=False) | |
2017-02-10 22:24:54,877 INFO 8423 [luigi-interface] worker.py:282 - [pid 8423] Worker Worker(salt=023637734, host=prod-host, username=hadoop, pid=8423) running ImportStudentCourseEnrollmentTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None) | |
2017-02-10 22:24:56,425 INFO 8423 [edx.analytics.tasks.insights.database_imports] database_imports.py:110 - Marking existing output as having been removed for task ImportStudentCourseEnrollmentTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None) | |
2017-02-10 22:24:56,426 INFO 8423 [luigi-interface] hive.py:358 - ['hive', '-f', '/tmp/tmpZXygU1', '--hiveconf', 'mapred.job.name=ImportStudentCourseEnrollmentTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None)'] | |
2017-02-10 22:24:56,426 INFO 8423 [luigi-interface] hadoop.py:242 - hive -f /tmp/tmpZXygU1 --hiveconf mapred.job.name=ImportStudentCourseEnrollmentTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None) | |
2017-02-10 22:24:57,563 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:57 INFO Configuration.deprecation: mapred.input.dir.recursive is deprecated. Instead, use mapreduce.input.fileinputformat.input.dir.recursive | |
2017-02-10 22:24:57,564 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:57 INFO Configuration.deprecation: mapred.max.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.maxsize | |
2017-02-10 22:24:57,565 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:57 INFO Configuration.deprecation: mapred.min.split.size is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize | |
2017-02-10 22:24:57,565 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:57 INFO Configuration.deprecation: mapred.min.split.size.per.rack is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.rack | |
2017-02-10 22:24:57,566 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:57 INFO Configuration.deprecation: mapred.min.split.size.per.node is deprecated. Instead, use mapreduce.input.fileinputformat.split.minsize.per.node | |
2017-02-10 22:24:57,566 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:57 INFO Configuration.deprecation: mapred.reduce.tasks is deprecated. Instead, use mapreduce.job.reduces | |
2017-02-10 22:24:57,567 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:57 INFO Configuration.deprecation: mapred.reduce.tasks.speculative.execution is deprecated. Instead, use mapreduce.reduce.speculative | |
2017-02-10 22:24:57,810 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:57 WARN conf.Configuration: org.apache.hadoop.hive.conf.LoopingByteArrayInputStream@711f39f9:an attempt to override final parameter: mapreduce.job.end-notification.max.retry.interval; Ignoring. | |
2017-02-10 22:24:57,817 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:57 WARN conf.Configuration: org.apache.hadoop.hive.conf.LoopingByteArrayInputStream@711f39f9:an attempt to override final parameter: mapreduce.job.end-notification.max.attempts; Ignoring. | |
2017-02-10 22:24:57,820 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:24:57 INFO Configuration.deprecation: mapred.job.name is deprecated. Instead, use mapreduce.job.name | |
2017-02-10 22:24:58,019 INFO 8423 [luigi-interface] hadoop.py:273 - Logging initialized using configuration in jar:file:/edx/app/hadoop/hive-0.11.0-bin/lib/hive-common-0.11.0.jar!/hive-log4j.properties | |
2017-02-10 22:24:58,024 INFO 8423 [luigi-interface] hadoop.py:273 - Hive history file=/tmp/hadoop/hive_job_log_hadoop_11491@prod-host.c.hostname.internal_201702102224_1705183928.txt | |
2017-02-10 22:24:58,136 INFO 8423 [luigi-interface] hadoop.py:273 - SLF4J: Class path contains multiple SLF4J bindings. | |
2017-02-10 22:24:58,138 INFO 8423 [luigi-interface] hadoop.py:273 - SLF4J: Found binding in [jar:file:/edx/app/hadoop/hadoop-2.3.0/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
2017-02-10 22:24:58,138 INFO 8423 [luigi-interface] hadoop.py:273 - SLF4J: Found binding in [jar:file:/edx/app/hadoop/hive-0.11.0-bin/lib/slf4j-log4j12-1.6.1.jar!/org/slf4j/impl/StaticLoggerBinder.class] | |
2017-02-10 22:24:58,139 INFO 8423 [luigi-interface] hadoop.py:273 - SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation. | |
2017-02-10 22:24:58,140 INFO 8423 [luigi-interface] hadoop.py:273 - SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory] | |
2017-02-10 22:25:00,745 INFO 8423 [luigi-interface] hadoop.py:273 - OK | |
2017-02-10 22:25:00,746 INFO 8423 [luigi-interface] hadoop.py:273 - Time taken: 2.494 seconds | |
2017-02-10 22:25:02,251 INFO 8423 [luigi-interface] hadoop.py:273 - OK | |
2017-02-10 22:25:02,252 INFO 8423 [luigi-interface] hadoop.py:273 - Time taken: 1.506 seconds | |
2017-02-10 22:25:02,630 INFO 8423 [luigi-interface] hadoop.py:273 - OK | |
2017-02-10 22:25:02,631 INFO 8423 [luigi-interface] hadoop.py:273 - Time taken: 0.378 seconds | |
2017-02-10 22:25:02,990 INFO 8423 [luigi-interface] hadoop.py:273 - OK | |
2017-02-10 22:25:02,991 INFO 8423 [luigi-interface] hadoop.py:273 - Time taken: 0.361 seconds | |
2017-02-10 22:25:03,114 INFO 8423 [luigi-interface] worker.py:296 - [pid 8423] Worker Worker(salt=023637734, host=prod-host, username=hadoop, pid=8423) done ImportStudentCourseEnrollmentTask(destination=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, credentials=/edx/etc/edx-analytics-pipeline/input.json, database=edxapp, import_date=None) | |
2017-02-10 22:25:03,115 INFO 8423 [luigi-interface] worker.py:282 - [pid 8423] Worker Worker(salt=023637734, host=prod-host, username=hadoop, pid=8423) running LastCountryOfUser(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, geolocation_data=hdfs://localhost:9000/edx-analytics-pipeline/geo.dat, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-02-10-2017-02-10) | |
2017-02-10 22:25:10,538 INFO 8423 [luigi-interface] hadoop.py:242 - /edx/app/hadoop/hadoop/bin/hadoop jar /edx/app/hadoop/hadoop/share/hadoop/tools/lib/hadoop-streaming-2.3.0.jar -D mapred.job.name=LastCountryOfUser(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, geolocation_data=hdfs://localhost:9000/edx-analytics-pipeline/geo.dat, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-02-10-2017-02-10) -D mapred.reduce.tasks=2 -mapper /usr/bin/python2.7 mrrunner.py map -reducer /usr/bin/python2.7 mrrunner.py reduce -file /var/lib/analytics-tasks/analyticstack/venv/local/lib/python2.7/site-packages/luigi/mrrunner.py -file /tmp/tmpMDczOC/packages.tar -file /tmp/tmpMDczOC/job-instance.pickle -output /edx-analytics-pipeline/warehouse/last_country_of_user/dt=2017-02-10 | |
2017-02-10 22:25:11,144 INFO 8423 [luigi-interface] hadoop.py:273 - 17/02/10 22:25:11 WARN streaming.StreamJob: -file option is deprecated, please use generic option -files instead. | |
2017-02-10 22:25:11,508 INFO 8423 [luigi-interface] hadoop.py:273 - Required argument: -input <name> | |
2017-02-10 22:25:11,509 INFO 8423 [luigi-interface] hadoop.py:273 - Try -help for more information | |
2017-02-10 22:25:11,509 INFO 8423 [luigi-interface] hadoop.py:273 - Streaming Command Failed! | |
2017-02-10 22:25:11,520 ERROR 8423 [luigi-interface] worker.py:304 - [pid 8423] Worker Worker(salt=023637734, host=prod-host, username=hadoop, pid=8423) failed LastCountryOfUser(source=('hdfs://localhost:9000/data/',), expand_interval=2 days, 0:00:00, pattern=('.*tracking.log.*',), date_pattern=%Y%m%d, geolocation_data=hdfs://localhost:9000/edx-analytics-pipeline/geo.dat, warehouse_path=hdfs://localhost:9000/edx-analytics-pipeline/warehouse/, interval=2017-02-10-2017-02-10) | |
Traceback (most recent call last): | |
File "/var/lib/analytics-tasks/analyticstack/venv/local/lib/python2.7/site-packages/luigi/worker.py", line 292, in _run_task | |
task.run() | |
File "/var/lib/analytics-tasks/analyticstack/venv/local/lib/python2.7/site-packages/edx/analytics/tasks/insights/location_per_course.py", line 330, in run | |
super(LastCountryOfUser, self).run() | |
File "/var/lib/analytics-tasks/analyticstack/venv/local/lib/python2.7/site-packages/luigi/hadoop.py", line 612, in run | |
self.job_runner().run_job(self) | |
File "/var/lib/analytics-tasks/analyticstack/venv/local/lib/python2.7/site-packages/luigi/hadoop.py", line 482, in run_job | |
run_and_track_hadoop_job(arglist) | |
File "/var/lib/analytics-tasks/analyticstack/venv/local/lib/python2.7/site-packages/luigi/hadoop.py", line 318, in run_and_track_hadoop_job | |
return track_process(arglist, tracking_url_callback, env) | |
File "/var/lib/analytics-tasks/analyticstack/venv/local/lib/python2.7/site-packages/luigi/hadoop.py", line 302, in track_process | |
raise HadoopJobError(message + 'Also, no tracking url found.', out, err) | |
HadoopJobError: ('Streaming job failed with exit code 1. Also, no tracking url found.', '', '17/02/10 22:25:11 WARN streaming.StreamJob: -file option is deprecated, please use generic option -files instead.\nRequired argument: -input <name>\nTry -help for more information\nStreaming Command Failed!\n') | |
2017-02-10 22:25:11,521 INFO 8423 [luigi-interface] notifications.py:96 - Skipping error email. Set `error-email` in the `core` section of the luigi config file to receive error emails. | |
2017-02-10 22:25:11,526 INFO 8423 [luigi-interface] worker.py:337 - Done | |
2017-02-10 22:25:11,526 INFO 8423 [luigi-interface] worker.py:338 - There are no more tasks to run at this time | |
2017-02-10 22:25:11,526 INFO 8423 [luigi-interface] worker.py:343 - There are 5 pending tasks possibly being run by other workers | |
2017-02-10 22:25:11,555 INFO 8423 [luigi-interface] worker.py:117 - Worker Worker(salt=023637734, host=prod-host, username=hadoop, pid=8423) was stopped. Shutting down Keep-Alive thread | |
Connection to localhost closed. | |
Exiting with status = 0 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment