Skip to content

Instantly share code, notes, and snippets.

@ashigeru
Created May 12, 2012 07:34
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ashigeru/2664993 to your computer and use it in GitHub Desktop.
Save ashigeru/2664993 to your computer and use it in GitHub Desktop.
0 - EMRに関するメモ
Status Code: 400, AWS Request ID: 26FA211630014E86, AWS Error Code: InvalidRequest, AWS Error Message: The specified copy source is larger than the maximum allowable size for a copy source: 5368709120, S3 Extended Request ID: j2yq6lBCU8gL5/bmrTLH4pZ9gtoGUPzySVLN+OiUJ4anMBqlohG3QVQBhoMWBLBL
at com.amazonaws.http.AmazonHttpClient.handleErrorResponse(AmazonHttpClient.java:544)
at com.amazonaws.http.AmazonHttpClient.executeHelper(AmazonHttpClient.java:284)
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:169)
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:2619)
at com.amazonaws.services.s3.AmazonS3Client.copyObject(AmazonS3Client.java:1129)
at com.amazonaws.services.s3.AmazonS3Client.copyObject(AmazonS3Client.java:1092)
at org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore.copy(Jets3tNativeFileSystemStore.java:177)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
at java.lang.reflect.Method.invoke(Method.java:597)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:82)
at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:59)
at org.apache.hadoop.fs.s3native.$Proxy3.copy(Unknown Source)
at org.apache.hadoop.fs.s3native.NativeS3FileSystem.rename(NativeS3FileSystem.java:996)
at com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.move(HadoopDataSourceUtil.java:869)
at com.asakusafw.runtime.directio.hadoop.HadoopDataSourceUtil.move(HadoopDataSourceUtil.java:765)
at com.asakusafw.runtime.directio.hadoop.HadoopDataSourceCore.commitAttemptOutput(HadoopDataSourceCore.java:566)
at com.asakusafw.runtime.directio.keepalive.KeepAliveDataSource.commitAttemptOutput(KeepAliveDataSource.java:113)
at com.asakusafw.runtime.directio.hadoop.HadoopDataSource.commitAttemptOutput(HadoopDataSource.java:127)
at com.asakusafw.runtime.stage.output.BridgeOutputFormat$BridgeOutputCommitter.commitTask(BridgeOutputFormat.java:336)
at com.asakusafw.runtime.stage.output.StageOutputFormat$CombinedOutputCommitter.commitTask(StageOutputFormat.java:251)
at org.apache.hadoop.mapred.Task.commit(Task.java:1001)
at org.apache.hadoop.mapred.Task.done(Task.java:871)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:431)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:396)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1059)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
Dumping current stack trace:
thread: s3-transfer-manager-worker-9 (daemon=false)
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:947)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:907)
java.lang.Thread.run(Thread.java:662)
thread: s3-transfer-manager-worker-8 (daemon=false)
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:947)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:907)
java.lang.Thread.run(Thread.java:662)
thread: s3-transfer-manager-worker-2 (daemon=false)
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:947)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:907)
java.lang.Thread.run(Thread.java:662)
thread: Thread for syncLogs (daemon=true)
java.lang.Thread.sleep(Native Method)
org.apache.hadoop.mapred.Child$3.run(Child.java:139)
thread: IPC Client (47) connection to /127.0.0.1:42791 from job_201205111133_0002 (daemon=true)
java.lang.Object.wait(Native Method)
org.apache.hadoop.ipc.Client$Connection.waitForWork(Client.java:699)
org.apache.hadoop.ipc.Client$Connection.run(Client.java:741)
thread: s3-transfer-manager-worker-1 (daemon=false)
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:947)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:907)
java.lang.Thread.run(Thread.java:662)
thread: s3-transfer-manager-worker-4 (daemon=false)
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:947)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:907)
java.lang.Thread.run(Thread.java:662)
thread: main (daemon=false)
java.lang.Thread.dumpThreads(Native Method)
java.lang.Thread.getAllStackTraces(Thread.java:1530)
com.asakusafw.runtime.stage.output.BridgeOutputFormat$BridgeOutputCommitter.dump0(BridgeOutputFormat.java:685)
com.asakusafw.runtime.stage.output.BridgeOutputFormat$BridgeOutputCommitter.commitTask(BridgeOutputFormat.java:352)
com.asakusafw.runtime.stage.output.StageOutputFormat$CombinedOutputCommitter.commitTask(StageOutputFormat.java:251)
org.apache.hadoop.mapred.Task.commit(Task.java:1001)
org.apache.hadoop.mapred.Task.done(Task.java:871)
org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:431)
org.apache.hadoop.mapred.Child$4.run(Child.java:255)
java.security.AccessController.doPrivileged(Native Method)
javax.security.auth.Subject.doAs(Subject.java:396)
org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1059)
org.apache.hadoop.mapred.Child.main(Child.java:249)
thread: file (daemon=true)
java.lang.Object.wait(Native Method)
java.lang.Object.wait(Object.java:485)
org.apache.hadoop.metrics2.impl.SinkQueue.waitForData(SinkQueue.java:109)
org.apache.hadoop.metrics2.impl.SinkQueue.consumeAll(SinkQueue.java:78)
org.apache.hadoop.metrics2.impl.MetricsSinkAdapter.publishMetricsFromQueue(MetricsSinkAdapter.java:113)
org.apache.hadoop.metrics2.impl.MetricsSinkAdapter$2.run(MetricsSinkAdapter.java:89)
thread: s3-transfer-manager-worker-7 (daemon=false)
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:947)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:907)
java.lang.Thread.run(Thread.java:662)
thread: Signal Dispatcher (daemon=true)
thread: s3-transfer-manager-worker-5 (daemon=false)
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:947)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:907)
java.lang.Thread.run(Thread.java:662)
thread: Finalizer (daemon=true)
java.lang.Object.wait(Native Method)
java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:118)
java.lang.ref.ReferenceQueue.remove(ReferenceQueue.java:134)
java.lang.ref.Finalizer$FinalizerThread.run(Finalizer.java:159)
thread: Timer for 'ReduceTask' metrics system (daemon=true)
java.lang.Object.wait(Native Method)
java.util.TimerThread.mainLoop(Timer.java:509)
java.util.TimerThread.run(Timer.java:462)
thread: Reference Handler (daemon=true)
java.lang.Object.wait(Native Method)
java.lang.Object.wait(Object.java:485)
java.lang.ref.Reference$ReferenceHandler.run(Reference.java:116)
thread: directio-keepalive-02 (daemon=true)
java.lang.Thread.sleep(Native Method)
com.asakusafw.runtime.directio.keepalive.HeartbeatKeeper.keepAlive(HeartbeatKeeper.java:129)
com.asakusafw.runtime.directio.keepalive.HeartbeatKeeper$1.run(HeartbeatKeeper.java:57)
java.lang.Thread.run(Thread.java:662)
thread: s3-transfer-manager-worker-3 (daemon=false)
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:947)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:907)
java.lang.Thread.run(Thread.java:662)
thread: s3-transfer-manager-worker-10 (daemon=false)
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:947)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:907)
java.lang.Thread.run(Thread.java:662)
thread: directio-keepalive-01 (daemon=true)
java.lang.Thread.sleep(Native Method)
com.asakusafw.runtime.directio.keepalive.HeartbeatKeeper.keepAlive(HeartbeatKeeper.java:129)
com.asakusafw.runtime.directio.keepalive.HeartbeatKeeper$1.run(HeartbeatKeeper.java:57)
java.lang.Thread.run(Thread.java:662)
thread: s3-transfer-manager-worker-6 (daemon=false)
sun.misc.Unsafe.park(Native Method)
java.util.concurrent.locks.LockSupport.park(LockSupport.java:158)
java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:1987)
java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:399)
java.util.concurrent.ThreadPoolExecutor.getTask(ThreadPoolExecutor.java:947)
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:907)
java.lang.Thread.run(Thread.java:662)
thread: communication thread (daemon=true)
java.lang.Thread.sleep(Native Method)
org.apache.hadoop.mapred.Task$TaskReporter.run(Task.java:654)
EMR:
起動時の設定は以下の様な感じ。
elastic-mapreduce --create --alive \
--name testrun \
--hadoop-version 0.20.205 \
--ami-version latest \
--enable-debugging \
--master-instance-type m1.large \
--slave-instance-type m2.2xlarge \
--num-instances 4 \
--bootstrap-action s3://elasticmapreduce/bootstrap-actions/configure-hadoop \
--bootstrap-name "enable multipart upload" \
--args "-c,fs.s3n.multipart.uploads.enabled=true,-c,fs.s3n.multipart.uploads.split.size=524288000" \
...
状況:
1. ReduceタスクでS3に6GB超のファイルをHadoop FileSystem API経由で作成
2. 同ファイルの名前をFileSystem.rename()で変更してみたところ、添付のエラーがスローされた
# multipartがONでもコピーは出来ない?
3. 処理がひと通り終わってもChildプロセスが生き残っていた
4. 600秒後に同タスク試行がkillされた
5. エラー直後のスレッドダンプは添付ファイルの通り
2a. 名前を変更しないとエラーにはならず、MapReduceジョブは正しく終了した
推測:
1. スレッドダンプを見るとdaemon threadでないものがmainとs3-transfer-manager-worker-nのみであるため、その周辺を調査中
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment