Created
February 2, 2023 18:06
-
-
Save vaskokj/3e827bd3999653ebb38fcb375c527558 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/spark-3.3.1-bin-hadoop3/bin/spark-submit \ | |
--packages org.apache.hadoop:hadoop-aws:3.3.2,com.amazonaws:aws-java-sdk-bundle:1.12.180 \ | |
--master spark://localhost:7077 \ | |
--class io.treeverse.gc.UncommittedGarbageCollector \ | |
-c spark.hadoop.lakefs.gc.do_mark=false \ | |
-c spark.hadoop.lakefs.gc.do_sweep=true \ | |
-c spark.hadoop.lakefs.gc.mark_id=<redacted> \ | |
-c spark.hadoop.lakefs.api.url=http://<mylakeFSServer>:8000/api/v1 \ | |
-c spark.hadoop.lakefs.api.access_key=<lakeFS Key> \ | |
-c spark.hadoop.lakefs.api.secret_key=<lakeFS Key> \ | |
-c spark.hadoop.fs.s3a.access.key=<aws key> \ | |
-c spark.hadoop.fs.s3a.secret.key=<aws key> \ | |
-c spark.hadoop.fs.s3a.session.token=<aws key> \ | |
-c spark.hadoop.fs.s3a.endpoint=http://bucket.vpce-<redacted>.s3.us-gov-west-1.vpce.amazonaws.com \ | |
-c spark.hadoop.fs.s3a.endpoint.region=us-gov-west-1 \ | |
-c spark.hadoop.fs.s3.impl=org.apache.hadoop.fs.s3a.S3AFileSystem \ | |
lakefs-spark-client-312-hadoop3-assembly-0.6.0.jar \ | |
myproject us-gov-west-1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
3/02/02 12:00:49 INFO TaskSchedulerImpl: Adding task set 9.0 with 11 tasks resource profile 0 | |
23/02/02 12:00:49 INFO TaskSetManager: Starting task 0.0 in stage 9.0 (TID 28) (172.16.0.127, executor 0, partition 0, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:49 INFO TaskSetManager: Starting task 1.0 in stage 9.0 (TID 29) (172.16.0.127, executor 0, partition 1, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:49 INFO TaskSetManager: Starting task 2.0 in stage 9.0 (TID 30) (172.16.0.127, executor 0, partition 2, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:49 INFO TaskSetManager: Starting task 3.0 in stage 9.0 (TID 31) (172.16.0.127, executor 0, partition 3, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:49 INFO TaskSetManager: Starting task 4.0 in stage 9.0 (TID 32) (172.16.0.127, executor 0, partition 4, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:49 INFO TaskSetManager: Starting task 5.0 in stage 9.0 (TID 33) (172.16.0.127, executor 0, partition 5, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:49 INFO TaskSetManager: Starting task 6.0 in stage 9.0 (TID 34) (172.16.0.127, executor 0, partition 6, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:49 INFO TaskSetManager: Starting task 7.0 in stage 9.0 (TID 35) (172.16.0.127, executor 0, partition 7, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:49 INFO BlockManagerInfo: Added broadcast_12_piece0 in memory on 172.16.0.127:34959 (size: 13.5 KiB, free: 434.3 MiB) | |
23/02/02 12:00:49 INFO MapOutputTrackerMasterEndpoint: Asked to send map output locations for shuffle 1 to 172.16.0.127:50274 | |
23/02/02 12:00:50 INFO BlockManagerInfo: Added broadcast_5_piece0 in memory on 172.16.0.127:34959 (size: 4.5 KiB, free: 434.3 MiB) | |
23/02/02 12:00:51 INFO TaskSetManager: Starting task 8.0 in stage 9.0 (TID 36) (172.16.0.127, executor 0, partition 8, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:51 INFO TaskSetManager: Starting task 9.0 in stage 9.0 (TID 37) (172.16.0.127, executor 0, partition 9, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:51 INFO TaskSetManager: Starting task 10.0 in stage 9.0 (TID 38) (172.16.0.127, executor 0, partition 10, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:51 WARN TaskSetManager: Lost task 5.0 in stage 9.0 (TID 33) (172.16.0.127 executor 0): com.amazonaws.services.s3.model.AmazonS3Exception: The AWS Access Key Id you provided does not exist in our records. (Service: Amazon S3; Status Code: 403; Error Code: InvalidAccessKeyId; Request ID: <redacted>; S3 Extended Request ID: <redacted>=; Proxy: null), S3 Extended Request ID: <redacted>= | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1862) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleServiceErrorResponse(AmazonHttpClient.java:1415) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1154) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:811) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:779) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:753) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:713) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:695) | |
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:559) | |
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:539) | |
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5453) | |
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5400) | |
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5394) | |
at com.amazonaws.services.s3.AmazonS3Client.getBucketLocation(AmazonS3Client.java:1047) | |
at com.amazonaws.services.s3.AmazonS3Client.getBucketLocation(AmazonS3Client.java:1053) | |
at io.treeverse.clients.StorageUtils$S3$.getAWSS3Region(StorageUtils.scala:129) | |
at io.treeverse.clients.StorageUtils$S3$.createAndValidateS3Client(StorageUtils.scala:85) | |
at io.treeverse.clients.conditional.S3ClientBuilder$.build(S3ClientBuilder.scala:44) | |
at io.treeverse.clients.BulkRemoverFactory$S3BulkRemover.getS3Client(BulkRemoverFactory.scala:88) | |
at io.treeverse.clients.BulkRemoverFactory$S3BulkRemover.deleteObjects(BulkRemoverFactory.scala:77) | |
at io.treeverse.clients.GarbageCollector$.$anonfun$bulkRemove$3(GarbageCollector.scala:568) | |
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) | |
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) | |
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) | |
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:364) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:329) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:136) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) | |
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
at java.base/java.lang.Thread.run(Thread.java:829) | |
23/02/02 12:00:51 INFO TaskSetManager: Starting task 5.1 in stage 9.0 (TID 39) (172.16.0.127, executor 0, partition 5, NODE_LOCAL, 4457 bytes) taskResourceAssignments Map() | |
23/02/02 12:00:51 WARN TaskSetManager: Lost task 6.0 in stage 9.0 (TID 34) (172.16.0.127 executor 0): com.amazonaws.services.s3.model.AmazonS3Exception: The AWS Access Key Id you provided does not exist in our records. (Service: Amazon S3; Status Code: 403; Error Code: InvalidAccessKeyId; Request ID: <Redacted>; S3 Extended Request ID: <redacted>=; Proxy: null), S3 Extended Request ID: <redacted>= | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleErrorResponse(AmazonHttpClient.java:1862) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.handleServiceErrorResponse(AmazonHttpClient.java:1415) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeOneRequest(AmazonHttpClient.java:1384) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeHelper(AmazonHttpClient.java:1154) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.doExecute(AmazonHttpClient.java:811) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.executeWithTimer(AmazonHttpClient.java:779) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.execute(AmazonHttpClient.java:753) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutor.access$500(AmazonHttpClient.java:713) | |
at com.amazonaws.http.AmazonHttpClient$RequestExecutionBuilderImpl.execute(AmazonHttpClient.java:695) | |
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:559) | |
at com.amazonaws.http.AmazonHttpClient.execute(AmazonHttpClient.java:539) | |
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5453) | |
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5400) | |
at com.amazonaws.services.s3.AmazonS3Client.invoke(AmazonS3Client.java:5394) | |
at com.amazonaws.services.s3.AmazonS3Client.getBucketLocation(AmazonS3Client.java:1047) | |
at com.amazonaws.services.s3.AmazonS3Client.getBucketLocation(AmazonS3Client.java:1053) | |
at io.treeverse.clients.StorageUtils$S3$.getAWSS3Region(StorageUtils.scala:129) | |
at io.treeverse.clients.StorageUtils$S3$.createAndValidateS3Client(StorageUtils.scala:85) | |
at io.treeverse.clients.conditional.S3ClientBuilder$.build(S3ClientBuilder.scala:44) | |
at io.treeverse.clients.BulkRemoverFactory$S3BulkRemover.getS3Client(BulkRemoverFactory.scala:88) | |
at io.treeverse.clients.BulkRemoverFactory$S3BulkRemover.deleteObjects(BulkRemoverFactory.scala:77) | |
at io.treeverse.clients.GarbageCollector$.$anonfun$bulkRemove$3(GarbageCollector.scala:568) | |
at scala.collection.Iterator$$anon$11.nextCur(Iterator.scala:486) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:492) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) | |
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) | |
at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) | |
at org.apache.spark.sql.execution.SparkPlan.$anonfun$getByteArrayRdd$1(SparkPlan.scala:364) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890) | |
at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890) | |
at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) | |
at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:365) | |
at org.apache.spark.rdd.RDD.iterator(RDD.scala:329) | |
at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) | |
at org.apache.spark.scheduler.Task.run(Task.scala:136) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1504) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) | |
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) | |
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) | |
at java.base/java.lang.Thread.run(Thread.java:829) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment