Created
June 12, 2024 12:51
-
-
Save redhatHameed/99cb180d5a607f17840ffcc8ec7fcb38 to your computer and use it in GitHub Desktop.
make test-python-integration-local
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
FEAST_IS_LOCAL_TEST=True \ | |
FEAST_LOCAL_ONLINE_CONTAINER=True \ | |
python -m pytest -n 8 --color=yes --integration --durations=5 --dist loadgroup \ | |
-k "not test_lambda_materialization and not test_snowflake_materialization" \ | |
sdk/python/tests | |
/opt/anaconda3/envs/3.11/lib/python3.11/site-packages/pytest_benchmark/logger.py:46: PytestBenchmarkWarning: Benchmarks are automatically disabled because xdist plugin is active.Benchmarks cannot be performed reliably in a parallelized environment. | |
warner(PytestBenchmarkWarning(text)) | |
========================================= test session starts ========================================== | |
platform darwin -- Python 3.11.9, pytest-7.4.4, pluggy-1.5.0 | |
benchmark: 3.4.1 (defaults: timer=time.perf_counter disable_gc=False min_rounds=5 min_time=0.000005 max_time=1.0 calibration_precision=10 warmup=False warmup_iterations=100000) | |
rootdir: /Users/ahameed/feast/sdk/python | |
configfile: pytest.ini | |
plugins: benchmark-3.4.1, cov-5.0.0, anyio-4.3.0, typeguard-4.2.1, env-1.1.3, ordering-0.6, mock-1.10.4, timeout-1.4.2, lazy-fixture-0.6.3, xdist-3.6.1 | |
8 workers [259 items] | |
........................................s..................s.......sssss.................s....s. [ 37%] | |
..F.....s...sss...ss..s......................................................................... [ 74%] | |
................................................................... [100%] | |
=============================================== FAILURES =============================================== | |
________________________________ test_spark_materialization_consistency ________________________________ | |
[gw3] darwin -- Python 3.11.9 /opt/anaconda3/envs/3.11/bin/python | |
@pytest.mark.integration | |
def test_spark_materialization_consistency(): | |
spark_config = IntegrationTestRepoConfig( | |
provider="local", | |
online_store_creator=RedisOnlineStoreCreator, | |
offline_store_creator=SparkDataSourceCreator, | |
batch_engine={"type": "spark.engine", "partitions": 10}, | |
) | |
spark_environment = construct_test_environment( | |
spark_config, None, entity_key_serialization_version=2 | |
) | |
spark_environment.setup() | |
df = create_basic_driver_dataset() | |
ds = spark_environment.data_source_creator.create_data_source( | |
df, | |
spark_environment.feature_store.project, | |
field_mapping={"ts_1": "ts"}, | |
) | |
fs = spark_environment.feature_store | |
driver = Entity( | |
name="driver_id", | |
join_keys=["driver_id"], | |
) | |
driver_stats_fv = FeatureView( | |
name="driver_hourly_stats", | |
entities=[driver], | |
ttl=timedelta(weeks=52), | |
schema=[Field(name="value", dtype=Float32)], | |
source=ds, | |
) | |
try: | |
fs.apply([driver, driver_stats_fv]) | |
print(df) | |
# materialization is run in two steps and | |
# we use timestamp from generated dataframe as a split point | |
split_dt = df["ts_1"][4].to_pydatetime() - timedelta(seconds=1) | |
print(f"Split datetime: {split_dt}") | |
> validate_offline_online_store_consistency(fs, driver_stats_fv, split_dt) | |
sdk/python/tests/integration/materialization/contrib/spark/test_spark.py:72: | |
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | |
sdk/python/tests/utils/e2e_test_validation.py:43: in validate_offline_online_store_consistency | |
fs.materialize(feature_views=[fv.name], start_date=start_date, end_date=end_date) | |
sdk/python/feast/feature_store.py:1367: in materialize | |
provider.materialize_single_feature_view( | |
sdk/python/feast/infra/passthrough_provider.py:275: in materialize_single_feature_view | |
raise e | |
sdk/python/feast/infra/materialization/contrib/spark/spark_materialization_engine.py:183: in _materialize_one | |
).count() # dummy action to force evaluation | |
/opt/anaconda3/envs/3.11/lib/python3.11/site-packages/pyspark/sql/dataframe.py:1238: in count | |
return int(self._jdf.count()) | |
/opt/anaconda3/envs/3.11/lib/python3.11/site-packages/py4j/java_gateway.py:1322: in __call__ | |
return_value = get_return_value( | |
/opt/anaconda3/envs/3.11/lib/python3.11/site-packages/pyspark/errors/exceptions/captured.py:179: in deco | |
return f(*a, **kw) | |
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ | |
answer = 'xro115', gateway_client = <py4j.clientserver.JavaClient object at 0x34a12d4d0> | |
target_id = 'o114', name = 'count' | |
def get_return_value(answer, gateway_client, target_id=None, name=None): | |
"""Converts an answer received from the Java gateway into a Python object. | |
For example, string representation of integers are converted to Python | |
integer, string representation of objects are converted to JavaObject | |
instances, etc. | |
:param answer: the string returned by the Java gateway | |
:param gateway_client: the gateway client used to communicate with the Java | |
Gateway. Only necessary if the answer is a reference (e.g., object, | |
list, map) | |
:param target_id: the name of the object from which the answer comes from | |
(e.g., *object1* in `object1.hello()`). Optional. | |
:param name: the name of the member from which the answer comes from | |
(e.g., *hello* in `object1.hello()`). Optional. | |
""" | |
if is_error(answer)[0]: | |
if len(answer) > 1: | |
type = answer[1] | |
value = OUTPUT_CONVERTER[type](answer[2:], gateway_client) | |
if answer[1] == REFERENCE_TYPE: | |
> raise Py4JJavaError( | |
"An error occurred while calling {0}{1}{2}.\n". | |
format(target_id, ".", name), value) | |
E py4j.protocol.Py4JJavaError: An error occurred while calling o114.count. | |
E : org.apache.spark.SparkException: Job aborted due to stage failure: Task 9 in stage 8.0 failed 1 times, most recent failure: Lost task 9.0 in stage 8.0 (TID 22) (ahameed-mac executor driver): java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available | |
E at org.apache.arrow.memory.util.MemoryUtil.directBuffer(MemoryUtil.java:174) | |
E at org.apache.arrow.memory.ArrowBuf.getDirectBuffer(ArrowBuf.java:229) | |
E at org.apache.arrow.memory.ArrowBuf.nioBuffer(ArrowBuf.java:224) | |
E at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:133) | |
E at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303) | |
E at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276) | |
E at org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:147) | |
E at org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:133) | |
E at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream(PythonArrowInput.scala:140) | |
E at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream$(PythonArrowInput.scala:124) | |
E at org.apache.spark.sql.execution.python.ArrowPythonRunner.writeIteratorToArrowStream(ArrowPythonRunner.scala:30) | |
E at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.$anonfun$writeIteratorToStream$1(PythonArrowInput.scala:96) | |
E at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) | |
E at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
E at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
E at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.writeIteratorToStream(PythonArrowInput.scala:102) | |
E at org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:451) | |
E at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1928) | |
E at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:282) | |
E | |
E Driver stacktrace: | |
E at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2856) | |
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2792) | |
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2791) | |
E at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) | |
E at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) | |
E at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) | |
E at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2791) | |
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1247) | |
E at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1247) | |
E at scala.Option.foreach(Option.scala:407) | |
E at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1247) | |
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3060) | |
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2994) | |
E at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2983) | |
E at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) | |
E Caused by: java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available | |
E at org.apache.arrow.memory.util.MemoryUtil.directBuffer(MemoryUtil.java:174) | |
E at org.apache.arrow.memory.ArrowBuf.getDirectBuffer(ArrowBuf.java:229) | |
E at org.apache.arrow.memory.ArrowBuf.nioBuffer(ArrowBuf.java:224) | |
E at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:133) | |
E at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303) | |
E at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276) | |
E at org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:147) | |
E at org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:133) | |
E at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream(PythonArrowInput.scala:140) | |
E at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream$(PythonArrowInput.scala:124) | |
E at org.apache.spark.sql.execution.python.ArrowPythonRunner.writeIteratorToArrowStream(ArrowPythonRunner.scala:30) | |
E at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.$anonfun$writeIteratorToStream$1(PythonArrowInput.scala:96) | |
E at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) | |
E at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
E at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
E at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
E at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.writeIteratorToStream(PythonArrowInput.scala:102) | |
E at org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:451) | |
E at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1928) | |
E at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:282) | |
/opt/anaconda3/envs/3.11/lib/python3.11/site-packages/py4j/protocol.py:326: Py4JJavaError | |
----------------------------------------- Captured stdout call ----------------------------------------- | |
driver_id value ts_1 created_ts | |
0 1 0.1 2024-06-12 08:00:00+00:00 2024-06-12 12:00:00 | |
1 2 NaN 2024-06-12 12:00:00+00:00 2024-06-12 12:00:00 | |
2 1 0.3 2024-06-12 09:00:00+00:00 2024-06-12 12:00:00 | |
3 3 4.0 2024-06-12 08:00:00+00:00 2024-06-12 12:00:00 | |
4 3 5.0 2024-06-12 11:00:00+00:00 2024-06-12 12:00:00 | |
Split datetime: 2024-06-12 10:59:59+00:00 | |
Materializing 1 feature views from 2024-06-12 03:44:30-04:00 to 2024-06-12 06:59:59-04:00 into the redis online store. | |
driver_hourly_stats: | |
Pulling latest features from spark offline store | |
----------------------------------------- Captured stderr call ----------------------------------------- | |
Warning: Ignoring non-Spark config property: master | |
Setting default log level to "WARN". | |
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel). | |
24/06/12 08:44:21 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable | |
using host unix:///Users/ahameed/.colima/default/docker.sock | |
06/12/2024 08:44:22 AM testcontainers.core.docker_client INFO: using host unix:///Users/ahameed/.colima/default/docker.sock | |
using host unix:///Users/ahameed/.colima/default/docker.sock | |
06/12/2024 08:44:22 AM testcontainers.core.docker_client INFO: using host unix:///Users/ahameed/.colima/default/docker.sock | |
Pulling image testcontainers/ryuk:0.7.0 | |
06/12/2024 08:44:22 AM testcontainers.core.container INFO: Pulling image testcontainers/ryuk:0.7.0 | |
Container started: 6ab1b8f21f65 | |
06/12/2024 08:44:23 AM testcontainers.core.container INFO: Container started: 6ab1b8f21f65 | |
Waiting for container <Container: 6ab1b8f21f65> with image testcontainers/ryuk:0.7.0 to be ready ... | |
06/12/2024 08:44:23 AM testcontainers.core.waiting_utils INFO: Waiting for container <Container: 6ab1b8f21f65> with image testcontainers/ryuk:0.7.0 to be ready ... | |
Pulling image redis | |
06/12/2024 08:44:25 AM testcontainers.core.container INFO: Pulling image redis | |
Container started: 1f99ffe6be6b | |
06/12/2024 08:44:25 AM testcontainers.core.container INFO: Container started: 1f99ffe6be6b | |
Waiting for container <Container: 1f99ffe6be6b> with image redis to be ready ... | |
06/12/2024 08:44:26 AM testcontainers.core.waiting_utils INFO: Waiting for container <Container: 1f99ffe6be6b> with image redis to be ready ... | |
06/12/2024 08:44:26 AM feast.infra.registry.registry INFO: Registry cache expired, so refreshing | |
06/12/2024 08:44:30 AM feast.infra.registry.registry INFO: Registry cache expired, so refreshing | |
06/12/2024 08:44:30 AM feast.infra.registry.registry INFO: Registry cache expired, so refreshing | |
06/12/2024 08:44:30 AM feast.infra.registry.registry INFO: Registry cache expired, so refreshing | |
24/06/12 08:44:31 ERROR ArrowPythonRunner: Python worker exited unexpectedly (crashed) | |
org.apache.spark.api.python.PythonException: Traceback (most recent call last): | |
File "/opt/anaconda3/envs/3.11/lib/python3.11/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 1225, in main | |
eval_type = read_int(infile) | |
^^^^^^^^^^^^^^^^ | |
File "/opt/anaconda3/envs/3.11/lib/python3.11/site-packages/pyspark/python/lib/pyspark.zip/pyspark/serializers.py", line 596, in read_int | |
raise EOFError | |
EOFError | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:572) | |
at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:118) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:525) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.hashAgg_doAggregateWithoutKey_0$(Unknown Source) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.processNext(Unknown Source) | |
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) | |
at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:43) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) | |
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140) | |
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:104) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54) | |
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623) | |
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) | |
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) | |
at java.base/java.lang.Thread.run(Thread.java:1570) | |
Caused by: java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available | |
at org.apache.arrow.memory.util.MemoryUtil.directBuffer(MemoryUtil.java:174) | |
at org.apache.arrow.memory.ArrowBuf.getDirectBuffer(ArrowBuf.java:229) | |
at org.apache.arrow.memory.ArrowBuf.nioBuffer(ArrowBuf.java:224) | |
at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:133) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:147) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:133) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream(PythonArrowInput.scala:140) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream$(PythonArrowInput.scala:124) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner.writeIteratorToArrowStream(ArrowPythonRunner.scala:30) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.$anonfun$writeIteratorToStream$1(PythonArrowInput.scala:96) | |
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.writeIteratorToStream(PythonArrowInput.scala:102) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:451) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1928) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:282) | |
24/06/12 08:44:31 ERROR ArrowPythonRunner: This may have been caused by a prior exception: | |
java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available | |
at org.apache.arrow.memory.util.MemoryUtil.directBuffer(MemoryUtil.java:174) | |
at org.apache.arrow.memory.ArrowBuf.getDirectBuffer(ArrowBuf.java:229) | |
at org.apache.arrow.memory.ArrowBuf.nioBuffer(ArrowBuf.java:224) | |
at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:133) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:147) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:133) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream(PythonArrowInput.scala:140) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream$(PythonArrowInput.scala:124) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner.writeIteratorToArrowStream(ArrowPythonRunner.scala:30) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.$anonfun$writeIteratorToStream$1(PythonArrowInput.scala:96) | |
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.writeIteratorToStream(PythonArrowInput.scala:102) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:451) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1928) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:282) | |
24/06/12 08:44:31 ERROR ArrowPythonRunner: Python worker exited unexpectedly (crashed) | |
org.apache.spark.api.python.PythonException: Traceback (most recent call last): | |
File "/opt/anaconda3/envs/3.11/lib/python3.11/site-packages/pyspark/python/lib/pyspark.zip/pyspark/worker.py", line 1225, in main | |
eval_type = read_int(infile) | |
^^^^^^^^^^^^^^^^ | |
File "/opt/anaconda3/envs/3.11/lib/python3.11/site-packages/pyspark/python/lib/pyspark.zip/pyspark/serializers.py", line 596, in read_int | |
raise EOFError | |
EOFError | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:572) | |
at org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:118) | |
at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:525) | |
at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) | |
at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:491) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.hashAgg_doAggregateWithoutKey_0$(Unknown Source) | |
at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage4.processNext(Unknown Source) | |
at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) | |
at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:43) | |
at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) | |
at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140) | |
at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:104) | |
at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:54) | |
at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) | |
at org.apache.spark.scheduler.Task.run(Task.scala:141) | |
at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:620) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:623) | |
at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1144) | |
at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:642) | |
at java.base/java.lang.Thread.run(Thread.java:1570) | |
Caused by: java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available | |
at org.apache.arrow.memory.util.MemoryUtil.directBuffer(MemoryUtil.java:174) | |
at org.apache.arrow.memory.ArrowBuf.getDirectBuffer(ArrowBuf.java:229) | |
at org.apache.arrow.memory.ArrowBuf.nioBuffer(ArrowBuf.java:224) | |
at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:133) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:147) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:133) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream(PythonArrowInput.scala:140) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream$(PythonArrowInput.scala:124) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner.writeIteratorToArrowStream(ArrowPythonRunner.scala:30) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.$anonfun$writeIteratorToStream$1(PythonArrowInput.scala:96) | |
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.writeIteratorToStream(PythonArrowInput.scala:102) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:451) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1928) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:282) | |
24/06/12 08:44:31 ERROR ArrowPythonRunner: This may have been caused by a prior exception: | |
java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available | |
at org.apache.arrow.memory.util.MemoryUtil.directBuffer(MemoryUtil.java:174) | |
at org.apache.arrow.memory.ArrowBuf.getDirectBuffer(ArrowBuf.java:229) | |
at org.apache.arrow.memory.ArrowBuf.nioBuffer(ArrowBuf.java:224) | |
at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:133) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:147) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:133) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream(PythonArrowInput.scala:140) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream$(PythonArrowInput.scala:124) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner.writeIteratorToArrowStream(ArrowPythonRunner.scala:30) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.$anonfun$writeIteratorToStream$1(PythonArrowInput.scala:96) | |
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.writeIteratorToStream(PythonArrowInput.scala:102) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:451) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1928) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:282) | |
24/06/12 08:44:31 ERROR Executor: Exception in task 9.0 in stage 8.0 (TID 22) | |
java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available | |
at org.apache.arrow.memory.util.MemoryUtil.directBuffer(MemoryUtil.java:174) | |
at org.apache.arrow.memory.ArrowBuf.getDirectBuffer(ArrowBuf.java:229) | |
at org.apache.arrow.memory.ArrowBuf.nioBuffer(ArrowBuf.java:224) | |
at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:133) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:147) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:133) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream(PythonArrowInput.scala:140) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream$(PythonArrowInput.scala:124) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner.writeIteratorToArrowStream(ArrowPythonRunner.scala:30) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.$anonfun$writeIteratorToStream$1(PythonArrowInput.scala:96) | |
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.writeIteratorToStream(PythonArrowInput.scala:102) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:451) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1928) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:282) | |
24/06/12 08:44:31 ERROR Executor: Exception in task 0.0 in stage 8.0 (TID 21) | |
java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available | |
at org.apache.arrow.memory.util.MemoryUtil.directBuffer(MemoryUtil.java:174) | |
at org.apache.arrow.memory.ArrowBuf.getDirectBuffer(ArrowBuf.java:229) | |
at org.apache.arrow.memory.ArrowBuf.nioBuffer(ArrowBuf.java:224) | |
at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:133) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:147) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:133) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream(PythonArrowInput.scala:140) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream$(PythonArrowInput.scala:124) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner.writeIteratorToArrowStream(ArrowPythonRunner.scala:30) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.$anonfun$writeIteratorToStream$1(PythonArrowInput.scala:96) | |
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.writeIteratorToStream(PythonArrowInput.scala:102) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:451) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1928) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:282) | |
24/06/12 08:44:31 WARN TaskSetManager: Lost task 9.0 in stage 8.0 (TID 22) (ahameed-mac executor driver): java.lang.UnsupportedOperationException: sun.misc.Unsafe or java.nio.DirectByteBuffer.<init>(long, int) not available | |
at org.apache.arrow.memory.util.MemoryUtil.directBuffer(MemoryUtil.java:174) | |
at org.apache.arrow.memory.ArrowBuf.getDirectBuffer(ArrowBuf.java:229) | |
at org.apache.arrow.memory.ArrowBuf.nioBuffer(ArrowBuf.java:224) | |
at org.apache.arrow.vector.ipc.WriteChannel.write(WriteChannel.java:133) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.writeBatchBuffers(MessageSerializer.java:303) | |
at org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(MessageSerializer.java:276) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(ArrowWriter.java:147) | |
at org.apache.arrow.vector.ipc.ArrowWriter.writeBatch(ArrowWriter.java:133) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream(PythonArrowInput.scala:140) | |
at org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeIteratorToArrowStream$(PythonArrowInput.scala:124) | |
at org.apache.spark.sql.execution.python.ArrowPythonRunner.writeIteratorToArrowStream(ArrowPythonRunner.scala:30) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.$anonfun$writeIteratorToStream$1(PythonArrowInput.scala:96) | |
at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) | |
at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) | |
at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) | |
at org.apache.spark.sql.execution.python.PythonArrowInput$$anon$1.writeIteratorToStream(PythonArrowInput.scala:102) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:451) | |
at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:1928) | |
at org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:282) | |
24/06/12 08:44:31 ERROR TaskSetManager: Task 9 in stage 8.0 failed 1 times; aborting job | |
06/12/2024 08:44:31 AM root WARNING: list_feature_views will make breaking changes. Please use list_batch_feature_views instead. list_feature_views will behave like list_all_feature_views in the future. | |
06/12/2024 08:44:31 AM root WARNING: _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future. | |
06/12/2024 08:44:31 AM feast.infra.registry.registry INFO: Registry cache expired, so refreshing | |
06/12/2024 08:44:31 AM feast.infra.registry.registry INFO: Registry cache expired, so refreshing | |
------------------------------------------ Captured log call ------------------------------------------- | |
INFO testcontainers.core.docker_client:docker_client.py:52 using host unix:///Users/ahameed/.colima/default/docker.sock | |
INFO testcontainers.core.docker_client:docker_client.py:52 using host unix:///Users/ahameed/.colima/default/docker.sock | |
INFO testcontainers.core.container:container.py:88 Pulling image testcontainers/ryuk:0.7.0 | |
INFO testcontainers.core.container:container.py:100 Container started: 6ab1b8f21f65 | |
INFO testcontainers.core.waiting_utils:waiting_utils.py:52 Waiting for container <Container: 6ab1b8f21f65> with image testcontainers/ryuk:0.7.0 to be ready ... | |
INFO testcontainers.core.container:container.py:88 Pulling image redis | |
INFO testcontainers.core.container:container.py:100 Container started: 1f99ffe6be6b | |
INFO testcontainers.core.waiting_utils:waiting_utils.py:52 Waiting for container <Container: 1f99ffe6be6b> with image redis to be ready ... | |
INFO feast.infra.registry.registry:registry.py:817 Registry cache expired, so refreshing | |
INFO feast.infra.registry.registry:registry.py:817 Registry cache expired, so refreshing | |
INFO feast.infra.registry.registry:registry.py:817 Registry cache expired, so refreshing | |
INFO feast.infra.registry.registry:registry.py:817 Registry cache expired, so refreshing | |
WARNING root:feature_store.py:275 list_feature_views will make breaking changes. Please use list_batch_feature_views instead. list_feature_views will behave like list_all_feature_views in the future. | |
WARNING root:feature_store.py:297 _list_feature_views will make breaking changes. Please use _list_batch_feature_views instead. _list_feature_views will behave like _list_all_feature_views in the future. | |
INFO feast.infra.registry.registry:registry.py:817 Registry cache expired, so refreshing | |
INFO feast.infra.registry.registry:registry.py:817 Registry cache expired, so refreshing | |
--------------------------------------- Captured stderr teardown --------------------------------------- | |
[Stage 8:> (0 + 8) / 10] | |
=========================================== warnings summary =========================================== | |
tests/integration/materialization/contrib/spark/test_spark.py::test_spark_materialization_consistency | |
/Users/ahameed/feast/sdk/python/feast/infra/offline_stores/contrib/spark_offline_store/spark.py:76: RuntimeWarning: The spark offline store is an experimental feature in alpha development. Some functionality may still be unstable so functionality can change in the future. | |
warnings.warn( | |
tests/integration/registration/test_universal_cli.py::test_universal_cli | |
tests/integration/registration/test_universal_cli.py::test_universal_cli | |
/Users/ahameed/feast/sdk/python/feast/infra/registry/base_registry.py:628: DeprecationWarning: We will be deprecating the usage of spec.userDefinedFunction in a future release please upgrade cautiously. | |
warnings.warn( | |
-- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html | |
========================================= slowest 5 durations ========================================== | |
35.33s call tests/integration/registration/test_universal_cli.py::test_universal_cli | |
21.06s call tests/integration/materialization/test_universal_materialization.py::test_universal_materialization_consistency[LOCAL:DuckDBDelta:sqlite:python_fs:False] | |
20.98s call tests/integration/materialization/test_universal_materialization.py::test_universal_materialization_consistency[LOCAL:DuckDB:sqlite:python_fs:False] | |
20.57s call tests/integration/materialization/test_universal_materialization.py::test_universal_materialization_consistency[LOCAL:File:sqlite:python_fs:False] | |
20.57s call tests/integration/materialization/test_universal_e2e.py::test_e2e_consistency[LOCAL:File:sqlite:python_fs:False-True] | |
======================================= short test summary info ======================================== | |
FAILED sdk/python/tests/integration/materialization/contrib/spark/test_spark.py::test_spark_materialization_consistency - py4j.protocol.Py4JJavaError: An error occurred while calling o114.count. | |
=================== 1 failed, 242 passed, 16 skipped, 3 warnings in 92.32s (0:01:32) =================== | |
make: *** [test-python-integration-local] Error 1 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment