Skip to content

Instantly share code, notes, and snippets.

@emcake
Created February 24, 2024 12:57
Show Gist options
  • Save emcake/4edfb72d77e08e8a600b8c0c902e2718 to your computer and use it in GitHub Desktop.
Save emcake/4edfb72d77e08e8a600b8c0c902e2718 to your computer and use it in GitHub Desktop.
from deltalake import DeltaTable, write_deltalake
import pandas as pd
delta_path = f"/tmp/table-conflict-test"
print(delta_path)
write_deltalake(
delta_path,
pd.DataFrame.from_dict(
{
"p": [1, 1, 1, 1, 1],
"k": [1, 2, 3, 4, 5],
"v": [1, 2, 3, 4, 5],
}
),
partition_by="p",
)
def write_table(table: DeltaTable, data):
table.merge(
data,
predicate="s.p = t.p and s.k = t.k",
source_alias="s",
target_alias="t",
).when_matched_update_all().when_not_matched_insert_all().execute()
# by getting both delta tables first, it similuates concurrent actions
table_1 = DeltaTable(delta_path)
table_2 = DeltaTable(delta_path)
data_1 = pd.DataFrame.from_dict(
{
"p": [1, 1, 1, 1, 1],
"k": [3, 4, 5, 6, 7],
"v": [-3, -4, -5, -6, -7],
}
)
write_table(table_1, data_1)
# this should not succeed, but it does??
table_2.optimize.z_order(["k"])
table = DeltaTable(delta_path)
df = table.to_pandas()
print("this data looks completely wrong - looks to have appended the z_order data + merged data?")
print(df)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment