Skip to content

Instantly share code, notes, and snippets.

@oliverholworthy
Last active June 29, 2023 15:33
Show Gist options
  • Save oliverholworthy/b64eec0888f3717c4bec19dc9d901419 to your computer and use it in GitHub Desktop.
Save oliverholworthy/b64eec0888f3717c4bec19dc9d901419 to your computer and use it in GitHub Desktop.
cuDF dataframe merge
import cudf
def merge():
left = cudf.DataFrame(
{
"string_id": [
3,
3,
6,
3,
6,
5,
5,
4,
6,
4,
7,
5,
5,
6,
5,
7,
7,
3,
5,
5,
3,
7,
3,
6,
3,
6,
4,
6,
6,
7,
]
}
)
right = cudf.DataFrame(
{
"string_id": [3, 4, 5, 6, 7],
"embeddings": [
[0.43440665, 0.71459431],
[0.16957864, 0.4211413],
[0.6488939, 0.69370209],
[0.17119426, 0.54116936],
[0.55397995, 0.07149349],
],
}
)
merged = left.merge(right, on="string_id", how="left")
print("left:")
print(left)
print("right:")
print(right)
print("merged:")
print(merged)
if __name__ == "__main__":
merge()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment