Skip to content

Instantly share code, notes, and snippets.

@westandskif
Created April 14, 2022 15:06
Show Gist options
  • Save westandskif/362930891265815c474d37400108d603 to your computer and use it in GitHub Desktop.
Save westandskif/362930891265815c474d37400108d603 to your computer and use it in GitHub Desktop.
how-can-i-use-pandas-explode-crosstab-and-count-number-of-occurence-for-large-d/71591146
from convtools import conversion as c
# fmt: off
input_data = [
{ "id": 1, "listElements": [ "apple", "peer", ["apple", "peer"], "banana", "chocolate", ["chocolate", "apple"], ], },
{ "id": 2, "listElements": [ "ginger", "peer", ["ginger", "sugar"], "tofu", ["tofu", "veggie"], "chocolate", ], },
{ "id": 3, "listElements": [ "steak", "beef", ["beef", "potatoes"], "banana", ], },
]
# fmt: on
# generated ad hoc converter function; run on startup and reuse further
# option A
converter = (
c.iter(
c.zip(
c.repeat(c.item("id")),
c.item("listElements").iter(
c.if_(
c.call_func(isinstance, c.this, list),
c.this.as_type(tuple),
)
),
)
)
.flatten()
.pipe(
c.group_by(c.item(1)).aggregate(
{
"ingredient": c.item(1),
"ids": c.ReduceFuncs.Array(c.item(0)),
"count": c.ReduceFuncs.Count(),
}
)
)
.gen_converter()
)
result = converter(input_data)
assert result == [
{"ingredient": "apple", "ids": [1], "count": 1},
{"ingredient": "peer", "ids": [1, 2], "count": 2},
{"ingredient": ("apple", "peer"), "ids": [1], "count": 1},
{"ingredient": "banana", "ids": [1, 3], "count": 2},
{"ingredient": "chocolate", "ids": [1, 2], "count": 2},
{"ingredient": ("chocolate", "apple"), "ids": [1], "count": 1},
{"ingredient": "ginger", "ids": [2], "count": 1},
{"ingredient": ("ginger", "sugar"), "ids": [2], "count": 1},
{"ingredient": "tofu", "ids": [2], "count": 1},
{"ingredient": ("tofu", "veggie"), "ids": [2], "count": 1},
{"ingredient": "steak", "ids": [3], "count": 1},
{"ingredient": "beef", "ids": [3], "count": 1},
{"ingredient": ("beef", "potatoes"), "ids": [3], "count": 1},
]
# option B
converter = (
c.iter(
c.zip(
c.repeat(c.item("id")),
c.item("listElements").iter(
c.if_(
c.call_func(isinstance, c.this, list),
c.this.as_type(tuple),
)
),
)
)
.flatten()
.pipe(
c.group_by(c.item(1)).aggregate(
(
c.item(1),
c.ReduceFuncs.Array(c.item(0)),
)
)
)
.as_type(dict)
.gen_converter()
)
result = converter(input_data)
assert result == {
"apple": [1],
"peer": [1, 2],
("apple", "peer"): [1],
"banana": [1, 3],
"chocolate": [1, 2],
("chocolate", "apple"): [1],
"ginger": [2],
("ginger", "sugar"): [2],
"tofu": [2],
("tofu", "veggie"): [2],
"steak": [3],
"beef": [3],
("beef", "potatoes"): [3],
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment