Skip to content

Instantly share code, notes, and snippets.

@sss-ng
Created May 5, 2022 22:32
Show Gist options
  • Save sss-ng/ced3a287e8843cdfc232617f69507456 to your computer and use it in GitHub Desktop.
Save sss-ng/ced3a287e8843cdfc232617f69507456 to your computer and use it in GitHub Desktop.
pandas groupby reindex not working as expected
import pandas as pd
dates = pd.date_range("2020-01-01", "2020-01-10", freq="1D", tz="UTC")
df = pd.DataFrame(
[
{"state": "virginia", "count": 4},
{"state": "virginia", "count": 5},
{"state": "virginia", "count": 2},
{"state": "virginia", "count": 8},
{"state": "virginia", "count": 0},
{"state": "virginia", "count": 3},
{"state": "virginia", "count": 7},
{"state": "virginia", "count": 0},
{"state": "virginia", "count": 4},
{"state": "virginia", "count": 2},
]
)
df["date"] = dates
df = df.set_index("date")
new_dates = pd.date_range("2020-01-01", "2020-01-10", freq="1D", tz="UTC")
df = df.groupby(["state"]).apply(lambda x: x.reindex(new_dates, fill_value=0))
print(df)
@sss-ng
Copy link
Author

sss-ng commented May 6, 2022


root@docker-container-python38:/# python3 -m pip install pandas==1.3.2
Successfully installed pandas-1.3.2
root@docker-container-python38:/# python3
Python 3.8.13 (default, Apr 20 2022, 18:53:37)
[GCC 10.2.1 20210110] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>>
>>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="1D", tz="UTC")
>>>
>>>
>>> df = pd.DataFrame(
...     [
...         {"state": "virginia", "count": 4},
...         {"state": "virginia", "count": 5},
...         {"state": "virginia", "count": 2},
...         {"state": "virginia", "count": 8},
...         {"state": "virginia", "count": 0},
...         {"state": "virginia", "count": 3},
...         {"state": "virginia", "count": 7},
...         {"state": "virginia", "count": 0},
...         {"state": "virginia", "count": 4},
...         {"state": "virginia", "count": 2},
...     ]
... )
>>>
>>> df["date"] = dates
>>> df = df.set_index("date")
>>>
>>> new_dates = pd.date_range("2020-01-01", "2020-01-10", freq="1D", tz="UTC")
>>>
>>> df = df.groupby(["state"]).apply(lambda x: x.reindex(new_dates, fill_value=0))
>>>
>>> print(df)
                                       state  count
state
virginia 2020-01-01 00:00:00+00:00  virginia      4
         2020-01-02 00:00:00+00:00  virginia      5
         2020-01-03 00:00:00+00:00  virginia      2
         2020-01-04 00:00:00+00:00  virginia      8
         2020-01-05 00:00:00+00:00  virginia      0
         2020-01-06 00:00:00+00:00  virginia      3
         2020-01-07 00:00:00+00:00  virginia      7
         2020-01-08 00:00:00+00:00  virginia      0
         2020-01-09 00:00:00+00:00  virginia      4
         2020-01-10 00:00:00+00:00  virginia      2
>>>


root@docker-container-python38:/# python3 -m pip install pandas==1.3.3
Collecting pandas==1.3.3
Successfully installed pandas-1.3.3


root@docker-container-python38:/# python3
Python 3.8.13 (default, Apr 20 2022, 18:53:37)
[GCC 10.2.1 20210110] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>>
>>> import pandas as pd
>>>
>>> dates = pd.date_range("2020-01-01", "2020-01-10", freq="1D", tz="UTC")
>>>
>>>
>>> df = pd.DataFrame(
...     [
...         {"state": "virginia", "count": 4},
...         {"state": "virginia", "count": 5},
...         {"state": "virginia", "count": 2},
...         {"state": "virginia", "count": 8},
...         {"state": "virginia", "count": 0},
...         {"state": "virginia", "count": 3},
...         {"state": "virginia", "count": 7},
...         {"state": "virginia", "count": 0},
...         {"state": "virginia", "count": 4},
...         {"state": "virginia", "count": 2},
...     ]
... )
>>>
>>> df["date"] = dates
>>> df = df.set_index("date")
>>>
>>> new_dates = pd.date_range("2020-01-01", "2020-01-10", freq="1D", tz="UTC")
>>>
>>> df = df.groupby(["state"]).apply(lambda x: x.reindex(new_dates, fill_value=0))
>>>
>>> print(df)
                              state  count
date
2020-01-01 00:00:00+00:00  virginia      4
2020-01-02 00:00:00+00:00  virginia      5
2020-01-03 00:00:00+00:00  virginia      2
2020-01-04 00:00:00+00:00  virginia      8
2020-01-05 00:00:00+00:00  virginia      0
2020-01-06 00:00:00+00:00  virginia      3
2020-01-07 00:00:00+00:00  virginia      7
2020-01-08 00:00:00+00:00  virginia      0
2020-01-09 00:00:00+00:00  virginia      4
2020-01-10 00:00:00+00:00  virginia      2
>>>

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment