Created
January 25, 2014 04:37
-
-
Save gjreda/8611946 to your computer and use it in GitHub Desktop.
Weird numpy/pandas groupby behavior when using min() on a np.datetime64 field.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# OSX 10.7.5 | |
# python 2.7.5 | |
# pandas 0.13.0 | |
# numpy 1.8.0 | |
import pandas as pd | |
import numpy as np | |
from StringIO import StringIO | |
d = """row1,'2013-10-01' | |
row1,'1995-03-15' | |
row2,'1998-11-04' | |
row2,'2014-01-10' | |
row3,'1950-12-25' | |
""" | |
df = pd.read_csv(StringIO(d), names=['col', 'the_date'], | |
parse_dates=['the_date']) | |
# col the_date | |
# 0 row1 2013-10-01 00:00:00 | |
# 1 row1 1995-03-15 00:00:00 | |
# 2 row2 1998-11-04 00:00:00 | |
# 3 row2 2014-01-10 00:00:00 | |
# 4 row3 1950-12-25 00:00:00 | |
# [5 rows x 2 columns] | |
print df.dtypes | |
# col object | |
# the_date datetime64[ns] | |
# dtype: object | |
df.groupby('col')['the_date'].min() | |
# col | |
# row1 795225600000000000 | |
# row2 910137600000000000 | |
# row3 -600220800000000000 | |
# Name: the_date, dtype: int64 | |
df.groupby('col', as_index=False)['the_date'].min() | |
# col the_date | |
# 0 row1 1995-03-15 00:00:00 | |
# 1 row2 1998-11-04 00:00:00 | |
# 2 row3 1950-12-25 00:00:00 | |
# [3 rows x 2 columns] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment