Skip to content

Instantly share code, notes, and snippets.

@tacaswell
Last active August 29, 2015 14:06
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tacaswell/c2be32c2810088bec3e2 to your computer and use it in GitHub Desktop.
Save tacaswell/c2be32c2810088bec3e2 to your computer and use it in GitHub Desktop.
pandas play
import pandas as pd
from datetime import datetime, timedelta
dt = timedelta(seconds=1)
base_time = datetime(2014, 9, 24, 10, 5, 30)
indx1 = [base_time + j * dt for j in range(0, 10)]
indx2 = [base_time + j * dt for j in range(0, 10, 2)]
ds1 = pd.Series(5, index=indx1)
ds2 = pd.Series(7, index=indx2)
df = pd.DataFrame({'a': ds1, 'b': ds2})
from six.moves import zip
from scipy.ndimage.measurements import label
import pandas as pd
import matplotlib.pyplot as plt
from itertools import cycle
# synthetic data source
class data_gen(object):
def __init__(self, length, func=None):
self._len = length
self._x, self._y = [_ * 2 * np.pi / 500 for _ in
np.ogrid[-500:500, -500:500]]
self._rep = int(np.sqrt(length))
def __len__(self):
return self._len
def __getitem__(self, k):
kx = k // self._rep + 1
ky = k % self._rep
return np.sin(kx * self._x) * np.cos(ky * self._y) + 1.05
@property
def ndim(self):
return 2
@property
def shape(self):
len(self._x), len(self._y)
num_steps = 100
# make the thing we can call to get data
data_source = data_gen(num_steps)
# function to
def lazy_listify(data_frame, col, data_extractor_fun=None):
if data_extractor_fun is None:
return data_frame[col].values
return (data_extractor_fun(v) for v in data_frame[col])
dd_extractor = lambda n, dd=data_source: dd[n]
df = pd.DataFrame({'P': np.linspace(0, 1, num_steps),
'frame_no': range(num_steps),
'kx': np.arange(num_steps, dtype=int)//int(np.sqrt(num_steps)) + 1,
'ky': np.arange(num_steps, dtype=int)%int(np.sqrt(num_steps))}
)
df['pk_count'] = [label(im > 1.7)[1] for im in lazy_listify(df, 'frame_no', dd_extractor)]
my_colors = iter(plt.cm.get_cmap('Reds')(np.linspace(.5, 1, 1+int(np.sqrt(num_steps)))))
fig, ax = plt.subplots()
for kx, g in df.groupby('kx'):
plt.plot(g['ky'], g['pk_count'], label='$k_x={}$'.format(kx), color=my_colors.next(), marker='x')
ax.legend(ncol=4)
my_colors = iter(plt.cm.get_cmap('Blues')(np.linspace(.5, 1, int(np.sqrt(num_steps)))))
fig, ax = plt.subplots()
for ky, g in df.groupby('ky'):
plt.plot(g['kx'] , g['pk_count'], label='$k_y={}$'.format(ky), color=my_colors.next(), marker='x')
ax.legend(ncol=4)
@ericdill
Copy link

In [15]: df                
Out[15]:                   
                     a   b 
2014-09-24 10:05:30  5   7 
2014-09-24 10:05:31  5 NaN 
2014-09-24 10:05:32  5   7 
2014-09-24 10:05:33  5 NaN 
2014-09-24 10:05:34  5   7 
2014-09-24 10:05:35  5 NaN 
2014-09-24 10:05:36  5   7 
2014-09-24 10:05:37  5 NaN 
2014-09-24 10:05:38  5   7 
2014-09-24 10:05:39  5 NaN 

Is that what is expected?

@tacaswell
Copy link
Author

yes

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment