Skip to content

Instantly share code, notes, and snippets.

@nicktimko
Created September 30, 2016 22:33
Show Gist options
  • Save nicktimko/0be938e31a3071cc84df0b9da86a15d9 to your computer and use it in GitHub Desktop.
Save nicktimko/0be938e31a3071cc84df0b9da86a15d9 to your computer and use it in GitHub Desktop.
Rejected Ideas
# maybe they'll be used, one day...
class ArrayAutoAllocator(object):
def __init__(self, n_columns, fill_val=-1, chunk_size=PREALLOC_ROWS, dtype=None):
self.n_columns = n_columns + 1 # for the index
self.fill_val = fill_val
self.chunk_size = chunk_size
self.dtype = dtype
self.chunks = {}
def __getitem__(self, idx):
chunk = self._get_chunk(idx)
return chunk[idx]
def __setitem__(self, key, val):
idx, col = key
chunk = self._get_chunk(idx)
chunk[idx % self.chunk_size, col] = val
def _get_chunk(self, idx):
chunk_number = idx // self.chunk_size
if chunk_number not in self.chunks:
self.chunks[chunk_number] = self._new_array(chunk_number)
return self.chunks[chunk_number]
def _new_array(self, chunk_number):
a = self.fill_val * np.ones((self.chunk_size, self.n_columns), dtype=self.dtype)
a[...,0] = np.arange(self.chunk_size * chunk_number, self.chunk_size * (chunk_number + 1))
return a
def flatten(self):
"""Return a basic array"""
S = self.chunk_size
total = np.empty((S * len(self.chunks), self.n_columns), dtype=self.dtype)
for i, cn in enumerate(sorted(self.chunks.keys())):
total[S*i:S*(i+1)] = self.chunks[cn]
return total
def crush(self):
total = self.flatten()
bool_filt = np.sum(total[...,1:], axis=1) != (self.n_columns - 1) * self.fill_val
return total[bool_filt]
PREALLOC_ROWS = 1000
class DataFramePreallocator(object):
def __init__(self, fill, columns, chunk_size=PREALLOC_ROWS):
self.fill = fill
self.columns = columns
self.chunk_size = chunk_size
self.chunks = {}
def __getitem__(self, idx):
chunk = self._get_chunk(idx)
return chunk.loc[idx]
def __setitem__(self, key, val):
idx, col = key
chunk = self._get_chunk(idx)
chunk.loc[key] = val
def _get_chunk(self, idx):
chunk_number = idx // self.chunk_size
if chunk_number not in self.chunks:
self.chunks[chunk_number] = self._new_df(chunk_number)
return self.chunks[chunk_number]
def _new_df(self, chunk_number):
index = range(PREALLOC_ROWS * chunk_number, PREALLOC_ROWS * (chunk_number + 1))
return pd.DataFrame([self.fill], columns=self.columns, index=index)
def flatten(self):
"""Return a basic dataframe"""
return pd.concat(self.chunks.values())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment