woemler/pandas_hdf5_test.py

## pandas_hdf5_test.py
import pandas as pd

# Read a TSV file into a data frame
# This will automatically handle the data types
df = pd.read_csv("my_data.txt", sep="\t")

# Write the data frame to a storer HDF5 file in the table "data", overwriting existing files
df.to_hdf("my_data.store.h5", 'data', mode="w")

# Writing to ta table formatted HDF5 file is a little trickier
# You need to set column widths to prevent opaque error messages
item_size = {'COL_A': 100, 'COL_B': 30, 'COL_C': 20}

# And set data columns for easier querying
data_columns = ['COL_A', 'COL_B', 'COL_C']

# Now write to a new HDF5 file
# This file is queryable and appendable
store.to_hdf("my_data.table.h5", "data", table=True, min_itemsize=item_size, data_columns=data_columns, append=True)

# You can now read the data from HDF5 into a data frame in memory
df = pd.read_hdf("my_data.store.h5", "data")

# Or you can search from the table file without reading it all into memory
store = pd.HDFStore("my_data.table.h5")

# You can get a list of unique values for each column
col_a = store.select_column('data', 'COL_A').unique()

#You can run queries on the table file
df = store.select("data", where=["COL_A=%s"%(col_a[0])])

# You should close the store when done
store.close()

# You can also open a store using "with" for automatic closing
with pd.HDFStore("my_data.table.h5") as store:
  #Do stuff...
  print store
	import pandas as pd

	# Read a TSV file into a data frame
	# This will automatically handle the data types
	df = pd.read_csv("my_data.txt", sep="\t")

	# Write the data frame to a storer HDF5 file in the table "data", overwriting existing files
	df.to_hdf("my_data.store.h5", 'data', mode="w")

	# Writing to ta table formatted HDF5 file is a little trickier
	# You need to set column widths to prevent opaque error messages
	item_size = {'COL_A': 100, 'COL_B': 30, 'COL_C': 20}

	# And set data columns for easier querying
	data_columns = ['COL_A', 'COL_B', 'COL_C']

	# Now write to a new HDF5 file
	# This file is queryable and appendable
	store.to_hdf("my_data.table.h5", "data", table=True, min_itemsize=item_size, data_columns=data_columns, append=True)

	# You can now read the data from HDF5 into a data frame in memory
	df = pd.read_hdf("my_data.store.h5", "data")

	# Or you can search from the table file without reading it all into memory
	store = pd.HDFStore("my_data.table.h5")

	# You can get a list of unique values for each column
	col_a = store.select_column('data', 'COL_A').unique()

	#You can run queries on the table file
	df = store.select("data", where=["COL_A=%s"%(col_a[0])])

	# You should close the store when done
	store.close()

	# You can also open a store using "with" for automatic closing
	with pd.HDFStore("my_data.table.h5") as store:
	#Do stuff...
	print store