Skip to content

Instantly share code, notes, and snippets.

@janpipek
Created September 22, 2015 14:28
Show Gist options
  • Save janpipek/34ea71db87ad55b70a52 to your computer and use it in GitHub Desktop.
Save janpipek/34ea71db87ad55b70a52 to your computer and use it in GitHub Desktop.
Convert CSV files to HDF5 format
#!/usr/bin/env python
import pandas as pd
import sys
def convert(source_name, target_name, dataset_name=None, group_name=None):
# data_set_name & group_name not yet used
if not group_name:
dataset_name = ".".join(s for s in source_name.split(".") if s not in ("csv", "gz", "dat", "txt", "tsv"))
# print(group_name)
data = pd.read_table(source_name, index_col=False)
data.to_hdf(target_name, dataset_name, complib="zlib")
if __name__ == "__main__":
if len(sys.argv) < 3:
raise Exception("At least 3 parameters!")
sources = sys.argv[1:-1]
target = sys.argv[-1]
for source in sources:
convert(source, target)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment