Skip to content

Instantly share code, notes, and snippets.

@nitred
Last active May 4, 2018 15:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nitred/ee5ce56b0e0a5a764178014b298b4887 to your computer and use it in GitHub Desktop.
Save nitred/ee5ce56b0e0a5a764178014b298b4887 to your computer and use it in GitHub Desktop.
Dataset Extraction Utilities

About

Dataset extraction utilities

Table of Contents

  • Script for changing directory of meta_df: Link
"""Script to change the directory of the full_path column of the dataset meta_df."""
import argparse
import os
import shutil
import pandas as pd
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('--meta_filename', action='store', dest='meta_filename', required=True)
parser.add_argument('--meta_column', action='store', dest='meta_column', required=False, default='full_path')
parser.add_argument('--new_directory', action='store', dest='new_directory', required=True)
command_args = parser.parse_args()
meta_filename = command_args.meta_filename
meta_column = command_args.meta_column
new_directory = command_args.new_directory
def change_full_path_directory(df, full_path_col, new_directory):
def change_directory(full_path):
basename = os.path.basename(full_path)
train_or_test_dir = os.path.basename(os.path.dirname(full_path))
new_full_path = os.path.join(new_directory, train_or_test_dir)
new_full_path = os.path.join(new_full_path, basename)
return new_full_path
df.loc[:, full_path_col] = df.loc[:, full_path_col].apply(change_directory)
return df
print("Loading meta pickle as dataframe: {}".format(meta_filename))
df = pd.read_pickle(meta_filename)
print("Changing the full path column {} to new directory have {}".format(meta_column, new_directory))
df = change_full_path_directory(df, meta_column, new_directory)
new_meta_filename = os.path.join(os.path.dirname(meta_filename), 'meta.pkl')
print("Copying old filename to a backup filename: {}".format(meta_filename + ".bak"))
shutil.copyfile(meta_filename, meta_filename + ".bak")
print("Saving changed dataframe to new meta pickle filename: {}".format(new_meta_filename))
df.to_pickle(new_meta_filename)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment