Skip to content

Instantly share code, notes, and snippets.

@ivder
Created October 4, 2019 07:55
Show Gist options
  • Save ivder/1afface163624a2b31e0956c3866a65b to your computer and use it in GitHub Desktop.
Save ivder/1afface163624a2b31e0956c3866a65b to your computer and use it in GitHub Desktop.
import os
import pandas as pd
data_folders = next(os.walk('.'))[1]
filenames = [os.listdir(f) for f in data_folders]
test_ratio = 10
files_dict = dict(zip(data_folders, filenames))
base_gcs_path = 'gs://v26/v26-30kinds-augmented-color/'
train_array = []
test_array = []
counter = 1
index_test = round(100 / test_ratio)
for (dict_key, files_list) in files_dict.items():
for filename in files_list:
if '.png' not in filename:
continue # don't include non-photos
label = dict_key
if counter == index_test:
counter = 1
test_array.append((base_gcs_path + dict_key + '/' + filename , label))
else:
train_array.append((base_gcs_path + dict_key + '/' + filename , label))
counter = counter + 1
#print train_array
train_dataframe = pd.DataFrame(train_array)
train_dataframe.to_csv('training_data.csv', index=False, header=False)
test_dataframe = pd.DataFrame(test_array)
test_dataframe.to_csv('test_data.csv', index=False, header=False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment