Skip to content

Instantly share code, notes, and snippets.

What would you like to do?
import os
import random
import numpy as np
from sklearn.model_selection import train_test_split
dir_src = "{Diretório de origem}"
dir_test = "{Diretório destino das imagens de teste}"
dir_train = "{Diretório destino das imagens de traino}"
# Quantidade de imagens a serem selecionadas no dataset
qtd_images = 300
def get_filepaths(directory):
file_paths = []
for root, directories, files in os.walk(directory):
for filename in files:
# Supported image formats: JPEG, PNG, GIF, BMP.
if filename[-4::] == 'jpeg' or filename[-3::] == 'jpg' or filename[-3::] == 'png' or filename[-3::] == 'gif' or filename[-3::] == 'bmp':
return file_paths
full_file_paths = get_filepaths(dir_src)
result = random.sample(set(full_file_paths), qtd_images)
print('\nNúmero de arquivos do dataset: ' + str(len(full_file_paths)))
print('\nItens selecionados randomicamente: ' +str(len(result)))
X = y = result
# use 1/4 data for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=0)
for xt in X_test:
os.rename(dir_src + xt, dir_test + xt)
for yt in X_train:
os.rename(dir_src + yt, dir_train + yt)
print('\nQuantidade de arquivos no conjunto de treino: ' + str(len(X_train)))
print('\nQuantidade de arquivos no conjunto de teste: ' + str(len(X_test)))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.