Last active
May 16, 2021 01:30
Star
You must be signed in to star a gist
Original json version to extract the feature
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# single image feature extraction code from https://stackoverflow.com/a/63552285/13369757 | |
# whole folder images feature extraction idea from https://github.com/meliketoy/fine-tuning.pytorch/blob/master/extract_features.py | |
# The purpose of this code it to reproduce the res101.mat format file with similar settings (columns and rows) | |
import torch | |
import torchvision | |
import torchvision.models as models | |
import os | |
from PIL import Image | |
import json | |
import pandas as pd | |
if torch.cuda.is_available(): | |
print("Use GPU") | |
else: | |
print("Use CPU only, check your environment!") | |
data_dir = "./test" | |
dataset_info = json.load(open("../../dataset/XXXXXXXX/dataset.json", "r")) | |
df = pd.DataFrame.from_dict(dataset_info, orient="index") | |
df['file_name'] = df.index | |
df["file_name"] = data_dir + df["file_name"].astype(str) #新增圖片完整路徑(full path) | |
orig_df = pd.DataFrame.from_dict(dataset_info, orient="index") #載入只有圖片檔名的data(後續可複製一份df代替,就不用再次載入) | |
orig_df['file_name'] = orig_df.index | |
print("Load dataset.json successfully!!!") | |
transforms = torchvision.transforms.Compose([ | |
torchvision.transforms.ToTensor(), | |
]) | |
def is_image(f): | |
return f.endswith(".png") or f.endswith(".jpg") | |
def get_vector(image): | |
# Create a PyTorch tensor with the transformed image | |
t_img = transforms(image) | |
# Create a vector of zeros that will hold our feature vector | |
# The 'avgpool' layer has an output size of 512 | |
my_embedding = torch.zeros(2048) | |
# Define a function that will copy the output of a layer | |
def copy_data(m, i, o): | |
my_embedding.copy_(o.flatten()) # <-- flatten | |
# Attach that function to our selected layer | |
h = layer.register_forward_hook(copy_data) | |
# Run the model on our transformed image | |
with torch.no_grad(): # <-- no_grad context | |
model(t_img.unsqueeze(0)) # <-- unsqueeze | |
# Detach our copy function from the layer | |
h.remove() | |
# Return the feature vector | |
return my_embedding | |
model = models.resnet101(pretrained=True) | |
layer = model._modules.get('avgpool') | |
model.eval() | |
file_path_list = [] | |
feature_list = [] | |
label_list = [] | |
files = df["file_name"].tolist() #之前用os.walk去訪歷資料夾,但是百萬張圖片的速度太慢,透過之前預先建好的json檔案,直接取得路徑 | |
for file_path in files: | |
if (is_image(file_path)): #假如路徑是圖片 | |
image = Image.open(file_path) #開圖檔 | |
pic_vector = get_vector(image) #用ResNet101萃取圖片的特徵向量 | |
file_path_list.append(file_path) #把該張圖片的絕對路徑加到list保存 | |
feature_list.append(pic_vector.data.tolist()) #把該張圖片萃取的特徵加到list保存 | |
#如果當前萃取之圖片的檔名與原始json檔中的檔名相符,取得該檔名的class值,並加到list保存 | |
label_list.append(orig_df.loc[orig_df['file_name'] == file_path.split("/")[-1] ]['class'].values[0]) | |
print(file_path+" Done!") #萃取完成顯示Done | |
df_feature_list = pd.DataFrame(data = feature_list).T #將資料轉置 | |
df_file_path_list = pd.DataFrame(data = file_path_list) | |
df_label_list = pd.DataFrame(data = label_list) | |
print('saving file_path, feature_list, labels to csv.......') #保存到csv檔案當中 | |
df_file_path_list.to_csv('file_path.csv',index=False) | |
df_feature_list.to_csv('feature_list.csv',index=False) | |
df_label_list.to_csv('label_list.csv',index=False) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import torch | |
import torchvision | |
import torchvision.models as models | |
import json | |
import pandas as pd | |
import numpy as np | |
import torch.nn as nn | |
import scipy.io as io | |
from torch.utils.data import Dataset | |
from torch.utils import data | |
import os | |
from PIL import Image | |
from tqdm import tqdm | |
import h5py | |
# data_dir = "./test" | |
data_dir = "../XXison/datasets_for_ma/" | |
dataset_info = json.load(open("../XXison/dataset.json", "r")) | |
df = pd.DataFrame.from_dict(dataset_info, orient="index") | |
df['file_name'] = df.index | |
df["file_name"] = data_dir + df["file_name"].astype(str) #新增圖片完整路徑(full path) | |
orig_df = pd.DataFrame.from_dict(dataset_info, orient="index") #載入只有圖片檔名的data(後續可複製一份df代替,就不用再次載入) | |
orig_df['file_name'] = orig_df.index | |
print("Load dataset.json successfully!!!") | |
transforms = torchvision.transforms.Compose([ | |
torchvision.transforms.ToTensor(), | |
]) | |
def save_dict_to_hdf5(dic, filename): | |
with h5py.File(filename, 'w') as h5file: | |
recursively_save_dict_contents_to_group(h5file, '/', dic) | |
def recursively_save_dict_contents_to_group(h5file, path, dic): | |
for key, item in dic.items(): | |
if isinstance(item, (np.ndarray, np.int64, np.float64, str, bytes,'<U44')): | |
h5file[path + key] = item | |
elif isinstance(item, dict): | |
recursively_save_dict_contents_to_group(h5file, path + key + '/', item) | |
else: | |
raise ValueError('Cannot save %s type'%type(item)) | |
def is_image(f): | |
return f.endswith(".png") or f.endswith(".jpg") | |
def get_vector(image): | |
# Create a PyTorch tensor with the transformed image | |
t_img = transforms(image) | |
# Create a vector of zeros that will hold our feature vector | |
# The 'avgpool' layer has an output size of 2048 | |
my_embedding = torch.zeros(2048) | |
# Define a function that will copy the output of a layer | |
def copy_data(m, i, o): | |
my_embedding.copy_(o.flatten()) # <-- flatten | |
# Attach that function to our selected layer | |
h = layer.register_forward_hook(copy_data) | |
# Run the model on our transformed image | |
with torch.no_grad(): # <-- no_grad context | |
model(t_img.unsqueeze(0).cuda()) # <-- unsqueeze | |
# Detach our copy function from the layer | |
h.remove() | |
# Return the feature vector | |
return my_embedding.cuda() | |
model = models.resnet101(pretrained=True) | |
layer = model._modules.get('avgpool') | |
model.eval() | |
if torch.cuda.is_available(): | |
gpus = '0' | |
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" | |
os.environ["CUDA_VISIBLE_DEVICES"] = gpus | |
device_ids = [i for i in range(torch.cuda.device_count())] | |
if torch.cuda.device_count() > 1: | |
print("Let's use", torch.cuda.device_count(), "GPUs!") | |
if len(device_ids)>1: | |
model = nn.DataParallel(model, device_ids = device_ids).cuda() | |
else: | |
model = model.cuda() | |
else: | |
print("Use CPU only, check your environment!") | |
feat = {} | |
feature_list = [] | |
file_path_list = [] | |
label_list = [] | |
files = df["file_name"].tolist() #之前用os.walk去訪歷資料夾,但是百萬張圖片的速度太慢,透過之前預先建好的json檔案,直接取得路徑 | |
# files = ['/root/notebooks/nfs/work/yanwei.liu/Phison/test/1.jpg','/root/notebooks/nfs/work/yanwei.liu/Phison/test/2.jpg','/root/notebooks/nfs/work/yanwei.liu/Phison/test/3.jpg'] | |
for file_path in tqdm(files[0:700000]): | |
if (is_image(file_path)): #假如路徑是圖片 | |
image = Image.open(file_path) #開圖檔 | |
pic_vector = get_vector(image) #用ResNet101萃取圖片的特徵向量 | |
# print(pic_vector.is_cuda) #檢查是否用GPU | |
file_path_list.append(file_path) #把該張圖片的絕對路徑加到list保存 | |
feature_list.append(pic_vector.data.tolist()) #把該張圖片萃取的特徵加到list保存 | |
#如果當前萃取之圖片的檔名與原始json檔中的檔名相符,取得該檔名的class值,並加到list保存 | |
label_list.append(orig_df.loc[orig_df['file_name'] == file_path.split("/")[-1] ]['class'].values[0]) | |
for index, value in enumerate(label_list): | |
if value == 'good': | |
label_list[index] = 1 | |
elif value == 'missing': | |
label_list[index] = 2 | |
elif value == 'shift': | |
label_list[index] = 3 | |
elif value == 'stand': | |
label_list[index] = 4 | |
elif value == 'broke': | |
label_list[index] = 5 | |
elif value == 'short': | |
label_list[index] = 6 | |
# https://stackoverflow.com/a/11885718/13369757 (單維度dim轉置) | |
# https://stackoverflow.com/a/7464816 (處理char字串呈現問題) | |
""" | |
feat['features'] = np.array(feature_list).astype('float32').T #小檔案(2GB)以內,可用這4行註解的,直接產生mat檔案 | |
feat['image_files'] = np.array(file_path_list, dtype=np.object)[None].T | |
feat['labels'] = np.array(label_list).astype('float32')[None].T | |
io.savemat('res101.mat',feat) | |
""" | |
# 由於scipy無法將大量的矩陣寫入到 mat檔案的關係,因此改用hdf5格式保存 | |
# 到Octave開啟進行Transpose後,將多個h5合併成1個h5後,再保存成v7版本的mat檔案,最後透過tfvaegan進行訓練 | |
feat['features'] = np.array(feature_list).astype('float32') #大檔案(2GB)以上,則使用以下程式保存hdf5格式 | |
# feat['image_files'] = np.array(file_path_list, dtype=np.object) #為降低空間占用,不保存檔案路徑到hdf5 | |
feat['labels'] = np.array(label_list).astype('float32')[None].T #於Octave開啟後,記得將labels的變數進行Transpose | |
save_dict_to_hdf5(feat, 'res101.h5') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment