Created
January 15, 2020 06:26
-
-
Save puraminy/a83654c077124b0adc4c386fca5278a2 to your computer and use it in GitHub Desktop.
PCA image Compress
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.decomposition import PCA | |
import scipy.io as sio | |
import matplotlib.image as image | |
import matplotlib.image as mpimg | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
import cv2 | |
import numpy as np | |
import math | |
import operator | |
from PIL import Image | |
import eval_conf_mat | |
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score | |
def plot_dict(dict, ax=None): | |
if ax == None: | |
fig, ax = plt.subplots(figsize=(10,8)) | |
lists = sorted(dict.items()) # sorted by key, return a list of tuples | |
x, y = zip(*lists) # unpack a list of pairs into two tuples | |
ax.plot(x, y) | |
def readImages(path, type, n): | |
list_ = list() | |
for i in range(1, n+1): | |
img = cv2.imread(path + type +'/' + str(i) + '.png') | |
list_.append(img) | |
data_size = len(list_) | |
labels=[] | |
temp = 1 | |
if(type=='Train'): | |
temp = 2 | |
for i in range(0, data_size): | |
class_label = math.floor(i/temp) | |
labels.append(class_label) | |
return np.array(list_), np.array(labels) | |
def compress(im, r): | |
img_r = np.reshape(im, (100, 90*3)) | |
img_norm = img_r/255 #normalize(img_r) | |
pca = PCA(r) | |
#Run PCA on normalized image data | |
lower_dimension_img = pca.fit_transform(img_norm) | |
expl_var = np.round(np.sum(pca.explained_variance_ratio_),2) | |
# print(lower_dimension_img.size) | |
comp_rate = np.round(im.size/lower_dimension_img.size,3) | |
lower_dimension_img.shape | |
reconstructed_img = pca.inverse_transform(lower_dimension_img) | |
# print("reconstructed", reconstructed_img.shape) | |
reconstructed_img = np.reshape(reconstructed_img, (100, 90, 3)) | |
return reconstructed_img, expl_var, comp_rate | |
path = 'drive/My Drive/FACES/' | |
train_data, train_label = readImages(path, 'Train',28) | |
test_data, test_label = readImages(path, 'Test',14) | |
#Image is stored in dataset | |
im = test_data[0,:,:,:] | |
#################################### PART A | |
print("a) compressing and reconstructing a sample picture....") | |
fig, axes = plt.subplots(2,2, figsize=(10,10)) | |
k = 0 | |
CR = [0.4,0.6,0.8,0.99] | |
for i in range(2): | |
for j in range(2): | |
r=CR[k] | |
k+=1 | |
# print("R=",r ) | |
reconstructed_img,expl_var, comp_rate = compress(im, r) | |
axes[i,j].imshow(reconstructed_img) | |
axes[i,j].set_title(f"Explained Var= {expl_var} Compr. Rate: "+str(comp_rate)) | |
fig.savefig('pic.png') | |
####################################### Part B | |
print("b) reconition rate....") | |
import scipy.spatial.distance as dist | |
acc_rate = {} | |
for r in range(1, 14):#np.linspace(0.1,0.99, num=10): | |
y_pred = [] | |
for test_item in range(len(test_data)): | |
im = test_data[test_item,:,:,:] | |
# print("R=",r ) | |
# plt.imshow(im) | |
# plt.title("test image") | |
# plt.show() | |
#Run PCA on normalized image data | |
compressed_test_img, expl_var,_ = compress(im, r) | |
# plt.imshow(lower_dimension_img) | |
# plt.title("lower dimension") | |
# plt.show() | |
min_d,min_index = -1,-1 | |
for index, train_img in enumerate(train_data[:,:,:,:]): | |
compressed_train_img,_,_ = compress(train_img, r) | |
# mse = np.sum((compressed_train_img - compressed_test_img)**2) | |
d = dist.euclidean(compressed_train_img.flatten(), compressed_test_img.flatten()) | |
# print("d=", d) | |
if (min_d < 0): | |
min_d = d | |
min_index = 0 | |
if d < min_d: | |
min_d = d | |
min_index = index | |
pred_label = train_label[min_index] | |
y_pred.append(pred_label) | |
acc = accuracy_score(test_label, y_pred) | |
# print(f"r: {expl_var.round(2)} acc: {acc}") | |
acc_rate[expl_var] = acc.round(2) | |
print("acc_rate:", acc_rate) | |
plot_dict(acc_rate) | |
plt.savefig("acc_rate") | |
################## PART C | |
print("c) MSE ....") | |
rate_comp = {} | |
for r in range(1, 20): | |
mse_all = 0 | |
for index, train_img in enumerate(train_data[:,:,:,:]): | |
compressed_train_img,_,_ = compress(train_img, r) | |
# print(train_img.shape) | |
mse = np.sum((train_img - compressed_train_img)**2)/(train_img.shape[0]*train_img.shape[1]) | |
mse_all+=mse | |
# print(len(train_data)) | |
avg_mse = mse_all/len(train_data) | |
rate_comp[r] = avg_mse | |
plot_dict(rate_comp) | |
plt.xlabel("# of PCA components") | |
plt.ylabel("MSE between compressed and original image") | |
plt.savefig("rate_comp.png") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment