Skip to content

Instantly share code, notes, and snippets.

@puraminy
Created January 2, 2020 17:24
Show Gist options
  • Save puraminy/50b1b3d1e5920d4083eab3d244d8eebc to your computer and use it in GitHub Desktop.
Save puraminy/50b1b3d1e5920d4083eab3d244d8eebc to your computer and use it in GitHub Desktop.
Image compression using PCA
from sklearn.decomposition import PCA
import scipy.io as sio
import matplotlib.image as image
import matplotlib.image as mpimg
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import numpy as np
import math
import operator
from PIL import Image
import eval_conf_mat
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score
def plot_dict(dict, ax=None):
if ax == None:
fig, ax = plt.subplots(figsize=(10,8))
lists = sorted(dict.items()) # sorted by key, return a list of tuples
x, y = zip(*lists) # unpack a list of pairs into two tuples
ax.plot(x, y)
def readImages(path, type, n):
list_ = list()
for i in range(1, n+1):
img = cv2.imread(path + type +'/' + str(i) + '.png')
list_.append(img)
data_size = len(list_)
labels=[]
temp = 1
if(type=='Train'):
temp = 2
for i in range(0, data_size):
class_label = math.floor(i/temp)
labels.append(class_label)
return np.array(list_), np.array(labels)
def compress(im, r):
img_r = np.reshape(im, (100, 90*3))
img_norm = img_r/255 #normalize(img_r)
pca = PCA(r)
#Run PCA on normalized image data
lower_dimension_img = pca.fit_transform(img_norm)
expl_var = np.round(np.sum(pca.explained_variance_ratio_),2)
# print(lower_dimension_img.size)
comp_rate = np.round(im.size/lower_dimension_img.size,3)
# comp_rate = pca.explained_variance_
# print("explained var:",comp_rate)
#Lower dimension data is 5000x353 instead of 5000x1024
lower_dimension_img.shape
reconstructed_img = pca.inverse_transform(lower_dimension_img)
# print("reconstructed", reconstructed_img.shape)
reconstructed_img = np.reshape(reconstructed_img, (100, 90, 3))
return reconstructed_img, expl_var, comp_rate
path = 'drive/My Drive/FACES/'
train_data, train_label = readImages(path, 'Train',28)
test_data, test_label = readImages(path, 'Test',14)
#Image is stored in dataset
im = test_data[0,:,:,:]
#################################### PART A
print("a) compressing and reconstructing a sample picture....")
fig, axes = plt.subplots(2,2, figsize=(10,10))
k = 0
CR = [0.4,0.6,0.8,0.99]
for i in range(2):
for j in range(2):
r=CR[k]
k+=1
# print("R=",r )
reconstructed_img,expl_var, comp_rate = compress(im, r)
axes[i,j].imshow(reconstructed_img)
axes[i,j].set_title(f"Explained Var= {expl_var} Compr. Rate: "+str(comp_rate))
fig.savefig('pic.png')
####################################### Part B
print("b) reconition rate....")
import scipy.spatial.distance as dist
acc_rate = {}
for r in range(1, 14):#np.linspace(0.1,0.99, num=10):
y_pred = []
for test_item in range(len(test_data)):
im = test_data[test_item,:,:,:]
# print("R=",r )
# plt.imshow(im)
# plt.title("test image")
# plt.show()
#Run PCA on normalized image data
compressed_test_img, expl_var,_ = compress(im, r)
# plt.imshow(lower_dimension_img)
# plt.title("lower dimension")
# plt.show()
min_d,min_index = -1,-1
for index, train_img in enumerate(train_data[:,:,:,:]):
compressed_train_img,_,_ = compress(train_img, r)
# mse = np.sum((compressed_train_img - compressed_test_img)**2)
d = dist.euclidean(compressed_train_img.flatten(), compressed_test_img.flatten())
# print("d=", d)
if (min_d < 0):
min_d = d
min_index = 0
if d < min_d:
min_d = d
min_index = index
pred_label = train_label[min_index]
y_pred.append(pred_label)
acc = accuracy_score(test_label, y_pred)
# print(f"r: {expl_var.round(2)} acc: {acc}")
acc_rate[expl_var] = acc.round(2)
print("acc_rate:", acc_rate)
plot_dict(acc_rate)
plt.savefig("acc_rate")
################## PART C
print("c) MSE ....")
rate_comp = {}
for r in range(1, 20):
mse_all = 0
for index, train_img in enumerate(train_data[:,:,:,:]):
compressed_train_img,_,_ = compress(train_img, r)
# print(train_img.shape)
mse = np.sum((train_img - compressed_train_img)**2)/(train_img.shape[0]*train_img.shape[1])
mse_all+=mse
# print(len(train_data))
avg_mse = mse_all/len(train_data)
rate_comp[r] = avg_mse
plot_dict(rate_comp)
plt.xlabel("# of PCA components")
plt.ylabel("MSE between compressed and original image")
plt.savefig("rate_comp.png")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment