Skip to content

Instantly share code, notes, and snippets.

@sparkydogX
Last active March 22, 2019 02:33
Show Gist options
  • Save sparkydogX/651e21f3fd66cda2afc100e9f67d79b2 to your computer and use it in GitHub Desktop.
Save sparkydogX/651e21f3fd66cda2afc100e9f67d79b2 to your computer and use it in GitHub Desktop.
Get array from confusion matrix image. And save it to xlsx file for manual correction.
from __future__ import print_function
import cv2
import pytesseract
import numpy as np
from os import listdir
from os.path import join
import pandas
import xlsxwriter
def getCFMatrix(imgpath, class_num=4, start_position=(40, 128), block_height=95, block_width=93):
'''
Convert confusion matrix to numpy matrix via ocr.
:param imgpath: Confusion matrix file path.
:return: Numpy matrix.
'''
img = cv2.imread(imgpath)
ErrorRows = []
CFMatrix = np.zeros([class_num, class_num])
for i in range(class_num):
for j in range(class_num):
cropped_img = img[start_position[0] + block_height * i:block_height * (i + 1) + start_position[0],
start_position[1] + block_width * j:start_position[1] + block_width * (j + 1)]
text = pytesseract.image_to_string(cropped_img)
CFMatrix[i, j] = float(text)
RowSum = CFMatrix.sum(axis=1)
for i in range(RowSum.shape[0]):
if np.abs(RowSum[i] - 1.0) > 0.00005:
ErrorRows.append(i)
print('-'*20)
print("Error occurs in row {0}".format(i+1))
print(imgpath)
return CFMatrix, ErrorRows
def func_for_sort_eatract_int_from_file_name(name):
return int(name.split('.')[0])
def handleDFentry(frame):
assert frame.shape[0] == 1
assert frame.shape[1] == 9
code_location = frame['code_location'].values[0]
model_location = frame['model_location'].values[0]
if pandas.isnull(model_location):
print("Nan value",frame)
return np.zeros([4, 4]), None ,[0,1,2,3]
log_location = join(code_location, 'logs', model_location[model_location.find('/') + 1:], 'confusion_matrix')
cfmLists = list(listdir(log_location))
cfmLists.sort(key=func_for_sort_eatract_int_from_file_name)
cfm_file_path = join(log_location, cfmLists[-1])
CFMatrix, ErrIndex = getCFMatrix(cfm_file_path)
return CFMatrix, cfm_file_path, ErrIndex
if __name__ == '__main__':
df = pandas.read_excel('./Experiments.xlsx')
# Rename dafaframs header.
df.columns = ['version', 'baseline', 'name', 'description', 'accuracy', 'commit', 'code_location', 'model_location',
'device']
# frame = df[df['version'] == 'v51']
# Create an new Excel file and add a worksheet.
workbook = xlsxwriter.Workbook('Results.xlsx')
rows = df.shape[0]
for row in range(rows):
frame = df[row:row+1]
CFMatrix,cfm_file_path, ErrIndex = handleDFentry(frame)
error_format = workbook.add_format({'bold': True, 'font_color': 'red'})
worksheet = workbook.add_worksheet(frame['version'].values[0])
for i in range(4):
for j in range(4):
if i in ErrIndex:
worksheet.write(i+1,j+1,CFMatrix[i,j],error_format)
else:
worksheet.write(i+1, j+1, CFMatrix[i, j])
worksheet.write_formula(i+1,5,'=SUM(B{0}:E{0})'.format(i+2), value=np.sum(CFMatrix[i,:]))
if cfm_file_path:
worksheet.insert_image('G2', cfm_file_path,{'x_scale': 1.0, 'y_scale': 1.0})
worksheet.write('A1','GT/Pred')
worksheet.write('A2','Sit')
worksheet.write('A3','Walk')
worksheet.write('A4','Ride')
worksheet.write('A5','Stand')
worksheet.write('B1','Sit')
worksheet.write('C1','Walk')
worksheet.write('D1','Ride')
worksheet.write('E1','Stand')
worksheet.write('A7','meanAcc')
worksheet.write('F1','Sum')
worksheet.write_formula(6,1,'=AVERAGE(B2,C3,D4,E5)',value=np.trace(CFMatrix)/4)
worksheet.set_column('A:E', 10)
workbook.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment