Skip to content

Instantly share code, notes, and snippets.

@Deepayan137
Created January 3, 2019 06:07
Show Gist options
  • Save Deepayan137/d3e3dc73286c9265af6bcd9d07b7cff6 to your computer and use it in GitHub Desktop.
Save Deepayan137/d3e3dc73286c9265af6bcd9d07b7cff6 to your computer and use it in GitHub Desktop.
CA and WA
import re
import sys
import os
import tempfile
import subprocess
import pdb
import pandas as pd
import numpy as np
from collections import defaultdict
from ocr.baselines.base_config import *
def calculate_word_accuracy(**kwargs):
""" Calculates the word level accuracy of the OCR result using corrected result as ground truth. """
path = kwargs['path']
acc = kwargs['accuracy']
files = list(map(lambda f: path+'/' + f, os.listdir(path)))
def clean(base_name):
base_name = base_name.split('.')[0]
return base_name + '_ocr.txt'
count = 0
ch_acc = defaultdict(float)
for file_ in files:
# pdb.set_trace()
if '_ocr' not in file_:
gt_file = file_
pr_file = clean(file_)
count+=1
print(count)
try:
cmd = ['ocr-evaluation-tools/dist/bin/ocrevalutf8.fix', '{}'.format(acc), '{}'.format(gt_file), '{}'.format(pr_file)]
process = subprocess.run(cmd, stdout=subprocess.PIPE)
accuracy = process.stdout.decode().splitlines()[4].strip().split()[0].replace('%', '')
ch_acc[file_] = float(accuracy)
except Exception as e:
print(e)
pass
df = pd.DataFrame(list(ch_acc.items()), columns=['file', 'accuracy'])
df.to_csv('ocr/stats/{}.csv'.format('rahul'))
print(np.mean(list(ch_acc.values())))
def main(**kwargs):
opt = Config()
opt._parse(kwargs)
dir_ = opt.path
accuracy = opt.accuracy
calculate_word_accuracy(path=dir_,
accuracy=accuracy)
if __name__=='__main__':
import fire
fire.Fire(main)
# python -m ocr.baselines.evaluate --path=<path>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment