Created
February 2, 2017 18:33
-
-
Save karimsa/935e85b7c1b5a8ab616d9c10494db55c to your computer and use it in GitHub Desktop.
Algorithm to read basic receipts using OCR response from MS cognitive services. Built for McHacks 2017.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* getReceipt.js - Split | |
* Logic to convert a JSON response from | |
* the MS Cognitive Services OCR API into | |
* proper receiptable data. | |
* | |
* Licensed under MIT license. | |
* Copyright (C) 2017 Karim Alibhai. | |
*/ | |
'use strict' | |
/** | |
* Algorithm for vertical distance. | |
* Assumes that 'a' is the line that is higher. | |
*/ | |
const vdist = (a, b) => (b.boundingBox.y - (a.boundingBox.y + a.boundingBox.height)) | |
/** | |
* Algorithm for horizontal distance. | |
* Assumes that 'a' is the line that is on the left. | |
*/ | |
const hdist = (a, b) => (b.boundingBox.x - (a.boundingBox.x + a.boundingBox.width)) | |
/** | |
* Verify that text includes a number. | |
*/ | |
const isNumber = text => ( | |
// it's a valid string | |
text && | |
// it's got numbers and a decimal | |
/[$0-9.\s]+/g.test(text) && | |
// it doesn't have letters | |
!/[a-z]/gi.test(text) | |
) | |
/** | |
* Transforms a response from MS cognitive services OCR | |
* into a receipt items dictionary. | |
*/ | |
module.exports = res => new Promise((resolve, reject) => { | |
const data = {}, lines = [] | |
let docWidth = 0, docHeight = 0 | |
let averageVDist = 0, averageHDist = 0 | |
if (res.language !== 'en') return reject('Unsupported language: ' + res.language) | |
if (res.textAngle !== 0 || res.orientation !== 'Up') { | |
data.warning = 'Photo is a bit unclear. Please try to ensure that' + | |
'your receipt is straight and facing the right way.' | |
} | |
// we don't care about word-by-word, so let's | |
// collapse lines | |
// | |
// let's also collapse regions because the default | |
// region decomposition algorithm is based on general | |
// euclidean distance and we just want to decompose | |
// based upon vertical distance | |
res.regions.forEach(region => { | |
region.lines.reduce((arr, line) => { | |
// collapse words into line | |
line.words = line.words.map(word => word.text).join(' ') | |
// fix spacing issues | |
.replace(/\s+/g, ' ').trim() | |
// destructure the bounding box, since we're looping anyways | |
let [ x, y, width, height ] = line.boundingBox.split(',').map(i => parseInt(i, 10)) | |
line.boundingBox = { x, y, width, height } | |
// update docWidth and docHeight | |
docWidth = Math.max(docWidth, x + width) | |
docHeight = Math.max(docHeight, y + height) | |
// faster than concat | |
arr.push(line) | |
return arr | |
}, lines) | |
}) | |
// let's sort, so that our algorithms always work | |
// sorting is ascending y, then ascending x | |
lines | |
.sort((a, b) => a.boundingBox.x - b.boundingBox.x) | |
.sort((a, b) => a.boundingBox.y - b.boundingBox.y) | |
// find distances between lines | |
lines.forEach((line, index, self) => { | |
if (index === 0) { | |
line.boundingBox.fromTop = 0 | |
} else { | |
averageVDist += Math.abs(line.boundingBox.fromTop = vdist( self[index - 1], line )) | |
} | |
}) | |
// find average | |
averageVDist /= lines.length | |
// decompose regions by average | |
let region = [ lines[0] ] | |
let regions = [region] | |
lines.forEach((line, index) => { | |
if (index !== 0) { | |
if (line.boundingBox.fromTop > averageVDist) { | |
regions.push(region = [line]) | |
} else { | |
region.push(line) | |
} | |
} | |
}) | |
// identify average line differences in regions, | |
// and use them to collapse lines into sub-regions | |
regions = regions.map((region, ri) => { | |
const averageLineDiff = region.reduce((sum, line) => { | |
return sum + Math.max(line.boundingBox.fromTop, 0) | |
}, 0) / region.length | |
//console.log('region #%s : diff of %s', ri, averageLineDiff) | |
let subreg = [] | |
const lines = [subreg] | |
for (let i = 0; i < region.length; ++ i) { | |
if (i !== 0 && region[i].boundingBox.fromTop > averageLineDiff) { | |
lines.push(subreg = [region[i]]) | |
} else { | |
subreg.push(region[i]) | |
} | |
} | |
return lines | |
}) | |
// do some clean-up and sorting | |
regions = regions.map(region => | |
region.map(subreg => { | |
let nstrings = 0, nints = 0 | |
subreg = subreg.sort((a, b) => | |
(a.boundingBox.x - b.boundingBox.x) | |
).map(line => { | |
line = line.words | |
// parse numbers | |
if (isNumber(line)) { | |
nints += 1 | |
line = parseFloat(line.replace(/[$\s]/g, '')) | |
} else { | |
nstrings += 1 | |
} | |
return line | |
}) | |
// some merging, required for some receipts | |
if (nstrings === nints) { | |
let sub = [] | |
for (let i = 0; typeof subreg[i] === 'string' && i < subreg.length; ++ i) { | |
sub.push([ subreg[i], subreg[i + nstrings] ]) | |
} | |
return sub | |
} | |
return subreg | |
}).reduce((a,b) => a.concat(b[0] instanceof Array ? b : [b]), []) | |
) | |
// debugging | |
//console.log(JSON.stringify(regions, null, 2)) | |
// find what is most probably a list of items | |
let max = [-Infinity,-1] | |
regions.forEach((region, index) => { | |
let num = 0 | |
region.forEach(subreg => | |
num += (subreg.length === 2 && typeof subreg[0] === 'string' && typeof subreg[1] === 'number') | |
) | |
if (num > max[0]) max = [num,index] | |
}) | |
// if not found, we have failed | |
if (max[1] === -1) { | |
return reject('Unable to read receipt.') | |
} | |
// create dictionary | |
const items = {} | |
regions[max[1]].forEach(subreg => { | |
if (typeof subreg[0] === 'string') { | |
items[subreg[0]] = subreg[1] | |
} | |
}) | |
// resolve with items list | |
resolve(items) | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment