Skip to content

Instantly share code, notes, and snippets.

@karimsa
Created February 2, 2017 18:33
Show Gist options
  • Save karimsa/935e85b7c1b5a8ab616d9c10494db55c to your computer and use it in GitHub Desktop.
Save karimsa/935e85b7c1b5a8ab616d9c10494db55c to your computer and use it in GitHub Desktop.
Algorithm to read basic receipts using OCR response from MS cognitive services. Built for McHacks 2017.
/**
* getReceipt.js - Split
* Logic to convert a JSON response from
* the MS Cognitive Services OCR API into
* proper receiptable data.
*
* Licensed under MIT license.
* Copyright (C) 2017 Karim Alibhai.
*/
'use strict'
/**
* Algorithm for vertical distance.
* Assumes that 'a' is the line that is higher.
*/
const vdist = (a, b) => (b.boundingBox.y - (a.boundingBox.y + a.boundingBox.height))
/**
* Algorithm for horizontal distance.
* Assumes that 'a' is the line that is on the left.
*/
const hdist = (a, b) => (b.boundingBox.x - (a.boundingBox.x + a.boundingBox.width))
/**
* Verify that text includes a number.
*/
const isNumber = text => (
// it's a valid string
text &&
// it's got numbers and a decimal
/[$0-9.\s]+/g.test(text) &&
// it doesn't have letters
!/[a-z]/gi.test(text)
)
/**
* Transforms a response from MS cognitive services OCR
* into a receipt items dictionary.
*/
module.exports = res => new Promise((resolve, reject) => {
const data = {}, lines = []
let docWidth = 0, docHeight = 0
let averageVDist = 0, averageHDist = 0
if (res.language !== 'en') return reject('Unsupported language: ' + res.language)
if (res.textAngle !== 0 || res.orientation !== 'Up') {
data.warning = 'Photo is a bit unclear. Please try to ensure that' +
'your receipt is straight and facing the right way.'
}
// we don't care about word-by-word, so let's
// collapse lines
//
// let's also collapse regions because the default
// region decomposition algorithm is based on general
// euclidean distance and we just want to decompose
// based upon vertical distance
res.regions.forEach(region => {
region.lines.reduce((arr, line) => {
// collapse words into line
line.words = line.words.map(word => word.text).join(' ')
// fix spacing issues
.replace(/\s+/g, ' ').trim()
// destructure the bounding box, since we're looping anyways
let [ x, y, width, height ] = line.boundingBox.split(',').map(i => parseInt(i, 10))
line.boundingBox = { x, y, width, height }
// update docWidth and docHeight
docWidth = Math.max(docWidth, x + width)
docHeight = Math.max(docHeight, y + height)
// faster than concat
arr.push(line)
return arr
}, lines)
})
// let's sort, so that our algorithms always work
// sorting is ascending y, then ascending x
lines
.sort((a, b) => a.boundingBox.x - b.boundingBox.x)
.sort((a, b) => a.boundingBox.y - b.boundingBox.y)
// find distances between lines
lines.forEach((line, index, self) => {
if (index === 0) {
line.boundingBox.fromTop = 0
} else {
averageVDist += Math.abs(line.boundingBox.fromTop = vdist( self[index - 1], line ))
}
})
// find average
averageVDist /= lines.length
// decompose regions by average
let region = [ lines[0] ]
let regions = [region]
lines.forEach((line, index) => {
if (index !== 0) {
if (line.boundingBox.fromTop > averageVDist) {
regions.push(region = [line])
} else {
region.push(line)
}
}
})
// identify average line differences in regions,
// and use them to collapse lines into sub-regions
regions = regions.map((region, ri) => {
const averageLineDiff = region.reduce((sum, line) => {
return sum + Math.max(line.boundingBox.fromTop, 0)
}, 0) / region.length
//console.log('region #%s : diff of %s', ri, averageLineDiff)
let subreg = []
const lines = [subreg]
for (let i = 0; i < region.length; ++ i) {
if (i !== 0 && region[i].boundingBox.fromTop > averageLineDiff) {
lines.push(subreg = [region[i]])
} else {
subreg.push(region[i])
}
}
return lines
})
// do some clean-up and sorting
regions = regions.map(region =>
region.map(subreg => {
let nstrings = 0, nints = 0
subreg = subreg.sort((a, b) =>
(a.boundingBox.x - b.boundingBox.x)
).map(line => {
line = line.words
// parse numbers
if (isNumber(line)) {
nints += 1
line = parseFloat(line.replace(/[$\s]/g, ''))
} else {
nstrings += 1
}
return line
})
// some merging, required for some receipts
if (nstrings === nints) {
let sub = []
for (let i = 0; typeof subreg[i] === 'string' && i < subreg.length; ++ i) {
sub.push([ subreg[i], subreg[i + nstrings] ])
}
return sub
}
return subreg
}).reduce((a,b) => a.concat(b[0] instanceof Array ? b : [b]), [])
)
// debugging
//console.log(JSON.stringify(regions, null, 2))
// find what is most probably a list of items
let max = [-Infinity,-1]
regions.forEach((region, index) => {
let num = 0
region.forEach(subreg =>
num += (subreg.length === 2 && typeof subreg[0] === 'string' && typeof subreg[1] === 'number')
)
if (num > max[0]) max = [num,index]
})
// if not found, we have failed
if (max[1] === -1) {
return reject('Unable to read receipt.')
}
// create dictionary
const items = {}
regions[max[1]].forEach(subreg => {
if (typeof subreg[0] === 'string') {
items[subreg[0]] = subreg[1]
}
})
// resolve with items list
resolve(items)
})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment