Skip to content

Instantly share code, notes, and snippets.

@jdesboeufs
Created December 17, 2015 08:54
Show Gist options
  • Save jdesboeufs/b14ca07f89636706d0c0 to your computer and use it in GitHub Desktop.
Save jdesboeufs/b14ca07f89636706d0c0 to your computer and use it in GitHub Desktop.
Prototype parsing avis IR
var PDFParser = require('pdf2json');
var _ = require('lodash');
var _s = require('underscore.string');
var parser = new PDFParser();
var startTime = Date.now();
parser.on("pdfParser_dataReady", function() {
var endOfParsing = Date.now();
var entries = this.data.Pages[0].Texts.map(function(textEntry) {
var content = textEntry.R.map(function(singleTextEntry) {
return decodeURIComponent(singleTextEntry.T);
}).join('');
return { x: textEntry.x, y: textEntry.y, content: content };
});
var yGroupedEntries = _.groupBy(entries, 'y');
var xGroupedEntries = _.groupBy(entries, 'x');
var contentIndexedEntries = _.indexBy(entries, 'content');
console.log(entries);
function ensureTextEntry(textEntry) {
return _.isString(textEntry) ? contentIndexedEntries[textEntry] : textEntry;
}
function topOf(that, reference) {
reference = ensureTextEntry(reference);
return that.y < reference.y;
}
function bottomOf(that, reference) {
reference = ensureTextEntry(reference);
return that.y > reference.y;
}
function rightOf(that, reference) {
reference = ensureTextEntry(reference);
return that.x > reference.x;
}
function leftOf(that, reference) {
reference = ensureTextEntry(reference);
return that.x < reference.x;
}
function sameLineRight(that, tolerance) {
var reference = ensureTextEntry(that);
var xLine;
tolerance = tolerance || 0;
if (tolerance > 0) {
xLine = _.flatten(_.map(yGroupedEntries, function(yGroup, y) {
return Math.abs(y - reference.y) <= tolerance ? yGroup : [];
}));
} else {
xLine = yGroupedEntries[reference.y];
}
var xSortedLine = _.sortBy(xLine, 'x');
return _.filter(xSortedLine, function(item) {
return rightOf(item, reference);
});
}
function sameLineImmediateRight(that, tolerance) {
return sameLineRight(that, tolerance)[0];
}
var data = { declarants: {} };
_.filter(entries, function(entry) {
return bottomOf(entry, 'Numéro fiscal :') &&
topOf(entry, 'Référence de l\'avis :') &&
_s.startsWith(entry.content, 'Déclarant');
}).forEach(function(declarant) {
data.declarants[declarant.content] = sameLineImmediateRight(declarant).content.split(' ').join('');
});
data.referenceAvis = sameLineImmediateRight('Référence de l\'avis :', 0.1).content.split(' ').join('');
data.traitementsSalaires = sameLineRight('Traitements, salaires', 0.1).forEach(function(valeur) {
});
console.log(data);
var endOfProcessing = Date.now();
console.log('Parsing: ', endOfParsing - startTime, 'ms');
console.log('Processing: ', endOfProcessing - endOfParsing, 'ms');
});
parser.on("pdfParser_dataError", function() {
console.log('An error occurred!');
console.log(this);
});
parser.loadPDF(__dirname + '/IR-Justif-91-2013-14922554377149.pdf');
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment