Skip to content

Instantly share code, notes, and snippets.

@eng-rodrigocunha
Last active March 12, 2023 02:29
Show Gist options
  • Save eng-rodrigocunha/d81b8525ba218ed48f1c8a90d18204a8 to your computer and use it in GitHub Desktop.
Save eng-rodrigocunha/d81b8525ba218ed48f1c8a90d18204a8 to your computer and use it in GitHub Desktop.
Realiza scrapping na Carteira Nacional de Vacinação Digital ou no Certificado Nacional de Vacinação Covid-19 emitido através do ConecteSUS para identificar quantas doses de COVID-19 foram administradas
/*
* Convert PDF file to text
* @param {string} fileId - The Google Drive ID of the PDF
* @param {string} language - The language of the PDF text to use for OCR
* return {string} - The extracted text of the PDF file
* https://www.labnol.org/extract-text-from-pdf-220422
* IMPORTANT! https://www.labnol.org/shared-drives-google-script-220128
*/
const convertPDFToText = (fileId, language) => {
fileId = fileId || '1eFIfRKUTvhtX1TMKRWWyIXYIe7R-F2cA';
language = language || 'pt';
// Read the PDF file in Google Drive
const pdfDocument = DriveApp.getFileById(fileId);
// Use OCR to convert PDF to a temporary Google Document
// Restrict the response to include file Id and Title fields only
const { id, title } = Drive.Files.insert(
{
title: pdfDocument.getName().replace(/\.pdf$/, ''),
mimeType: pdfDocument.getMimeType() || 'application/pdf',
},
pdfDocument.getBlob(),
{
ocr: true,
ocrLanguage: language,
fields: 'id,title',
}
);
// Use the Document API to extract text from the Google Document
const textContent = DocumentApp.openById(id).getBody().getText();
// Delete the temporary Google Document since it is no longer needed
DriveApp.getFileById(id).setTrashed(true);
// (optional) Save the text content to another text file in Google Drive
const textFile = DriveApp.createFile(`${title}.txt`, textContent, 'text/plain');
return textContent;
};
function getVaccinationStatus (fileId){
fileId = fileId || '1eFIfRKUTvhtX1TMKRWWyIXYIe7R-F2cA';
language = 'pt';
text = convertPDFToText(fileId, language);
// quebra texto em linhas
const lines = text.split("\n");
const doses = ["Reforço", "Dose Adicional", "2/2", "1/2"]
let num_doses = 0;
let doses_recog = [];
for (let index = 0; index < lines.length; index++) {
const line = lines[index].trim();
for (let dose of doses) {
// verifica se há uma próxima linha
if ((index + 1) < lines.length) {
// cria uma linha temporária, juntando a linha atual com a próxima
// (foi necessária essa etapa em razão do "Dose Adicional" em linha quebrada)
const line_temp = line + " " + lines[index + 1].trim();
if ((line == dose) && (!doses_recog.includes(dose))) {
doses_recog.push(dose);
num_doses += 1;
} else if ((line_temp == dose) && (!doses_recog.includes(dose))) {
doses_recog.push(dose);
num_doses += 1;
}
}
}
}
console.log("Número de doses administradas: " +num_doses);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment