Created
October 21, 2023 15:52
-
-
Save navtej/9706fb4b49189df8a273d1e4ed3f4937 to your computer and use it in GitHub Desktop.
Gist to bulk convert PDF files to editable google documents using Google AppScript
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// pieced together from various online sources. Meat of the logic in fn convertFile came from SO answer. | |
// the script moves the original pdf to trash after conversion. You might want to change that. | |
function getFilesList(folder, filetype) { | |
var allfiles = new Array(); | |
const files = folder.getFilesByType(filetype); | |
while (files.hasNext()) { | |
var file = files.next(); | |
allfiles.push(file); | |
} | |
return allfiles; | |
} | |
function getSubFoldersList(name) { | |
var allfolders = new Array(); | |
const elexfolder = DriveApp.getFoldersByName(name).next(); | |
var subfolders = elexfolder.getFolders(); | |
while (subfolders.hasNext()) { | |
var folder = subfolders.next(); | |
allfolders.push(folder); | |
} | |
return allfolders; | |
} | |
function convertFile(srcfile, index) { | |
const parentFolder = srcfile.getParents().next(); | |
const parentFolderID = parentFolder.getId(); | |
Logger.log("Converting %s from folder %s",srcfile.getName(),parentFolder.getName()); | |
const {id, title} = Drive.Files.insert( | |
{ | |
title: srcfile.getName().replace(/\.pdf$/, ''), | |
mimeType: srcfile.getMimeType() || 'application/pdf', | |
}, | |
srcfile.getBlob(), | |
{ | |
ocr: true, | |
fields: 'id,title', | |
} | |
); | |
moveFile(id, parentFolderID); | |
srcfile.setTrashed(true); | |
Logger.log("Done %d - %s",index+1, srcfile.getName() ); | |
} | |
function moveFile(sourceFileId, targetFolderId) { | |
var file = DriveApp.getFileById(sourceFileId); | |
var folder = DriveApp.getFolderById(targetFolderId); | |
file.moveTo(folder); | |
} | |
function convertFolder(folder){ | |
const pdffiles = getFilesList(folder, MimeType.PDF); | |
pdffiles.forEach(convertFile); | |
} | |
function main() { | |
const FOLDER_NAME = "test"; | |
allsubfolders = getSubFoldersList(FOLDER_NAME); | |
allsubfolders.forEach(convertFolder); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment