Skip to content

Instantly share code, notes, and snippets.

@imolorhe
Forked from jdeng/pdf2img.html
Created August 13, 2018 09:38
Show Gist options
  • Star 17 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save imolorhe/f8794d3bb55e1a8065b23bcd0efeebe1 to your computer and use it in GitHub Desktop.
Save imolorhe/f8794d3bb55e1a8065b23bcd0efeebe1 to your computer and use it in GitHub Desktop.
pdf to image using pdf.js
<html>
<body>
<script type="text/javascript" src="//mozilla.github.io/pdf.js/build/pdf.js"></script>
<script type="text/javascript">
var url = "https://docs.google.com/document/export?format=pdf&id=1ML11ZyyMpnAr6clIAwWrXD53pQgNR-DppMYwt9XvE6s&token=AC4w5Vg7fSWH1Hq0SgNckx4YCvnGPaScyw%3A1423618416864";
var pages = [], heights = [], width = 0, height = 0, currentPage = 1;
var scale = 1.5;
function draw() {
var canvas = document.createElement('canvas'), ctx = canvas.getContext('2d');
canvas.width = width;
canvas.height = height;
for(var i = 0; i < pages.length; i++)
ctx.putImageData(pages[i], 0, heights[i]);
document.body.appendChild(canvas);
}
PDFJS.disableWorker = true; // due to CORS
PDFJS.getDocument(url).then(function (pdf) {
getPage();
function getPage() {
pdf.getPage(currentPage).then(function(page) {
console.log("Printing " + currentPage);
var viewport = page.getViewport(scale);
var canvas = document.createElement('canvas') , ctx = canvas.getContext('2d');
var renderContext = { canvasContext: ctx, viewport: viewport };
canvas.height = viewport.height;
canvas.width = viewport.width;
page.render(renderContext).then(function() {
pages.push(ctx.getImageData(0, 0, canvas.width, canvas.height));
heights.push(height);
height += canvas.height;
if (width < canvas.width) width = canvas.width;
if (currentPage < pdf.numPages) {
currentPage++;
getPage();
}
else {
draw();
}
});
});
}
});
</script>
</body>
</html>
@microwavePC
Copy link

This code is old, not working...
PDFJS -> pdfjsLib

@magidandrew
Copy link

magidandrew commented May 17, 2022

microwavePC is right, code needs to be updated. Here's a fix for the newest pdfjs library 2.14.305:

var pages = [], heights = [], width = 0, height = 0, currentPage = 1;
var scale = 1.5;

function draw() {
    var canvas = document.createElement('canvas'), ctx = canvas.getContext('2d');
    canvas.width = width;
    canvas.height = height;
    for(var i = 0; i < pages.length; i++)
        ctx.putImageData(pages[i], 0, heights[i]);
    document.body.appendChild(canvas);
}

const thisdoc = pdfjsLib.getDocument(url);
thisdoc.promise.then(function (pdf) {
    getPage();

    function getPage() {
        pdf.getPage(currentPage).then(function(page) {
            console.log("Printing:" + currentPage);
            var viewport = page.getViewport({scale});
            var canvas = document.createElement('canvas') , ctx = canvas.getContext('2d');
            var renderContext = { canvasContext: ctx, viewport: viewport };

            canvas.height = viewport.height;
            canvas.width = viewport.width;


            const mypage = page.render(renderContext)
            mypage.promise.then(function() {
                pages.push(ctx.getImageData(0, 0, canvas.width, canvas.height));

                heights.push(height);
                height += canvas.height;
                if (width < canvas.width) width = canvas.width;

                if (currentPage < pdf.numPages) {
                    currentPage++;
                    getPage();
                }
                else {
                    draw();
                }
            });
        });
    }
});

@aminh09cs
Copy link

The code doesn't work, I think it's missing something

@joaquin102
Copy link

Not working for me either

@ikantthink
Copy link

ikantthink commented Jan 19, 2024

Here is a similar function I wrote for pdfs to base64 images. The function returns an array of images depending upon the pdf page length.

<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.min.js" integrity="sha512-q+4liFwdPC/bNdhUpZx6aXDx/h77yEQtn4I1slHydcbZK34nLaR3cAeYSJshoxIOq3mjEf7xJE8YWIUHMn+oCQ==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/pdf.js/3.11.174/pdf.worker.min.js" integrity="sha512-BbrZ76UNZq5BhH7LL7pn9A4TKQpQeNCHOo65/akfelcIBbcVvYWOFQKPXIrykE3qZxYjmDX573oa4Ywsc7rpTw==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>
export async function PDFtoIMG(url) {
  return new Promise(async (resolve, reject) => {
    const existingPdfBytes = await fetch(url).then(res => res.arrayBuffer())
    const fileArray = new Uint8Array(existingPdfBytes);
    const doc = await pdfjsLib.getDocument({
      data: fileArray,
      useSystemFonts: true,
    }).promise

    console.log('PDFtoIMG: url, doc', url, doc)
    const pages = []
    const count = 1

    for (let i = 1; i < doc.numPages + 1; i++) {
      const page = await doc.getPage(i)
      const viewport = page.getViewport({scale: 1.5})
      const canvas = document.createElement('canvas')
      const ctx = canvas.getContext('2d')
      canvas.width = viewport.width
      canvas.height = viewport.height
      const task = page.render({canvasContext: ctx, viewport: viewport})
      task.promise.then( () => {
        pages.push(canvas.toDataURL())
        if (count == doc.numPages) {
          resolve(pages)
        }
      })
    }
  })
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment