Skip to content

Instantly share code, notes, and snippets.

@Sphinxxxx
Last active April 25, 2019 22:29
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Sphinxxxx/8e2e4523a7426031cf4e65e8ae3260cd to your computer and use it in GitHub Desktop.
Save Sphinxxxx/8e2e4523a7426031cf4e65e8ae3260cd to your computer and use it in GitHub Desktop.
Extract PDF images
<script>console.clear();</script>
<script src="https://unpkg.com/pdfjs-dist@2/build/pdf.js"></script>
<script src="https://unpkg.com/pdfjs-dist@2/build/pdf.worker.js"></script>
<script src="https://unpkg.com/vue@2"></script>
<script src="https://unpkg.com/abo-utils@0.3"></script>
<script type="text/x-template" id="templ-page">
<div>
<details v-if="p.svg.doc" @click="handleSVG" ref="svgContainer">
<summary>Page {{p.number}} (click for SVG rendered version)</summary>
</details>
<h4 v-else>Page {{p.number}}</h4>
<ul>
<li v-for="image in p.images">
<figure>
<a :href="image.url" :download="image.name"><img :src="image.url" :alt="image.name"></a>
<figcaption>{{image.name}}</figcaption>
</figure>
</li>
</ul>
</div>
</script>
<header>
<h1>Extract PDF images</h1>
</header>
<main id="app">
<label id="pdfs">
<input type="file" multiple :accept="mime" />
<span>Open PDFs</span>
</label>
<h2 v-if="docs.length">(Click the images to download)</h2>
<section v-for="doc in docs">
<h3>{{doc.name}}</h3>
<ul>
<li v-for="page in doc.pages">
<page :p="page" />
</li>
</ul>
</section>
<pre>
{{ /* docs */ }}
</pre>
</main>
(function() {
const PDFJS = pdfjsLib,
pdfMime = 'application/pdf',
ad = ABOUtils.DOM,
[$, $$] = ad.selectors();
const state = {
mime: pdfMime,
docs: [],
};
//https://stackoverflow.com/a/39855420/1869660
//https://www.sitepoint.com/custom-pdf-rendering/#renderingusingsvg
function parsePage(page, pageInfo) {
page.getOperatorList().then(function(ops) {
console.log('ops', ops);
const fns = ops.fnArray,
args = ops.argsArray;
let imgsFound = 0;
args.forEach((arg, i) => {
//Not a JPEG resource:
if (fns[i] !== PDFJS.OPS.paintJpegXObject) { return; }
console.log('loading', arg);
imgsFound++;
const imgKey = arg[0],
imgInfo = {
name: pageInfo.name + '-' + imgsFound + '.jpg',
url: '',
};
pageInfo.images.push(imgInfo);
page.objs.get(imgKey, img => {
imgInfo.url = img.src;
});
});
});
//Full SVG:
// Get viewport (dimensions)
const scale = 1.5;
const viewport = page.getViewport({ scale });
pageInfo.svg = {
w: viewport.width,
h: viewport.height,
doc: '',
};
// SVG rendering by PDF.js
page.getOperatorList().then(opList => {
var svgGfx = new PDFJS.SVGGraphics(page.commonObjs, page.objs);
return svgGfx.getSVG(opList, viewport);
}).then(svg => {
//console.log(svg);
pageInfo.svg.doc = svg;
});
}
function handleFiles(data) {
//console.log('files', data);
const docs = [];
data.forEach(d => {
const docName = d.file.name,
pages = [];
docs.push({
name: docName,
pages,
});
PDFJS.getDocument({
url: d.url,
//password: "test",
})
.promise.then(function(doc) {
for(let p = 1; p <= doc.numPages; p++) {
const pageInfo = {
number: p,
name: docName + '-' + p,
images: [],
svg: {},
};
pages.push(pageInfo);
doc.getPage(p).then(page => parsePage(page, pageInfo));
}
})
.catch(function(error) {
alert('Failed to open ' + docName);
console.log(error);
});
});
state.docs = docs;
console.log(state);
}
Vue.component('page', {
template: '#templ-page',
props: ['p'],
data() {
return {
checked: false,
title: 'Check me'
}
},
methods: {
handleSVG(e) {
const imgUrl = e.target.href?.baseVal;
if(imgUrl) {
console.log(imgUrl);
window.open(imgUrl, '_blank');
}
else {
this.$refs.svgContainer.appendChild(this.p.svg.doc);
}
}
}
});
new Vue({
el: '#app',
data: state,
});
ad.dropFiles($('#pdfs input'), handleFiles, { acceptedTypes: [pdfMime] });
ad.dropFiles(document, handleFiles, { acceptedTypes: [pdfMime] });
})();
body {
font-family: Georgia, sans-serif;
h1 {
text-align: center;
}
details {
background: gold;
summary {
cursor: pointer;
}
}
ul {
list-style: none;
}
img, details > svg {
max-width: 100%;
height: auto;
}
}
#pdfs {
input {
display: none;
}
display: inline-block;
width: 100%;
box-sizing: border-box;
padding: 2em;
font-size: 2em;
text-align: center;
color: white;
background: dodgerblue;
border: .25em dashed lightskyblue;
cursor: pointer;
}
svg {
image {
cursor: pointer;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment