extract JPEG from PDF by iTextSharp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// iTextSharp: http://itextpdf.com/ | |
// reference: http://www.vbforums.com/showthread.php?t=530736 | |
void ExtractJpeg(string file) | |
{ | |
var dir1 = Path.GetDirectoryName(file); | |
var fn = Path.GetFileNameWithoutExtension(file); | |
var dir2 = Path.Combine(dir1, fn); | |
if (!Directory.Exists(dir2)) Directory.CreateDirectory(dir2); | |
var pdf = new PdfReader(file); | |
int n = pdf.NumberOfPages; | |
for (int i = 1; i <= n; i++) | |
{ | |
var pg = pdf.GetPageN(i); | |
var res = PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)) as PdfDictionary; | |
var xobj = PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)) as PdfDictionary; | |
if (xobj == null) continue; | |
var keys = xobj.Keys; | |
if (keys.Count == 0) continue; | |
var obj = xobj.Get(keys.ElementAt(0)); | |
if (!obj.IsIndirect()) continue; | |
var tg = PdfReader.GetPdfObject(obj) as PdfDictionary; | |
var type = PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE)) as PdfName; | |
if (!PdfName.IMAGE.Equals(type)) continue; | |
int XrefIndex = (obj as PRIndirectReference).Number; | |
var pdfStream = pdf.GetPdfObject(XrefIndex) as PRStream; | |
var data = PdfReader.GetStreamBytesRaw(pdfStream); | |
var jpeg = Path.Combine(dir2, string.Format("{0:0000}.jpg", i)); | |
File.WriteAllBytes(jpeg, data); | |
} | |
} |
Awesome! Exactly what I needed. Thanks!
Thanks!
Hi,
Can I get image from PDF using field name?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Hello ,
I am using this code but not accurate extract my pdf file.