-
-
Save blinds52/d051a29752b3fa6b5759b15e23591093 to your computer and use it in GitHub Desktop.
extract JPEG from PDF by iTextSharp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// iTextSharp: http://itextpdf.com/ | |
// reference: http://www.vbforums.com/showthread.php?t=530736 | |
void ExtractJpeg(string file) | |
{ | |
var dir1 = Path.GetDirectoryName(file); | |
var fn = Path.GetFileNameWithoutExtension(file); | |
var dir2 = Path.Combine(dir1, fn); | |
if (!Directory.Exists(dir2)) Directory.CreateDirectory(dir2); | |
var pdf = new PdfReader(file); | |
int n = pdf.NumberOfPages; | |
for (int i = 1; i <= n; i++) | |
{ | |
var pg = pdf.GetPageN(i); | |
var res = PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)) as PdfDictionary; | |
var xobj = PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)) as PdfDictionary; | |
if (xobj == null) continue; | |
var keys = xobj.Keys; | |
if (keys.Count == 0) continue; | |
var obj = xobj.Get(keys.ElementAt(0)); | |
if (!obj.IsIndirect()) continue; | |
var tg = PdfReader.GetPdfObject(obj) as PdfDictionary; | |
var type = PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE)) as PdfName; | |
if (!PdfName.IMAGE.Equals(type)) continue; | |
int XrefIndex = (obj as PRIndirectReference).Number; | |
var pdfStream = pdf.GetPdfObject(XrefIndex) as PRStream; | |
var data = PdfReader.GetStreamBytesRaw(pdfStream); | |
var jpeg = Path.Combine(dir2, string.Format("{0:0000}.jpg", i)); | |
File.WriteAllBytes(jpeg, data); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment