Skip to content

Instantly share code, notes, and snippets.

@7shi
Created February 1, 2011 02:52
  • Star 7 You must be signed in to star a gist
  • Fork 3 You must be signed in to fork a gist
Star You must be signed in to star a gist
Save 7shi/805326 to your computer and use it in GitHub Desktop.
extract JPEG from PDF by iTextSharp
// iTextSharp: http://itextpdf.com/
// reference: http://www.vbforums.com/showthread.php?t=530736
void ExtractJpeg(string file)
{
var dir1 = Path.GetDirectoryName(file);
var fn = Path.GetFileNameWithoutExtension(file);
var dir2 = Path.Combine(dir1, fn);
if (!Directory.Exists(dir2)) Directory.CreateDirectory(dir2);
var pdf = new PdfReader(file);
int n = pdf.NumberOfPages;
for (int i = 1; i <= n; i++)
{
var pg = pdf.GetPageN(i);
var res = PdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES)) as PdfDictionary;
var xobj = PdfReader.GetPdfObject(res.Get(PdfName.XOBJECT)) as PdfDictionary;
if (xobj == null) continue;
var keys = xobj.Keys;
if (keys.Count == 0) continue;
var obj = xobj.Get(keys.ElementAt(0));
if (!obj.IsIndirect()) continue;
var tg = PdfReader.GetPdfObject(obj) as PdfDictionary;
var type = PdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE)) as PdfName;
if (!PdfName.IMAGE.Equals(type)) continue;
int XrefIndex = (obj as PRIndirectReference).Number;
var pdfStream = pdf.GetPdfObject(XrefIndex) as PRStream;
var data = PdfReader.GetStreamBytesRaw(pdfStream);
var jpeg = Path.Combine(dir2, string.Format("{0:0000}.jpg", i));
File.WriteAllBytes(jpeg, data);
}
}
@pramodgehlotgate6
Copy link

Hello ,
I am using this code but not accurate extract my pdf file.

@WashingtonJunior
Copy link

Awesome! Exactly what I needed. Thanks!

@AndersBillLinden
Copy link

Thanks!

@mennatullahS
Copy link

Hi,
Can I get image from PDF using field name?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment