Skip to content

Instantly share code, notes, and snippets.

@jaffreyjoy
Last active April 6, 2018 15:48
Show Gist options
  • Save jaffreyjoy/2c08aec34e1f02a6caba7246a5ede962 to your computer and use it in GitHub Desktop.
Save jaffreyjoy/2c08aec34e1f02a6caba7246a5ede962 to your computer and use it in GitHub Desktop.
PHP and python script for finding no. of pgs in a pdf
/*
First function requires an executable called pdfinfo.exe
Alternative function using python script needs PyPDF2 package to be installed
*/
<?php
// Make a function for convenience
function getPDFPages($document)
{
// $cmd = "/path/to/pdfinfo"; // Linux
// $cmd = "C:\\path\\to\\pdfinfo.exe"; // Windows
$cmd = "pdfinfo"; // Windows
// Parse entire output
// Surround with double quotes if file name has spaces
exec("$cmd \"$document\"", $output);
// Iterate through lines
$pagecount = 0;
foreach($output as $op)
{
// Extract the number
if(preg_match("/Pages:\s*(\d+)/i", $op, $matches) === 1)
{
$pagecount = intval($matches[1]);
break;
}
}
return $pagecount;
}
function getPDFPages_py($document)
{
// $cmd = "/path/to/pdfinfo"; // Linux
// $cmd = "C:\\path\\to\\pdfinfo.exe"; // Windows
$cmd = "python pdf.py ".$document.""; // Windows
// Parse entire output
// Surround with double quotes if file name has spaces
exec("$cmd \"$document\"", $pagecount);
return $pagecount[0];
}
// Use the function
echo getPDFPages_py("file.pdf"); // Output: 2
?>
# from PyPDF2 import PdfFileReader
# # pdf = PdfFileReader(open("C:/UsersJaffrey Joy/Desktop/name.pdf",'rb'))
# pdf = PdfFileReader(open("file.pdf",'rb'))
# print(pdf.getNumPages())
#To install PyPDF2 package :- pip install PyPDF2
import sys
pdfname=sys.argv[1]
from PyPDF2 import PdfFileReader
pdf = PdfFileReader(open(pdfname,'rb'))
print(pdf.getNumPages())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment