From 5196aa0c41523fdc09c7c89a467d7cc0a7b9729e Mon Sep 17 00:00:00 2001 From: Sergii Kauk Date: Mon, 27 May 2019 13:15:23 +0200 Subject: [PATCH] Refactoring * Re-arrange structure * Compatibility with version 4.0 (mac OS) * Readme update --- .gitignore | 2 + README.md | 15 +- composer.json | 11 +- examples/example.php | 22 +++ examples/example1.php | 39 ----- examples/example2.php | 39 ----- examples/example3.php | 39 ----- examples/example4.php | 41 ----- .../Exceptions/OpenOutputException.php | 2 +- .../Exceptions/OpenPDFException.php | 2 +- .../Exceptions/OtherException.php | 2 +- .../Exceptions/PDFPermissionException.php | 2 +- src/Howtomakeaturn/PDFInfo/PDFInfo.php | 132 ---------------- src/PDFInfo.php | 145 ++++++++++++++++++ 14 files changed, 187 insertions(+), 306 deletions(-) create mode 100755 examples/example.php delete mode 100755 examples/example1.php delete mode 100755 examples/example2.php delete mode 100755 examples/example3.php delete mode 100755 examples/example4.php rename src/{Howtomakeaturn/PDFInfo => }/Exceptions/OpenOutputException.php (61%) rename src/{Howtomakeaturn/PDFInfo => }/Exceptions/OpenPDFException.php (60%) rename src/{Howtomakeaturn/PDFInfo => }/Exceptions/OtherException.php (59%) rename src/{Howtomakeaturn/PDFInfo => }/Exceptions/PDFPermissionException.php (62%) delete mode 100644 src/Howtomakeaturn/PDFInfo/PDFInfo.php create mode 100644 src/PDFInfo.php diff --git a/.gitignore b/.gitignore index 606dd43..6ce86a6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ +.* +!.gitignore vendor/* composer.phar composer.lock diff --git a/README.md b/README.md index f1ad9be..56e1798 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,3 @@ -- Adapted version for EV Conversion Worker - Simple PHP wrapper to the pdfinfo unix tool. Inspired by http://stackoverflow.com/questions/14644353/get-the-number-of-pages-in-a-pdf-document/14644354 @@ -10,7 +8,7 @@ pdfinfo is an unix tool helping extract information from pdf files. http://linuxcommand.org/man_pages/pdfinfo1.html -You can get page count, title, author..etc via the tool. +You can get metadata, page count and rotation for every page, etc. # Installation @@ -26,7 +24,7 @@ sudo apt-get install poppler-utils ## 2. Install the library You can just download the file to your project, or install it via composer: ``` -composer require "howtomakeaturn/pdfinfo:1.*" +composer require apilayer/pdfinfo ``` # Usage @@ -45,7 +43,7 @@ This library throws 4 kind of exceptions to represent the official exit codes. * PDFPermissionException * OtherException -Check the [official documentation](http://linuxcommand.org/man_pages/pdfinfo1.html) for more information. +Check the [official documentation](https://www.xpdfreader.com/pdfinfo-man.html) for more information. @@ -64,7 +62,14 @@ Currently this library supports the following metadata: * pages * encrypted * pageSize +* pageSizes * fileSize * optimized * PDFVersion * pageRot +* pageRots + +Environment variables configuration: + +* `PDFINFO_BIN` - location of PDFInfo executable +* `PDFINFO_PAGE_LIMIT` - last page to process, defaults to 999 diff --git a/composer.json b/composer.json index 771d127..60f938c 100644 --- a/composer.json +++ b/composer.json @@ -1,15 +1,12 @@ { - "name": "howtomakeaturn/pdfinfo", + "name": "apilayer/pdfinfo", "type": "library", - "description": "simple php wrapper for pdfinfo", + "description": "pdfinfo PHP wrapper", "keywords": ["pdf", "pdfinfo"], "license": "MIT", - "require-dev": { - "filp/whoops": "1.1.4" - }, "autoload": { - "psr-0": { - "Howtomakeaturn\\PDFInfo": "src/" + "psr-4": { + "apilayer\\PDFInfo\\": "src/" } } } diff --git a/examples/example.php b/examples/example.php new file mode 100755 index 0000000..08fbe63 --- /dev/null +++ b/examples/example.php @@ -0,0 +1,22 @@ +title, '
', PHP_EOL; +echo 'Author: ', $pdf->author, '
', PHP_EOL; +echo 'Creator: ', $pdf->creator, '
', PHP_EOL; +echo 'Producer: ', $pdf->producer, '
', PHP_EOL; +echo 'Creation date: ', $pdf->creationDate, '
', PHP_EOL; +echo 'Last modified date: ', $pdf->modDate, '
', PHP_EOL; +echo 'Tagged: ', $pdf->tagged, '
', PHP_EOL; +echo 'Form: ', $pdf->form, '
', PHP_EOL; +echo 'Pages: ', $pdf->pages, '
', PHP_EOL; +echo 'Encrypted: ', $pdf->encrypted, '
', PHP_EOL; +echo 'Page size: ', $pdf->pageSize, '
', PHP_EOL; +echo 'Page rotation: ', $pdf->pageRot, '
', PHP_EOL; +echo 'File size: ', $pdf->fileSize, '
', PHP_EOL; +echo 'Optimized: ', $pdf->optimized, '
', PHP_EOL; +echo 'PDF Version: ', $pdf->PDFVersion, '
', PHP_EOL; diff --git a/examples/example1.php b/examples/example1.php deleted file mode 100755 index 16464a8..0000000 --- a/examples/example1.php +++ /dev/null @@ -1,39 +0,0 @@ -pushHandler(new \Whoops\Handler\PrettyPageHandler); -$whoops->register(); - -$pdf = new PDFInfo('files/DDD.pdf'); - -echo $pdf->title; -echo '
'; -echo $pdf->author; -echo '
'; -echo $pdf->creator; -echo '
'; -echo $pdf->producer; -echo '
'; -echo $pdf->creationDate; -echo '
'; -echo $pdf->modDate; -echo '
'; -echo $pdf->tagged; -echo '
'; -echo $pdf->form; -echo '
'; -echo $pdf->pages; -echo '
'; -echo $pdf->encrypted; -echo '
'; -echo $pdf->pageSize; -echo '
'; -echo $pdf->fileSize; -echo '
'; -echo $pdf->optimized; -echo '
'; -echo $pdf->PDFVersion; -echo '
'; diff --git a/examples/example2.php b/examples/example2.php deleted file mode 100755 index e6241c2..0000000 --- a/examples/example2.php +++ /dev/null @@ -1,39 +0,0 @@ -pushHandler(new \Whoops\Handler\PrettyPageHandler); -$whoops->register(); - -$pdf = new PDFInfo('files/not-exist.pdf'); - -echo $pdf->title; -echo '
'; -echo $pdf->author; -echo '
'; -echo $pdf->creator; -echo '
'; -echo $pdf->producer; -echo '
'; -echo $pdf->creationDate; -echo '
'; -echo $pdf->modDate; -echo '
'; -echo $pdf->tagged; -echo '
'; -echo $pdf->form; -echo '
'; -echo $pdf->pages; -echo '
'; -echo $pdf->encrypted; -echo '
'; -echo $pdf->pageSize; -echo '
'; -echo $pdf->fileSize; -echo '
'; -echo $pdf->optimized; -echo '
'; -echo $pdf->PDFVersion; -echo '
'; diff --git a/examples/example3.php b/examples/example3.php deleted file mode 100755 index 07aceb9..0000000 --- a/examples/example3.php +++ /dev/null @@ -1,39 +0,0 @@ -pushHandler(new \Whoops\Handler\PrettyPageHandler); -$whoops->register(); - -$pdf = new PDFInfo('files/EEE.pdf'); - -echo $pdf->title; -echo '
'; -echo $pdf->author; -echo '
'; -echo $pdf->creator; -echo '
'; -echo $pdf->producer; -echo '
'; -echo $pdf->creationDate; -echo '
'; -echo $pdf->modDate; -echo '
'; -echo $pdf->tagged; -echo '
'; -echo $pdf->form; -echo '
'; -echo $pdf->pages; -echo '
'; -echo $pdf->encrypted; -echo '
'; -echo $pdf->pageSize; -echo '
'; -echo $pdf->fileSize; -echo '
'; -echo $pdf->optimized; -echo '
'; -echo $pdf->PDFVersion; -echo '
'; diff --git a/examples/example4.php b/examples/example4.php deleted file mode 100755 index c7ab95a..0000000 --- a/examples/example4.php +++ /dev/null @@ -1,41 +0,0 @@ -pushHandler(new \Whoops\Handler\PrettyPageHandler); -$whoops->register(); - -$pdf = new PDFInfo('files/cool.pdf'); - -echo $pdf->title; -echo '
'; -echo $pdf->author; -echo '
'; -echo $pdf->creator; -echo '
'; -echo $pdf->producer; -echo '
'; -echo $pdf->creationDate; -echo '
'; -echo $pdf->modDate; -echo '
'; -echo $pdf->tagged; -echo '
'; -echo $pdf->form; -echo '
'; -echo $pdf->pages; -echo '
'; -echo $pdf->encrypted; -echo '
'; -echo $pdf->pageSize; -echo '
'; -echo $pdf->fileSize; -echo '
'; -echo $pdf->optimized; -echo '
'; -echo $pdf->PDFVersion; -echo '
'; -echo $pdf->pageRot; -echo '
'; diff --git a/src/Howtomakeaturn/PDFInfo/Exceptions/OpenOutputException.php b/src/Exceptions/OpenOutputException.php similarity index 61% rename from src/Howtomakeaturn/PDFInfo/Exceptions/OpenOutputException.php rename to src/Exceptions/OpenOutputException.php index 52c8844..a4f76ed 100644 --- a/src/Howtomakeaturn/PDFInfo/Exceptions/OpenOutputException.php +++ b/src/Exceptions/OpenOutputException.php @@ -1,5 +1,5 @@ file = $file; - - $this->loadOutput(); - - $this->parseOutput(); - } - - public function getBinary() - { - if (empty(static::$bin)) { - static::$bin = trim(trim(getenv('PDFINFO_BIN'), '\\/" \'')) ?: 'pdfinfo'; - } - - return static::$bin; - } - - private function loadOutput() - { - $cmd = escapeshellarg($this->getBinary()); // escapeshellarg to work with Windows paths with spaces. - - $file = escapeshellarg($this->file); - // Parse entire output - // Surround with double quotes if file name has spaces - exec("$cmd -l 500 $file", $output, $returnVar); - - if ( $returnVar === 1 ){ - throw new Exceptions\OpenPDFException(); - } else if ( $returnVar === 2 ){ - throw new Exceptions\OpenOutputException(); - } else if ( $returnVar === 3 ){ - throw new Exceptions\PDFPermissionException(); - } else if ( $returnVar === 99 ){ - throw new Exceptions\OtherException(); - } - - $this->output = $output; - } - - private function parseOutput() - { - $this->title = $this->parse('Title'); - $this->author = $this->parse('Author'); - $this->creator = $this->parse('Creator'); - $this->producer = $this->parse('Producer'); - $this->creationDate = $this->parse('CreationDate'); - $this->modDate = $this->parse('ModDate'); - $this->tagged = $this->parse('Tagged'); - $this->form = $this->parse('Form'); - $this->pages = $this->parse('Pages'); - $this->encrypted = $this->parse('Encrypted'); - $this->pageSize = $this->parse('Page 1 size'); - - $x=1; - foreach(range(1,$this->pages) as $index) { - if ($x>99){ - $this->newpageSize->$x = $this->parse('Page '.$x.' size'); - } else if ($x>9){ - $this->newpageSize->$x = $this->parse('Page '.$x.' size'); - } else { - $this->newpageSize->$x = $this->parse('Page '.$x.' size'); - } - $x++; - } - - $x=1; - foreach(range(1,$this->pages) as $index) { - if ($x>99){ - $this->newrotation->$x = $this->parse('Page '.$x.' rot'); - } else if ($x>9){ - $this->newrotation->$x = $this->parse('Page '.$x.' rot'); - } else { - $this->newrotation->$x = $this->parse('Page '.$x.' rot'); - } - $x++; - } - - $this->rotation = $this->parse('Page 1 rot'); - $this->fileSize = $this->parse('File size'); - $this->optimized = $this->parse('Optimized'); - $this->PDFVersion = $this->parse('PDF version'); - } - - private function parse($attribute) - { - // Iterate through lines - $result = null; - foreach($this->output as $op) - { - // Extract the number - if(preg_match("/" . $attribute . ":\s*(.+)/i", $op, $matches) === 1) - { - $result = $matches[1]; - break; - } - } - - return $result; - } - -} diff --git a/src/PDFInfo.php b/src/PDFInfo.php new file mode 100644 index 0000000..2332bd7 --- /dev/null +++ b/src/PDFInfo.php @@ -0,0 +1,145 @@ +attributes = new stdClass; + $this->file = $file; + + $this->loadOutput(); + + $this->parseOutput(); + } + + public function __get($field) + { + return property_exists($this->attributes, $field) ? + $this->attributes->{$field} : + null; + } + + public function getBinary() + { + if (empty(static::$bin)) { + static::$bin = trim(trim(getenv('PDFINFO_BIN'), '\\/" \'')) ?: 'pdfinfo'; + } + + return static::$bin; + } + + /** + * @throws Exceptions\OpenOutputException + * @throws Exceptions\OpenPDFException + * @throws Exceptions\OtherException + * @throws Exceptions\PDFPermissionException + */ + private function loadOutput() + { + $cmd = escapeshellarg($this->getBinary()); // escapeshellarg to work with Windows paths with spaces. + + $file = escapeshellarg($this->file); + + $page_limit = intval(getenv('PDFINFO_PAGE_LIMIT')) ?: 999; + + // Parse entire output + // Surround with double quotes if file name has spaces + exec("$cmd -l $page_limit $file", $output, $returnVar); + + if ($returnVar === 1) { + throw new Exceptions\OpenPDFException(); + } elseif ($returnVar === 2) { + throw new Exceptions\OpenOutputException(); + } elseif ($returnVar === 3) { + throw new Exceptions\PDFPermissionException(); + } elseif ($returnVar === 99) { + throw new Exceptions\OtherException(); + } + + $this->output = $output; + } + + private function parseOutput() + { + foreach ($this->output as $output_line) { + list($key, $value) = explode(':', $output_line, 2); + + if (preg_match('/\b(?\d+)\b/', $key, $key_matches)) { + $key = str_replace($key_matches['number'], '', $key); + } + + $key = $this->formatKey($key); + $value = $this->formatValue($value); + + // Only set attributes once + if (!property_exists($this->attributes, $key)) { + $this->attributes->{$key} = $value; + } + + // Attributes for multiple pages + if (isset($key_matches['number'])) { + if (!property_exists($this->attributes, "${key}s")) { + $this->attributes->{"${key}s"} = new stdClass; + } + $this->attributes->{"${key}s"}->{$key_matches['number']} = $value; + } + } + + // Compatibility with version 4.0 which has page rotation data inside of page size + $rot_pattern = '/\(rot\w+\s(?\d+)\s\w+\)$$/'; + $rot_replace = '/\s+\([^\)]+\)$/'; + + if (is_null($this->pageRot) && $this->pageSize && preg_match($rot_pattern, $this->pageSize, $rot_matches)) { + $this->attributes->{'pageRot'} = $rot_matches['degrees']; + $this->attributes->{'pageSize'} = preg_replace($rot_replace, '', $this->pageSize); + + // Also process attributes for all pages + if (property_exists($this->attributes, 'pageSizes')) { + foreach ($this->pageSizes as $page_number => $page_size) { + preg_match($rot_pattern, $page_size, $page_matches); + if (isset($page_matches['degrees'])) { + if (!property_exists($this->attributes, 'pageRots')) { + $this->attributes->{'pageRots'} = new stdClass; + } + $this->attributes->{'pageRots'}->{$page_number} = $page_matches['degrees']; + $this->attributes->{'pageSizes'}->{$page_number} = preg_replace($rot_replace, '', + $this->pageSizes->{$page_number}); + } + } + } + } + } + + private function formatKey($string) + { + return preg_replace_callback('/^([A-Z])(?![A-Z])/', function ($m) { + return strtolower($m[1]); + }, preg_replace('/\s+/', '', ucwords($string))); + } + + private function formatValue($string) + { + return trim(preg_replace('/\s+/', ' ', $string)); + } +}