fpdi_pdf_parser

Class fpdi_pdf_parser.

Defined (1)

The class is defined in the following location(s).

/vendor/setasign/fpdi/fpdi_pdf_parser.php  
  1. class fpdi_pdf_parser extends pdf_parser 
  2. /** 
  3. * Pages 
  4. * Index begins at 0 
  5. * @var array 
  6. */ 
  7. protected $_pages; 
  8.  
  9. /** 
  10. * Page count 
  11. * @var integer 
  12. */ 
  13. protected $_pageCount; 
  14.  
  15. /** 
  16. * Current page number 
  17. * @var integer 
  18. */ 
  19. public $pageNo; 
  20.  
  21. /** 
  22. * PDF version of imported document 
  23. * @var string 
  24. */ 
  25. public $_pdfVersion; 
  26.  
  27. /** 
  28. * Available BoxTypes 
  29. * @var array 
  30. */ 
  31. public $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox'); 
  32.  
  33. /** 
  34. * The constructor. 
  35. * @param string $filename The source filename 
  36. */ 
  37. public function __construct($filename) 
  38. parent::__construct($filename); 
  39.  
  40. // resolve Pages-Dictonary 
  41. $pages = $this->resolveObject($this->_root[1][1]['/Pages']); 
  42.  
  43. // Read pages 
  44. $this->_readPages($pages, $this->_pages); 
  45.  
  46. // count pages; 
  47. $this->_pageCount = count($this->_pages); 
  48.  
  49. /** 
  50. * Get page count from source file. 
  51. * @return int 
  52. */ 
  53. public function getPageCount() 
  54. return $this->_pageCount; 
  55.  
  56. /** 
  57. * Set the page number. 
  58. * @param int $pageNo Page number to use 
  59. * @throws InvalidArgumentException 
  60. */ 
  61. public function setPageNo($pageNo) 
  62. $pageNo = ((int) $pageNo) - 1; 
  63.  
  64. if ($pageNo < 0 || $pageNo >= $this->getPageCount()) { 
  65. throw new InvalidArgumentException('Invalid page number!'); 
  66.  
  67. $this->pageNo = $pageNo; 
  68.  
  69. /** 
  70. * Get page-resources from current page 
  71. * @return array|boolean 
  72. */ 
  73. public function getPageResources() 
  74. return $this->_getPageResources($this->_pages[$this->pageNo]); 
  75.  
  76. /** 
  77. * Get page-resources from a /Page dictionary. 
  78. * @param array $obj Array of pdf-data 
  79. * @return array|boolean 
  80. */ 
  81. protected function _getPageResources($obj) 
  82. $obj = $this->resolveObject($obj); 
  83.  
  84. // If the current object has a resources 
  85. // dictionary associated with it, we use 
  86. // it. Otherwise, we move back to its 
  87. // parent object. 
  88. if (isset($obj[1][1]['/Resources'])) { 
  89. $res = $this->resolveObject($obj[1][1]['/Resources']); 
  90. if ($res[0] == pdf_parser::TYPE_OBJECT) 
  91. return $res[1]; 
  92. return $res; 
  93.  
  94. if (!isset($obj[1][1]['/Parent'])) { 
  95. return false; 
  96.  
  97. $res = $this->_getPageResources($obj[1][1]['/Parent']); 
  98. if ($res[0] == pdf_parser::TYPE_OBJECT) 
  99. return $res[1]; 
  100. return $res; 
  101.  
  102. /** 
  103. * Get content of current page. 
  104. * If /Contents is an array, the streams are concatenated 
  105. * @return string 
  106. */ 
  107. public function getContent() 
  108. $buffer = ''; 
  109.  
  110. if (isset($this->_pages[$this->pageNo][1][1]['/Contents'])) { 
  111. $contents = $this->_getPageContent($this->_pages[$this->pageNo][1][1]['/Contents']); 
  112. foreach ($contents AS $tmpContent) { 
  113. $buffer .= $this->_unFilterStream($tmpContent) . ' '; 
  114.  
  115. return $buffer; 
  116.  
  117. /** 
  118. * Resolve all content objects. 
  119. * @param array $contentRef 
  120. * @return array 
  121. */ 
  122. protected function _getPageContent($contentRef) 
  123. $contents = array(); 
  124.  
  125. if ($contentRef[0] == pdf_parser::TYPE_OBJREF) { 
  126. $content = $this->resolveObject($contentRef); 
  127. if ($content[1][0] == pdf_parser::TYPE_ARRAY) { 
  128. $contents = $this->_getPageContent($content[1]); 
  129. } else { 
  130. $contents[] = $content; 
  131. } else if ($contentRef[0] == pdf_parser::TYPE_ARRAY) { 
  132. foreach ($contentRef[1] AS $tmp_content_ref) { 
  133. $contents = array_merge($contents, $this->_getPageContent($tmp_content_ref)); 
  134.  
  135. return $contents; 
  136.  
  137. /** 
  138. * Get a boundary box from a page 
  139. * Array format is same as used by FPDF_TPL. 
  140. * @param array $page a /Page dictionary 
  141. * @param string $boxIndex Type of box {see {@link $availableBoxes}) 
  142. * @param float Scale factor from user space units to points 
  143. * @return array|boolean 
  144. */ 
  145. protected function _getPageBox($page, $boxIndex, $k) 
  146. $page = $this->resolveObject($page); 
  147. $box = null; 
  148. if (isset($page[1][1][$boxIndex])) { 
  149. $box = $page[1][1][$boxIndex]; 
  150.  
  151. if (!is_null($box) && $box[0] == pdf_parser::TYPE_OBJREF) { 
  152. $tmp_box = $this->resolveObject($box); 
  153. $box = $tmp_box[1]; 
  154.  
  155. if (!is_null($box) && $box[0] == pdf_parser::TYPE_ARRAY) { 
  156. $b = $box[1]; 
  157. return array( 
  158. 'x' => $b[0][1] / $k,  
  159. 'y' => $b[1][1] / $k,  
  160. 'w' => abs($b[0][1] - $b[2][1]) / $k,  
  161. 'h' => abs($b[1][1] - $b[3][1]) / $k,  
  162. 'llx' => min($b[0][1], $b[2][1]) / $k,  
  163. 'lly' => min($b[1][1], $b[3][1]) / $k,  
  164. 'urx' => max($b[0][1], $b[2][1]) / $k,  
  165. 'ury' => max($b[1][1], $b[3][1]) / $k,  
  166. ); 
  167. } else if (!isset($page[1][1]['/Parent'])) { 
  168. return false; 
  169. } else { 
  170. return $this->_getPageBox($this->resolveObject($page[1][1]['/Parent']), $boxIndex, $k); 
  171.  
  172. /** 
  173. * Get all page boundary boxes by page number 
  174. *  
  175. * @param int $pageNo The page number 
  176. * @param float $k Scale factor from user space units to points 
  177. * @return array 
  178. * @throws InvalidArgumentException 
  179. */ 
  180. public function getPageBoxes($pageNo, $k) 
  181. if (!isset($this->_pages[$pageNo - 1])) { 
  182. throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.'); 
  183.  
  184. return $this->_getPageBoxes($this->_pages[$pageNo - 1], $k); 
  185.  
  186. /** 
  187. * Get all boxes from /Page dictionary 
  188. * @param array $page A /Page dictionary 
  189. * @param float $k Scale factor from user space units to points 
  190. * @return array 
  191. */ 
  192. protected function _getPageBoxes($page, $k) 
  193. $boxes = array(); 
  194.  
  195. foreach($this->availableBoxes AS $box) { 
  196. if ($_box = $this->_getPageBox($page, $box, $k)) { 
  197. $boxes[$box] = $_box; 
  198.  
  199. return $boxes; 
  200.  
  201. /** 
  202. * Get the page rotation by page number 
  203. * @param integer $pageNo 
  204. * @throws InvalidArgumentException 
  205. * @return array 
  206. */ 
  207. public function getPageRotation($pageNo) 
  208. if (!isset($this->_pages[$pageNo - 1])) { 
  209. throw new InvalidArgumentException('Page ' . $pageNo . ' does not exists.'); 
  210.  
  211. return $this->_getPageRotation($this->_pages[$pageNo - 1]); 
  212.  
  213. /** 
  214. * Get the rotation value of a page 
  215. * @param array $obj A /Page dictionary 
  216. * @return array|bool 
  217. */ 
  218. protected function _getPageRotation($obj) 
  219. $obj = $this->resolveObject($obj); 
  220. if (isset($obj[1][1]['/Rotate'])) { 
  221. $res = $this->resolveObject($obj[1][1]['/Rotate']); 
  222. if ($res[0] == pdf_parser::TYPE_OBJECT) 
  223. return $res[1]; 
  224. return $res; 
  225.  
  226. if (!isset($obj[1][1]['/Parent'])) { 
  227. return false; 
  228.  
  229. $res = $this->_getPageRotation($obj[1][1]['/Parent']); 
  230. if ($res[0] == pdf_parser::TYPE_OBJECT) 
  231. return $res[1]; 
  232.  
  233. return $res; 
  234.  
  235. /** 
  236. * Read all pages 
  237. * @param array $pages /Pages dictionary 
  238. * @param array $result The result array 
  239. * @throws Exception 
  240. */ 
  241. protected function _readPages(&$pages, &$result) 
  242. // Get the kids dictionary 
  243. $_kids = $this->resolveObject($pages[1][1]['/Kids']); 
  244.  
  245. if (!is_array($_kids)) { 
  246. throw new Exception('Cannot find /Kids in current /Page-Dictionary'); 
  247.  
  248. if ($_kids[0] === self::TYPE_OBJECT) { 
  249. $_kids = $_kids[1]; 
  250.  
  251. $kids = $_kids[1]; 
  252.  
  253. foreach ($kids as $v) { 
  254. $pg = $this->resolveObject($v); 
  255. if ($pg[1][1]['/Type'][1] === '/Pages') { 
  256. // If one of the kids is an embedded 
  257. // /Pages array, resolve it as well. 
  258. $this->_readPages($pg, $result); 
  259. } else { 
  260. $result[] = $pg;