/inc/sitemaps/class-sitemap-image-parser.php

  1. <?php 
  2. /** 
  3. * @package WPSEO\XML_Sitemaps 
  4. */ 
  5.  
  6. /** 
  7. * Parses images from the given post. 
  8. */ 
  9. class WPSEO_Sitemap_Image_Parser { 
  10.  
  11. /** @var string $home_url Holds the home_url() value to speed up loops. */ 
  12. protected $home_url = ''; 
  13.  
  14. /** @var string $host Holds site URL hostname. */ 
  15. protected $host = ''; 
  16.  
  17. /** @var string $scheme Holds site URL protocol. */ 
  18. protected $scheme = 'http'; 
  19.  
  20. /** @var array $attachments Cached set of attachments for multiple posts. */ 
  21. protected $attachments = array(); 
  22.  
  23. /** @var string $charset Holds blog charset value for use in DOM parsing. */ 
  24. protected $charset = 'UTF-8'; 
  25.  
  26. /** 
  27. * Set up URL properties for reuse. 
  28. */ 
  29. public function __construct() { 
  30.  
  31. $this->home_url = home_url(); 
  32. $parsed_home = wp_parse_url( $this->home_url ); 
  33.  
  34. if ( ! empty( $parsed_home['host'] ) ) { 
  35. $this->host = str_replace( 'www.', '', $parsed_home['host'] ); 
  36.  
  37. if ( ! empty( $parsed_home['scheme'] ) ) { 
  38. $this->scheme = $parsed_home['scheme']; 
  39.  
  40. $this->charset = esc_attr( get_bloginfo( 'charset' ) ); 
  41.  
  42. /** 
  43. * Get set of image data sets for the given post. 
  44. * 
  45. * @param object $post Post object to get images for. 
  46. * 
  47. * @return array 
  48. */ 
  49. public function get_images( $post ) { 
  50.  
  51. $images = array(); 
  52.  
  53. if ( ! is_object( $post ) ) { 
  54. return $images; 
  55.  
  56. $thumbnail_id = get_post_thumbnail_id( $post->ID ); 
  57.  
  58. if ( $thumbnail_id ) { 
  59.  
  60. $src = $this->get_absolute_url( $this->image_url( $thumbnail_id ) ); 
  61. $alt = get_post_meta( $thumbnail_id, '_wp_attachment_image_alt', true ); 
  62. $title = get_post_field( 'post_title', $thumbnail_id ); 
  63. $images[] = $this->get_image_item( $post, $src, $title, $alt ); 
  64.  
  65. $unfiltered_images = $this->parse_html_images( $post->post_content ); 
  66.  
  67. foreach ( $unfiltered_images as $image ) { 
  68. $images[] = $this->get_image_item( $post, $image['src'], $image['title'], $image['alt'] ); 
  69.  
  70. foreach ( $this->parse_galleries( $post->post_content, $post->ID ) as $attachment ) { 
  71.  
  72. $src = $this->get_absolute_url( $this->image_url( $attachment->ID ) ); 
  73. $alt = get_post_meta( $attachment->ID, '_wp_attachment_image_alt', true ); 
  74.  
  75. $images[] = $this->get_image_item( $post, $src, $attachment->post_title, $alt ); 
  76.  
  77. if ( 'attachment' === $post->post_type && wp_attachment_is_image( $post ) ) { 
  78.  
  79. $src = $this->get_absolute_url( $this->image_url( $post->ID ) ); 
  80. $alt = get_post_meta( $post->ID, '_wp_attachment_image_alt', true ); 
  81.  
  82. $images[] = $this->get_image_item( $post, $src, $post->post_title, $alt ); 
  83.  
  84. foreach ( $images as $key => $image ) { 
  85.  
  86. if ( empty( $image['src'] ) ) { 
  87. unset( $images[ $key ] ); 
  88.  
  89. /** 
  90. * Filter images to be included for the post in XML sitemap. 
  91. * 
  92. * @param array $images Array of image items. 
  93. * @param int $post_id ID of the post. 
  94. */ 
  95. $images = apply_filters( 'wpseo_sitemap_urlimages', $images, $post->ID ); 
  96.  
  97. return $images; 
  98.  
  99. /** 
  100. * @param object $term Term to get images from description for. 
  101. * 
  102. * @return array 
  103. */ 
  104. public function get_term_images( $term ) { 
  105.  
  106. $images = $this->parse_html_images( $term->description ); 
  107.  
  108. foreach ( $this->parse_galleries( $term->description ) as $attachment ) { 
  109.  
  110. $images[] = array( 
  111. 'src' => $this->get_absolute_url( $this->image_url( $attachment->ID ) ),  
  112. 'title' => $attachment->post_title,  
  113. 'alt' => get_post_meta( $attachment->ID, '_wp_attachment_image_alt', true ),  
  114. ); 
  115.  
  116. return $images; 
  117.  
  118. /** 
  119. * Parse `<img />` tags in content. 
  120. * 
  121. * @param string $content Content string to parse. 
  122. * 
  123. * @return array 
  124. */ 
  125. private function parse_html_images( $content ) { 
  126.  
  127. $images = array(); 
  128.  
  129. if ( ! class_exists( 'DOMDocument' ) ) { 
  130. return $images; 
  131.  
  132. if ( empty( $content ) ) { 
  133. return $images; 
  134.  
  135. // Prevent DOMDocument from bubbling warnings about invalid HTML. 
  136. libxml_use_internal_errors( true ); 
  137.  
  138. $post_dom = new DOMDocument(); 
  139. $post_dom->loadHTML( '<?xml encoding="' . $this->charset . '">' . $content ); 
  140.  
  141. // Clear the errors, so they don't get kept in memory. 
  142. libxml_clear_errors(); 
  143.  
  144. /** @var DOMElement $img */ 
  145. foreach ( $post_dom->getElementsByTagName( 'img' ) as $img ) { 
  146.  
  147. $src = $img->getAttribute( 'src' ); 
  148.  
  149. if ( empty( $src ) ) { 
  150. continue; 
  151.  
  152. $class = $img->getAttribute( 'class' ); 
  153.  
  154. if ( // This detects WP-inserted images, which we need to upsize. R. 
  155. ! empty( $class ) 
  156. && false === strpos( $class, 'size-full' ) 
  157. && preg_match( '|wp-image-(?P<id>\d+)|', $class, $matches ) 
  158. && get_post_status( $matches['id'] ) 
  159. ) { 
  160. $src = $this->image_url( $matches['id'] ); 
  161.  
  162. $src = $this->get_absolute_url( $src ); 
  163.  
  164. if ( strpos( $src, $this->host ) === false ) { 
  165. continue; 
  166.  
  167. if ( $src !== esc_url( $src ) ) { 
  168. continue; 
  169.  
  170. $images[] = array( 
  171. 'src' => $src,  
  172. 'title' => $img->getAttribute( 'title' ),  
  173. 'alt' => $img->getAttribute( 'alt' ),  
  174. ); 
  175.  
  176. return $images; 
  177.  
  178. /** 
  179. * Parse gallery shortcodes in a given content. 
  180. * 
  181. * @param string $content Content string. 
  182. * @param int $post_id Optional ID of post being parsed. 
  183. * 
  184. * @return array Set of attachment objects. 
  185. */ 
  186. private function parse_galleries( $content, $post_id = 0 ) { 
  187.  
  188. $attachments = array(); 
  189. $galleries = $this->get_content_galleries( $content ); 
  190.  
  191. foreach ( $galleries as $gallery ) { 
  192.  
  193. $id = $post_id; 
  194.  
  195. if ( ! empty( $gallery['id'] ) ) { 
  196. $id = intval( $gallery['id'] ); 
  197.  
  198. // Forked from core gallery_shortcode() to have exact same logic. R. 
  199. if ( ! empty( $gallery['ids'] ) ) { 
  200. $gallery['include'] = $gallery['ids']; 
  201.  
  202. $gallery_attachments = array(); 
  203.  
  204. if ( ! empty( $gallery['include'] ) ) { 
  205.  
  206. $_attachments = get_posts( array( 
  207. 'include' => $gallery['include'],  
  208. 'post_status' => 'inherit',  
  209. 'post_type' => 'attachment',  
  210. 'post_mime_type' => 'image',  
  211. ) ); 
  212.  
  213. foreach ( $_attachments as $key => $val ) { 
  214. $gallery_attachments[ $val->ID ] = $_attachments[ $key ]; 
  215. elseif ( ! empty( $gallery['exclude'] ) && ! empty( $id ) ) { 
  216.  
  217. $gallery_attachments = get_children( array( 
  218. 'post_parent' => $id,  
  219. 'exclude' => $gallery['exclude'],  
  220. 'post_status' => 'inherit',  
  221. 'post_type' => 'attachment',  
  222. 'post_mime_type' => 'image',  
  223. ) ); 
  224. elseif ( ! empty( $id ) ) { 
  225.  
  226. $gallery_attachments = get_children( array( 
  227. 'post_parent' => $id,  
  228. 'post_status' => 'inherit',  
  229. 'post_type' => 'attachment',  
  230. 'post_mime_type' => 'image',  
  231. ) ); 
  232.  
  233. $attachments = array_merge( $attachments, $gallery_attachments ); 
  234.  
  235. return array_unique( $attachments, SORT_REGULAR ); 
  236.  
  237. /** 
  238. * Retrieves galleries from the passed content. 
  239. * 
  240. * Forked from core to skip executing shortcodes for performance. 
  241. * 
  242. * @param string $content Content to parse for shortcodes. 
  243. * 
  244. * @return array A list of arrays, each containing gallery data. 
  245. */ 
  246. protected function get_content_galleries( $content ) { 
  247.  
  248. if ( ! has_shortcode( $content, 'gallery' ) ) { 
  249. return array(); 
  250.  
  251. $galleries = array(); 
  252.  
  253. if ( ! preg_match_all( '/' . get_shortcode_regex() . '/s', $content, $matches, PREG_SET_ORDER ) ) { 
  254. return $galleries; 
  255.  
  256. foreach ( $matches as $shortcode ) { 
  257. if ( 'gallery' === $shortcode[2] ) { 
  258.  
  259. $attributes = shortcode_parse_atts( $shortcode[3] ); 
  260.  
  261. if ( '' === $attributes ) { // Valid shortcode without any attributes. R. 
  262. $attributes = array(); 
  263.  
  264. $galleries[] = $attributes; 
  265.  
  266. return $galleries; 
  267.  
  268. /** 
  269. * Get image item array with filters applied. 
  270. * 
  271. * @param WP_Post $post Post object for the context. 
  272. * @param string $src Image URL. 
  273. * @param string $title Optional image title. 
  274. * @param string $alt Optional image alt text. 
  275. * 
  276. * @return array 
  277. */ 
  278. protected function get_image_item( $post, $src, $title = '', $alt = '' ) { 
  279.  
  280. $image = array(); 
  281.  
  282. /** 
  283. * Filter image URL to be included in XML sitemap for the post. 
  284. * 
  285. * @param string $src Image URL. 
  286. * @param object $post Post object. 
  287. */ 
  288. $image['src'] = apply_filters( 'wpseo_xml_sitemap_img_src', $src, $post ); 
  289.  
  290. if ( ! empty( $title ) ) { 
  291. $image['title'] = $title; 
  292.  
  293. if ( ! empty( $alt ) ) { 
  294. $image['alt'] = $alt; 
  295.  
  296. /** 
  297. * Filter image data to be included in XML sitemap for the post. 
  298. * 
  299. * @param array $image { 
  300. * Array of image data. 
  301. * 
  302. * @type string $src Image URL. 
  303. * @type string $title Image title attribute (optional). 
  304. * @type string $alt Image alt attribute (optional). 
  305. * } 
  306. * 
  307. * @param object $post Post object. 
  308. */ 
  309. return apply_filters( 'wpseo_xml_sitemap_img', $image, $post ); 
  310.  
  311. /** 
  312. * Get attached image URL. Adapted from core for speed. 
  313. * 
  314. * @param int $post_id ID of the post. 
  315. * 
  316. * @return string 
  317. */ 
  318. private function image_url( $post_id ) { 
  319.  
  320. static $uploads; 
  321.  
  322. if ( empty( $uploads ) ) { 
  323. $uploads = wp_upload_dir(); 
  324.  
  325. if ( false !== $uploads['error'] ) { 
  326. return ''; 
  327.  
  328. $file = get_post_meta( $post_id, '_wp_attached_file', true ); 
  329.  
  330. if ( empty( $file ) ) { 
  331. return ''; 
  332.  
  333. // Check that the upload base exists in the file location. 
  334. if ( 0 === strpos( $file, $uploads['basedir'] ) ) { 
  335. return str_replace( $uploads['basedir'], $uploads['baseurl'], $file ); 
  336.  
  337. // Replace file location with url location. 
  338. if ( false !== strpos( $file, 'wp-content/uploads' ) ) { 
  339. return $uploads['baseurl'] . substr( $file, ( strpos( $file, 'wp-content/uploads' ) + 18 ) ); 
  340.  
  341. // It's a newly uploaded file, therefore $file is relative to the baseurl. 
  342. return $uploads['baseurl'] . "/$file"; 
  343.  
  344. /** 
  345. * Make absolute URL for domain or protocol-relative one. 
  346. * 
  347. * @param string $src URL to process. 
  348. * 
  349. * @return string 
  350. */ 
  351. protected function get_absolute_url( $src ) { 
  352.  
  353. if ( empty( $src ) || ! is_string( $src ) ) { 
  354. return $src; 
  355.  
  356. if ( WPSEO_Utils::is_url_relative( $src ) === true ) { 
  357.  
  358. if ( $src[0] !== '/' ) { 
  359. return $src; 
  360.  
  361. // The URL is relative, we'll have to make it absolute. 
  362. return $this->home_url . $src; 
  363.  
  364. if ( strpos( $src, 'http' ) !== 0 ) { 
  365. // Protocol relative url, we add the scheme as the standard requires a protocol. 
  366. return $this->scheme . ':' . $src; 
  367.  
  368. return $src; 
  369.  
  370. /** 
  371. * Cache attached images and thumbnails for a set of posts. 
  372. * 
  373. * @deprecated 3.3 Blanket caching no longer makes sense with modern galleries. R. 
  374. */ 
  375. public function cache_attachments() { 
  376.  
  377. _deprecated_function( __METHOD__, '3.3' ); 
.