WPSEO_Sitemap_Image_Parser

Parses images from the given post.

Defined (1)

The class is defined in the following location(s).

/inc/sitemaps/class-sitemap-image-parser.php  
  1. class WPSEO_Sitemap_Image_Parser { 
  2.  
  3. /** @var string $home_url Holds the home_url() value to speed up loops. */ 
  4. protected $home_url = ''; 
  5.  
  6. /** @var string $host Holds site URL hostname. */ 
  7. protected $host = ''; 
  8.  
  9. /** @var string $scheme Holds site URL protocol. */ 
  10. protected $scheme = 'http'; 
  11.  
  12. /** @var array $attachments Cached set of attachments for multiple posts. */ 
  13. protected $attachments = array(); 
  14.  
  15. /** @var string $charset Holds blog charset value for use in DOM parsing. */ 
  16. protected $charset = 'UTF-8'; 
  17.  
  18. /** 
  19. * Set up URL properties for reuse. 
  20. */ 
  21. public function __construct() { 
  22.  
  23. $this->home_url = home_url(); 
  24. $parsed_home = wp_parse_url( $this->home_url ); 
  25.  
  26. if ( ! empty( $parsed_home['host'] ) ) { 
  27. $this->host = str_replace( 'www.', '', $parsed_home['host'] ); 
  28.  
  29. if ( ! empty( $parsed_home['scheme'] ) ) { 
  30. $this->scheme = $parsed_home['scheme']; 
  31.  
  32. $this->charset = esc_attr( get_bloginfo( 'charset' ) ); 
  33.  
  34. /** 
  35. * Get set of image data sets for the given post. 
  36. * @param object $post Post object to get images for. 
  37. * @return array 
  38. */ 
  39. public function get_images( $post ) { 
  40.  
  41. $images = array(); 
  42.  
  43. if ( ! is_object( $post ) ) { 
  44. return $images; 
  45.  
  46. $thumbnail_id = get_post_thumbnail_id( $post->ID ); 
  47.  
  48. if ( $thumbnail_id ) { 
  49.  
  50. $src = $this->get_absolute_url( $this->image_url( $thumbnail_id ) ); 
  51. $alt = get_post_meta( $thumbnail_id, '_wp_attachment_image_alt', true ); 
  52. $title = get_post_field( 'post_title', $thumbnail_id ); 
  53. $images[] = $this->get_image_item( $post, $src, $title, $alt ); 
  54.  
  55. $unfiltered_images = $this->parse_html_images( $post->post_content ); 
  56.  
  57. foreach ( $unfiltered_images as $image ) { 
  58. $images[] = $this->get_image_item( $post, $image['src'], $image['title'], $image['alt'] ); 
  59.  
  60. foreach ( $this->parse_galleries( $post->post_content, $post->ID ) as $attachment ) { 
  61.  
  62. $src = $this->get_absolute_url( $this->image_url( $attachment->ID ) ); 
  63. $alt = get_post_meta( $attachment->ID, '_wp_attachment_image_alt', true ); 
  64.  
  65. $images[] = $this->get_image_item( $post, $src, $attachment->post_title, $alt ); 
  66.  
  67. if ( 'attachment' === $post->post_type && wp_attachment_is_image( $post ) ) { 
  68.  
  69. $src = $this->get_absolute_url( $this->image_url( $post->ID ) ); 
  70. $alt = get_post_meta( $post->ID, '_wp_attachment_image_alt', true ); 
  71.  
  72. $images[] = $this->get_image_item( $post, $src, $post->post_title, $alt ); 
  73.  
  74. foreach ( $images as $key => $image ) { 
  75.  
  76. if ( empty( $image['src'] ) ) { 
  77. unset( $images[ $key ] ); 
  78.  
  79. /** 
  80. * Filter images to be included for the post in XML sitemap. 
  81. * @param array $images Array of image items. 
  82. * @param int $post_id ID of the post. 
  83. */ 
  84. $images = apply_filters( 'wpseo_sitemap_urlimages', $images, $post->ID ); 
  85.  
  86. return $images; 
  87.  
  88. /** 
  89. * @param object $term Term to get images from description for. 
  90. * @return array 
  91. */ 
  92. public function get_term_images( $term ) { 
  93.  
  94. $images = $this->parse_html_images( $term->description ); 
  95.  
  96. foreach ( $this->parse_galleries( $term->description ) as $attachment ) { 
  97.  
  98. $images[] = array( 
  99. 'src' => $this->get_absolute_url( $this->image_url( $attachment->ID ) ),  
  100. 'title' => $attachment->post_title,  
  101. 'alt' => get_post_meta( $attachment->ID, '_wp_attachment_image_alt', true ),  
  102. ); 
  103.  
  104. return $images; 
  105.  
  106. /** 
  107. * Parse `<img />` tags in content. 
  108. * @param string $content Content string to parse. 
  109. * @return array 
  110. */ 
  111. private function parse_html_images( $content ) { 
  112.  
  113. $images = array(); 
  114.  
  115. if ( ! class_exists( 'DOMDocument' ) ) { 
  116. return $images; 
  117.  
  118. if ( empty( $content ) ) { 
  119. return $images; 
  120.  
  121. // Prevent DOMDocument from bubbling warnings about invalid HTML. 
  122. libxml_use_internal_errors( true ); 
  123.  
  124. $post_dom = new DOMDocument(); 
  125. $post_dom->loadHTML( '<?xml encoding="' . $this->charset . '">' . $content ); 
  126.  
  127. // Clear the errors, so they don't get kept in memory. 
  128. libxml_clear_errors(); 
  129.  
  130. /** @var DOMElement $img */ 
  131. foreach ( $post_dom->getElementsByTagName( 'img' ) as $img ) { 
  132.  
  133. $src = $img->getAttribute( 'src' ); 
  134.  
  135. if ( empty( $src ) ) { 
  136. continue; 
  137.  
  138. $class = $img->getAttribute( 'class' ); 
  139.  
  140. if ( // This detects WP-inserted images, which we need to upsize. R. 
  141. ! empty( $class ) 
  142. && false === strpos( $class, 'size-full' ) 
  143. && preg_match( '|wp-image-(?P<id>\d+)|', $class, $matches ) 
  144. && get_post_status( $matches['id'] ) 
  145. ) { 
  146. $src = $this->image_url( $matches['id'] ); 
  147.  
  148. $src = $this->get_absolute_url( $src ); 
  149.  
  150. if ( strpos( $src, $this->host ) === false ) { 
  151. continue; 
  152.  
  153. if ( $src !== esc_url( $src ) ) { 
  154. continue; 
  155.  
  156. $images[] = array( 
  157. 'src' => $src,  
  158. 'title' => $img->getAttribute( 'title' ),  
  159. 'alt' => $img->getAttribute( 'alt' ),  
  160. ); 
  161.  
  162. return $images; 
  163.  
  164. /** 
  165. * Parse gallery shortcodes in a given content. 
  166. * @param string $content Content string. 
  167. * @param int $post_id Optional ID of post being parsed. 
  168. * @return array Set of attachment objects. 
  169. */ 
  170. private function parse_galleries( $content, $post_id = 0 ) { 
  171.  
  172. $attachments = array(); 
  173. $galleries = $this->get_content_galleries( $content ); 
  174.  
  175. foreach ( $galleries as $gallery ) { 
  176.  
  177. $id = $post_id; 
  178.  
  179. if ( ! empty( $gallery['id'] ) ) { 
  180. $id = intval( $gallery['id'] ); 
  181.  
  182. // Forked from core gallery_shortcode() to have exact same logic. R. 
  183. if ( ! empty( $gallery['ids'] ) ) { 
  184. $gallery['include'] = $gallery['ids']; 
  185.  
  186. $gallery_attachments = array(); 
  187.  
  188. if ( ! empty( $gallery['include'] ) ) { 
  189.  
  190. $_attachments = get_posts( array( 
  191. 'include' => $gallery['include'],  
  192. 'post_status' => 'inherit',  
  193. 'post_type' => 'attachment',  
  194. 'post_mime_type' => 'image',  
  195. ) ); 
  196.  
  197. foreach ( $_attachments as $key => $val ) { 
  198. $gallery_attachments[ $val->ID ] = $_attachments[ $key ]; 
  199. elseif ( ! empty( $gallery['exclude'] ) && ! empty( $id ) ) { 
  200.  
  201. $gallery_attachments = get_children( array( 
  202. 'post_parent' => $id,  
  203. 'exclude' => $gallery['exclude'],  
  204. 'post_status' => 'inherit',  
  205. 'post_type' => 'attachment',  
  206. 'post_mime_type' => 'image',  
  207. ) ); 
  208. elseif ( ! empty( $id ) ) { 
  209.  
  210. $gallery_attachments = get_children( array( 
  211. 'post_parent' => $id,  
  212. 'post_status' => 'inherit',  
  213. 'post_type' => 'attachment',  
  214. 'post_mime_type' => 'image',  
  215. ) ); 
  216.  
  217. $attachments = array_merge( $attachments, $gallery_attachments ); 
  218.  
  219. return array_unique( $attachments, SORT_REGULAR ); 
  220.  
  221. /** 
  222. * Retrieves galleries from the passed content. 
  223. * Forked from core to skip executing shortcodes for performance. 
  224. * @param string $content Content to parse for shortcodes. 
  225. * @return array A list of arrays, each containing gallery data. 
  226. */ 
  227. protected function get_content_galleries( $content ) { 
  228.  
  229. if ( ! has_shortcode( $content, 'gallery' ) ) { 
  230. return array(); 
  231.  
  232. $galleries = array(); 
  233.  
  234. if ( ! preg_match_all( '/' . get_shortcode_regex() . '/s', $content, $matches, PREG_SET_ORDER ) ) { 
  235. return $galleries; 
  236.  
  237. foreach ( $matches as $shortcode ) { 
  238. if ( 'gallery' === $shortcode[2] ) { 
  239.  
  240. $attributes = shortcode_parse_atts( $shortcode[3] ); 
  241.  
  242. if ( '' === $attributes ) { // Valid shortcode without any attributes. R. 
  243. $attributes = array(); 
  244.  
  245. $galleries[] = $attributes; 
  246.  
  247. return $galleries; 
  248.  
  249. /** 
  250. * Get image item array with filters applied. 
  251. * @param WP_Post $post Post object for the context. 
  252. * @param string $src Image URL. 
  253. * @param string $title Optional image title. 
  254. * @param string $alt Optional image alt text. 
  255. * @return array 
  256. */ 
  257. protected function get_image_item( $post, $src, $title = '', $alt = '' ) { 
  258.  
  259. $image = array(); 
  260.  
  261. /** 
  262. * Filter image URL to be included in XML sitemap for the post. 
  263. * @param string $src Image URL. 
  264. * @param object $post Post object. 
  265. */ 
  266. $image['src'] = apply_filters( 'wpseo_xml_sitemap_img_src', $src, $post ); 
  267.  
  268. if ( ! empty( $title ) ) { 
  269. $image['title'] = $title; 
  270.  
  271. if ( ! empty( $alt ) ) { 
  272. $image['alt'] = $alt; 
  273.  
  274. /** 
  275. * Filter image data to be included in XML sitemap for the post. 
  276. * @param array $image { 
  277. * Array of image data. 
  278. * @type string $src Image URL. 
  279. * @type string $title Image title attribute (optional). 
  280. * @type string $alt Image alt attribute (optional). 
  281. * } 
  282. * @param object $post Post object. 
  283. */ 
  284. return apply_filters( 'wpseo_xml_sitemap_img', $image, $post ); 
  285.  
  286. /** 
  287. * Get attached image URL. Adapted from core for speed. 
  288. * @param int $post_id ID of the post. 
  289. * @return string 
  290. */ 
  291. private function image_url( $post_id ) { 
  292.  
  293. static $uploads; 
  294.  
  295. if ( empty( $uploads ) ) { 
  296. $uploads = wp_upload_dir(); 
  297.  
  298. if ( false !== $uploads['error'] ) { 
  299. return ''; 
  300.  
  301. $file = get_post_meta( $post_id, '_wp_attached_file', true ); 
  302.  
  303. if ( empty( $file ) ) { 
  304. return ''; 
  305.  
  306. // Check that the upload base exists in the file location. 
  307. if ( 0 === strpos( $file, $uploads['basedir'] ) ) { 
  308. return str_replace( $uploads['basedir'], $uploads['baseurl'], $file ); 
  309.  
  310. // Replace file location with url location. 
  311. if ( false !== strpos( $file, 'wp-content/uploads' ) ) { 
  312. return $uploads['baseurl'] . substr( $file, ( strpos( $file, 'wp-content/uploads' ) + 18 ) ); 
  313.  
  314. // It's a newly uploaded file, therefore $file is relative to the baseurl. 
  315. return $uploads['baseurl'] . "/$file"; 
  316.  
  317. /** 
  318. * Make absolute URL for domain or protocol-relative one. 
  319. * @param string $src URL to process. 
  320. * @return string 
  321. */ 
  322. protected function get_absolute_url( $src ) { 
  323.  
  324. if ( empty( $src ) || ! is_string( $src ) ) { 
  325. return $src; 
  326.  
  327. if ( WPSEO_Utils::is_url_relative( $src ) === true ) { 
  328.  
  329. if ( $src[0] !== '/' ) { 
  330. return $src; 
  331.  
  332. // The URL is relative, we'll have to make it absolute. 
  333. return $this->home_url . $src; 
  334.  
  335. if ( strpos( $src, 'http' ) !== 0 ) { 
  336. // Protocol relative url, we add the scheme as the standard requires a protocol. 
  337. return $this->scheme . ':' . $src; 
  338.  
  339. return $src; 
  340.  
  341. /** 
  342. * Cache attached images and thumbnails for a set of posts. 
  343. * @deprecated 3.3 Blanket caching no longer makes sense with modern galleries. R. 
  344. */ 
  345. public function cache_attachments() { 
  346.  
  347. _deprecated_function( __METHOD__, '3.3' );