/inc/sitemaps/class-sitemaps-renderer.php

  1. <?php 
  2. /** 
  3. * @package WPSEO\XML_Sitemaps 
  4. */ 
  5.  
  6. /** 
  7. * Renders XML output for sitemaps. 
  8. */ 
  9. class WPSEO_Sitemaps_Renderer { 
  10.  
  11. /** @var string $stylesheet XSL stylesheet for styling a sitemap for web browsers. */ 
  12. protected $stylesheet = ''; 
  13.  
  14. /** @var string $charset Holds the get_bloginfo( 'charset' ) value to reuse for performance. */ 
  15. protected $charset = 'UTF-8'; 
  16.  
  17. /** @var string $output_charset Holds charset of output, might be converted. */ 
  18. protected $output_charset = 'UTF-8'; 
  19.  
  20. /** @var bool $needs_conversion If data encoding needs to be converted for output. */ 
  21. protected $needs_conversion = false; 
  22.  
  23. /** @var WPSEO_Sitemap_Timezone $timezone */ 
  24. protected $timezone; 
  25.  
  26. /** 
  27. * Set up object properties. 
  28. */ 
  29. public function __construct() { 
  30.  
  31. $stylesheet_url = preg_replace( '/(^http[s]?:)/', '', esc_url( home_url( 'main-sitemap.xsl' ) ) ); 
  32. $this->stylesheet = '<?xml-stylesheet type="text/xsl" href="' . $stylesheet_url . '"?>'; 
  33. $this->charset = get_bloginfo( 'charset' ); 
  34. $this->output_charset = $this->charset; 
  35. $this->timezone = new WPSEO_Sitemap_Timezone(); 
  36.  
  37. if ( 
  38. 'UTF-8' !== $this->charset 
  39. && function_exists( 'mb_list_encodings' ) 
  40. && in_array( $this->charset, mb_list_encodings(), true ) 
  41. ) { 
  42. $this->output_charset = 'UTF-8'; 
  43.  
  44. $this->needs_conversion = $this->output_charset !== $this->charset; 
  45.  
  46. /** 
  47. * @param array $links Set of sitemaps index links. 
  48. * 
  49. * @return string 
  50. */ 
  51. public function get_index( $links ) { 
  52.  
  53. $xml = '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; 
  54.  
  55. foreach ( $links as $link ) { 
  56. $xml .= $this->sitemap_index_url( $link ); 
  57.  
  58. /** 
  59. * Filter to append sitemaps to the index. 
  60. * 
  61. * @param string $index String to append to sitemaps index, defaults to empty. 
  62. */ 
  63. $xml .= apply_filters( 'wpseo_sitemap_index', '' ); 
  64. $xml .= '</sitemapindex>'; 
  65.  
  66. return $xml; 
  67.  
  68. /** 
  69. * @param array $links Set of sitemap links. 
  70. * @param string $type Sitemap type. 
  71. * @param int $current_page Current sitemap page number. 
  72. * 
  73. * @return string 
  74. */ 
  75. public function get_sitemap( $links, $type, $current_page ) { 
  76.  
  77. $urlset = '<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" ' 
  78. . 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" ' 
  79. . 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; 
  80.  
  81. /** 
  82. * Filters the `urlset` for a sitemap by type. 
  83. * 
  84. * @api string $urlset The output for the sitemap's `urlset`. 
  85. */ 
  86. $xml = apply_filters( "wpseo_sitemap_{$type}_urlset", $urlset ); 
  87.  
  88. foreach ( $links as $url ) { 
  89. $xml .= $this->sitemap_url( $url ); 
  90.  
  91. /** 
  92. * Filter to add extra URLs to the XML sitemap by type. 
  93. * 
  94. * Only runs for the first page, not on all. 
  95. * 
  96. * @param string $content String content to add, defaults to empty. 
  97. */ 
  98. if ( $current_page === 1 ) { 
  99. $xml .= apply_filters( "wpseo_sitemap_{$type}_content", '' ); 
  100.  
  101. $xml .= '</urlset>'; 
  102.  
  103. return $xml; 
  104.  
  105. /** 
  106. * Produce final XML output with debug information. 
  107. * 
  108. * @param string $sitemap Sitemap XML. 
  109. * @param boolean $transient Transient cache flag. 
  110. * 
  111. * @return string 
  112. */ 
  113. public function get_output( $sitemap, $transient ) { 
  114.  
  115. $output = '<?xml version="1.0" encoding="' . esc_attr( $this->output_charset ) . '"?>'; 
  116.  
  117. if ( $this->stylesheet ) { 
  118. /** 
  119. * Filter the stylesheet URL for the XML sitemap. 
  120. * 
  121. * @param string $stylesheet Stylesheet URL. 
  122. */ 
  123. $output .= apply_filters( 'wpseo_stylesheet_url', $this->stylesheet ) . "\n"; 
  124.  
  125. $output .= $sitemap; 
  126. $output .= "\n<!-- XML Sitemap generated by Yoast SEO -->"; 
  127.  
  128. $debug = WP_DEBUG || ( defined( 'WPSEO_DEBUG' ) && true === WPSEO_DEBUG ); 
  129.  
  130. if ( ! WP_DEBUG_DISPLAY || ! $debug ) { 
  131. return $output; 
  132.  
  133. $memory_used = number_format( ( memory_get_peak_usage() / 1048576 ), 2 ); 
  134. $queries_run = ( $transient ) ? 'Served from transient cache' : 'Queries executed ' . absint( $GLOBALS['wpdb']->num_queries ); 
  135.  
  136. $output .= "\n<!-- {$memory_used}MB | {$queries_run} -->"; 
  137.  
  138. if ( defined( 'SAVEQUERIES' ) && SAVEQUERIES ) { 
  139.  
  140. $queries = print_r( $GLOBALS['wpdb']->queries, true ); 
  141. $output .= "\n<!-- {$queries} -->"; 
  142.  
  143. return $output; 
  144.  
  145. /** 
  146. * Get charset for the output. 
  147. * 
  148. * @return string 
  149. */ 
  150. public function get_output_charset() { 
  151. return $this->output_charset; 
  152.  
  153. /** 
  154. * Set a custom stylesheet for this sitemap. Set to empty to just remove the default stylesheet. 
  155. * 
  156. * @param string $stylesheet Full xml-stylesheet declaration. 
  157. */ 
  158. public function set_stylesheet( $stylesheet ) { 
  159. $this->stylesheet = $stylesheet; 
  160.  
  161. /** 
  162. * Build the `<sitemap>` tag for a given URL. 
  163. * 
  164. * @param array $url Array of parts that make up this entry. 
  165. * 
  166. * @return string 
  167. */ 
  168. protected function sitemap_index_url( $url ) { 
  169.  
  170. $date = null; 
  171.  
  172. if ( ! empty( $url['lastmod'] ) ) { 
  173. $date = $this->timezone->format_date( $url['lastmod'] ); 
  174.  
  175. $url['loc'] = htmlspecialchars( $url['loc'] ); 
  176.  
  177. $output = "\t<sitemap>\n"; 
  178. $output .= "\t\t<loc>" . $url['loc'] . "</loc>\n"; 
  179. $output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date ) . "</lastmod>\n"; 
  180. $output .= "\t</sitemap>\n"; 
  181.  
  182. return $output; 
  183.  
  184. /** 
  185. * Build the `<url>` tag for a given URL. 
  186. * 
  187. * Public access for backwards compatibility reasons. 
  188. * 
  189. * @param array $url Array of parts that make up this entry. 
  190. * 
  191. * @return string 
  192. */ 
  193. public function sitemap_url( $url ) { 
  194.  
  195. $date = null; 
  196.  
  197.  
  198. if ( ! empty( $url['mod'] ) ) { 
  199. // Create a DateTime object date in the correct timezone. 
  200. $date = $this->timezone->format_date( $url['mod'] ); 
  201.  
  202. $url['loc'] = htmlspecialchars( $url['loc'] ); 
  203.  
  204. $output = "\t<url>\n"; 
  205. $output .= "\t\t<loc>" . $this->encode_url_rfc3986( $url['loc'] ) . "</loc>\n"; 
  206. $output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date ) . "</lastmod>\n"; 
  207.  
  208. if ( empty( $url['images'] ) ) { 
  209. $url['images'] = array(); 
  210.  
  211. foreach ( $url['images'] as $img ) { 
  212.  
  213. if ( empty( $img['src'] ) ) { 
  214. continue; 
  215.  
  216. $output .= "\t\t<image:image>\n"; 
  217. $output .= "\t\t\t<image:loc>" . esc_html( $this->encode_url_rfc3986( $img['src'] ) ) . "</image:loc>\n"; 
  218.  
  219. if ( ! empty( $img['title'] ) ) { 
  220.  
  221. $title = $img['title']; 
  222.  
  223. if ( $this->needs_conversion ) { 
  224. $title = mb_convert_encoding( $title, $this->output_charset, $this->charset ); 
  225.  
  226. $title = _wp_specialchars( html_entity_decode( $title, ENT_QUOTES, $this->output_charset ) ); 
  227. $output .= "\t\t\t<image:title><![CDATA[{$title}]]></image:title>\n"; 
  228.  
  229. if ( ! empty( $img['alt'] ) ) { 
  230.  
  231. $alt = $img['alt']; 
  232.  
  233. if ( $this->needs_conversion ) { 
  234. $alt = mb_convert_encoding( $alt, $this->output_charset, $this->charset ); 
  235.  
  236. $alt = _wp_specialchars( html_entity_decode( $alt, ENT_QUOTES, $this->output_charset ) ); 
  237. $output .= "\t\t\t<image:caption><![CDATA[{$alt}]]></image:caption>\n"; 
  238.  
  239. $output .= "\t\t</image:image>\n"; 
  240. unset( $img, $title, $alt ); 
  241.  
  242. $output .= "\t</url>\n"; 
  243.  
  244. /** 
  245. * Filters the output for the sitemap url tag. 
  246. * 
  247. * @api string $output The output for the sitemap url tag. 
  248. * 
  249. * @param array $url The sitemap url array on which the output is based. 
  250. */ 
  251. return apply_filters( 'wpseo_sitemap_url', $output, $url ); 
  252.  
  253. /** 
  254. * Apply some best effort conversion to comply with RFC3986. 
  255. * 
  256. * @param string $url URL to encode. 
  257. * 
  258. * @return string 
  259. */ 
  260. protected function encode_url_rfc3986( $url ) { 
  261.  
  262. if ( filter_var( $url, FILTER_VALIDATE_URL ) ) { 
  263. return $url; 
  264.  
  265. $path = parse_url( $url, PHP_URL_PATH ); 
  266.  
  267. if ( ! empty( $path ) && '/' !== $path ) { 
  268.  
  269. $encoded_path = explode( '/', $path ); 
  270. $encoded_path = array_map( 'rawurlencode', $encoded_path ); 
  271. $encoded_path = implode( '/', $encoded_path ); 
  272. $encoded_path = str_replace( '%7E', '~', $encoded_path ); // PHP <5.3. 
  273.  
  274. $url = str_replace( $path, $encoded_path, $url ); 
  275.  
  276. $query = parse_url( $url, PHP_URL_QUERY ); 
  277.  
  278. if ( ! empty( $query ) ) { 
  279.  
  280. parse_str( $query, $parsed_query ); 
  281.  
  282. if ( defined( 'PHP_QUERY_RFC3986' ) ) { // PHP 5.4+. 
  283. $parsed_query = http_build_query( $parsed_query, null, '&', PHP_QUERY_RFC3986 ); 
  284. else { 
  285. $parsed_query = http_build_query( $parsed_query, null, '&' ); 
  286. $parsed_query = str_replace( '+', '%20', $parsed_query ); 
  287. $parsed_query = str_replace( '%7E', '~', $parsed_query ); 
  288.  
  289. $url = str_replace( $query, $parsed_query, $url ); 
  290.  
  291. return $url; 
.