WPSEO_Sitemaps_Renderer

Renders XML output for sitemaps.

Defined (1)

The class is defined in the following location(s).

/inc/sitemaps/class-sitemaps-renderer.php  
  1. class WPSEO_Sitemaps_Renderer { 
  2.  
  3. /** @var string $stylesheet XSL stylesheet for styling a sitemap for web browsers. */ 
  4. protected $stylesheet = ''; 
  5.  
  6. /** @var string $charset Holds the get_bloginfo( 'charset' ) value to reuse for performance. */ 
  7. protected $charset = 'UTF-8'; 
  8.  
  9. /** @var string $output_charset Holds charset of output, might be converted. */ 
  10. protected $output_charset = 'UTF-8'; 
  11.  
  12. /** @var bool $needs_conversion If data encoding needs to be converted for output. */ 
  13. protected $needs_conversion = false; 
  14.  
  15. /** @var WPSEO_Sitemap_Timezone $timezone */ 
  16. protected $timezone; 
  17.  
  18. /** 
  19. * Set up object properties. 
  20. */ 
  21. public function __construct() { 
  22.  
  23. $stylesheet_url = preg_replace( '/(^http[s]?:)/', '', esc_url( home_url( 'main-sitemap.xsl' ) ) ); 
  24. $this->stylesheet = '<?xml-stylesheet type="text/xsl" href="' . $stylesheet_url . '"?>'; 
  25. $this->charset = get_bloginfo( 'charset' ); 
  26. $this->output_charset = $this->charset; 
  27. $this->timezone = new WPSEO_Sitemap_Timezone(); 
  28.  
  29. if ( 
  30. 'UTF-8' !== $this->charset 
  31. && function_exists( 'mb_list_encodings' ) 
  32. && in_array( $this->charset, mb_list_encodings(), true ) 
  33. ) { 
  34. $this->output_charset = 'UTF-8'; 
  35.  
  36. $this->needs_conversion = $this->output_charset !== $this->charset; 
  37.  
  38. /** 
  39. * @param array $links Set of sitemaps index links. 
  40. * @return string 
  41. */ 
  42. public function get_index( $links ) { 
  43.  
  44. $xml = '<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; 
  45.  
  46. foreach ( $links as $link ) { 
  47. $xml .= $this->sitemap_index_url( $link ); 
  48.  
  49. /** 
  50. * Filter to append sitemaps to the index. 
  51. * @param string $index String to append to sitemaps index, defaults to empty. 
  52. */ 
  53. $xml .= apply_filters( 'wpseo_sitemap_index', '' ); 
  54. $xml .= '</sitemapindex>'; 
  55.  
  56. return $xml; 
  57.  
  58. /** 
  59. * @param array $links Set of sitemap links. 
  60. * @param string $type Sitemap type. 
  61. * @param int $current_page Current sitemap page number. 
  62. * @return string 
  63. */ 
  64. public function get_sitemap( $links, $type, $current_page ) { 
  65.  
  66. $urlset = '<urlset xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:image="http://www.google.com/schemas/sitemap-image/1.1" ' 
  67. . 'xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd" ' 
  68. . 'xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">' . "\n"; 
  69.  
  70. /** 
  71. * Filters the `urlset` for a sitemap by type. 
  72. * @api string $urlset The output for the sitemap's `urlset`. 
  73. */ 
  74. $xml = apply_filters( "wpseo_sitemap_{$type}_urlset", $urlset ); 
  75.  
  76. foreach ( $links as $url ) { 
  77. $xml .= $this->sitemap_url( $url ); 
  78.  
  79. /** 
  80. * Filter to add extra URLs to the XML sitemap by type. 
  81. * Only runs for the first page, not on all. 
  82. * @param string $content String content to add, defaults to empty. 
  83. */ 
  84. if ( $current_page === 1 ) { 
  85. $xml .= apply_filters( "wpseo_sitemap_{$type}_content", '' ); 
  86.  
  87. $xml .= '</urlset>'; 
  88.  
  89. return $xml; 
  90.  
  91. /** 
  92. * Produce final XML output with debug information. 
  93. * @param string $sitemap Sitemap XML. 
  94. * @param boolean $transient Transient cache flag. 
  95. * @return string 
  96. */ 
  97. public function get_output( $sitemap, $transient ) { 
  98.  
  99. $output = '<?xml version="1.0" encoding="' . esc_attr( $this->output_charset ) . '"?>'; 
  100.  
  101. if ( $this->stylesheet ) { 
  102. /** 
  103. * Filter the stylesheet URL for the XML sitemap. 
  104. * @param string $stylesheet Stylesheet URL. 
  105. */ 
  106. $output .= apply_filters( 'wpseo_stylesheet_url', $this->stylesheet ) . "\n"; 
  107.  
  108. $output .= $sitemap; 
  109. $output .= "\n<!-- XML Sitemap generated by Yoast SEO -->"; 
  110.  
  111. $debug = WP_DEBUG || ( defined( 'WPSEO_DEBUG' ) && true === WPSEO_DEBUG ); 
  112.  
  113. if ( ! WP_DEBUG_DISPLAY || ! $debug ) { 
  114. return $output; 
  115.  
  116. $memory_used = number_format( ( memory_get_peak_usage() / 1048576 ), 2 ); 
  117. $queries_run = ( $transient ) ? 'Served from transient cache' : 'Queries executed ' . absint( $GLOBALS['wpdb']->num_queries ); 
  118.  
  119. $output .= "\n<!-- {$memory_used}MB | {$queries_run} -->"; 
  120.  
  121. if ( defined( 'SAVEQUERIES' ) && SAVEQUERIES ) { 
  122.  
  123. $queries = print_r( $GLOBALS['wpdb']->queries, true ); 
  124. $output .= "\n<!-- {$queries} -->"; 
  125.  
  126. return $output; 
  127.  
  128. /** 
  129. * Get charset for the output. 
  130. * @return string 
  131. */ 
  132. public function get_output_charset() { 
  133. return $this->output_charset; 
  134.  
  135. /** 
  136. * Set a custom stylesheet for this sitemap. Set to empty to just remove the default stylesheet. 
  137. * @param string $stylesheet Full xml-stylesheet declaration. 
  138. */ 
  139. public function set_stylesheet( $stylesheet ) { 
  140. $this->stylesheet = $stylesheet; 
  141.  
  142. /** 
  143. * Build the `<sitemap>` tag for a given URL. 
  144. * @param array $url Array of parts that make up this entry. 
  145. * @return string 
  146. */ 
  147. protected function sitemap_index_url( $url ) { 
  148.  
  149. $date = null; 
  150.  
  151. if ( ! empty( $url['lastmod'] ) ) { 
  152. $date = $this->timezone->format_date( $url['lastmod'] ); 
  153.  
  154. $url['loc'] = htmlspecialchars( $url['loc'] ); 
  155.  
  156. $output = "\t<sitemap>\n"; 
  157. $output .= "\t\t<loc>" . $url['loc'] . "</loc>\n"; 
  158. $output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date ) . "</lastmod>\n"; 
  159. $output .= "\t</sitemap>\n"; 
  160.  
  161. return $output; 
  162.  
  163. /** 
  164. * Build the `<url>` tag for a given URL. 
  165. * Public access for backwards compatibility reasons. 
  166. * @param array $url Array of parts that make up this entry. 
  167. * @return string 
  168. */ 
  169. public function sitemap_url( $url ) { 
  170.  
  171. $date = null; 
  172.  
  173.  
  174. if ( ! empty( $url['mod'] ) ) { 
  175. // Create a DateTime object date in the correct timezone. 
  176. $date = $this->timezone->format_date( $url['mod'] ); 
  177.  
  178. $url['loc'] = htmlspecialchars( $url['loc'] ); 
  179.  
  180. $output = "\t<url>\n"; 
  181. $output .= "\t\t<loc>" . $this->encode_url_rfc3986( $url['loc'] ) . "</loc>\n"; 
  182. $output .= empty( $date ) ? '' : "\t\t<lastmod>" . htmlspecialchars( $date ) . "</lastmod>\n"; 
  183.  
  184. if ( empty( $url['images'] ) ) { 
  185. $url['images'] = array(); 
  186.  
  187. foreach ( $url['images'] as $img ) { 
  188.  
  189. if ( empty( $img['src'] ) ) { 
  190. continue; 
  191.  
  192. $output .= "\t\t<image:image>\n"; 
  193. $output .= "\t\t\t<image:loc>" . esc_html( $this->encode_url_rfc3986( $img['src'] ) ) . "</image:loc>\n"; 
  194.  
  195. if ( ! empty( $img['title'] ) ) { 
  196.  
  197. $title = $img['title']; 
  198.  
  199. if ( $this->needs_conversion ) { 
  200. $title = mb_convert_encoding( $title, $this->output_charset, $this->charset ); 
  201.  
  202. $title = _wp_specialchars( html_entity_decode( $title, ENT_QUOTES, $this->output_charset ) ); 
  203. $output .= "\t\t\t<image:title><![CDATA[{$title}]]></image:title>\n"; 
  204.  
  205. if ( ! empty( $img['alt'] ) ) { 
  206.  
  207. $alt = $img['alt']; 
  208.  
  209. if ( $this->needs_conversion ) { 
  210. $alt = mb_convert_encoding( $alt, $this->output_charset, $this->charset ); 
  211.  
  212. $alt = _wp_specialchars( html_entity_decode( $alt, ENT_QUOTES, $this->output_charset ) ); 
  213. $output .= "\t\t\t<image:caption><![CDATA[{$alt}]]></image:caption>\n"; 
  214.  
  215. $output .= "\t\t</image:image>\n"; 
  216. unset( $img, $title, $alt ); 
  217.  
  218. $output .= "\t</url>\n"; 
  219.  
  220. /** 
  221. * Filters the output for the sitemap url tag. 
  222. * @api string $output The output for the sitemap url tag. 
  223. * @param array $url The sitemap url array on which the output is based. 
  224. */ 
  225. return apply_filters( 'wpseo_sitemap_url', $output, $url ); 
  226.  
  227. /** 
  228. * Apply some best effort conversion to comply with RFC3986. 
  229. * @param string $url URL to encode. 
  230. * @return string 
  231. */ 
  232. protected function encode_url_rfc3986( $url ) { 
  233.  
  234. if ( filter_var( $url, FILTER_VALIDATE_URL ) ) { 
  235. return $url; 
  236.  
  237. $path = parse_url( $url, PHP_URL_PATH ); 
  238.  
  239. if ( ! empty( $path ) && '/' !== $path ) { 
  240.  
  241. $encoded_path = explode( '/', $path ); 
  242. $encoded_path = array_map( 'rawurlencode', $encoded_path ); 
  243. $encoded_path = implode( '/', $encoded_path ); 
  244. $encoded_path = str_replace( '%7E', '~', $encoded_path ); // PHP <5.3. 
  245.  
  246. $url = str_replace( $path, $encoded_path, $url ); 
  247.  
  248. $query = parse_url( $url, PHP_URL_QUERY ); 
  249.  
  250. if ( ! empty( $query ) ) { 
  251.  
  252. parse_str( $query, $parsed_query ); 
  253.  
  254. if ( defined( 'PHP_QUERY_RFC3986' ) ) { // PHP 5.4+. 
  255. $parsed_query = http_build_query( $parsed_query, null, '&', PHP_QUERY_RFC3986 ); 
  256. else { 
  257. $parsed_query = http_build_query( $parsed_query, null, '&' ); 
  258. $parsed_query = str_replace( '+', '%20', $parsed_query ); 
  259. $parsed_query = str_replace( '%7E', '~', $parsed_query ); 
  260.  
  261. $url = str_replace( $query, $parsed_query, $url ); 
  262.  
  263. return $url;