WPSEO_Sitemaps

Class WPSEO_Sitemaps.

Defined (1)

The class is defined in the following location(s).

/inc/sitemaps/class-sitemaps.php  
  1. class WPSEO_Sitemaps { 
  2.  
  3. /** Sitemap index identifier. */ 
  4. const SITEMAP_INDEX_TYPE = '1'; 
  5.  
  6. /** @var string $sitemap Content of the sitemap to output. */ 
  7. protected $sitemap = ''; 
  8.  
  9. /** @var bool $bad_sitemap Flag to indicate if this is an invalid or empty sitemap. */ 
  10. public $bad_sitemap = false; 
  11.  
  12. /** @var bool $transient Whether or not the XML sitemap was served from a transient or not. */ 
  13. private $transient = false; 
  14.  
  15. /** @var int $max_entries The maximum number of entries per sitemap page. */ 
  16. private $max_entries; 
  17.  
  18. /** 
  19. * @var string $http_protocol HTTP protocol to use in headers. 
  20. * @since 3.2 
  21. */ 
  22. protected $http_protocol = 'HTTP/1.1'; 
  23.  
  24. /** @var int $current_page Holds the n variable. */ 
  25. private $current_page = 1; 
  26.  
  27. /** @var WPSEO_Sitemap_Timezone $timezone */ 
  28. private $timezone; 
  29.  
  30. /** 
  31. * @var WPSEO_Sitemaps_Router $router 
  32. * @since 3.2 
  33. */ 
  34. public $router; 
  35.  
  36. /** 
  37. * @var WPSEO_Sitemaps_Renderer $renderer 
  38. * @since 3.2 
  39. */ 
  40. public $renderer; 
  41.  
  42. /** 
  43. * @var WPSEO_Sitemaps_Cache $cache 
  44. * @since 3.2 
  45. */ 
  46. public $cache; 
  47.  
  48. /** 
  49. * @var WPSEO_Sitemap_Provider[] $providers 
  50. * @since 3.2 
  51. */ 
  52. public $providers; 
  53.  
  54. /** 
  55. * Class constructor 
  56. */ 
  57. public function __construct() { 
  58.  
  59. add_action( 'after_setup_theme', array( $this, 'reduce_query_load' ), 99 ); 
  60. add_action( 'pre_get_posts', array( $this, 'redirect' ), 1 ); 
  61. add_action( 'wpseo_hit_sitemap_index', array( $this, 'hit_sitemap_index' ) ); 
  62. add_action( 'wpseo_ping_search_engines', array( __CLASS__, 'ping_search_engines' ) ); 
  63.  
  64. $options = WPSEO_Options::get_all(); 
  65. $this->max_entries = $options['entries-per-page']; 
  66. $this->timezone = new WPSEO_Sitemap_Timezone(); 
  67. $this->router = new WPSEO_Sitemaps_Router(); 
  68. $this->renderer = new WPSEO_Sitemaps_Renderer(); 
  69. $this->cache = new WPSEO_Sitemaps_Cache(); 
  70. $this->providers = array( // TODO API for add/remove. R. 
  71. new WPSEO_Post_Type_Sitemap_Provider(),  
  72. new WPSEO_Taxonomy_Sitemap_Provider(),  
  73. new WPSEO_Author_Sitemap_Provider(),  
  74. ); 
  75.  
  76. if ( ! empty( $_SERVER['SERVER_PROTOCOL'] ) ) { 
  77. $this->http_protocol = sanitize_text_field( $_SERVER['SERVER_PROTOCOL'] ); 
  78.  
  79. /** 
  80. * Check the current request URI, if we can determine it's probably an XML sitemap, kill loading the widgets 
  81. */ 
  82. public function reduce_query_load() { 
  83.  
  84. if ( ! isset( $_SERVER['REQUEST_URI'] ) ) { 
  85. return; 
  86.  
  87. $request_uri = $_SERVER['REQUEST_URI']; 
  88. $extension = substr( $request_uri, -4 ); 
  89.  
  90. if ( false !== stripos( $request_uri, 'sitemap' ) && in_array( $extension, array( '.xml', '.xsl' ) ) ) { 
  91. remove_all_actions( 'widgets_init' ); 
  92.  
  93. /** 
  94. * Register your own sitemap. Call this during 'init'. 
  95. * @param string $name The name of the sitemap. 
  96. * @param callback $function Function to build your sitemap. 
  97. * @param string $rewrite Optional. Regular expression to match your sitemap with. 
  98. */ 
  99. public function register_sitemap( $name, $function, $rewrite = '' ) { 
  100. add_action( 'wpseo_do_sitemap_' . $name, $function ); 
  101. if ( ! empty( $rewrite ) ) { 
  102. add_rewrite_rule( $rewrite, 'index.php?sitemap=' . $name, 'top' ); 
  103.  
  104. /** 
  105. * Register your own XSL file. Call this during 'init'. 
  106. * @since 1.4.23 
  107. * @param string $name The name of the XSL file. 
  108. * @param callback $function Function to build your XSL file. 
  109. * @param string $rewrite Optional. Regular expression to match your sitemap with. 
  110. */ 
  111. public function register_xsl( $name, $function, $rewrite = '' ) { 
  112. add_action( 'wpseo_xsl_' . $name, $function ); 
  113. if ( ! empty( $rewrite ) ) { 
  114. add_rewrite_rule( $rewrite, 'index.php?xsl=' . $name, 'top' ); 
  115.  
  116. /** 
  117. * Set the sitemap current page to allow creating partial sitemaps with wp-cli 
  118. * in a one-off process. 
  119. * @param integer $current_page The part that should be generated. 
  120. */ 
  121. public function set_n( $current_page ) { 
  122. if ( is_scalar( $current_page ) && intval( $current_page ) > 0 ) { 
  123. $this->current_page = intval( $current_page ); 
  124.  
  125. /** 
  126. * Set the sitemap content to display after you have generated it. 
  127. * @param string $sitemap The generated sitemap to output. 
  128. */ 
  129. public function set_sitemap( $sitemap ) { 
  130. $this->sitemap = $sitemap; 
  131.  
  132. /** 
  133. * Set as true to make the request 404. Used stop the display of empty sitemaps or invalid requests. 
  134. * @param bool $bool Is this a bad request. True or false. 
  135. */ 
  136. public function set_bad_sitemap( $bool ) { 
  137. $this->bad_sitemap = (bool) $bool; 
  138.  
  139. /** 
  140. * Prevent stupid plugins from running shutdown scripts when we're obviously not outputting HTML. 
  141. * @since 1.4.16 
  142. */ 
  143. public function sitemap_close() { 
  144. remove_all_actions( 'wp_footer' ); 
  145. die(); 
  146.  
  147. /** 
  148. * Hijack requests for potential sitemaps and XSL files. 
  149. * @param \WP_Query $query Main query instance. 
  150. */ 
  151. public function redirect( $query ) { 
  152.  
  153. if ( ! $query->is_main_query() ) { 
  154. return; 
  155.  
  156. $xsl = get_query_var( 'xsl' ); 
  157.  
  158. if ( ! empty( $xsl ) ) { 
  159. $this->xsl_output( $xsl ); 
  160. $this->sitemap_close(); 
  161.  
  162. return; 
  163.  
  164. $type = get_query_var( 'sitemap' ); 
  165.  
  166. if ( empty( $type ) ) { 
  167. return; 
  168.  
  169. $this->set_n( get_query_var( 'sitemap_n' ) ); 
  170.  
  171. if ( ! $this->get_sitemap_from_cache( $type, $this->current_page ) ) { 
  172. $this->build_sitemap( $type ); 
  173.  
  174. if ( $this->bad_sitemap ) { 
  175. $query->set_404(); 
  176. status_header( 404 ); 
  177.  
  178. return; 
  179.  
  180. $this->output(); 
  181. $this->sitemap_close(); 
  182.  
  183. /** 
  184. * Try to get the sitemap from cache 
  185. * @param string $type Sitemap type. 
  186. * @param int $page_number The page number to retrieve. 
  187. * @return bool If the sitemap has been retrieved from cache. 
  188. */ 
  189. private function get_sitemap_from_cache( $type, $page_number ) { 
  190.  
  191. $this->transient = false; 
  192.  
  193. if ( true !== $this->cache->is_enabled() ) { 
  194. return false; 
  195.  
  196. /** 
  197. * Fires before the attempt to retrieve XML sitemap from the transient cache. 
  198. * @param WPSEO_Sitemaps $sitemaps Sitemaps object. 
  199. */ 
  200. do_action( 'wpseo_sitemap_stylesheet_cache_' . $type, $this ); 
  201.  
  202. $sitemap_cache_data = $this->cache->get_sitemap_data( $type, $page_number ); 
  203.  
  204. // No cache was found, refresh it because cache is enabled. 
  205. if ( empty( $sitemap_cache_data ) ) { 
  206. return $this->refresh_sitemap_cache( $type, $page_number ); 
  207.  
  208. // Cache object was found, parse information. 
  209. $this->transient = true; 
  210.  
  211. $this->sitemap = $sitemap_cache_data->get_sitemap(); 
  212. $this->bad_sitemap = ! $sitemap_cache_data->is_usable(); 
  213.  
  214. return true; 
  215.  
  216. /** 
  217. * Build and save sitemap to cache. 
  218. * @param string $type Sitemap type. 
  219. * @param int $page_number The page number to save to. 
  220. * @return bool 
  221. */ 
  222. private function refresh_sitemap_cache( $type, $page_number ) { 
  223. $this->set_n( $page_number ); 
  224. $this->build_sitemap( $type ); 
  225.  
  226. return $this->cache->store_sitemap( $type, $page_number, $this->sitemap, ! $this->bad_sitemap ); 
  227.  
  228. /** 
  229. * Attempts to build the requested sitemap. 
  230. * Sets $bad_sitemap if this isn't for the root sitemap, a post type or taxonomy. 
  231. * @param string $type The requested sitemap's identifier. 
  232. */ 
  233. public function build_sitemap( $type ) { 
  234.  
  235. /** 
  236. * Filter the type of sitemap to build. 
  237. * @param string $type Sitemap type, determined by the request. 
  238. */ 
  239. $type = apply_filters( 'wpseo_build_sitemap_post_type', $type ); 
  240.  
  241. if ( $type === '1' ) { 
  242. $this->build_root_map(); 
  243.  
  244. return; 
  245.  
  246. foreach ( $this->providers as $provider ) { 
  247. if ( ! $provider->handles_type( $type ) ) { 
  248. continue; 
  249.  
  250. $links = $provider->get_sitemap_links( $type, $this->max_entries, $this->current_page ); 
  251.  
  252. if ( empty( $links ) ) { 
  253. $this->bad_sitemap = true; 
  254.  
  255. return; 
  256.  
  257. $this->sitemap = $this->renderer->get_sitemap( $links, $type, $this->current_page ); 
  258.  
  259. return; 
  260.  
  261. if ( has_action( 'wpseo_do_sitemap_' . $type ) ) { 
  262. /** 
  263. * Fires custom handler, if hooked to generate sitemap for the type. 
  264. */ 
  265. do_action( 'wpseo_do_sitemap_' . $type ); 
  266.  
  267. return; 
  268.  
  269. $this->bad_sitemap = true; 
  270.  
  271. /** 
  272. * Build the root sitemap (example.com/sitemap_index.xml) which lists sub-sitemaps for other content types. 
  273. */ 
  274. public function build_root_map() { 
  275.  
  276. $links = array(); 
  277.  
  278. foreach ( $this->providers as $provider ) { 
  279. $links = array_merge( $links, $provider->get_index_links( $this->max_entries ) ); 
  280.  
  281. if ( empty( $links ) ) { 
  282. $this->bad_sitemap = true; 
  283. $this->sitemap = ''; 
  284.  
  285. return; 
  286.  
  287. $this->sitemap = $this->renderer->get_index( $links ); 
  288.  
  289. /** 
  290. * Spits out the XSL for the XML sitemap. 
  291. * @param string $type Type to output. 
  292. * @since 1.4.13 
  293. */ 
  294. public function xsl_output( $type ) { 
  295.  
  296. if ( $type !== 'main' ) { 
  297.  
  298. /** 
  299. * Fires for the output of XSL for XML sitemaps, other than type "main". 
  300. */ 
  301. do_action( 'wpseo_xsl_' . $type ); 
  302.  
  303. return; 
  304.  
  305. header( $this->http_protocol . ' 200 OK', true, 200 ); 
  306. // Prevent the search engines from indexing the XML Sitemap. 
  307. header( 'X-Robots-Tag: noindex, follow', true ); 
  308. header( 'Content-Type: text/xml' ); 
  309.  
  310. // Make the browser cache this file properly. 
  311. $expires = YEAR_IN_SECONDS; 
  312. header( 'Pragma: public' ); 
  313. header( 'Cache-Control: maxage=' . $expires ); 
  314. header( 'Expires: ' . gmdate( 'D, d M Y H:i:s', ( time() + $expires ) ) . ' GMT' ); 
  315.  
  316. require_once( WPSEO_PATH . 'css/xml-sitemap-xsl.php' ); 
  317.  
  318. /** 
  319. * Spit out the generated sitemap and relevant headers and encoding information. 
  320. */ 
  321. public function output() { 
  322.  
  323. if ( ! headers_sent() ) { 
  324. header( $this->http_protocol . ' 200 OK', true, 200 ); 
  325. // Prevent the search engines from indexing the XML Sitemap. 
  326. header( 'X-Robots-Tag: noindex, follow', true ); 
  327. header( 'Content-Type: text/xml; charset=' . esc_attr( $this->renderer->get_output_charset() ) ); 
  328.  
  329. echo $this->renderer->get_output( $this->sitemap, $this->transient ); 
  330.  
  331. /** 
  332. * Make a request for the sitemap index so as to cache it before the arrival of the search engines. 
  333. */ 
  334. public function hit_sitemap_index() { 
  335. wp_remote_get( WPSEO_Sitemaps_Router::get_base_url( 'sitemap_index.xml' ) ); 
  336.  
  337. /** 
  338. * Get the GMT modification date for the last modified post in the post type. 
  339. * @since 3.2 
  340. * @param string|array $post_types Post type or array of types. 
  341. * @param bool $return_all Flag to return array of values. 
  342. * @return string|array|false 
  343. */ 
  344. static public function get_last_modified_gmt( $post_types, $return_all = false ) { 
  345.  
  346. global $wpdb; 
  347.  
  348. static $post_type_dates = null; 
  349.  
  350. if ( ! is_array( $post_types ) ) { 
  351. $post_types = array( $post_types ); 
  352.  
  353. foreach ( $post_types as $post_type ) { 
  354. if ( ! isset( $post_type_dates[ $post_type ] ) ) { // If we hadn't seen post type before. R. 
  355. $post_type_dates = null; 
  356. break; 
  357.  
  358. if ( is_null( $post_type_dates ) ) { 
  359.  
  360. $sql = " 
  361. SELECT post_type, MAX(post_modified_gmt) AS date 
  362. FROM $wpdb->posts 
  363. WHERE post_status IN ('publish', 'inherit') 
  364. AND post_type IN ('" . implode( "', '", get_post_types( array( 'public' => true ) ) ) . "') 
  365. GROUP BY post_type 
  366. ORDER BY post_modified_gmt DESC 
  367. "; 
  368.  
  369. $post_type_dates = array(); 
  370.  
  371. foreach ( $wpdb->get_results( $sql ) as $obj ) { 
  372. $post_type_dates[ $obj->post_type ] = $obj->date; 
  373.  
  374. $dates = array_intersect_key( $post_type_dates, array_flip( $post_types ) ); 
  375.  
  376. if ( count( $dates ) > 0 ) { 
  377.  
  378. if ( $return_all ) { 
  379. return $dates; 
  380.  
  381. return max( $dates ); 
  382.  
  383. return false; 
  384.  
  385. /** 
  386. * Get the modification date for the last modified post in the post type. 
  387. * @param array $post_types Post types to get the last modification date for. 
  388. * @return string 
  389. */ 
  390. public function get_last_modified( $post_types ) { 
  391.  
  392. return $this->timezone->format_date( self::get_last_modified_gmt( $post_types ) ); 
  393.  
  394. /** 
  395. * Notify search engines of the updated sitemap. 
  396. * @param string|null $url Optional URL to make the ping for. 
  397. */ 
  398. public static function ping_search_engines( $url = null ) { 
  399.  
  400. /** 
  401. * Filter: 'wpseo_allow_xml_sitemap_ping' - Check if pinging is not allowed (allowed by default) 
  402. * @api boolean $allow_ping The boolean that is set to true by default. 
  403. */ 
  404. if ( apply_filters( 'wpseo_allow_xml_sitemap_ping', true ) === false ) { 
  405. return; 
  406.  
  407. if ( '0' === get_option( 'blog_public' ) ) { // Don't ping if blog is not public. 
  408. return; 
  409.  
  410. if ( empty( $url ) ) { 
  411. $url = urlencode( WPSEO_Sitemaps_Router::get_base_url( 'sitemap_index.xml' ) ); 
  412.  
  413. // Ping Google and Bing. 
  414. wp_remote_get( 'http://www.google.com/webmasters/tools/ping?sitemap=' . $url, array( 'blocking' => false ) ); 
  415. wp_remote_get( 'http://www.bing.com/ping?sitemap=' . $url, array( 'blocking' => false ) ); 
  416.  
  417. /** 
  418. * Build the `<url>` tag for a given URL. 
  419. * @deprecated 3.2 
  420. * @see WPSEO_Sitemaps_Renderer::sitemap_url() 
  421. * @param array $url Array of parts that make up this entry. 
  422. * @return string 
  423. */ 
  424. public function sitemap_url( $url ) { 
  425. _deprecated_function( __METHOD__, 'WPSEO 3.2', 'WPSEO_Sitemaps_Renderer::sitemap_url()' ); 
  426. return $this->renderer->sitemap_url( $url ); 
  427.  
  428. /** 
  429. * Set a custom stylesheet for this sitemap. Set to empty to just remove the default stylesheet. 
  430. * @deprecated 3.2 
  431. * @see WPSEO_Sitemaps_Renderer::set_stylesheet() 
  432. * @param string $stylesheet Full xml-stylesheet declaration. 
  433. */ 
  434. public function set_stylesheet( $stylesheet ) { 
  435. _deprecated_function( __METHOD__, 'WPSEO 3.2', 'WPSEO_Sitemaps_Renderer::set_stylesheet()' ); 
  436. $this->renderer->set_stylesheet( $stylesheet ); 
  437.  
  438. /** 
  439. * Function to dynamically filter the change frequency. 
  440. * @deprecated 3.5 Change frequency data dropped from sitemaps. 
  441. * @param string $filter Expands to wpseo_sitemap_$filter_change_freq, allowing for a change of the frequency for 
  442. * numerous specific URLs. 
  443. * @param string $default The default value for the frequency. 
  444. * @param string $url The URL of the current entry. 
  445. * @return mixed|void 
  446. */ 
  447. static public function filter_frequency( $filter, $default, $url ) { 
  448. _deprecated_function( __METHOD__, 'WPSEO 3.5' ); 
  449.  
  450. /** 
  451. * Filter the specific change frequency 
  452. * @param string $default The default change frequency. 
  453. * @param string $url URL to filter frequency for. 
  454. */ 
  455. $change_freq = apply_filters( 'wpseo_sitemap_' . $filter . '_change_freq', $default, $url ); 
  456.  
  457. if ( ! in_array( $change_freq, array( 
  458. 'always',  
  459. 'hourly',  
  460. 'daily',  
  461. 'weekly',  
  462. 'monthly',  
  463. 'yearly',  
  464. 'never',  
  465. ) ) 
  466. ) { 
  467. $change_freq = $default; 
  468.  
  469. return $change_freq;