Jetpack_Sitemap_Manager

Governs the generation, storage, and serving of sitemaps.

Defined (1)

The class is defined in the following location(s).

/modules/sitemaps/sitemaps.php  
  1. class Jetpack_Sitemap_Manager { 
  2.  
  3. /** 
  4. * @see Jetpack_Sitemap_Librarian 
  5. * @since 4.8.0 
  6. * @var Jetpack_Sitemap_Librarian $librarian Librarian object for storing and retrieving sitemap data. 
  7. */ 
  8. private $librarian; 
  9.  
  10. /** 
  11. * @see Jetpack_Sitemap_Logger 
  12. * @since 4.8.0 
  13. * @var Jetpack_Sitemap_Logger $logger Logger object for reporting debug messages. 
  14. */ 
  15. private $logger; 
  16.  
  17. /** 
  18. * @see Jetpack_Sitemap_Finder 
  19. * @since 4.8.0 
  20. * @var Jetpack_Sitemap_Finder $finder Finder object for dealing with sitemap URIs. 
  21. */ 
  22. private $finder; 
  23.  
  24. /** 
  25. * Construct a new Jetpack_Sitemap_Manager. 
  26. * @access public 
  27. * @since 4.8.0 
  28. */ 
  29. public function __construct() { 
  30. $this->librarian = new Jetpack_Sitemap_Librarian(); 
  31. $this->finder = new Jetpack_Sitemap_Finder(); 
  32.  
  33. if ( defined( 'WP_DEBUG' ) && ( true === WP_DEBUG ) ) { 
  34. $this->logger = new Jetpack_Sitemap_Logger(); 
  35.  
  36. // Add callback for sitemap URL handler. 
  37. add_action( 
  38. 'init',  
  39. array( $this, 'callback_action_catch_sitemap_urls' ) 
  40. ); 
  41.  
  42. // Add generator to wp_cron task list. 
  43. $this->schedule_sitemap_generation(); 
  44.  
  45. // Add sitemap to robots.txt. 
  46. add_action( 
  47. 'do_robotstxt',  
  48. array( $this, 'callback_action_do_robotstxt' ),  
  49. 20 
  50. ); 
  51.  
  52. // The news sitemap is cached; here we add a callback to 
  53. // flush the cached news sitemap when a post is published. 
  54. add_action( 
  55. 'publish_post',  
  56. array( $this, 'callback_action_flush_news_sitemap_cache' ),  
  57. 10 
  58. ); 
  59.  
  60. /** 
  61. * Module parameters are stored as options in the database. 
  62. * This allows us to avoid having to process all of init 
  63. * before serving the sitemap data. The following actions 
  64. * process and store these filters. 
  65. */ 
  66.  
  67. // Process filters and store location string for sitemap. 
  68. add_action( 
  69. 'init',  
  70. array( $this, 'callback_action_filter_sitemap_location' ),  
  71. 999 
  72. ); 
  73.  
  74. return; 
  75.  
  76. /** 
  77. * Echo a raw string of given content-type. 
  78. * @access private 
  79. * @since 4.8.0 
  80. * @param string $the_content_type The content type to be served. 
  81. * @param string $the_content The string to be echoed. 
  82. */ 
  83. private function serve_raw_and_die( $the_content_type, $the_content ) { 
  84. header( 'Content-Type: ' . $the_content_type . '; charset=UTF-8' ); 
  85.  
  86. if ( '' === $the_content ) { 
  87. wp_die( 
  88. esc_html__( "No sitemap found. Maybe it's being generated. Please try again later.", 'jetpack' ),  
  89. esc_html__( 'Sitemaps', 'jetpack' ),  
  90. array( 
  91. 'response' => 404,  
  92. ); 
  93.  
  94. echo $the_content; 
  95.  
  96. die(); 
  97.  
  98. /** 
  99. * Callback to intercept sitemap url requests and serve sitemap files. 
  100. * @access public 
  101. * @since 4.8.0 
  102. */ 
  103. public function callback_action_catch_sitemap_urls() { 
  104. // Regular expressions for sitemap URL routing. 
  105. $regex = array( 
  106. 'master' => '/^sitemap\.xml$/',  
  107. 'sitemap' => '/^sitemap-[1-9][0-9]*\.xml$/',  
  108. 'index' => '/^sitemap-index-[1-9][0-9]*\.xml$/',  
  109. 'sitemap-style' => '/^sitemap\.xsl$/',  
  110. 'index-style' => '/^sitemap-index\.xsl$/',  
  111. 'image' => '/^image-sitemap-[1-9][0-9]*\.xml$/',  
  112. 'image-index' => '/^image-sitemap-index-[1-9][0-9]*\.xml$/',  
  113. 'image-style' => '/^image-sitemap\.xsl$/',  
  114. 'video' => '/^video-sitemap-[1-9][0-9]*\.xml$/',  
  115. 'video-index' => '/^video-sitemap-index-[1-9][0-9]*\.xml$/',  
  116. 'video-style' => '/^video-sitemap\.xsl$/',  
  117. 'news' => '/^news-sitemap\.xml$/',  
  118. 'news-style' => '/^news-sitemap\.xsl$/',  
  119. ); 
  120.  
  121. // The raw path(+query) of the requested URI. 
  122. if ( isset( $_SERVER['REQUEST_URI'] ) ) { // WPCS: Input var okay. 
  123. $raw_uri = sanitize_text_field( 
  124. wp_unslash( $_SERVER['REQUEST_URI'] ) // WPCS: Input var okay. 
  125. ); 
  126. } else { 
  127. $raw_uri = ''; 
  128.  
  129. $request = $this->finder->recognize_sitemap_uri( $raw_uri ); 
  130.  
  131. if ( isset( $request['sitemap_name'] ) ) { 
  132.  
  133. /** 
  134. * Filter the content type used to serve the sitemap XML files. 
  135. * @module sitemaps 
  136. * @since 3.9.0 
  137. * @param string $xml_content_type By default, it's 'text/xml'. 
  138. */ 
  139. $xml_content_type = apply_filters( 'jetpack_sitemap_content_type', 'text/xml' ); 
  140.  
  141. // Catch master sitemap xml. 
  142. if ( preg_match( $regex['master'], $request['sitemap_name'] ) ) { 
  143. $this->serve_raw_and_die( 
  144. $xml_content_type,  
  145. $this->librarian->get_sitemap_text( 
  146. jp_sitemap_filename( JP_MASTER_SITEMAP_TYPE, 0 ),  
  147. JP_MASTER_SITEMAP_TYPE 
  148. ); 
  149.  
  150. // Catch sitemap xml. 
  151. if ( preg_match( $regex['sitemap'], $request['sitemap_name'] ) ) { 
  152. $this->serve_raw_and_die( 
  153. $xml_content_type,  
  154. $this->librarian->get_sitemap_text( 
  155. $request['sitemap_name'],  
  156. JP_PAGE_SITEMAP_TYPE 
  157. ); 
  158.  
  159. // Catch sitemap index xml. 
  160. if ( preg_match( $regex['index'], $request['sitemap_name'] ) ) { 
  161. $this->serve_raw_and_die( 
  162. $xml_content_type,  
  163. $this->librarian->get_sitemap_text( 
  164. $request['sitemap_name'],  
  165. JP_PAGE_SITEMAP_INDEX_TYPE 
  166. ); 
  167.  
  168. // Catch sitemap xsl. 
  169. if ( preg_match( $regex['sitemap-style'], $request['sitemap_name'] ) ) { 
  170. $this->serve_raw_and_die( 
  171. 'application/xml',  
  172. Jetpack_Sitemap_Stylist::sitemap_xsl() 
  173. ); 
  174.  
  175. // Catch sitemap index xsl. 
  176. if ( preg_match( $regex['index-style'], $request['sitemap_name'] ) ) { 
  177. $this->serve_raw_and_die( 
  178. 'application/xml',  
  179. Jetpack_Sitemap_Stylist::sitemap_index_xsl() 
  180. ); 
  181.  
  182. // Catch image sitemap xml. 
  183. if ( preg_match( $regex['image'], $request['sitemap_name'] ) ) { 
  184. $this->serve_raw_and_die( 
  185. $xml_content_type,  
  186. $this->librarian->get_sitemap_text( 
  187. $request['sitemap_name'],  
  188. JP_IMAGE_SITEMAP_TYPE 
  189. ); 
  190.  
  191. // Catch image sitemap index xml. 
  192. if ( preg_match( $regex['image-index'], $request['sitemap_name'] ) ) { 
  193. $this->serve_raw_and_die( 
  194. $xml_content_type,  
  195. $this->librarian->get_sitemap_text( 
  196. $request['sitemap_name'],  
  197. JP_IMAGE_SITEMAP_INDEX_TYPE 
  198. ); 
  199.  
  200. // Catch image sitemap xsl. 
  201. if ( preg_match( $regex['image-style'], $request['sitemap_name'] ) ) { 
  202. $this->serve_raw_and_die( 
  203. 'application/xml',  
  204. Jetpack_Sitemap_Stylist::image_sitemap_xsl() 
  205. ); 
  206.  
  207. // Catch video sitemap xml. 
  208. if ( preg_match( $regex['video'], $request['sitemap_name'] ) ) { 
  209. $this->serve_raw_and_die( 
  210. $xml_content_type,  
  211. $this->librarian->get_sitemap_text( 
  212. $request['sitemap_name'],  
  213. JP_VIDEO_SITEMAP_TYPE 
  214. ); 
  215.  
  216. // Catch video sitemap index xml. 
  217. if ( preg_match( $regex['video-index'], $request['sitemap_name'] ) ) { 
  218. $this->serve_raw_and_die( 
  219. $xml_content_type,  
  220. $this->librarian->get_sitemap_text( 
  221. $request['sitemap_name'],  
  222. JP_VIDEO_SITEMAP_INDEX_TYPE 
  223. ); 
  224.  
  225. // Catch video sitemap xsl. 
  226. if ( preg_match( $regex['video-style'], $request['sitemap_name'] ) ) { 
  227. $this->serve_raw_and_die( 
  228. 'application/xml',  
  229. Jetpack_Sitemap_Stylist::video_sitemap_xsl() 
  230. ); 
  231.  
  232. // Catch news sitemap xml. 
  233. if ( preg_match( $regex['news'], $request['sitemap_name'] ) ) { 
  234. $sitemap_builder = new Jetpack_Sitemap_Builder(); 
  235. $this->serve_raw_and_die( 
  236. $xml_content_type,  
  237. $sitemap_builder->news_sitemap_xml() 
  238. ); 
  239.  
  240. // Catch news sitemap xsl. 
  241. if ( preg_match( $regex['news-style'], $request['sitemap_name'] ) ) { 
  242. $this->serve_raw_and_die( 
  243. 'application/xml',  
  244. Jetpack_Sitemap_Stylist::news_sitemap_xsl() 
  245. ); 
  246.  
  247. // URL did not match any sitemap patterns. 
  248. return; 
  249.  
  250. /** 
  251. * Callback for adding sitemap-interval to the list of schedules. 
  252. * @access public 
  253. * @since 4.8.0 
  254. * @param array $schedules The array of WP_Cron schedules. 
  255. * @return array The updated array of WP_Cron schedules. 
  256. */ 
  257. public function callback_add_sitemap_schedule( $schedules ) { 
  258. $schedules['sitemap-interval'] = array( 
  259. 'interval' => JP_SITEMAP_INTERVAL,  
  260. 'display' => __( 'Sitemap Interval', 'jetpack' ),  
  261. ); 
  262. return $schedules; 
  263.  
  264. /** 
  265. * Add actions to schedule sitemap generation. 
  266. * Should only be called once, in the constructor. 
  267. * @access private 
  268. * @since 4.8.0 
  269. */ 
  270. private function schedule_sitemap_generation() { 
  271. // Add cron schedule. 
  272. add_filter( 'cron_schedules', array( $this, 'callback_add_sitemap_schedule' ) ); 
  273.  
  274. $sitemap_builder = new Jetpack_Sitemap_Builder(); 
  275.  
  276. add_action( 
  277. 'jp_sitemap_cron_hook',  
  278. array( $sitemap_builder, 'update_sitemap' ) 
  279. ); 
  280.  
  281. if ( ! wp_next_scheduled( 'jp_sitemap_cron_hook' ) ) { 
  282. wp_schedule_event( 
  283. time(),  
  284. 'sitemap-interval',  
  285. 'jp_sitemap_cron_hook' 
  286. ); 
  287.  
  288. return; 
  289.  
  290. /** 
  291. * Callback to add sitemap to robots.txt. 
  292. * @access public 
  293. * @since 4.8.0 
  294. */ 
  295. public function callback_action_do_robotstxt() { 
  296.  
  297. /** 
  298. * Filter whether to make the default sitemap discoverable to robots or not. Default true. 
  299. * @module sitemaps 
  300. * @since 3.9.0 
  301. * @param bool $discover_sitemap Make default sitemap discoverable to robots. 
  302. */ 
  303. $discover_sitemap = apply_filters( 'jetpack_sitemap_generate', true ); 
  304.  
  305. if ( true === $discover_sitemap ) { 
  306. $sitemap_url = $this->finder->construct_sitemap_url( 'sitemap.xml' ); 
  307. echo 'Sitemap: ' . esc_url( $sitemap_url ) . "\n"; 
  308.  
  309. /** 
  310. * Filter whether to make the news sitemap discoverable to robots or not. Default true. 
  311. * @module sitemaps 
  312. * @since 3.9.0 
  313. * @param bool $discover_news_sitemap Make default news sitemap discoverable to robots. 
  314. */ 
  315. $discover_news_sitemap = apply_filters( 'jetpack_news_sitemap_generate', true ); 
  316.  
  317. if ( true === $discover_news_sitemap ) { 
  318. $news_sitemap_url = $this->finder->construct_sitemap_url( 'news-sitemap.xml' ); 
  319. echo 'Sitemap: ' . esc_url( $news_sitemap_url ) . "\n"; 
  320.  
  321. return; 
  322.  
  323. /** 
  324. * Callback to delete the news sitemap cache. 
  325. * @access public 
  326. * @since 4.8.0 
  327. */ 
  328. public function callback_action_flush_news_sitemap_cache() { 
  329. delete_transient( 'jetpack_news_sitemap_xml' ); 
  330.  
  331. /** 
  332. * Callback to set the sitemap location. 
  333. * @access public 
  334. * @since 4.8.0 
  335. */ 
  336. public function callback_action_filter_sitemap_location() { 
  337. update_option( 
  338. 'jetpack_sitemap_location',  
  339. /** 
  340. * Additional path for sitemap URIs. Default value is empty. 
  341. * This string is any additional path fragment you want included between 
  342. * the home URL and the sitemap filenames. Exactly how this fragment is 
  343. * interpreted depends on your permalink settings. For example: 
  344. * Pretty permalinks: 
  345. * home_url() . jetpack_sitemap_location . '/sitemap.xml' 
  346. * Plain ("ugly") permalinks: 
  347. * home_url() . jetpack_sitemap_location . '/?jetpack-sitemap=sitemap.xml' 
  348. * PATHINFO permalinks: 
  349. * home_url() . '/index.php' . jetpack_sitemap_location . '/sitemap.xml' 
  350. * where 'sitemap.xml' is the name of a specific sitemap file. 
  351. * The value of this filter must be a valid path fragment per RFC 3986; 
  352. * in particular it must either be empty or begin with a '/'. 
  353. * Also take care that any restrictions on sitemap location imposed by 
  354. * the sitemap protocol are satisfied. 
  355. * The result of this filter is stored in an option, 'jetpack_sitemap_location'; 
  356. * that option is what gets read when the sitemap location is needed. 
  357. * This way we don't have to wait for init to finish before building sitemaps. 
  358. * @link https://tools.ietf.org/html/rfc3986#section-3.3 RFC 3986 
  359. * @link http://www.sitemaps.org/ The sitemap protocol 
  360. * @since 4.8.0 
  361. */ 
  362. apply_filters( 
  363. 'jetpack_sitemap_location',  
  364. '' 
  365. ); 
  366.  
  367. return; 
  368.  
  369. } // End Jetpack_Sitemap_Manager class.