/bp-core/classes/class-bp-media-extractor.php

  1. <?php 
  2. /** 
  3. * Core component classes. 
  4. * 
  5. * @package BuddyPress 
  6. * @subpackage Core 
  7. * @since 2.3.0 
  8. */ 
  9.  
  10. // Exit if accessed directly. 
  11. defined( 'ABSPATH' ) || exit; 
  12.  
  13. /** 
  14. * Extracts media from text. Use {@link extract()}. 
  15. * 
  16. * @since 2.3.0 
  17. * 
  18. * The supported types are links, mentions, images, shortcodes, embeds, audio, video, and "all". 
  19. * This is what each type extracts: 
  20. * 
  21. * Links: <a href="http://example.com"> 
  22. * Mentions: @name 
  23. * If the Activity component is enabled, we use it to parse out any at-names. A consequence 
  24. * to note is that the "name" mentioned must match a real user account. If it's a made-up 
  25. * at-name, then it isn't extracted. 
  26. * If the Activity component is disabled, any at-name is extracted (both those matching 
  27. * real accounts, and those made-up). 
  28. * Images: <img src="image.gif">, , , featured images (Post thumbnails). 
  29. * If an extracted image is in the Media Library, then its resolution will be included. 
  30. * Shortcodes: Extract information about any (registered) shortcodes. 
  31. * This includes any shortcodes indirectly covered by any of the other media extraction types. 
  32. * For example, . 
  33. * Embeds: Extract any URL matching a registered oEmbed handler. 
  34. * Audio: <a href="*.mp3"">,  
  35. * See wp_get_audio_extensions() for supported audio formats. 
  36. * Video:  
  37. * See wp_get_video_extensions() for supported video formats. 
  38. * 
  39. * @see BP_Media_Extractor::extract() Use this to extract media. 
  40. */ 
  41. class BP_Media_Extractor { 
  42. /** 
  43. * Media type. 
  44. * 
  45. * @since 2.3.0 
  46. * @var int 
  47. */ 
  48. const ALL = 255; 
  49. const LINKS = 1; 
  50. const MENTIONS = 2; 
  51. const IMAGES = 4; 
  52. const SHORTCODES = 8; 
  53. const EMBEDS = 16; 
  54. const AUDIO = 32; 
  55. const VIDEOS = 64; 
  56.  
  57.  
  58. /** 
  59. * Extract media from text. 
  60. * 
  61. * @since 2.3.0 
  62. * 
  63. * @param string|WP_Post $richtext Content to parse. 
  64. * @param int $what_to_extract Media type to extract (optional). 
  65. * @param array $extra_args Bespoke data for a particular extractor (optional). 
  66. * @return array { 
  67. * @type array $has Extracted media counts. { 
  68. * @type int $audio 
  69. * @type int $embeds 
  70. * @type int $images 
  71. * @type int $links 
  72. * @type int $mentions 
  73. * @type int $shortcodes 
  74. * @type int $video 
  75. * } 
  76. * @type array $audio Extracted audio. { 
  77. * Array of extracted media. 
  78. * 
  79. * @type string $source Media source. Either "html" or "shortcodes". 
  80. * @type string $url Link to audio. 
  81. * } 
  82. * @type array $embeds Extracted oEmbeds. { 
  83. * Array of extracted media. 
  84. * 
  85. * @type string $url oEmbed link. 
  86. * } 
  87. * @type array $images Extracted images. { 
  88. * Array of extracted media. 
  89. * 
  90. * @type int $gallery_id Gallery ID. Optional, not always set. 
  91. * @type int $height Width of image. If unknown, set to 0. 
  92. * @type string $source Media source. Either "html" or "galleries". 
  93. * @type string $url Link to image. 
  94. * @type int $width Width of image. If unknown, set to 0. 
  95. * } 
  96. * @type array $links Extracted URLs. { 
  97. * Array of extracted media. 
  98. * 
  99. * @type string $url Link. 
  100. * } 
  101. * @type array $mentions Extracted mentions. { 
  102. * Array of extracted media. 
  103. * 
  104. * @type string $name @mention. 
  105. * @type string $user_id User ID. Optional, only set if Activity component enabled. 
  106. * } 
  107. * @type array $shortcodes Extracted shortcodes. { 
  108. * Array of extracted media. 
  109. * 
  110. * @type array $attributes Key/value pairs of the shortcodes attributes (if any). 
  111. * @type string $content Text wrapped by the shortcode. 
  112. * @type string $type Shortcode type. 
  113. * @type string $original The entire shortcode. 
  114. * } 
  115. * @type array $videos Extracted video. { 
  116. * Array of extracted media. 
  117. * 
  118. * @type string $source Media source. Currently only "shortcodes". 
  119. * @type string $url Link to audio. 
  120. * } 
  121. * } 
  122. */ 
  123. public function extract( $richtext, $what_to_extract = self::ALL, $extra_args = array() ) { 
  124. $media = array(); 
  125.  
  126. // Support passing a WordPress Post for the $richtext parameter. 
  127. if ( is_a( $richtext, 'WP_Post' ) ) { 
  128. $extra_args['post'] = $richtext; 
  129. $richtext = $extra_args['post']->post_content; 
  130.  
  131. $plaintext = $this->strip_markup( $richtext ); 
  132.  
  133.  
  134. // Extract links. 
  135. if ( self::LINKS & $what_to_extract ) { 
  136. $media = array_merge_recursive( $media, $this->extract_links( $richtext, $plaintext, $extra_args ) ); 
  137.  
  138. // Extract mentions. 
  139. if ( self::MENTIONS & $what_to_extract ) { 
  140. $media = array_merge_recursive( $media, $this->extract_mentions( $richtext, $plaintext, $extra_args ) ); 
  141.  
  142. // Extract images. 
  143. if ( self::IMAGES & $what_to_extract ) { 
  144. $media = array_merge_recursive( $media, $this->extract_images( $richtext, $plaintext, $extra_args ) ); 
  145.  
  146. // Extract shortcodes. 
  147. if ( self::SHORTCODES & $what_to_extract ) { 
  148. $media = array_merge_recursive( $media, $this->extract_shortcodes( $richtext, $plaintext, $extra_args ) ); 
  149.  
  150. // Extract oEmbeds. 
  151. if ( self::EMBEDS & $what_to_extract ) { 
  152. $media = array_merge_recursive( $media, $this->extract_embeds( $richtext, $plaintext, $extra_args ) ); 
  153.  
  154. // Extract audio. 
  155. if ( self::AUDIO & $what_to_extract ) { 
  156. $media = array_merge_recursive( $media, $this->extract_audio( $richtext, $plaintext, $extra_args ) ); 
  157.  
  158. // Extract video. 
  159. if ( self::VIDEOS & $what_to_extract ) { 
  160. $media = array_merge_recursive( $media, $this->extract_video( $richtext, $plaintext, $extra_args ) ); 
  161.  
  162. /** 
  163. * Filters media extracted from text. 
  164. * 
  165. * @since 2.3.0 
  166. * 
  167. * @param array $media Extracted media. See {@link BP_Media_Extractor::extract()} for format. 
  168. * @param string $richtext Content to parse. 
  169. * @param int $what_to_extract Media type to extract. 
  170. * @param array $extra_args Bespoke data for a particular extractor. 
  171. * @param string $plaintext Copy of $richtext without any markup. 
  172. */ 
  173. return apply_filters( 'bp_media_extractor_extract', $media, $richtext, $what_to_extract, $extra_args, $plaintext ); 
  174.  
  175.  
  176. /** 
  177. * Content type specific extraction methods. 
  178. * 
  179. * You shouldn't need to use these directly; just use {@link BP_Media_Extractor::extract()}. 
  180. */ 
  181.  
  182. /** 
  183. * Extract `<a href>` tags from text. 
  184. * 
  185. * @since 2.3.0 
  186. * 
  187. * @param string $richtext Content to parse. 
  188. * @param string $plaintext Sanitized version of the content. 
  189. * @param array $extra_args Bespoke data for a particular extractor (optional). 
  190. * @return array { 
  191. * @type array $has Extracted media counts. { 
  192. * @type int $links 
  193. * } 
  194. * @type array $links Extracted URLs. { 
  195. * Array of extracted media. 
  196. * 
  197. * @type string $url Link. 
  198. * } 
  199. * } 
  200. */ 
  201. protected function extract_links( $richtext, $plaintext, $extra_args = array() ) { 
  202. $data = array( 'has' => array( 'links' => 0 ), 'links' => array() ); 
  203.  
  204. // Matches: href="text" and href='text'. 
  205. if ( stripos( $richtext, 'href=' ) !== false ) { 
  206. preg_match_all( '#href=(["\'])([^"\']+)\1#i', $richtext, $matches ); 
  207.  
  208. if ( ! empty( $matches[2] ) ) { 
  209. $matches[2] = array_unique( $matches[2] ); 
  210.  
  211. foreach ( $matches[2] as $link_src ) { 
  212. $link_src = esc_url_raw( $link_src ); 
  213.  
  214. if ( $link_src ) { 
  215. $data['links'][] = array( 'url' => $link_src ); 
  216.  
  217. $data['has']['links'] = count( $data['links'] ); 
  218.  
  219. /** 
  220. * Filters links extracted from text. 
  221. * 
  222. * @since 2.3.0 
  223. * 
  224. * @param array $data Extracted links. See {@link BP_Media_Extractor::extract_links()} for format. 
  225. * @param string $richtext Content to parse. 
  226. * @param string $plaintext Copy of $richtext without any markup. 
  227. * @param array $extra_args Bespoke data for a particular extractor. 
  228. */ 
  229. return apply_filters( 'bp_media_extractor_links', $data, $richtext, $plaintext, $extra_args ); 
  230.  
  231. /** 
  232. * Extract @mentions tags from text. 
  233. * 
  234. * If the Activity component is enabled, it is used to parse @mentions. 
  235. * The mentioned "name" must match a user account, otherwise it is discarded. 
  236. * 
  237. * If the Activity component is disabled, any @mentions are extracted. 
  238. * 
  239. * @since 2.3.0 
  240. * 
  241. * @param string $richtext Content to parse. 
  242. * @param string $plaintext Sanitized version of the content. 
  243. * @param array $extra_args Bespoke data for a particular extractor. 
  244. * @return array { 
  245. * @type array $has Extracted media counts. { 
  246. * @type int $mentions 
  247. * } 
  248. * @type array $mentions Extracted mentions. { 
  249. * Array of extracted media. 
  250. * 
  251. * @type string $name @mention. 
  252. * @type string $user_id User ID. Optional, only set if Activity component enabled. 
  253. * } 
  254. * } 
  255. */ 
  256. protected function extract_mentions( $richtext, $plaintext, $extra_args = array() ) { 
  257. $data = array( 'has' => array( 'mentions' => 0 ), 'mentions' => array() ); 
  258. $mentions = array(); 
  259.  
  260. // If the Activity component is active, use it to parse @mentions. 
  261. if ( bp_is_active( 'activity' ) ) { 
  262. $mentions = bp_activity_find_mentions( $plaintext ); 
  263. if ( ! $mentions ) { 
  264. $mentions = array(); 
  265.  
  266. // If the Activity component is disabled, instead do a basic parse. 
  267. } else { 
  268. if ( strpos( $plaintext, '@' ) !== false ) { 
  269. preg_match_all( '/[@]+([A-Za-z0-9-_\.@]+)\b/', $plaintext, $matches ); 
  270.  
  271. if ( ! empty( $matches[1] ) ) { 
  272. $mentions = array_unique( array_map( 'strtolower', $matches[1] ) ); 
  273.  
  274. // Build results. 
  275. foreach ( $mentions as $user_id => $mention_name ) { 
  276. $mention = array( 'name' => strtolower( $mention_name ) ); 
  277.  
  278. // If the Activity component is active, store the User ID, too. 
  279. if ( bp_is_active( 'activity' ) ) { 
  280. $mention['user_id'] = (int) $user_id; 
  281.  
  282. $data['mentions'][] = $mention; 
  283.  
  284. $data['has']['mentions'] = count( $data['mentions'] ); 
  285.  
  286. /** 
  287. * Filters @mentions extracted from text. 
  288. * 
  289. * @since 2.3.0 
  290. * 
  291. * @param array $data Extracted @mentions. See {@link BP_Media_Extractor::extract_mentions()} for format. 
  292. * @param string $richtext Content to parse. 
  293. * @param string $plaintext Copy of $richtext without any markup. 
  294. * @param array $extra_args Bespoke data for a particular extractor (optional). 
  295. */ 
  296. return apply_filters( 'bp_media_extractor_mentions', $data, $richtext, $plaintext, $extra_args ); 
  297.  
  298. /** 
  299. * Extract images from `<img src>` tags, [galleries], and featured images from a Post. 
  300. * 
  301. * If an image is in the Media Library, then its resolution is included in the results. 
  302. * 
  303. * @since 2.3.0 
  304. * 
  305. * @param string $richtext Content to parse. 
  306. * @param string $plaintext Sanitized version of the content. 
  307. * @param array $extra_args Bespoke data for a particular extractor (optional). 
  308. * @return array { 
  309. * @type array $has Extracted media counts. { 
  310. * @type int $images 
  311. * } 
  312. * @type array $images Extracted images. { 
  313. * Array of extracted media. 
  314. * 
  315. * @type int $gallery_id Gallery ID. Optional, not always set. 
  316. * @type int $height Width of image. If unknown, set to 0. 
  317. * @type string $source Media source. Either "html" or "galleries". 
  318. * @type string $url Link to image. 
  319. * @type int $width Width of image. If unknown, set to 0. 
  320. * } 
  321. * } 
  322. */ 
  323. protected function extract_images( $richtext, $plaintext, $extra_args = array() ) { 
  324. $media = array( 'has' => array( 'images' => 0 ), 'images' => array() ); 
  325.  
  326. $featured_image = $this->extract_images_from_featured_images( $richtext, $plaintext, $extra_args ); 
  327. $galleries = $this->extract_images_from_galleries( $richtext, $plaintext, $extra_args ); 
  328.  
  329.  
  330. // `<img src>` tags. 
  331. if ( stripos( $richtext, 'src=' ) !== false ) { 
  332. preg_match_all( '#src=(["\'])([^"\']+)\1#i', $richtext, $img_srcs ); // Matches src="text" and src='text'. 
  333.  
  334. // <img>. 
  335. if ( ! empty( $img_srcs[2] ) ) { 
  336. $img_srcs[2] = array_unique( $img_srcs[2] ); 
  337.  
  338. foreach ( $img_srcs[2] as $image_src ) { 
  339. // Skip data URIs. 
  340. if ( strtolower( substr( $image_src, 0, 5 ) ) === 'data:' ) { 
  341. continue; 
  342.  
  343. $image_src = esc_url_raw( $image_src ); 
  344. if ( ! $image_src ) { 
  345. continue; 
  346.  
  347. $media['images'][] = array( 
  348. 'source' => 'html',  
  349. 'url' => $image_src,  
  350.  
  351. // The image resolution isn't available, but we need to set the keys anyway. 
  352. 'height' => 0,  
  353. 'width' => 0,  
  354. ); 
  355.  
  356. // Galleries. 
  357. if ( ! empty( $galleries ) ) { 
  358. foreach ( $galleries as $gallery ) { 
  359. foreach ( $gallery as $image ) { 
  360. $image_url = esc_url_raw( $image['url'] ); 
  361. if ( ! $image_url ) { 
  362. continue; 
  363.  
  364. $media['images'][] = array( 
  365. 'gallery_id' => $image['gallery_id'],  
  366. 'source' => 'galleries',  
  367. 'url' => $image_url,  
  368. 'width' => $image['width'],  
  369. 'height' => $image['height'],  
  370. ); 
  371.  
  372. $media['has']['galleries'] = count( $galleries ); 
  373.  
  374. // Featured images (aka thumbnails). 
  375. if ( ! empty( $featured_image ) ) { 
  376. $image_url = esc_url_raw( $featured_image[0] ); 
  377.  
  378. if ( $image_url ) { 
  379. $media['images'][] = array( 
  380. 'source' => 'featured_images',  
  381. 'url' => $image_url,  
  382. 'width' => $featured_image[1],  
  383. 'height' => $featured_image[2],  
  384. ); 
  385.  
  386. $media['has']['featured_images'] = 1; 
  387.  
  388. // Update image count. 
  389. $media['has']['images'] = count( $media['images'] ); 
  390.  
  391.  
  392. /** 
  393. * Filters images extracted from text. 
  394. * 
  395. * @since 2.3.0 
  396. * 
  397. * @param array $media Extracted images. See {@link BP_Media_Extractor::extract_images()} for format. 
  398. * @param string $richtext Content to parse. 
  399. * @param string $plaintext Copy of $richtext without any markup. 
  400. * @param array $extra_args Bespoke data for a particular extractor. 
  401. */ 
  402. return apply_filters( 'bp_media_extractor_images', $media, $richtext, $plaintext, $extra_args ); 
  403.  
  404. /** 
  405. * Extract shortcodes from text. 
  406. * 
  407. * This includes any shortcodes indirectly used by other media extraction types. 
  408. * For example, and . 
  409. * 
  410. * @since 2.3.0 
  411. * 
  412. * @param string $richtext Content to parse. 
  413. * @param string $plaintext Sanitized version of the content. 
  414. * @param array $extra_args Bespoke data for a particular extractor (optional). 
  415. * @return array { 
  416. * @type array $has Extracted media counts. { 
  417. * @type int $shortcodes 
  418. * } 
  419. * @type array $shortcodes Extracted shortcodes. { 
  420. * Array of extracted media. 
  421. * 
  422. * @type array $attributes Key/value pairs of the shortcodes attributes (if any). 
  423. * @type string $content Text wrapped by the shortcode. 
  424. * @type string $type Shortcode type. 
  425. * @type string $original The entire shortcode. 
  426. * } 
  427. * } 
  428. */ 
  429. protected function extract_shortcodes( $richtext, $plaintext, $extra_args = array() ) { 
  430. $data = array( 'has' => array( 'shortcodes' => 0 ), 'shortcodes' => array() ); 
  431.  
  432. // Match any registered WordPress shortcodes. 
  433. if ( strpos( $richtext, '[' ) !== false ) { 
  434. preg_match_all( '/' . get_shortcode_regex() . '/s', $richtext, $matches ); 
  435.  
  436. if ( ! empty( $matches[2] ) ) { 
  437. foreach ( $matches[2] as $i => $shortcode_name ) { 
  438. $attrs = shortcode_parse_atts( $matches[3][ $i ] ); 
  439. $attrs = ( ! $attrs ) ? array() : (array) $attrs; 
  440.  
  441. $shortcode = array(); 
  442. $shortcode['attributes'] = $attrs; // Attributes. 
  443. $shortcode['content'] = $matches[5][ $i ]; // Content. 
  444. $shortcode['type'] = $shortcode_name; // Shortcode. 
  445. $shortcode['original'] = $matches[0][ $i ]; // Entire shortcode. 
  446.  
  447. $data['shortcodes'][] = $shortcode; 
  448.  
  449. $data['has']['shortcodes'] = count( $data['shortcodes'] ); 
  450.  
  451. /** 
  452. * Filters shortcodes extracted from text. 
  453. * 
  454. * @since 2.3.0 
  455. * 
  456. * @param array $data Extracted shortcodes. 
  457. * See {@link BP_Media_Extractor::extract_shortcodes()} for format. 
  458. * @param string $richtext Content to parse. 
  459. * @param string $plaintext Copy of $richtext without any markup. 
  460. * @param array $extra_args Bespoke data for a particular extractor. 
  461. */ 
  462. return apply_filters( 'bp_media_extractor_shortcodes', $data, $richtext, $plaintext, $extra_args ); 
  463.  
  464. /** 
  465. * Extract any URL, matching a registered oEmbed endpoint, from text. 
  466. * 
  467. * @since 2.3.0 
  468. * 
  469. * @param string $richtext Content to parse. 
  470. * @param string $plaintext Sanitized version of the content. 
  471. * @param array $extra_args Bespoke data for a particular extractor (optional). 
  472. * @return array { 
  473. * @type array $has Extracted media counts. { 
  474. * @type int $embeds 
  475. * } 
  476. * @type array $embeds Extracted oEmbeds. { 
  477. * Array of extracted media. 
  478. * 
  479. * @type string $url oEmbed link. 
  480. * } 
  481. * } 
  482. */ 
  483. protected function extract_embeds( $richtext, $plaintext, $extra_args = array() ) { 
  484. $data = array( 'has' => array( 'embeds' => 0 ), 'embeds' => array() ); 
  485. $embeds = array(); 
  486.  
  487. if ( ! function_exists( '_wp_oembed_get_object' ) ) { 
  488. require( ABSPATH . WPINC . '/class-oembed.php' ); 
  489.  
  490.  
  491. // Matches any links on their own lines. They may be oEmbeds. 
  492. if ( stripos( $richtext, 'http' ) !== false ) { 
  493. preg_match_all( '#^\s*(https?://[^\s"]+)\s*$#im', $richtext, $matches ); 
  494.  
  495. if ( ! empty( $matches[1] ) ) { 
  496. $matches[1] = array_unique( $matches[1] ); 
  497. $oembed = _wp_oembed_get_object(); 
  498.  
  499. foreach ( $matches[1] as $link ) { 
  500. // Skip data URIs. 
  501. if ( strtolower( substr( $link, 0, 5 ) ) === 'data:' ) { 
  502. continue; 
  503.  
  504. foreach ( $oembed->providers as $matchmask => $oembed_data ) { 
  505. list( , $is_regex ) = $oembed_data; 
  506.  
  507. // Turn asterisk-type provider URLs into regexs. 
  508. if ( ! $is_regex ) { 
  509. $matchmask = '#' . str_replace( '___wildcard___', '(.+)', preg_quote( str_replace( '*', '___wildcard___', $matchmask ), '#' ) ) . '#i'; 
  510. $matchmask = preg_replace( '|^#http\\\://|', '#https?\://', $matchmask ); 
  511.  
  512. // Check whether this "link" is really an oEmbed. 
  513. if ( preg_match( $matchmask, $link ) ) { 
  514. $data['embeds'][] = array( 'url' => $link ); 
  515.  
  516. break; 
  517.  
  518. $data['has']['embeds'] = count( $data['embeds'] ); 
  519.  
  520. /** 
  521. * Filters embeds extracted from text. 
  522. * 
  523. * @since 2.3.0 
  524. * 
  525. * @param array $data Extracted embeds. See {@link BP_Media_Extractor::extract_embeds()} for format. 
  526. * @param string $richtext Content to parse. 
  527. * @param string $plaintext Copy of $richtext without any markup. 
  528. * @param array $extra_args Bespoke data for a particular extractor. 
  529. */ 
  530. return apply_filters( 'bp_media_extractor_embeds', $data, $richtext, $plaintext, $extra_args ); 
  531.  
  532. /** 
  533. * Extract shortcodes and `<a href="*.mp3">` tags, from text. 
  534. * 
  535. * @since 2.3.0 
  536. * 
  537. * @see wp_get_audio_extensions() for supported audio formats. 
  538. * 
  539. * @param string $richtext Content to parse. 
  540. * @param string $plaintext Sanitized version of the content. 
  541. * @param array $extra_args Bespoke data for a particular extractor (optional). 
  542. * @return array { 
  543. * @type array $has Extracted media counts. { 
  544. * @type int $audio 
  545. * } 
  546. * @type array $audio Extracted audio. { 
  547. * Array of extracted media. 
  548. * 
  549. * @type string $original The entire shortcode. 
  550. * @type string $source Media source. Either "html" or "shortcodes". 
  551. * @type string $url Link to audio. 
  552. * } 
  553. * } 
  554. */ 
  555. protected function extract_audio( $richtext, $plaintext, $extra_args = array() ) { 
  556. $data = array( 'has' => array( 'audio' => 0 ), 'audio' => array() ); 
  557. $audios = $this->extract_shortcodes( $richtext, $plaintext, $extra_args ); 
  558. $links = $this->extract_links( $richtext, $plaintext, $extra_args ); 
  559.  
  560. $audio_types = wp_get_audio_extensions(); 
  561.  
  562.  
  563. //  
  564. $audios = wp_list_filter( $audios['shortcodes'], array( 'type' => 'audio' ) ); 
  565. foreach ( $audios as $audio ) { 
  566.  
  567. // Media URL can appear as the first parameter inside the shortcode brackets. 
  568. if ( isset( $audio['attributes']['src'] ) ) { 
  569. $src_param = 'src'; 
  570. } elseif ( isset( $audio['attributes'][0] ) ) { 
  571. $src_param = 0; 
  572. } else { 
  573. continue; 
  574.  
  575. $path = untrailingslashit( parse_url( $audio['attributes'][ $src_param ], PHP_URL_PATH ) ); 
  576.  
  577. foreach ( $audio_types as $extension ) { 
  578. $extension = '.' . $extension; 
  579.  
  580. // Check this URL's file extension matches that of an accepted audio format. 
  581. if ( ! $path || substr( $path, -4 ) !== $extension ) { 
  582. continue; 
  583.  
  584. $data['audio'][] = array( 
  585. 'original' => '[ $src_param ] ) . '"]',  
  586. 'source' => 'shortcodes',  
  587. 'url' => esc_url_raw( $audio['attributes'][ $src_param ] ),  
  588. ); 
  589.  
  590. // <a href="*.mp3"> tags. 
  591. foreach ( $audio_types as $extension ) { 
  592. $extension = '.' . $extension; 
  593.  
  594. foreach ( $links['links'] as $link ) { 
  595. $path = untrailingslashit( parse_url( $link['url'], PHP_URL_PATH ) ); 
  596.  
  597. // Check this URL's file extension matches that of an accepted audio format. 
  598. if ( ! $path || substr( $path, -4 ) !== $extension ) { 
  599. continue; 
  600.  
  601. $data['audio'][] = array( 
  602. 'original' => ' ) . '"]', // Build an audio shortcode. 
  603. 'source' => 'html',  
  604. 'url' => esc_url_raw( $link['url'] ),  
  605. ); 
  606.  
  607. $data['has']['audio'] = count( $data['audio'] ); 
  608.  
  609. /** 
  610. * Filters audio extracted from text. 
  611. * 
  612. * @since 2.3.0 
  613. * 
  614. * @param array $data Extracted audio. See {@link BP_Media_Extractor::extract_audio()} for format. 
  615. * @param string $richtext Content to parse. 
  616. * @param string $plaintext Copy of $richtext without any markup. 
  617. * @param array $extra_args Bespoke data for a particular extractor. 
  618. */ 
  619. return apply_filters( 'bp_media_extractor_audio', $data, $richtext, $plaintext, $extra_args ); 
  620.  
  621. /** 
  622. * Extract shortcodes from text. 
  623. * 
  624. * @since 2.3.0 
  625. * 
  626. * @see wp_get_video_extensions() for supported video formats. 
  627. * 
  628. * @param string $richtext Content to parse. 
  629. * @param string $plaintext Sanitized version of the content. 
  630. * @param array $extra_args Bespoke data for a particular extractor (optional). 
  631. * @return array { 
  632. * @type array $has Extracted media counts. { 
  633. * @type int $video 
  634. * } 
  635. * @type array $videos Extracted video. { 
  636. * Array of extracted media. 
  637. * 
  638. * @type string $source Media source. Currently only "shortcodes". 
  639. * @type string $url Link to audio. 
  640. * } 
  641. * } 
  642. */ 
  643. protected function extract_video( $richtext, $plaintext, $extra_args = array() ) { 
  644. $data = array( 'has' => array( 'videos' => 0 ), 'videos' => array() ); 
  645. $videos = $this->extract_shortcodes( $richtext, $plaintext, $extra_args ); 
  646.  
  647. $video_types = wp_get_video_extensions(); 
  648.  
  649.  
  650. //  
  651. $videos = wp_list_filter( $videos['shortcodes'], array( 'type' => 'video' ) ); 
  652. foreach ( $videos as $video ) { 
  653.  
  654. // Media URL can appear as the first parameter inside the shortcode brackets. 
  655. if ( isset( $video['attributes']['src'] ) ) { 
  656. $src_param = 'src'; 
  657. } elseif ( isset( $video['attributes'][0] ) ) { 
  658. $src_param = 0; 
  659. } else { 
  660. continue; 
  661.  
  662. $path = untrailingslashit( parse_url( $video['attributes'][ $src_param ], PHP_URL_PATH ) ); 
  663.  
  664. foreach ( $video_types as $extension ) { 
  665. $extension = '.' . $extension; 
  666.  
  667. // Check this URL's file extension matches that of an accepted video format (-5 for webm). 
  668. if ( ! $path || ( substr( $path, -4 ) !== $extension && substr( $path, -5 ) !== $extension ) ) { 
  669. continue; 
  670.  
  671. $data['videos'][] = array( 
  672. 'original' => $video['original'], // Entire shortcode. 
  673. 'source' => 'shortcodes',  
  674. 'url' => esc_url_raw( $video['attributes'][ $src_param ] ),  
  675. ); 
  676.  
  677. $data['has']['videos'] = count( $data['videos'] ); 
  678.  
  679. /** 
  680. * Filters videos extracted from text. 
  681. * 
  682. * @since 2.3.0 
  683. * 
  684. * @param array $data Extracted videos. See {@link BP_Media_Extractor::extract_videos()} for format. 
  685. * @param string $richtext Content to parse. 
  686. * @param string $plaintext Copy of $richtext without any markup. 
  687. * @param array $extra_args Bespoke data for a particular extractor. 
  688. */ 
  689. return apply_filters( 'bp_media_extractor_videos', $data, $richtext, $plaintext, $extra_args ); 
  690.  
  691.  
  692. /** 
  693. * Helpers and utility methods. 
  694. */ 
  695.  
  696. /** 
  697. * Extract images in [galleries] shortcodes from text. 
  698. * 
  699. * @since 2.3.0 
  700. * 
  701. * @param string $richtext Content to parse. 
  702. * @param string $plaintext Sanitized version of the content. 
  703. * @param array $extra_args Bespoke data for a particular extractor (optional). 
  704. * @return array 
  705. */ 
  706. protected function extract_images_from_galleries( $richtext, $plaintext, $extra_args = array() ) { 
  707. if ( ! isset( $extra_args['post'] ) || ! is_a( $extra_args['post'], 'WP_Post' ) ) { 
  708. $post = new WP_Post( (object) array( 'post_content' => $richtext ) ); 
  709. } else { 
  710. $post = $extra_args['post']; 
  711.  
  712. // We're not using get_post_galleries_images() because it returns thumbnails; we want the original image. 
  713. $galleries = get_post_galleries( $post, false ); 
  714. $galleries_data = array(); 
  715.  
  716. if ( ! empty( $galleries ) ) { 
  717. // Validate the size of the images requested. 
  718. if ( isset( $extra_args['width'] ) ) { 
  719.  
  720. // A width was specified but not a height, so calculate it assuming a 4:3 ratio. 
  721. if ( ! isset( $extra_args['height'] ) && ctype_digit( $extra_args['width'] ) ) { 
  722. $extra_args['height'] = round( ( $extra_args['width'] / 4 ) * 3 ); 
  723.  
  724. if ( ctype_digit( $extra_args['width'] ) ) { 
  725. $image_size = array( $extra_args['width'], $extra_args['height'] ); 
  726. } else { 
  727. $image_size = $extra_args['width']; // E.g. "thumb", "medium". 
  728.  
  729. } else { 
  730. $image_size = 'full'; 
  731.  
  732. /** 
  733. * There are two variants of gallery shortcode. 
  734. * 
  735. * One kind specifies the image (post) IDs via an `ids` parameter. 
  736. * The other gets the image IDs from post_type=attachment and post_parent=get_the_ID(). 
  737. */ 
  738.  
  739. foreach ( $galleries as $gallery_id => $gallery ) { 
  740. $data = array(); 
  741. $images = array(); 
  742.  
  743. // Gallery ids= variant. 
  744. if ( isset( $gallery['ids'] ) ) { 
  745. $images = wp_parse_id_list( $gallery['ids'] ); 
  746.  
  747. // Gallery post_parent variant. 
  748. } elseif ( isset( $extra_args['post'] ) ) { 
  749. $images = wp_parse_id_list( 
  750. get_children( array( 
  751. 'fields' => 'ids',  
  752. 'order' => 'ASC',  
  753. 'orderby' => 'menu_order ID',  
  754. 'post_mime_type' => 'image',  
  755. 'post_parent' => $extra_args['post']->ID,  
  756. 'post_status' => 'inherit',  
  757. 'post_type' => 'attachment',  
  758. ) ) 
  759. ); 
  760.  
  761. // Extract the data we need from each image in this gallery. 
  762. foreach ( $images as $image_id ) { 
  763. $image = wp_get_attachment_image_src( $image_id, $image_size ); 
  764. $data[] = array( 
  765. 'url' => $image[0],  
  766. 'width' => $image[1],  
  767. 'height' => $image[2],  
  768.  
  769. 'gallery_id' => 1 + $gallery_id,  
  770. ); 
  771.  
  772. $galleries_data[] = $data; 
  773.  
  774. /** 
  775. * Filters image galleries extracted from text. 
  776. * 
  777. * @since 2.3.0 
  778. * 
  779. * @param array $galleries_data Galleries. See {@link BP_Media_Extractor::extract_images_from_galleries()}. 
  780. * @param string $richtext Content to parse. 
  781. * @param string $plaintext Copy of $richtext without any markup. 
  782. * @param array $extra_args Bespoke data for a particular extractor. 
  783. */ 
  784. return apply_filters( 'bp_media_extractor_galleries', $galleries_data, $richtext, $plaintext, $extra_args ); 
  785.  
  786. /** 
  787. * Extract the featured image from a Post. 
  788. * 
  789. * @since 2.3.0 
  790. * 
  791. * @param string $richtext Content to parse. 
  792. * @param string $plaintext Sanitized version of the content. 
  793. * @param array $extra_args Contains data that an implementation might need beyond the defaults. 
  794. * @return array 
  795. */ 
  796. protected function extract_images_from_featured_images( $richtext, $plaintext, $extra_args ) { 
  797. $image = array(); 
  798. $thumb = 0; 
  799.  
  800. if ( isset( $extra_args['post'] ) ) { 
  801. $thumb = (int) get_post_thumbnail_id( $extra_args['post']->ID ); 
  802.  
  803. if ( $thumb ) { 
  804. // Validate the size of the images requested. 
  805. if ( isset( $extra_args['width'] ) ) { 
  806. if ( ! isset( $extra_args['height'] ) && ctype_digit( $extra_args['width'] ) ) { 
  807. // A width was specified but not a height, so calculate it assuming a 4:3 ratio. 
  808. $extra_args['height'] = round( ( $extra_args['width'] / 4 ) * 3 ); 
  809.  
  810. if ( ctype_digit( $extra_args['width'] ) ) { 
  811. $image_size = array( $extra_args['width'], $extra_args['height'] ); 
  812. } else { 
  813. $image_size = $extra_args['width']; // E.g. "thumb", "medium". 
  814. } else { 
  815. $image_size = 'full'; 
  816.  
  817. $image = wp_get_attachment_image_src( $thumb, $image_size ); 
  818.  
  819. /** 
  820. * Filters featured images extracted from a WordPress Post. 
  821. * 
  822. * @since 2.3.0 
  823. * 
  824. * @param array $image Extracted images. See {@link BP_Media_Extractor_Post::extract_images()} for format. 
  825. * @param string $richtext Content to parse. 
  826. * @param string $plaintext Copy of $richtext without any markup. 
  827. * @param array $extra_args Bespoke data for a particular extractor. 
  828. */ 
  829. return apply_filters( 'bp_media_extractor_featured_images', $image, $richtext, $plaintext, $extra_args ); 
  830.  
  831. /** 
  832. * Sanitize and format raw content to prepare for content extraction. 
  833. * 
  834. * HTML tags and shortcodes are removed, and HTML entities are decoded. 
  835. * 
  836. * @since 2.3.0 
  837. * 
  838. * @param string $richtext Content to sanitize. 
  839. * @return string 
  840. */ 
  841. protected function strip_markup( $richtext ) { 
  842. $plaintext = strip_shortcodes( html_entity_decode( strip_tags( $richtext ) ) ); 
  843.  
  844. /** 
  845. * Filters the generated plain text version of the content passed to the extractor. 
  846. * 
  847. * @since 2.3.0 
  848. * 
  849. * @param array $plaintext Generated plain text. 
  850. * @param string $richtext Original content. 
  851. */ 
  852. return apply_filters( 'bp_media_extractor_strip_markup', $plaintext, $richtext ); 
.