Filter_Embedded_HTML_Objects

Helper class for identifying and parsing known HTML blocks.

Defined (1)

The class is defined in the following location(s).

/modules/shortcodes/class.filter-embedded-html-objects.php  
  1. class Filter_Embedded_HTML_Objects { 
  2. static public $strpos_filters = array(); 
  3. static public $regexp_filters = array(); 
  4. static public $current_element = false; 
  5. static public $html_strpos_filters = array(); 
  6. static public $html_regexp_filters = array(); 
  7. static public $failed_embeds = array(); 
  8.  
  9. /** 
  10. * Store tokens found in Syntax Highlighter. 
  11. * @since 4.5.0 
  12. * @var array 
  13. */ 
  14. static private $sh_unfiltered_content_tokens; 
  15.  
  16. /** 
  17. * Capture tokens found in Syntax Highlighter and collect them in self::$sh_unfiltered_content_tokens. 
  18. * @since 4.5.0 
  19. * @param array $match 
  20. * @return string 
  21. */ 
  22. static public function sh_regexp_callback( $match ) { 
  23. $token = '[prekses-filter-token-' . mt_rand() . '-' . md5( $match[0] ) . '-' . mt_rand() . ']'; 
  24. self::$sh_unfiltered_content_tokens[$token] = $match[0]; 
  25. return $token; 
  26.  
  27. static public function filter( $html ) { 
  28. if ( ! $html ) { 
  29. return $html; 
  30.  
  31. $regexps = array( 
  32. 'object' => '%<object[^>]*+>(?>[^<]*+(?><(?!/object>)[^<]*+)*)</object>%i',  
  33. 'embed' => '%<embed[^>]*+>(?:\s*</embed>)?%i',  
  34. 'iframe' => '%<iframe[^>]*+>(?>[^<]*+(?><(?!/iframe>)[^<]*+)*)</iframe>%i',  
  35. 'div' => '%<div[^>]*+>(?>[^<]*+(?><(?!/div>)[^<]*+)*+)(?:</div>)+%i',  
  36. 'script' => '%<script[^>]*+>(?>[^<]*+(?><(?!/script>)[^<]*+)*)</script>%i',  
  37. ); 
  38.  
  39. $unfiltered_content_tokens = array(); 
  40. self::$sh_unfiltered_content_tokens = array(); 
  41.  
  42. // Check here to make sure that SyntaxHighlighter is still used. (Just a little future proofing) 
  43. if ( class_exists( 'SyntaxHighlighter' ) ) { 
  44. // Replace any "code" shortcode blocks with a token that we'll later replace with its original text. 
  45. // This will keep the contents of the shortcode from being filtered 
  46.  
  47. global $SyntaxHighlighter; 
  48.  
  49. // Check to see if the $SyntaxHighlighter object has been created and is ready for use 
  50. if ( isset( $SyntaxHighlighter ) && is_array( $SyntaxHighlighter->shortcodes ) ) { 
  51. $shortcode_regex = implode( '|', array_map( 'preg_quote', $SyntaxHighlighter->shortcodes ) ); 
  52. $html = preg_replace_callback( 
  53. '/\[(' . $shortcode_regex . ')(\s[^\]]*)?\][\s\S]*?\[\/\1\]/m', array( __CLASS__, 'sh_regexp_callback' ), $html 
  54. ); 
  55. $unfiltered_content_tokens = self::$sh_unfiltered_content_tokens; 
  56.  
  57. foreach ( $regexps as $element => $regexp ) { 
  58. self::$current_element = $element; 
  59.  
  60. if ( false !== stripos( $html, "<$element" ) ) { 
  61. if ( $new_html = preg_replace_callback( $regexp, array( __CLASS__, 'dispatch' ), $html ) ) { 
  62. $html = $new_html; 
  63.  
  64. if ( false !== stripos( $html, "<$element" ) ) { 
  65. $regexp_entities = self::regexp_entities( $regexp ); 
  66. if ( $new_html = preg_replace_callback( $regexp_entities, array( __CLASS__, 'dispatch_entities' ), $html ) ) { 
  67. $html = $new_html; 
  68.  
  69. if ( count( $unfiltered_content_tokens ) > 0 ) { 
  70. // Replace any tokens generated earlier with their original unfiltered text 
  71. $html = str_replace( array_keys( $unfiltered_content_tokens ), $unfiltered_content_tokens, $html ); 
  72.  
  73. return $html; 
  74.  
  75. static public function regexp_entities( $regexp ) { 
  76. return preg_replace( 
  77. '/\[\^&([^\]]+)\]\*\+/',  
  78. '(?>[^&]*+(?>&(?!\1)[^&])*+)*+',  
  79. str_replace( '?>', '?' . '>', htmlspecialchars( $regexp, ENT_NOQUOTES ) ) 
  80. ); 
  81.  
  82. static public function register( $match, $callback, $is_regexp = false, $is_html_filter = false ) { 
  83. if ( $is_html_filter ) { 
  84. if ( $is_regexp ) { 
  85. self::$html_regexp_filters[$match] = $callback; 
  86. } else { 
  87. self::$html_strpos_filters[$match] = $callback; 
  88. } else { 
  89. if ( $is_regexp ) { 
  90. self::$regexp_filters[$match] = $callback; 
  91. } else { 
  92. self::$strpos_filters[$match] = $callback; 
  93.  
  94. static public function unregister( $match ) { 
  95. // Allow themes/plugins to remove registered embeds 
  96. unset( self::$regexp_filters[$match] ); 
  97. unset( self::$strpos_filters[$match] ); 
  98. unset( self::$html_regexp_filters[$match] ); 
  99. unset( self::$html_strpos_filters[$match] ); 
  100.  
  101. static function dispatch_entities( $matches ) { 
  102. $matches[0] = html_entity_decode( $matches[0] ); 
  103.  
  104. return self::dispatch( $matches ); 
  105.  
  106. static function dispatch( $matches ) { 
  107. $html = preg_replace( '%�*58;//%', '://', $matches[0] ); 
  108. $attrs = self::get_attrs( $html ); 
  109. if ( isset( $attrs['src'] ) ) { 
  110. $src = $attrs['src']; 
  111. } else if ( isset( $attrs['movie'] ) ) { 
  112. $src = $attrs['movie']; 
  113. } else { 
  114. // no src found, search html 
  115. foreach ( self::$html_strpos_filters as $match => $callback ) { 
  116. if ( false !== strpos( $html, $match ) ) { 
  117. return call_user_func( $callback, $attrs ); 
  118.  
  119. foreach ( self::$html_regexp_filters as $match => $callback ) { 
  120. if ( preg_match( $match, $html ) ) { 
  121. return call_user_func( $callback, $attrs ); 
  122.  
  123. return $matches[0]; 
  124.  
  125. $src = trim( $src ); 
  126.  
  127. // check source filter 
  128. foreach ( self::$strpos_filters as $match => $callback ) { 
  129. if ( false !== strpos( $src, $match ) ) { 
  130. return call_user_func( $callback, $attrs ); 
  131.  
  132. foreach ( self::$regexp_filters as $match => $callback ) { 
  133. if ( preg_match( $match, $src ) ) { 
  134. return call_user_func( $callback, $attrs ); 
  135.  
  136. // check html filters 
  137. foreach ( self::$html_strpos_filters as $match => $callback ) { 
  138. if ( false !== strpos( $html, $match ) ) { 
  139. return call_user_func( $callback, $attrs ); 
  140.  
  141. foreach ( self::$html_regexp_filters as $match => $callback ) { 
  142. if ( preg_match( $match, $html ) ) { 
  143. return call_user_func( $callback, $attrs ); 
  144.  
  145. // Log the strip 
  146. if ( function_exists( 'wp_kses_reject' ) ) { 
  147. wp_kses_reject( sprintf( __( '<code>%s</code> HTML tag removed as it is not allowed', 'jetpack' ), '<' . self::$current_element . '>' ), array( self::$current_element => $attrs ) ); 
  148.  
  149. // Keep the failed match so we can later replace it with a link,  
  150. // but return the original content to give others a chance too. 
  151. self::$failed_embeds[] = array( 
  152. 'match' => $matches[0],  
  153. 'src' => esc_url( $src ),  
  154. ); 
  155.  
  156. return $matches[0]; 
  157.  
  158. /** 
  159. * Failed embeds are stripped, so let's convert them to links at least. 
  160. * @param string $string Failed embed string. 
  161. * @return string $string Linkified string. 
  162. */ 
  163. public static function maybe_create_links( $string ) { 
  164. if ( empty( self::$failed_embeds ) ) { 
  165. return $string; 
  166.  
  167. foreach ( self::$failed_embeds as $entry ) { 
  168. $html = sprintf( '<a href="%s">%s</a>', esc_url( $entry['src'] ), esc_url( $entry['src'] ) ); 
  169. // Check if the string doesn't contain iframe, before replace. 
  170. if ( ! preg_match( '/<iframe /', $string ) ) { 
  171. $string = str_replace( $entry['match'], $html, $string ); 
  172.  
  173. self::$failed_embeds = array(); 
  174.  
  175. return $string; 
  176.  
  177. static function get_attrs( $html ) { 
  178. // We have to go through DOM, since it can load non-well-formed XML (i.e. HTML). SimpleXML cannot. 
  179. $dom = new DOMDocument; 
  180. // The @ is not enough to suppress errors when dealing with libxml,  
  181. // we have to tell it directly how we want to handle errors. 
  182. libxml_use_internal_errors( TRUE ); 
  183. @$dom->loadHTML( $html ); // suppress parser warnings 
  184. libxml_use_internal_errors( FALSE ); 
  185. $xml = false; 
  186. foreach ( $dom->childNodes as $node ) { 
  187. // find the root node (html) 
  188. if ( XML_ELEMENT_NODE == $node->nodeType ) { 
  189. // Use simplexml_load_string rather than simplexml_import_dom as the later doesn't cope well if the XML is malformmed in the DOM See #1688-wpcom 
  190. libxml_use_internal_errors( true ); 
  191. $xml = simplexml_load_string( $dom->saveXML( $node->firstChild->firstChild ) ); // html->body->object 
  192. libxml_clear_errors(); 
  193. break; 
  194. if ( ! $xml ) { 
  195. return array(); 
  196.  
  197. $attrs = array(); 
  198. $attrs['_raw_html'] = $html; 
  199.  
  200. // <param> elements 
  201. foreach ( $xml->param as $param ) { 
  202. $attrs[(string) $param['name']] = (string) $param['value']; 
  203.  
  204. // <object> attributes 
  205. foreach ( $xml->attributes() as $name => $attr ) { 
  206. $attrs[$name] = (string) $attr; 
  207.  
  208. // <embed> attributes 
  209. if ( $xml->embed ) { 
  210. foreach ( $xml->embed->attributes() as $name => $attr ) { 
  211. $attrs[$name] = (string) $attr; 
  212.  
  213. return $attrs;