SimplePie_Sanitize

Used for data cleanup and post-processing.

Defined (1)

The class is defined in the following location(s).

/wp-includes/SimplePie/Sanitize.php  
  1. class SimplePie_Sanitize 
  2. // Private vars 
  3. var $base; 
  4.  
  5. // Options 
  6. var $remove_div = true; 
  7. var $image_handler = ''; 
  8. var $strip_htmltags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style'); 
  9. var $encode_instead_of_strip = false; 
  10. var $strip_attributes = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc'); 
  11. var $strip_comments = false; 
  12. var $output_encoding = 'UTF-8'; 
  13. var $enable_cache = true; 
  14. var $cache_location = './cache'; 
  15. var $cache_name_function = 'md5'; 
  16. var $timeout = 10; 
  17. var $useragent = ''; 
  18. var $force_fsockopen = false; 
  19. var $replace_url_attributes = null; 
  20.  
  21. public function __construct() 
  22. // Set defaults 
  23. $this->set_url_replacements(null); 
  24.  
  25. public function remove_div($enable = true) 
  26. $this->remove_div = (bool) $enable; 
  27.  
  28. public function set_image_handler($page = false) 
  29. if ($page) 
  30. $this->image_handler = (string) $page; 
  31. else 
  32. $this->image_handler = false; 
  33.  
  34. public function set_registry(SimplePie_Registry $registry) 
  35. $this->registry = $registry; 
  36.  
  37. public function pass_cache_data($enable_cache = true, $cache_location = './cache', $cache_name_function = 'md5', $cache_class = 'SimplePie_Cache') 
  38. if (isset($enable_cache)) 
  39. $this->enable_cache = (bool) $enable_cache; 
  40.  
  41. if ($cache_location) 
  42. $this->cache_location = (string) $cache_location; 
  43.  
  44. if ($cache_name_function) 
  45. $this->cache_name_function = (string) $cache_name_function; 
  46.  
  47. public function pass_file_data($file_class = 'SimplePie_File', $timeout = 10, $useragent = '', $force_fsockopen = false) 
  48. if ($timeout) 
  49. $this->timeout = (string) $timeout; 
  50.  
  51. if ($useragent) 
  52. $this->useragent = (string) $useragent; 
  53.  
  54. if ($force_fsockopen) 
  55. $this->force_fsockopen = (string) $force_fsockopen; 
  56.  
  57. public function strip_htmltags($tags = array('base', 'blink', 'body', 'doctype', 'embed', 'font', 'form', 'frame', 'frameset', 'html', 'iframe', 'input', 'marquee', 'meta', 'noscript', 'object', 'param', 'script', 'style')) 
  58. if ($tags) 
  59. if (is_array($tags)) 
  60. $this->strip_htmltags = $tags; 
  61. else 
  62. $this->strip_htmltags = explode(', ', $tags); 
  63. else 
  64. $this->strip_htmltags = false; 
  65.  
  66. public function encode_instead_of_strip($encode = false) 
  67. $this->encode_instead_of_strip = (bool) $encode; 
  68.  
  69. public function strip_attributes($attribs = array('bgsound', 'class', 'expr', 'id', 'style', 'onclick', 'onerror', 'onfinish', 'onmouseover', 'onmouseout', 'onfocus', 'onblur', 'lowsrc', 'dynsrc')) 
  70. if ($attribs) 
  71. if (is_array($attribs)) 
  72. $this->strip_attributes = $attribs; 
  73. else 
  74. $this->strip_attributes = explode(', ', $attribs); 
  75. else 
  76. $this->strip_attributes = false; 
  77.  
  78. public function strip_comments($strip = false) 
  79. $this->strip_comments = (bool) $strip; 
  80.  
  81. public function set_output_encoding($encoding = 'UTF-8') 
  82. $this->output_encoding = (string) $encoding; 
  83.  
  84. /** 
  85. * Set element/attribute key/value pairs of HTML attributes 
  86. * containing URLs that need to be resolved relative to the feed 
  87. * Defaults to |a|@href, |area|@href, |blockquote|@cite, |del|@cite,  
  88. * |form|@action, |img|@longdesc, |img|@src, |input|@src, |ins|@cite,  
  89. * |q|@cite 
  90. * @since 1.0 
  91. * @param array|null $element_attribute Element/attribute key/value pairs, null for default 
  92. */ 
  93. public function set_url_replacements($element_attribute = null) 
  94. if ($element_attribute === null) 
  95. $element_attribute = array( 
  96. 'a' => 'href',  
  97. 'area' => 'href',  
  98. 'blockquote' => 'cite',  
  99. 'del' => 'cite',  
  100. 'form' => 'action',  
  101. 'img' => array( 
  102. 'longdesc',  
  103. 'src' 
  104. ),  
  105. 'input' => 'src',  
  106. 'ins' => 'cite',  
  107. 'q' => 'cite' 
  108. ); 
  109. $this->replace_url_attributes = (array) $element_attribute; 
  110.  
  111. public function sanitize($data, $type, $base = '') 
  112. $data = trim($data); 
  113. if ($data !== '' || $type & SIMPLEPIE_CONSTRUCT_IRI) 
  114. if ($type & SIMPLEPIE_CONSTRUCT_MAYBE_HTML) 
  115. if (preg_match('/(&(#(x[0-9a-fA-F]+|[0-9]+)|[a-zA-Z0-9]+)|<\/[A-Za-z][^\x09\x0A\x0B\x0C\x0D\x20\x2F\x3E]*' . SIMPLEPIE_PCRE_HTML_ATTRIBUTE . '>)/', $data)) 
  116. $type |= SIMPLEPIE_CONSTRUCT_HTML; 
  117. else 
  118. $type |= SIMPLEPIE_CONSTRUCT_TEXT; 
  119.  
  120. if ($type & SIMPLEPIE_CONSTRUCT_BASE64) 
  121. $data = base64_decode($data); 
  122.  
  123. if ($type & (SIMPLEPIE_CONSTRUCT_HTML | SIMPLEPIE_CONSTRUCT_XHTML)) 
  124.  
  125. if (!class_exists('DOMDocument')) 
  126. $this->registry->call('Misc', 'error', array('DOMDocument not found, unable to use sanitizer', E_USER_WARNING, __FILE__, __LINE__)); 
  127. return ''; 
  128. $document = new DOMDocument(); 
  129. $document->encoding = 'UTF-8'; 
  130. $data = $this->preprocess($data, $type); 
  131.  
  132. set_error_handler(array('SimplePie_Misc', 'silence_errors')); 
  133. $document->loadHTML($data); 
  134. restore_error_handler(); 
  135.  
  136. // Strip comments 
  137. if ($this->strip_comments) 
  138. $xpath = new DOMXPath($document); 
  139. $comments = $xpath->query('//comment()'); 
  140.  
  141. foreach ($comments as $comment) 
  142. $comment->parentNode->removeChild($comment); 
  143.  
  144. // Strip out HTML tags and attributes that might cause various security problems. 
  145. // Based on recommendations by Mark Pilgrim at: 
  146. // http://diveintomark.org/archives/2003/06/12/how_to_consume_rss_safely 
  147. if ($this->strip_htmltags) 
  148. foreach ($this->strip_htmltags as $tag) 
  149. $this->strip_tag($tag, $document, $type); 
  150.  
  151. if ($this->strip_attributes) 
  152. foreach ($this->strip_attributes as $attrib) 
  153. $this->strip_attr($attrib, $document); 
  154.  
  155. // Replace relative URLs 
  156. $this->base = $base; 
  157. foreach ($this->replace_url_attributes as $element => $attributes) 
  158. $this->replace_urls($document, $element, $attributes); 
  159.  
  160. // If image handling (caching, etc.) is enabled, cache and rewrite all the image tags. 
  161. if (isset($this->image_handler) && ((string) $this->image_handler) !== '' && $this->enable_cache) 
  162. $images = $document->getElementsByTagName('img'); 
  163. foreach ($images as $img) 
  164. if ($img->hasAttribute('src')) 
  165. $image_url = call_user_func($this->cache_name_function, $img->getAttribute('src')); 
  166. $cache = $this->registry->call('Cache', 'get_handler', array($this->cache_location, $image_url, 'spi')); 
  167.  
  168. if ($cache->load()) 
  169. $img->setAttribute('src', $this->image_handler . $image_url); 
  170. else 
  171. $file = $this->registry->create('File', array($img->getAttribute('src'), $this->timeout, 5, array('X-FORWARDED-FOR' => $_SERVER['REMOTE_ADDR']), $this->useragent, $this->force_fsockopen)); 
  172. $headers = $file->headers; 
  173.  
  174. if ($file->success && ($file->method & SIMPLEPIE_FILE_SOURCE_REMOTE === 0 || ($file->status_code === 200 || $file->status_code > 206 && $file->status_code < 300))) 
  175. if ($cache->save(array('headers' => $file->headers, 'body' => $file->body))) 
  176. $img->setAttribute('src', $this->image_handler . $image_url); 
  177. else 
  178. trigger_error("$this->cache_location is not writeable. Make sure you've set the correct relative or absolute path, and that the location is server-writable.", E_USER_WARNING); 
  179.  
  180. // Remove the DOCTYPE 
  181. // Seems to cause segfaulting if we don't do this 
  182. if ($document->firstChild instanceof DOMDocumentType) 
  183. $document->removeChild($document->firstChild); 
  184.  
  185. // Move everything from the body to the root 
  186. $real_body = $document->getElementsByTagName('body')->item(0)->childNodes->item(0); 
  187. $document->replaceChild($real_body, $document->firstChild); 
  188.  
  189. // Finally, convert to a HTML string 
  190. $data = trim($document->saveHTML()); 
  191.  
  192. if ($this->remove_div) 
  193. $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '', $data); 
  194. $data = preg_replace('/<\/div>$/', '', $data); 
  195. else 
  196. $data = preg_replace('/^<div' . SIMPLEPIE_PCRE_XML_ATTRIBUTE . '>/', '<div>', $data); 
  197.  
  198. if ($type & SIMPLEPIE_CONSTRUCT_IRI) 
  199. $absolute = $this->registry->call('Misc', 'absolutize_url', array($data, $base)); 
  200. if ($absolute !== false) 
  201. $data = $absolute; 
  202.  
  203. if ($type & (SIMPLEPIE_CONSTRUCT_TEXT | SIMPLEPIE_CONSTRUCT_IRI)) 
  204. $data = htmlspecialchars($data, ENT_COMPAT, 'UTF-8'); 
  205.  
  206. if ($this->output_encoding !== 'UTF-8') 
  207. $data = $this->registry->call('Misc', 'change_encoding', array($data, 'UTF-8', $this->output_encoding)); 
  208. return $data; 
  209.  
  210. protected function preprocess($html, $type) 
  211. $ret = ''; 
  212. if ($type & ~SIMPLEPIE_CONSTRUCT_XHTML) 
  213. // Atom XHTML constructs are wrapped with a div by default 
  214. // Note: No protection if $html contains a stray </div>! 
  215. $html = '<div>' . $html . '</div>'; 
  216. $ret .= '<!DOCTYPE html>'; 
  217. $content_type = 'text/html'; 
  218. else 
  219. $ret .= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">'; 
  220. $content_type = 'application/xhtml+xml'; 
  221.  
  222. $ret .= '<html><head>'; 
  223. $ret .= '<meta http-equiv="Content-Type" content="' . $content_type . '; charset=utf-8" />'; 
  224. $ret .= '</head><body>' . $html . '</body></html>'; 
  225. return $ret; 
  226.  
  227. public function replace_urls($document, $tag, $attributes) 
  228. if (!is_array($attributes)) 
  229. $attributes = array($attributes); 
  230.  
  231. if (!is_array($this->strip_htmltags) || !in_array($tag, $this->strip_htmltags)) 
  232. $elements = $document->getElementsByTagName($tag); 
  233. foreach ($elements as $element) 
  234. foreach ($attributes as $attribute) 
  235. if ($element->hasAttribute($attribute)) 
  236. $value = $this->registry->call('Misc', 'absolutize_url', array($element->getAttribute($attribute), $this->base)); 
  237. if ($value !== false) 
  238. $element->setAttribute($attribute, $value); 
  239.  
  240. public function do_strip_htmltags($match) 
  241. if ($this->encode_instead_of_strip) 
  242. if (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) 
  243. $match[1] = htmlspecialchars($match[1], ENT_COMPAT, 'UTF-8'); 
  244. $match[2] = htmlspecialchars($match[2], ENT_COMPAT, 'UTF-8'); 
  245. return "<$match[1]$match[2]>$match[3]</$match[1]>"; 
  246. else 
  247. return htmlspecialchars($match[0], ENT_COMPAT, 'UTF-8'); 
  248. elseif (isset($match[4]) && !in_array(strtolower($match[1]), array('script', 'style'))) 
  249. return $match[4]; 
  250. else 
  251. return ''; 
  252.  
  253. protected function strip_tag($tag, $document, $type) 
  254. $xpath = new DOMXPath($document); 
  255. $elements = $xpath->query('body//' . $tag); 
  256. if ($this->encode_instead_of_strip) 
  257. foreach ($elements as $element) 
  258. $fragment = $document->createDocumentFragment(); 
  259.  
  260. // For elements which aren't script or style, include the tag itself 
  261. if (!in_array($tag, array('script', 'style'))) 
  262. $text = '<' . $tag; 
  263. if ($element->hasAttributes()) 
  264. $attrs = array(); 
  265. foreach ($element->attributes as $name => $attr) 
  266. $value = $attr->value; 
  267.  
  268. // In XHTML, empty values should never exist, so we repeat the value 
  269. if (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_XHTML)) 
  270. $value = $name; 
  271. // For HTML, empty is fine 
  272. elseif (empty($value) && ($type & SIMPLEPIE_CONSTRUCT_HTML)) 
  273. $attrs[] = $name; 
  274. continue; 
  275.  
  276. // Standard attribute text 
  277. $attrs[] = $name . '="' . $attr->value . '"'; 
  278. $text .= ' ' . implode(' ', $attrs); 
  279. $text .= '>'; 
  280. $fragment->appendChild(new DOMText($text)); 
  281.  
  282. $number = $element->childNodes->length; 
  283. for ($i = $number; $i > 0; $i--) 
  284. $child = $element->childNodes->item(0); 
  285. $fragment->appendChild($child); 
  286.  
  287. if (!in_array($tag, array('script', 'style'))) 
  288. $fragment->appendChild(new DOMText('</' . $tag . '>')); 
  289.  
  290. $element->parentNode->replaceChild($fragment, $element); 
  291.  
  292. return; 
  293. elseif (in_array($tag, array('script', 'style'))) 
  294. foreach ($elements as $element) 
  295. $element->parentNode->removeChild($element); 
  296.  
  297. return; 
  298. else 
  299. foreach ($elements as $element) 
  300. $fragment = $document->createDocumentFragment(); 
  301. $number = $element->childNodes->length; 
  302. for ($i = $number; $i > 0; $i--) 
  303. $child = $element->childNodes->item(0); 
  304. $fragment->appendChild($child); 
  305.  
  306. $element->parentNode->replaceChild($fragment, $element); 
  307.  
  308. protected function strip_attr($attrib, $document) 
  309. $xpath = new DOMXPath($document); 
  310. $elements = $xpath->query('//*[@' . $attrib . ']'); 
  311.  
  312. foreach ($elements as $element) 
  313. $element->removeAttribute($attrib);