Emogrifier

The WooCommerce Emogrifier class.

Defined (1)

The class is defined in the following location(s).

/includes/libraries/class-emogrifier.php  
  1. class Emogrifier 
  2. /** 
  3. * @var int 
  4. */ 
  5. const CACHE_KEY_CSS = 0; 
  6.  
  7. /** 
  8. * @var int 
  9. */ 
  10. const CACHE_KEY_SELECTOR = 1; 
  11.  
  12. /** 
  13. * @var int 
  14. */ 
  15. const CACHE_KEY_XPATH = 2; 
  16.  
  17. /** 
  18. * @var int 
  19. */ 
  20. const CACHE_KEY_CSS_DECLARATIONS_BLOCK = 3; 
  21.  
  22. /** 
  23. * @var int 
  24. */ 
  25. const CACHE_KEY_COMBINED_STYLES = 4; 
  26.  
  27. /** 
  28. * for calculating nth-of-type and nth-child selectors 
  29. * @var int 
  30. */ 
  31. const INDEX = 0; 
  32.  
  33. /** 
  34. * for calculating nth-of-type and nth-child selectors 
  35. * @var int 
  36. */ 
  37. const MULTIPLIER = 1; 
  38.  
  39. /** 
  40. * @var string 
  41. */ 
  42. const ID_ATTRIBUTE_MATCHER = '/(\\w+)?\\#([\\w\\-]+)/'; 
  43.  
  44. /** 
  45. * @var string 
  46. */ 
  47. const CLASS_ATTRIBUTE_MATCHER = '/(\\w+|[\\*\\]])?((\\.[\\w\\-]+)+)/'; 
  48.  
  49. /** 
  50. * @var string 
  51. */ 
  52. const CONTENT_TYPE_META_TAG = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">'; 
  53.  
  54. /** 
  55. * @var string 
  56. */ 
  57. const DEFAULT_DOCUMENT_TYPE = '<!DOCTYPE html>'; 
  58.  
  59. /** 
  60. * @var string 
  61. */ 
  62. private $html = ''; 
  63.  
  64. /** 
  65. * @var string 
  66. */ 
  67. private $css = ''; 
  68.  
  69. /** 
  70. * @var bool[] 
  71. */ 
  72. private $excludedSelectors = array(); 
  73.  
  74. /** 
  75. * @var string[] 
  76. */ 
  77. private $unprocessableHtmlTags = array( 'wbr' ); 
  78.  
  79. /** 
  80. * @var bool[] 
  81. */ 
  82. private $allowedMediaTypes = array( 'all' => true, 'screen' => true, 'print' => true ); 
  83.  
  84. /** 
  85. * @var array[] 
  86. */ 
  87. private $caches = array( 
  88. self::CACHE_KEY_CSS => array(),  
  89. self::CACHE_KEY_SELECTOR => array(),  
  90. self::CACHE_KEY_XPATH => array(),  
  91. self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => array(),  
  92. self::CACHE_KEY_COMBINED_STYLES => array(),  
  93. ); 
  94.  
  95. /** 
  96. * the visited nodes with the XPath paths as array keys 
  97. * @var DoMElement[] 
  98. */ 
  99. private $visitedNodes = array(); 
  100.  
  101. /** 
  102. * the styles to apply to the nodes with the XPath paths as array keys for the outer array 
  103. * and the attribute names/values as key/value pairs for the inner array 
  104. * @var array[] 
  105. */ 
  106. private $styleAttributesForNodes = array(); 
  107.  
  108. /** 
  109. * Determines whether the "style" attributes of tags in the the HTML passed to this class should be preserved. 
  110. * If set to false, the value of the style attributes will be discarded. 
  111. * @var bool 
  112. */ 
  113. private $isInlineStyleAttributesParsingEnabled = true; 
  114.  
  115. /** 
  116. * Determines whether the <style> blocks in the HTML passed to this class should be parsed. 
  117. * If set to true, the <style> blocks will be removed from the HTML and their contents will be applied to the HTML 
  118. * via inline styles. 
  119. * If set to false, the <style> blocks will be left as they are in the HTML. 
  120. * @var bool 
  121. */ 
  122. private $isStyleBlocksParsingEnabled = true; 
  123.  
  124. /** 
  125. * Determines whether elements with the `display: none` property are 
  126. * removed from the DOM. 
  127. * @var bool 
  128. */ 
  129. private $shouldKeepInvisibleNodes = true; 
  130.  
  131. public static $_media = ''; 
  132.  
  133. /** 
  134. * The constructor. 
  135. * @param string $html the HTML to emogrify, must be UTF-8-encoded 
  136. * @param string $css the CSS to merge, must be UTF-8-encoded 
  137. */ 
  138. public function __construct( $html = '', $css = '' ) { 
  139. $this->setHtml($html); 
  140. $this->setCss($css); 
  141.  
  142. /** 
  143. * The destructor. 
  144. */ 
  145. public function __destruct() { 
  146. $this->purgeVisitedNodes(); 
  147.  
  148. /** 
  149. * Sets the HTML to emogrify. 
  150. * @param string $html the HTML to emogrify, must be UTF-8-encoded 
  151. * @return void 
  152. */ 
  153. public function setHtml( $html ) { 
  154. $this->html = $html; 
  155.  
  156. /** 
  157. * Sets the CSS to merge with the HTML. 
  158. * @param string $css the CSS to merge, must be UTF-8-encoded 
  159. * @return void 
  160. */ 
  161. public function setCss( $css ) { 
  162. $this->css = $css; 
  163.  
  164. /** 
  165. * Applies $this->css to $this->html and returns the HTML with the CSS 
  166. * applied. 
  167. * This method places the CSS inline. 
  168. * @return string 
  169. * @throws BadMethodCallException 
  170. */ 
  171. public function emogrify() { 
  172. if ( $this->html === '' ) { 
  173. throw new BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096); 
  174.  
  175. self::$_media = ''; // reset 
  176. $xmlDocument = $this->createXmlDocument(); 
  177. $this->process($xmlDocument); 
  178.  
  179. return $xmlDocument->saveHTML(); 
  180.  
  181. /** 
  182. * Applies $this->css to $this->html and returns only the HTML content 
  183. * within the <body> tag. 
  184. * This method places the CSS inline. 
  185. * @return string 
  186. * @throws BadMethodCallException 
  187. */ 
  188. public function emogrifyBodyContent() { 
  189. if ( $this->html === '' ) { 
  190. throw new BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096); 
  191.  
  192. $xmlDocument = $this->createXmlDocument(); 
  193. $this->process($xmlDocument); 
  194.  
  195. $innerDocument = new DoMDocument(); 
  196. foreach ( $xmlDocument->documentElement->getElementsByTagName('body')->item(0)->childNodes as $childNode ) { 
  197. $innerDocument->appendChild($innerDocument->importNode($childNode, true)); 
  198.  
  199. return $innerDocument->saveHTML(); 
  200.  
  201. /** 
  202. * Applies $this->css to $xmlDocument. 
  203. * This method places the CSS inline. 
  204. * @param DoMDocument $xmlDocument 
  205. * @return void 
  206. */ 
  207. protected function process( DoMDocument $xmlDocument ) { 
  208. $xpath = new DoMXPath($xmlDocument); 
  209. $this->clearAllCaches(); 
  210.  
  211. // Before be begin processing the CSS file, parse the document and normalize all existing CSS attributes. 
  212. // This changes 'DISPLAY: none' to 'display: none'. 
  213. // We wouldn't have to do this if DOMXPath supported XPath 2.0. 
  214. // Also store a reference of nodes with existing inline styles so we don't overwrite them. 
  215. $this->purgeVisitedNodes(); 
  216.  
  217. $nodesWithStyleAttributes = $xpath->query('//*[@style]'); 
  218. if ( $nodesWithStyleAttributes !== false ) { 
  219. /** @var DoMElement $node */ 
  220. foreach ( $nodesWithStyleAttributes as $node ) { 
  221. if ( $this->isInlineStyleAttributesParsingEnabled ) { 
  222. $this->normalizeStyleAttributes($node); 
  223. } else { 
  224. $node->removeAttribute('style'); 
  225.  
  226. // grab any existing style blocks from the html and append them to the existing CSS 
  227. // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS) 
  228. $allCss = $this->css; 
  229.  
  230. if ( $this->isStyleBlocksParsingEnabled ) { 
  231. $allCss .= $this->getCssFromAllStyleNodes($xpath); 
  232.  
  233. $cssParts = $this->splitCssAndMediaQuery($allCss); 
  234. $excludedNodes = $this->getNodesToExclude($xpath); 
  235. $cssRules = $this->parseCssRules($cssParts['css']); 
  236. foreach ( $cssRules as $cssRule ) { 
  237. // query the body for the xpath selector 
  238. $nodesMatchingCssSelectors = $xpath->query($this->translateCssToXpath($cssRule['selector'])); 
  239. // ignore invalid selectors 
  240. if ( $nodesMatchingCssSelectors === false ) { 
  241. continue; 
  242.  
  243. /** @var DoMElement $node */ 
  244. foreach ( $nodesMatchingCssSelectors as $node ) { 
  245. if ( in_array($node, $excludedNodes, true) ) { 
  246. continue; 
  247.  
  248. // if it has a style attribute, get it, process it, and append (overwrite) new stuff 
  249. if ( $node->hasAttribute('style') ) { 
  250. // break it up into an associative array 
  251. $oldStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style')); 
  252. } else { 
  253. $oldStyleDeclarations = array(); 
  254. $newStyleDeclarations = $this->parseCssDeclarationsBlock($cssRule['declarationsBlock']); 
  255. $node->setAttribute( 
  256. 'style',  
  257. $this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations) 
  258. ); 
  259.  
  260. if ( $this->isInlineStyleAttributesParsingEnabled ) { 
  261. $this->fillStyleAttributesWithMergedStyles(); 
  262.  
  263. if ( $this->shouldKeepInvisibleNodes ) { 
  264. $this->removeInvisibleNodes($xpath); 
  265.  
  266. $this->copyCssWithMediaToStyleNode($xmlDocument, $xpath, $cssParts['media']); 
  267.  
  268. /** 
  269. * Extracts and parses the individual rules from a CSS string. 
  270. * @param string $css a string of raw CSS code 
  271. * @return string[][] an array of string sub-arrays with the keys 
  272. * "selector" (the CSS selector(s), e.g., "*" or "h1"),  
  273. * "declarationsBLock" (the semicolon-separated CSS declarations for that selector(s),  
  274. * e.g., "color: red; height: 4px;"),  
  275. * and "line" (the line number e.g. 42) 
  276. */ 
  277. private function parseCssRules( $css ) { 
  278. $cssKey = md5($css); 
  279. if ( ! isset($this->caches[ self::CACHE_KEY_CSS ][ $cssKey ]) ) { 
  280. // process the CSS file for selectors and definitions 
  281. preg_match_all('/(?:^|[\\s^{}]*)([^{]+) {([^}]*)}/mis', $css, $matches, PREG_SET_ORDER); 
  282.  
  283. $cssRules = array(); 
  284. /** @var string[] $cssRule */ 
  285. foreach ( $matches as $key => $cssRule ) { 
  286. $cssDeclaration = trim($cssRule[2]); 
  287. if ( $cssDeclaration === '' ) { 
  288. continue; 
  289.  
  290. $selectors = explode(', ', $cssRule[1]); 
  291. foreach ( $selectors as $selector ) { 
  292. // don't process pseudo-elements and behavioral (dynamic) pseudo-classes; 
  293. // only allow structural pseudo-classes 
  294. if ( strpos($selector, ':') !== false && ! preg_match('/:\\S+\\-(child|type\\()/i', $selector) ) { 
  295. continue; 
  296.  
  297. $cssRules[] = array( 
  298. 'selector' => trim($selector),  
  299. 'declarationsBlock' => $cssDeclaration,  
  300. // keep track of where it appears in the file, since order is important 
  301. 'line' => $key,  
  302. ); 
  303.  
  304. usort($cssRules, array( $this, 'sortBySelectorPrecedence' ) ); 
  305.  
  306. $this->caches[ self::CACHE_KEY_CSS ][ $cssKey ] = $cssRules; 
  307.  
  308. return $this->caches[ self::CACHE_KEY_CSS ][ $cssKey ]; 
  309.  
  310. /** 
  311. * Disables the parsing of inline styles. 
  312. * @return void 
  313. */ 
  314. public function disableInlineStyleAttributesParsing() { 
  315. $this->isInlineStyleAttributesParsingEnabled = false; 
  316.  
  317. /** 
  318. * Disables the parsing of <style> blocks. 
  319. * @return void 
  320. */ 
  321. public function disableStyleBlocksParsing() { 
  322. $this->isStyleBlocksParsingEnabled = false; 
  323.  
  324. /** 
  325. * Disables the removal of elements with `display: none` properties. 
  326. * @return void 
  327. */ 
  328. public function disableInvisibleNodeRemoval() { 
  329. $this->shouldKeepInvisibleNodes = false; 
  330.  
  331. /** 
  332. * Clears all caches. 
  333. * @return void 
  334. */ 
  335. private function clearAllCaches() { 
  336. $this->clearCache(self::CACHE_KEY_CSS); 
  337. $this->clearCache(self::CACHE_KEY_SELECTOR); 
  338. $this->clearCache(self::CACHE_KEY_XPATH); 
  339. $this->clearCache(self::CACHE_KEY_CSS_DECLARATIONS_BLOCK); 
  340. $this->clearCache(self::CACHE_KEY_COMBINED_STYLES); 
  341.  
  342. /** 
  343. * Clears a single cache by key. 
  344. * @param int $key the cache key, must be CACHE_KEY_CSS, CACHE_KEY_SELECTOR, CACHE_KEY_XPATH 
  345. * or CACHE_KEY_CSS_DECLARATION_BLOCK 
  346. * @return void 
  347. * @throws \InvalidArgumentException 
  348. */ 
  349. private function clearCache( $key ) { 
  350. $allowedCacheKeys = array( 
  351. self::CACHE_KEY_CSS,  
  352. self::CACHE_KEY_SELECTOR,  
  353. self::CACHE_KEY_XPATH,  
  354. self::CACHE_KEY_CSS_DECLARATIONS_BLOCK,  
  355. self::CACHE_KEY_COMBINED_STYLES,  
  356. ); 
  357. if ( ! in_array($key, $allowedCacheKeys, true) ) { 
  358. throw new InvalidArgumentException('Invalid cache key: ' . $key, 1391822035); 
  359.  
  360. $this->caches[ $key ] = array(); 
  361.  
  362. /** 
  363. * Purges the visited nodes. 
  364. * @return void 
  365. */ 
  366. private function purgeVisitedNodes() { 
  367. $this->visitedNodes = array(); 
  368. $this->styleAttributesForNodes = array(); 
  369.  
  370. /** 
  371. * Marks a tag for removal. 
  372. * There are some HTML tags that DOMDocument cannot process, and it will throw an error if it encounters them. 
  373. * In particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document. 
  374. * Note: The tags will not be removed if they have any content. 
  375. * @param string $tagName the tag name, e.g., "p" 
  376. * @return void 
  377. */ 
  378. public function addUnprocessableHtmlTag( $tagName ) { 
  379. $this->unprocessableHtmlTags[] = $tagName; 
  380.  
  381. /** 
  382. * Drops a tag from the removal list. 
  383. * @param string $tagName the tag name, e.g., "p" 
  384. * @return void 
  385. */ 
  386. public function removeUnprocessableHtmlTag( $tagName ) { 
  387. $key = array_search($tagName, $this->unprocessableHtmlTags, true); 
  388. if ( $key !== false ) { 
  389. unset($this->unprocessableHtmlTags[ $key ]); 
  390.  
  391. /** 
  392. * Marks a media query type to keep. 
  393. * @param string $mediaName the media type name, e.g., "braille" 
  394. * @return void 
  395. */ 
  396. public function addAllowedMediaType( $mediaName ) { 
  397. $this->allowedMediaTypes[ $mediaName ] = true; 
  398.  
  399. /** 
  400. * Drops a media query type from the allowed list. 
  401. * @param string $mediaName the tag name, e.g., "braille" 
  402. * @return void 
  403. */ 
  404. public function removeAllowedMediaType( $mediaName ) { 
  405. if ( isset($this->allowedMediaTypes[ $mediaName ]) ) { 
  406. unset($this->allowedMediaTypes[ $mediaName ]); 
  407.  
  408. /** 
  409. * Adds a selector to exclude nodes from emogrification. 
  410. * Any nodes that match the selector will not have their style altered. 
  411. * @param string $selector the selector to exclude, e.g., ".editor" 
  412. * @return void 
  413. */ 
  414. public function addExcludedSelector( $selector ) { 
  415. $this->excludedSelectors[ $selector ] = true; 
  416.  
  417. /** 
  418. * No longer excludes the nodes matching this selector from emogrification. 
  419. * @param string $selector the selector to no longer exclude, e.g., ".editor" 
  420. * @return void 
  421. */ 
  422. public function removeExcludedSelector( $selector ) { 
  423. if ( isset($this->excludedSelectors[ $selector ]) ) { 
  424. unset($this->excludedSelectors[ $selector ]); 
  425.  
  426. /** 
  427. * This removes styles from your email that contain display:none. 
  428. * We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only 
  429. * supports XPath 1.0, lower-case() isn't available to us. We've thus far only set attributes to lowercase,  
  430. * not attribute values. Consequently, we need to translate() the letters that would be in 'NONE' ("NOE") 
  431. * to lowercase. 
  432. * @param DoMXPath $xpath 
  433. * @return void 
  434. */ 
  435. private function removeInvisibleNodes( DoMXPath $xpath ) { 
  436. $nodesWithStyleDisplayNone = $xpath->query( 
  437. '//*[contains(translate(translate(@style, " ", ""), "NOE", "noe"), "display:none")]' 
  438. ); 
  439. if ( $nodesWithStyleDisplayNone->length === 0 ) { 
  440. return; 
  441.  
  442. // The checks on parentNode and is_callable below ensure that if we've deleted the parent node,  
  443. // we don't try to call removeChild on a nonexistent child node 
  444. /** @var DoMNode $node */ 
  445. foreach ( $nodesWithStyleDisplayNone as $node ) { 
  446. if ( $node->parentNode && is_callable( array( $node->parentNode, 'removeChild' ) ) ) { 
  447. $node->parentNode->removeChild($node); 
  448.  
  449. private function normalizeStyleAttributes_callback( $m ) { 
  450. return strtolower( $m[0] ); 
  451.  
  452. /** 
  453. * Normalizes the value of the "style" attribute and saves it. 
  454. * @param DoMElement $node 
  455. * @return void 
  456. */ 
  457. private function normalizeStyleAttributes( DoMElement $node ) { 
  458. $normalizedOriginalStyle = preg_replace_callback( 
  459. '/[A-z\\-]+(?=\\:)/S',  
  460. array( $this, 'normalizeStyleAttributes_callback' ),  
  461. $node->getAttribute('style') 
  462. ); 
  463.  
  464. // in order to not overwrite existing style attributes in the HTML, we 
  465. // have to save the original HTML styles 
  466. $nodePath = $node->getNodePath(); 
  467. if ( ! isset($this->styleAttributesForNodes[ $nodePath ]) ) { 
  468. $this->styleAttributesForNodes[ $nodePath ] = $this->parseCssDeclarationsBlock($normalizedOriginalStyle); 
  469. $this->visitedNodes[ $nodePath ] = $node; 
  470.  
  471. $node->setAttribute('style', $normalizedOriginalStyle); 
  472.  
  473. /** 
  474. * Merges styles from styles attributes and style nodes and applies them to the attribute nodes 
  475. * @return void 
  476. */ 
  477. private function fillStyleAttributesWithMergedStyles() { 
  478. foreach ( $this->styleAttributesForNodes as $nodePath => $styleAttributesForNode ) { 
  479. $node = $this->visitedNodes[ $nodePath ]; 
  480. $currentStyleAttributes = $this->parseCssDeclarationsBlock($node->getAttribute('style')); 
  481. $node->setAttribute( 
  482. 'style',  
  483. $this->generateStyleStringFromDeclarationsArrays( 
  484. $currentStyleAttributes,  
  485. $styleAttributesForNode 
  486. ); 
  487.  
  488. /** 
  489. * This method merges old or existing name/value array with new name/value array 
  490. * and then generates a string of the combined style suitable for placing inline. 
  491. * This becomes the single point for CSS string generation allowing for consistent 
  492. * CSS output no matter where the CSS originally came from. 
  493. * @param string[] $oldStyles 
  494. * @param string[] $newStyles 
  495. * @return string 
  496. */ 
  497. private function generateStyleStringFromDeclarationsArrays( array $oldStyles, array $newStyles ) { 
  498. $combinedStyles = array_merge($oldStyles, $newStyles); 
  499. $cacheKey = serialize( $combinedStyles ); 
  500. if ( isset($this->caches[ self::CACHE_KEY_COMBINED_STYLES ][ $cacheKey ]) ) { 
  501. return $this->caches[ self::CACHE_KEY_COMBINED_STYLES ][ $cacheKey ]; 
  502.  
  503. foreach ( $oldStyles as $attributeName => $attributeValue ) { 
  504. if ( isset($newStyles[ $attributeName ]) && strtolower(substr($attributeValue, -10)) === '!important' ) { 
  505. $combinedStyles[ $attributeName ] = $attributeValue; 
  506.  
  507. $style = ''; 
  508. foreach ( $combinedStyles as $attributeName => $attributeValue ) { 
  509. $style .= strtolower(trim($attributeName)) . ': ' . trim($attributeValue) . '; '; 
  510. $trimmedStyle = rtrim($style); 
  511.  
  512. $this->caches[ self::CACHE_KEY_COMBINED_STYLES ][ $cacheKey ] = $trimmedStyle; 
  513.  
  514. return $trimmedStyle; 
  515.  
  516. /** 
  517. * Applies $css to $xmlDocument, limited to the media queries that actually apply to the document. 
  518. * @param DoMDocument $xmlDocument the document to match against 
  519. * @param DoMXPath $xpath 
  520. * @param string $css a string of CSS 
  521. * @return void 
  522. */ 
  523. private function copyCssWithMediaToStyleNode( DoMDocument $xmlDocument, DoMXPath $xpath, $css ) { 
  524. if ( $css === '' ) { 
  525. return; 
  526.  
  527. $mediaQueriesRelevantForDocument = array(); 
  528.  
  529. foreach ( $this->extractMediaQueriesFromCss($css) as $mediaQuery ) { 
  530. foreach ( $this->parseCssRules($mediaQuery['css']) as $selector ) { 
  531. if ( $this->existsMatchForCssSelector($xpath, $selector['selector']) ) { 
  532. $mediaQueriesRelevantForDocument[] = $mediaQuery['query']; 
  533. break; 
  534.  
  535. $this->addStyleElementToDocument($xmlDocument, implode($mediaQueriesRelevantForDocument)); 
  536.  
  537. /** 
  538. * Extracts the media queries from $css. 
  539. * @param string $css 
  540. * @return string[][] numeric array with string sub-arrays with the keys "css" and "query" 
  541. */ 
  542. private function extractMediaQueriesFromCss( $css ) { 
  543. preg_match_all('#(?<query>@media[^{]*\\{(?<css>(.*?)\\})(\\s*)\\})#s', $css, $mediaQueries); 
  544. $result = array(); 
  545. foreach ( array_keys($mediaQueries['css']) as $key ) { 
  546. $result[] = array( 
  547. 'css' => $mediaQueries['css'][ $key ],  
  548. 'query' => $mediaQueries['query'][ $key ],  
  549. ); 
  550. return $result; 
  551.  
  552. /** 
  553. * Checks whether there is at least one matching element for $cssSelector. 
  554. * @param DoMXPath $xpath 
  555. * @param string $cssSelector 
  556. * @return bool 
  557. */ 
  558. private function existsMatchForCssSelector( DoMXPath $xpath, $cssSelector ) { 
  559. $nodesMatchingSelector = $xpath->query($this->translateCssToXpath($cssSelector)); 
  560.  
  561. return $nodesMatchingSelector !== false && $nodesMatchingSelector->length !== 0; 
  562.  
  563. /** 
  564. * Returns CSS content. 
  565. * @param DoMXPath $xpath 
  566. * @return string 
  567. */ 
  568. private function getCssFromAllStyleNodes( DoMXPath $xpath ) { 
  569. $styleNodes = $xpath->query('//style'); 
  570.  
  571. if ( $styleNodes === false ) { 
  572. return ''; 
  573.  
  574. $css = ''; 
  575. /** @var DoMNode $styleNode */ 
  576. foreach ( $styleNodes as $styleNode ) { 
  577. $css .= "\n\n" . $styleNode->nodeValue; 
  578. $styleNode->parentNode->removeChild($styleNode); 
  579.  
  580. return $css; 
  581.  
  582. /** 
  583. * Adds a style element with $css to $document. 
  584. * This method is protected to allow overriding. 
  585. * @see https://github.com/jjriv/emogrifier/issues/103 
  586. * @param DoMDocument $document 
  587. * @param string $css 
  588. * @return void 
  589. */ 
  590. protected function addStyleElementToDocument( DoMDocument $document, $css ) { 
  591. $styleElement = $document->createElement('style', $css); 
  592. $styleAttribute = $document->createAttribute('type'); 
  593. $styleAttribute->value = 'text/css'; 
  594. $styleElement->appendChild($styleAttribute); 
  595.  
  596. $head = $this->getOrCreateHeadElement($document); 
  597. $head->appendChild($styleElement); 
  598.  
  599. /** 
  600. * Returns the existing or creates a new head element in $document. 
  601. * @param DoMDocument $document 
  602. * @return DoMNode the head element 
  603. */ 
  604. private function getOrCreateHeadElement( DoMDocument $document ) { 
  605. $head = $document->getElementsByTagName('head')->item(0); 
  606.  
  607. if ( $head === null ) { 
  608. $head = $document->createElement('head'); 
  609. $html = $document->getElementsByTagName('html')->item(0); 
  610. $html->insertBefore($head, $document->getElementsByTagName('body')->item(0)); 
  611.  
  612. return $head; 
  613.  
  614. private function splitCssAndMediaQuery_callback() { 
  615.  
  616.  
  617. /** 
  618. * Splits input CSS code to an array where: 
  619. * - key "css" will be contains clean CSS code. 
  620. * - key "media" will be contains all valuable media queries. 
  621. * Example: 
  622. * The CSS code. 
  623. * "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}" 
  624. * will be parsed into the following array: 
  625. * "css" => "h1 { color:red; }" 
  626. * "media" => "@media { h1 {}}" 
  627. * @param string $css 
  628. * @return array 
  629. */ 
  630. private function splitCssAndMediaQuery( $css ) { 
  631. $css = preg_replace_callback( '#@media\\s+(?:only\\s)?(?:[\\s{\(]|screen|all)\\s?[^{]+{.*}\\s*}\\s*#misU', array( $this, '_media_concat' ), $css ); 
  632. // filter the CSS 
  633. $search = array( 
  634. // get rid of css comment code 
  635. '/\\/\\*.*\\*\\//sU',  
  636. // strip out any import directives 
  637. '/^\\s*@import\\s[^;]+;/misU',  
  638. // strip remains media enclosures 
  639. '/^\\s*@media\\s[^{]+{(.*)}\\s*}\\s/misU',  
  640. ); 
  641. $replace = array( 
  642. '',  
  643. '',  
  644. '',  
  645. ); 
  646. // clean CSS before output 
  647. $css = preg_replace($search, $replace, $css); 
  648. return array( 'css' => $css, 'media' => self::$_media ); 
  649.  
  650. private function _media_concat( $matches ) { 
  651. self::$_media .= $matches[0]; 
  652.  
  653. /** 
  654. * Creates a DOMDocument instance with the current HTML. 
  655. * @return DoMDocument 
  656. */ 
  657. private function createXmlDocument() { 
  658. $xmlDocument = new DoMDocument; 
  659. $xmlDocument->encoding = 'UTF-8'; 
  660. $xmlDocument->strictErrorChecking = false; 
  661. $xmlDocument->formatOutput = true; 
  662. $libXmlState = libxml_use_internal_errors(true); 
  663. $xmlDocument->loadHTML($this->getUnifiedHtml()); 
  664. libxml_clear_errors(); 
  665. libxml_use_internal_errors($libXmlState); 
  666. $xmlDocument->normalizeDocument(); 
  667.  
  668. return $xmlDocument; 
  669.  
  670. /** 
  671. * Returns the HTML with the unprocessable HTML tags removed and 
  672. * with added document type and Content-Type meta tag if needed. 
  673. * @return string the unified HTML 
  674. * @throws BadMethodCallException 
  675. */ 
  676. private function getUnifiedHtml() { 
  677. $htmlWithoutUnprocessableTags = $this->removeUnprocessableTags($this->html); 
  678. $htmlWithDocumentType = $this->ensureDocumentType($htmlWithoutUnprocessableTags); 
  679.  
  680. return $this->addContentTypeMetaTag($htmlWithDocumentType); 
  681.  
  682. /** 
  683. * Removes the unprocessable tags from $html (if this feature is enabled). 
  684. * @param string $html 
  685. * @return string the reworked HTML with the unprocessable tags removed 
  686. */ 
  687. private function removeUnprocessableTags( $html ) { 
  688. if ( empty($this->unprocessableHtmlTags) ) { 
  689. return $html; 
  690.  
  691. $unprocessableHtmlTags = implode('|', $this->unprocessableHtmlTags); 
  692.  
  693. return preg_replace( 
  694. '/<\\/?(' . $unprocessableHtmlTags . ')[^>]*>/i',  
  695. '',  
  696. $html 
  697. ); 
  698.  
  699. /** 
  700. * Makes sure that the passed HTML has a document type. 
  701. * @param string $html 
  702. * @return string HTML with document type 
  703. */ 
  704. private function ensureDocumentType( $html ) { 
  705. $hasDocumentType = stripos($html, '<!DOCTYPE') !== false; 
  706. if ( $hasDocumentType ) { 
  707. return $html; 
  708.  
  709. return self::DEFAULT_DOCUMENT_TYPE . $html; 
  710.  
  711. /** 
  712. * Adds a Content-Type meta tag for the charset. 
  713. * @param string $html 
  714. * @return string the HTML with the meta tag added 
  715. */ 
  716. private function addContentTypeMetaTag( $html ) { 
  717. $hasContentTypeMetaTag = stristr($html, 'Content-Type') !== false; 
  718. if ( $hasContentTypeMetaTag ) { 
  719. return $html; 
  720.  
  721.  
  722. // We are trying to insert the meta tag to the right spot in the DOM. 
  723. // If we just prepended it to the HTML, we would lose attributes set to the HTML tag. 
  724. $hasHeadTag = stripos($html, '<head') !== false; 
  725. $hasHtmlTag = stripos($html, '<html') !== false; 
  726.  
  727. if ( $hasHeadTag ) { 
  728. $reworkedHtml = preg_replace('/<head(.*?)>/i', '<head$1>' . self::CONTENT_TYPE_META_TAG, $html); 
  729. } elseif ( $hasHtmlTag ) { 
  730. $reworkedHtml = preg_replace( 
  731. '/<html(.*?)>/i',  
  732. '<html$1><head>' . self::CONTENT_TYPE_META_TAG . '</head>',  
  733. $html 
  734. ); 
  735. } else { 
  736. $reworkedHtml = self::CONTENT_TYPE_META_TAG . $html; 
  737.  
  738. return $reworkedHtml; 
  739.  
  740. /** 
  741. * @param string[] $a 
  742. * @param string[] $b 
  743. * @return int 
  744. */ 
  745. private function sortBySelectorPrecedence( array $a, array $b ) { 
  746. $precedenceA = $this->getCssSelectorPrecedence($a['selector']); 
  747. $precedenceB = $this->getCssSelectorPrecedence($b['selector']); 
  748.  
  749. // We want these sorted in ascending order so selectors with lesser precedence get processed first and 
  750. // selectors with greater precedence get sorted last. 
  751. $precedenceForEquals = ($a['line'] < $b['line'] ? -1 : 1); 
  752. $precedenceForNotEquals = ($precedenceA < $precedenceB ? -1 : 1); 
  753. return ($precedenceA === $precedenceB) ? $precedenceForEquals : $precedenceForNotEquals; 
  754.  
  755. /** 
  756. * @param string $selector 
  757. * @return int 
  758. */ 
  759. private function getCssSelectorPrecedence( $selector ) { 
  760. $selectorKey = md5($selector); 
  761. if ( ! isset($this->caches[ self::CACHE_KEY_SELECTOR ][ $selectorKey ]) ) { 
  762. $precedence = 0; 
  763. $value = 100; 
  764. // ids: worth 100, classes: worth 10, elements: worth 1 
  765. $search = array( '\\#', '\\.', '' ); 
  766.  
  767. foreach ( $search as $s ) { 
  768. if ( trim($selector) === '' ) { 
  769. break; 
  770. $number = 0; 
  771. $selector = preg_replace('/' . $s . '\\w+/', '', $selector, -1, $number); 
  772. $precedence += ($value * $number); 
  773. $value /= 10; 
  774. $this->caches[ self::CACHE_KEY_SELECTOR ][ $selectorKey ] = $precedence; 
  775.  
  776. return $this->caches[ self::CACHE_KEY_SELECTOR ][ $selectorKey ]; 
  777.  
  778. private function translateCssToXpath_callback( $matches ) { 
  779. return strtolower($matches[0]); 
  780.  
  781. /** 
  782. * Maps a CSS selector to an XPath query string. 
  783. * @see http://plasmasturm.org/log/444/ 
  784. * @param string $cssSelector a CSS selector 
  785. * @return string the corresponding XPath selector 
  786. */ 
  787. private function translateCssToXpath( $cssSelector ) { 
  788. $paddedSelector = ' ' . $cssSelector . ' '; 
  789. $lowercasePaddedSelector = preg_replace_callback( 
  790. '/\\s+\\w+\\s+/',  
  791. array( $this, 'translateCssToXpath_callback' ),  
  792. $paddedSelector 
  793. ); 
  794. $trimmedLowercaseSelector = trim($lowercasePaddedSelector); 
  795. $xpathKey = md5($trimmedLowercaseSelector); 
  796. if ( ! isset($this->caches[ self::CACHE_KEY_XPATH ][ $xpathKey ]) ) { 
  797. $cssSelectorMatches = array( 
  798. 'child' => '/\\s+>\\s+/',  
  799. 'adjacent sibling' => '/\\s+\\+\\s+/',  
  800. 'descendant' => '/\\s+/',  
  801. ':first-child' => '/([^\\/]+):first-child/i',  
  802. ':last-child' => '/([^\\/]+):last-child/i',  
  803. 'attribute only' => '/^\\[(\\w+|\\w+\\=[\'"]?\\w+[\'"]?)\\]/',  
  804. 'attribute' => '/(\\w)\\[(\\w+)\\]/',  
  805. 'exact attribute' => '/(\\w)\\[(\\w+)\\=[\'"]?(\\w+)[\'"]?\\]/',  
  806. ); 
  807. $xPathReplacements = array( 
  808. 'child' => '/',  
  809. 'adjacent sibling' => '/following-sibling::*[1]/self::',  
  810. 'descendant' => '//',  
  811. ':first-child' => '\\1/*[1]',  
  812. ':last-child' => '\\1/*[last()]',  
  813. 'attribute only' => '*[@\\1]',  
  814. 'attribute' => '\\1[@\\2]',  
  815. 'exact attribute' => '\\1[@\\2="\\3"]',  
  816. ); 
  817.   
  818. $roughXpath = '//' . preg_replace($cssSelectorMatches, $xPathReplacements, $trimmedLowercaseSelector); 
  819.   
  820. $xpathWithIdAttributeMatchers = preg_replace_callback( 
  821. self::ID_ATTRIBUTE_MATCHER,  
  822. array( $this, 'matchIdAttributes' ),  
  823. $roughXpath 
  824. ); 
  825. $xpathWithIdAttributeAndClassMatchers = preg_replace_callback( 
  826. self::CLASS_ATTRIBUTE_MATCHER,  
  827. array( $this, 'matchClassAttributes' ),  
  828. $xpathWithIdAttributeMatchers 
  829. ); 
  830.   
  831. // Advanced selectors are going to require a bit more advanced emogrification. 
  832. // When we required PHP 5.3, we could do this with closures. 
  833. $xpathWithIdAttributeAndClassMatchers = preg_replace_callback( 
  834. '/([^\\/]+):nth-child\\(\\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',  
  835. array( $this, 'translateNthChild' ),  
  836. $xpathWithIdAttributeAndClassMatchers 
  837. ); 
  838. $finalXpath = preg_replace_callback( 
  839. '/([^\\/]+):nth-of-type\\(\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i',  
  840. array( $this, 'translateNthOfType' ),  
  841. $xpathWithIdAttributeAndClassMatchers 
  842. ); 
  843.   
  844. $this->caches[ self::CACHE_KEY_SELECTOR ][ $xpathKey ] = $finalXpath; 
  845. return $this->caches[ self::CACHE_KEY_SELECTOR ][ $xpathKey ]; 
  846.   
  847. /** 
  848. * @param string[] $match 
  849. * @return string 
  850. */ 
  851. private function matchIdAttributes( array $match ) { 
  852. return ($match[1] !== '' ? $match[1] : '*') . '[@id="' . $match[2] . '"]'; 
  853.  
  854. /** 
  855. * @param string[] $match 
  856. * @return string 
  857. */ 
  858. private function matchClassAttributes( array $match ) { 
  859. return ($match[1] !== '' ? $match[1] : '*') . '[contains(concat(" ", @class, " "), concat(" ", "' . 
  860. implode( 
  861. '", " "))][contains(concat(" ", @class, " "), concat(" ", "',  
  862. explode('.', substr($match[2], 1)) 
  863. ) . '", " "))]'; 
  864.  
  865. /** 
  866. * @param string[] $match 
  867. * @return string 
  868. */ 
  869. private function translateNthChild( array $match ) { 
  870. $parseResult = $this->parseNth($match); 
  871.  
  872. if ( isset($parseResult[ self::MULTIPLIER ]) ) { 
  873. if ( $parseResult[ self::MULTIPLIER ] < 0 ) { 
  874. $parseResult[ self::MULTIPLIER ] = abs($parseResult[ self::MULTIPLIER ]); 
  875. $xPathExpression = sprintf( 
  876. '*[(last() - position()) mod %u = %u]/self::%s',  
  877. $parseResult[ self::MULTIPLIER ],  
  878. $parseResult[ self::INDEX ],  
  879. $match[1] 
  880. ); 
  881. } else { 
  882. $xPathExpression = sprintf( 
  883. '*[position() mod %u = %u]/self::%s',  
  884. $parseResult[ self::MULTIPLIER ],  
  885. $parseResult[ self::INDEX ],  
  886. $match[1] 
  887. ); 
  888. } else { 
  889. $xPathExpression = sprintf('*[%u]/self::%s', $parseResult[ self::INDEX ], $match[1]); 
  890.  
  891. return $xPathExpression; 
  892.  
  893. /** 
  894. * @param string[] $match 
  895. * @return string 
  896. */ 
  897. private function translateNthOfType( array $match ) { 
  898. $parseResult = $this->parseNth($match); 
  899.  
  900. if ( isset($parseResult[ self::MULTIPLIER ]) ) { 
  901. if ( $parseResult[ self::MULTIPLIER ] < 0 ) { 
  902. $parseResult[ self::MULTIPLIER ] = abs($parseResult[ self::MULTIPLIER ]); 
  903. $xPathExpression = sprintf( 
  904. '%s[(last() - position()) mod %u = %u]',  
  905. $match[1],  
  906. $parseResult[ self::MULTIPLIER ],  
  907. $parseResult[ self::INDEX ] 
  908. ); 
  909. } else { 
  910. $xPathExpression = sprintf( 
  911. '%s[position() mod %u = %u]',  
  912. $match[1],  
  913. $parseResult[ self::MULTIPLIER ],  
  914. $parseResult[ self::INDEX ] 
  915. ); 
  916. } else { 
  917. $xPathExpression = sprintf('%s[%u]', $match[1], $parseResult[ self::INDEX ]); 
  918.  
  919. return $xPathExpression; 
  920.  
  921. /** 
  922. * @param string[] $match 
  923. * @return int[] 
  924. */ 
  925. private function parseNth( array $match ) { 
  926. if ( in_array(strtolower($match[2]), array( 'even', 'odd' ), true) ) { 
  927. // we have "even" or "odd" 
  928. $index = strtolower($match[2]) === 'even' ? 0 : 1; 
  929. return array( self::MULTIPLIER => 2, self::INDEX => $index ); 
  930. if ( stripos($match[2], 'n') === false ) { 
  931. // if there is a multiplier 
  932. $index = (int) str_replace(' ', '', $match[2]); 
  933. return array( self::INDEX => $index ); 
  934.  
  935. if ( isset($match[3]) ) { 
  936. $multipleTerm = str_replace($match[3], '', $match[2]); 
  937. $index = (int) str_replace(' ', '', $match[3]); 
  938. } else { 
  939. $multipleTerm = $match[2]; 
  940. $index = 0; 
  941.  
  942. $multiplier = str_ireplace('n', '', $multipleTerm); 
  943.  
  944. if ( $multiplier === '' ) { 
  945. $multiplier = 1; 
  946. } elseif ( $multiplier === '0' ) { 
  947. return array( self::INDEX => $index ); 
  948. } else { 
  949. $multiplier = (int) $multiplier; 
  950.  
  951. while ( $index < 0 ) { 
  952. $index += abs($multiplier); 
  953.  
  954. return array( self::MULTIPLIER => $multiplier, self::INDEX => $index ); 
  955.  
  956. /** 
  957. * Parses a CSS declaration block into property name/value pairs. 
  958. * Example: 
  959. * The declaration block 
  960. * "color: #000; font-weight: bold;" 
  961. * will be parsed into the following array: 
  962. * "color" => "#000" 
  963. * "font-weight" => "bold" 
  964. * @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty 
  965. * @return string[] 
  966. * the CSS declarations with the property names as array keys and the property values as array values 
  967. */ 
  968. private function parseCssDeclarationsBlock( $cssDeclarationsBlock ) { 
  969. if ( isset($this->caches[ self::CACHE_KEY_CSS_DECLARATIONS_BLOCK ][ $cssDeclarationsBlock ]) ) { 
  970. return $this->caches[ self::CACHE_KEY_CSS_DECLARATIONS_BLOCK ][ $cssDeclarationsBlock ]; 
  971.  
  972. $properties = array(); 
  973. $declarations = preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock); 
  974.  
  975. foreach ( $declarations as $declaration ) { 
  976. $matches = array(); 
  977. if ( ! preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/', trim($declaration), $matches) ) { 
  978. continue; 
  979.  
  980. $propertyName = strtolower($matches[1]); 
  981. $propertyValue = $matches[2]; 
  982. $properties[ $propertyName ] = $propertyValue; 
  983. $this->caches[ self::CACHE_KEY_CSS_DECLARATIONS_BLOCK ][ $cssDeclarationsBlock ] = $properties; 
  984.  
  985. return $properties; 
  986.  
  987. /** 
  988. * Find the nodes that are not to be emogrified. 
  989. * @param DoMXPath $xpath 
  990. * @return DoMElement[] 
  991. */ 
  992. private function getNodesToExclude( DoMXPath $xpath ) { 
  993. $excludedNodes = array(); 
  994. foreach ( array_keys($this->excludedSelectors) as $selectorToExclude ) { 
  995. foreach ( $xpath->query($this->translateCssToXpath($selectorToExclude)) as $node ) { 
  996. $excludedNodes[] = $node; 
  997.  
  998. return $excludedNodes;