HTML5_TreeBuilder

The WooCommerce PDF Invoices & Packing Slips HTML5 TreeBuilder class.

Defined (1)

The class is defined in the following location(s).

/lib/dompdf/lib/html5lib/TreeBuilder.php  
  1. class HTML5_TreeBuilder { 
  2. public $stack = array(); 
  3. public $content_model; 
  4.  
  5. private $mode; 
  6. private $original_mode; 
  7. private $secondary_mode; 
  8. private $dom; 
  9. // Whether or not normal insertion of nodes should actually foster 
  10. // parent (used in one case in spec) 
  11. private $foster_parent = false; 
  12. private $a_formatting = array(); 
  13.  
  14. private $head_pointer = null; 
  15. private $form_pointer = null; 
  16.  
  17. private $flag_frameset_ok = true; 
  18. private $flag_force_quirks = false; 
  19. private $ignored = false; 
  20. private $quirks_mode = null; 
  21. // this gets to 2 when we want to ignore the next lf character, and 
  22. // is decrement at the beginning of each processed token (this way,  
  23. // code can check for (bool)$ignore_lf_token, but it phases out 
  24. // appropriately) 
  25. private $ignore_lf_token = 0; 
  26. private $fragment = false; 
  27. private $root; 
  28.  
  29. private $scoping = array('applet', 'button', 'caption', 'html', 'marquee', 'object', 'table', 'td', 'th', 'svg:foreignObject'); 
  30. private $formatting = array('a', 'b', 'big', 'code', 'em', 'font', 'i', 'nobr', 's', 'small', 'strike', 'strong', 'tt', 'u'); 
  31. // dl and ds are speculative 
  32. private $special = array('address', 'area', 'article', 'aside', 'base', 'basefont', 'bgsound',  
  33. 'blockquote', 'body', 'br', 'center', 'col', 'colgroup', 'command', 'dc', 'dd', 'details', 'dir', 'div', 'dl', 'ds',  
  34. 'dt', 'embed', 'fieldset', 'figure', 'footer', 'form', 'frame', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5',  
  35. 'h6', 'head', 'header', 'hgroup', 'hr', 'iframe', 'img', 'input', 'isindex', 'li', 'link',  
  36. 'listing', 'menu', 'meta', 'nav', 'noembed', 'noframes', 'noscript', 'ol',  
  37. 'p', 'param', 'plaintext', 'pre', 'script', 'select', 'spacer', 'style',  
  38. 'tbody', 'textarea', 'tfoot', 'thead', 'title', 'tr', 'ul', 'wbr'); 
  39.  
  40. private $pendingTableCharacters; 
  41. private $pendingTableCharactersDirty; 
  42.  
  43. // Tree construction modes 
  44. const INITIAL = 0; 
  45. const BEFORE_HTML = 1; 
  46. const BEFORE_HEAD = 2; 
  47. const IN_HEAD = 3; 
  48. const IN_HEAD_NOSCRIPT = 4; 
  49. const AFTER_HEAD = 5; 
  50. const IN_BODY = 6; 
  51. const IN_CDATA_RCDATA = 7; 
  52. const IN_TABLE = 8; 
  53. const IN_TABLE_TEXT = 9; 
  54. const IN_CAPTION = 10; 
  55. const IN_COLUMN_GROUP = 11; 
  56. const IN_TABLE_BODY = 12; 
  57. const IN_ROW = 13; 
  58. const IN_CELL = 14; 
  59. const IN_SELECT = 15; 
  60. const IN_SELECT_IN_TABLE= 16; 
  61. const IN_FOREIGN_CONTENT= 17; 
  62. const AFTER_BODY = 18; 
  63. const IN_FRAMESET = 19; 
  64. const AFTER_FRAMESET = 20; 
  65. const AFTER_AFTER_BODY = 21; 
  66. const AFTER_AFTER_FRAMESET = 22; 
  67.  
  68. /** 
  69. * Converts a magic number to a readable name. Use for debugging. 
  70. */ 
  71. private function strConst($number) { 
  72. static $lookup; 
  73. if (!$lookup) { 
  74. $lookup = array(); 
  75. $r = new ReflectionClass('HTML5_TreeBuilder'); 
  76. $consts = $r->getConstants(); 
  77. foreach ($consts as $const => $num) { 
  78. if (!is_int($num)) continue; 
  79. $lookup[$num] = $const; 
  80. return $lookup[$number]; 
  81.  
  82. // The different types of elements. 
  83. const SPECIAL = 100; 
  84. const SCOPING = 101; 
  85. const FORMATTING = 102; 
  86. const PHRASING = 103; 
  87.  
  88. // Quirks modes in $quirks_mode 
  89. const NO_QUIRKS = 200; 
  90. const QUIRKS_MODE = 201; 
  91. const LIMITED_QUIRKS_MODE = 202; 
  92.  
  93. // Marker to be placed in $a_formatting 
  94. const MARKER = 300; 
  95.  
  96. // Namespaces for foreign content 
  97. const NS_HTML = null; // to prevent DOM from requiring NS on everything 
  98. const NS_MATHML = 'http://www.w3.org/1998/Math/MathML'; 
  99. const NS_SVG = 'http://www.w3.org/2000/svg'; 
  100. const NS_XLINK = 'http://www.w3.org/1999/xlink'; 
  101. const NS_XML = 'http://www.w3.org/XML/1998/namespace'; 
  102. const NS_XMLNS = 'http://www.w3.org/2000/xmlns/'; 
  103.  
  104. // Different types of scopes to test for elements 
  105. const SCOPE = 0; 
  106. const SCOPE_LISTITEM = 1; 
  107. const SCOPE_TABLE = 2; 
  108.  
  109. public function __construct() { 
  110. $this->mode = self::INITIAL; 
  111. $this->dom = new DOMDocument; 
  112.  
  113. $this->dom->encoding = 'UTF-8'; 
  114. $this->dom->preserveWhiteSpace = true; 
  115. $this->dom->substituteEntities = true; 
  116. $this->dom->strictErrorChecking = false; 
  117.  
  118. public function getQuirksMode() { 
  119. return $this->quirks_mode; 
  120.  
  121. // Process tag tokens 
  122. public function emitToken($token, $mode = null) { 
  123. // XXX: ignore parse errors... why are we emitting them, again? 
  124. if ($token['type'] === HTML5_Tokenizer::PARSEERROR) return; 
  125. if ($mode === null) $mode = $this->mode; 
  126.  
  127. /** 
  128. $backtrace = debug_backtrace(); 
  129. if ($backtrace[1]['class'] !== 'HTML5_TreeBuilder') echo "--\n"; 
  130. echo $this->strConst($mode); 
  131. if ($this->original_mode) echo " (originally ".$this->strConst($this->original_mode).")"; 
  132. echo "\n "; 
  133. token_dump($token); 
  134. $this->printStack(); 
  135. $this->printActiveFormattingElements(); 
  136. if ($this->foster_parent) echo " -> this is a foster parent mode\n"; 
  137. if ($this->flag_frameset_ok) echo " -> frameset ok\n"; 
  138. */ 
  139.  
  140. if ($this->ignore_lf_token) $this->ignore_lf_token--; 
  141. $this->ignored = false; 
  142. // indenting is a little wonky, this can be changed later on 
  143. switch ($mode) { 
  144.  
  145. case self::INITIAL: 
  146.  
  147. /** A character token that is one of U+0009 CHARACTER TABULATION,  
  148. * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 SPACE */ 
  149. if ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  150. /** Ignore the token. */ 
  151. $this->ignored = true; 
  152. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  153. if ( 
  154. $token['name'] !== 'html' || !empty($token['public']) || 
  155. !empty($token['system']) || $token !== 'about:legacy-compat' 
  156. ) { 
  157. /** If the DOCTYPE token's name is not a case-sensitive match 
  158. * for the string "html", or if the token's public identifier 
  159. * is not missing, or if the token's system identifier is 
  160. * neither missing nor a case-sensitive match for the string 
  161. * "about:legacy-compat", then there is a parse error (this 
  162. * is the DOCTYPE parse error). */ 
  163. // DOCTYPE parse error 
  164. /** Append a DocumentType node to the Document node, with the name 
  165. * attribute set to the name given in the DOCTYPE token, or the 
  166. * empty string if the name was missing; the publicId attribute 
  167. * set to the public identifier given in the DOCTYPE token, or 
  168. * the empty string if the public identifier was missing; the 
  169. * systemId attribute set to the system identifier given in the 
  170. * DOCTYPE token, or the empty string if the system identifier 
  171. * was missing; and the other attributes specific to 
  172. * DocumentType objects set to null and empty lists as 
  173. * appropriate. Associate the DocumentType node with the 
  174. * Document object so that it is returned as the value of the 
  175. * doctype attribute of the Document object. */ 
  176. if (!isset($token['public'])) $token['public'] = null; 
  177. if (!isset($token['system'])) $token['system'] = null; 
  178. // XDOM 
  179. // Yes this is hacky. I'm kind of annoyed that I can't appendChild 
  180. // a doctype to DOMDocument. Maybe I haven't chanted the right 
  181. // syllables. 
  182. $impl = new DOMImplementation(); 
  183. // This call can fail for particularly pathological cases (namely,  
  184. // the qualifiedName parameter ($token['name']) could be missing. 
  185. if ($token['name']) { 
  186. $doctype = $impl->createDocumentType($token['name'], $token['public'], $token['system']); 
  187. $this->dom->appendChild($doctype); 
  188. } else { 
  189. // It looks like libxml's not actually *able* to express this case. 
  190. // So... don't. 
  191. $this->dom->emptyDoctype = true; 
  192. $public = is_null($token['public']) ? false : strtolower($token['public']); 
  193. $system = is_null($token['system']) ? false : strtolower($token['system']); 
  194. $publicStartsWithForQuirks = array( 
  195. "+//silmaril//dtd html pro v0r11 19970101//",  
  196. "-//advasoft ltd//dtd html 3.0 aswedit + extensions//",  
  197. "-//as//dtd html 3.0 aswedit + extensions//",  
  198. "-//ietf//dtd html 2.0 level 1//",  
  199. "-//ietf//dtd html 2.0 level 2//",  
  200. "-//ietf//dtd html 2.0 strict level 1//",  
  201. "-//ietf//dtd html 2.0 strict level 2//",  
  202. "-//ietf//dtd html 2.0 strict//",  
  203. "-//ietf//dtd html 2.0//",  
  204. "-//ietf//dtd html 2.1e//",  
  205. "-//ietf//dtd html 3.0//",  
  206. "-//ietf//dtd html 3.2 final//",  
  207. "-//ietf//dtd html 3.2//",  
  208. "-//ietf//dtd html 3//",  
  209. "-//ietf//dtd html level 0//",  
  210. "-//ietf//dtd html level 1//",  
  211. "-//ietf//dtd html level 2//",  
  212. "-//ietf//dtd html level 3//",  
  213. "-//ietf//dtd html strict level 0//",  
  214. "-//ietf//dtd html strict level 1//",  
  215. "-//ietf//dtd html strict level 2//",  
  216. "-//ietf//dtd html strict level 3//",  
  217. "-//ietf//dtd html strict//",  
  218. "-//ietf//dtd html//",  
  219. "-//metrius//dtd metrius presentational//",  
  220. "-//microsoft//dtd internet explorer 2.0 html strict//",  
  221. "-//microsoft//dtd internet explorer 2.0 html//",  
  222. "-//microsoft//dtd internet explorer 2.0 tables//",  
  223. "-//microsoft//dtd internet explorer 3.0 html strict//",  
  224. "-//microsoft//dtd internet explorer 3.0 html//",  
  225. "-//microsoft//dtd internet explorer 3.0 tables//",  
  226. "-//netscape comm. corp.//dtd html//",  
  227. "-//netscape comm. corp.//dtd strict html//",  
  228. "-//o'reilly and associates//dtd html 2.0//",  
  229. "-//o'reilly and associates//dtd html extended 1.0//",  
  230. "-//o'reilly and associates//dtd html extended relaxed 1.0//",  
  231. "-//spyglass//dtd html 2.0 extended//",  
  232. "-//sq//dtd html 2.0 hotmetal + extensions//",  
  233. "-//sun microsystems corp.//dtd hotjava html//",  
  234. "-//sun microsystems corp.//dtd hotjava strict html//",  
  235. "-//w3c//dtd html 3 1995-03-24//",  
  236. "-//w3c//dtd html 3.2 draft//",  
  237. "-//w3c//dtd html 3.2 final//",  
  238. "-//w3c//dtd html 3.2//",  
  239. "-//w3c//dtd html 3.2s draft//",  
  240. "-//w3c//dtd html 4.0 frameset//",  
  241. "-//w3c//dtd html 4.0 transitional//",  
  242. "-//w3c//dtd html experimental 19960712//",  
  243. "-//w3c//dtd html experimental 970421//",  
  244. "-//w3c//dtd w3 html//",  
  245. "-//w3o//dtd w3 html 3.0//",  
  246. "-//webtechs//dtd mozilla html 2.0//",  
  247. "-//webtechs//dtd mozilla html//",  
  248. ); 
  249. $publicSetToForQuirks = array( 
  250. "-//w3o//dtd w3 html strict 3.0//",  
  251. "-/w3c/dtd html 4.0 transitional/en",  
  252. "html",  
  253. ); 
  254. $publicStartsWithAndSystemForQuirks = array( 
  255. "-//w3c//dtd html 4.01 frameset//",  
  256. "-//w3c//dtd html 4.01 transitional//",  
  257. ); 
  258. $publicStartsWithForLimitedQuirks = array( 
  259. "-//w3c//dtd xhtml 1.0 frameset//",  
  260. "-//w3c//dtd xhtml 1.0 transitional//",  
  261. ); 
  262. $publicStartsWithAndSystemForLimitedQuirks = array( 
  263. "-//w3c//dtd html 4.01 frameset//",  
  264. "-//w3c//dtd html 4.01 transitional//",  
  265. ); 
  266. // first, do easy checks 
  267. if ( 
  268. !empty($token['force-quirks']) || 
  269. strtolower($token['name']) !== 'html' 
  270. ) { 
  271. $this->quirks_mode = self::QUIRKS_MODE; 
  272. } else { 
  273. do { 
  274. if ($system) { 
  275. foreach ($publicStartsWithAndSystemForQuirks as $x) { 
  276. if (strncmp($public, $x, strlen($x)) === 0) { 
  277. $this->quirks_mode = self::QUIRKS_MODE; 
  278. break; 
  279. if (!is_null($this->quirks_mode)) break; 
  280. foreach ($publicStartsWithAndSystemForLimitedQuirks as $x) { 
  281. if (strncmp($public, $x, strlen($x)) === 0) { 
  282. $this->quirks_mode = self::LIMITED_QUIRKS_MODE; 
  283. break; 
  284. if (!is_null($this->quirks_mode)) break; 
  285. foreach ($publicSetToForQuirks as $x) { 
  286. if ($public === $x) { 
  287. $this->quirks_mode = self::QUIRKS_MODE; 
  288. break; 
  289. if (!is_null($this->quirks_mode)) break; 
  290. foreach ($publicStartsWithForLimitedQuirks as $x) { 
  291. if (strncmp($public, $x, strlen($x)) === 0) { 
  292. $this->quirks_mode = self::LIMITED_QUIRKS_MODE; 
  293. if (!is_null($this->quirks_mode)) break; 
  294. if ($system === "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd") { 
  295. $this->quirks_mode = self::QUIRKS_MODE; 
  296. break; 
  297. foreach ($publicStartsWithForQuirks as $x) { 
  298. if (strncmp($public, $x, strlen($x)) === 0) { 
  299. $this->quirks_mode = self::QUIRKS_MODE; 
  300. break; 
  301. if (is_null($this->quirks_mode)) { 
  302. $this->quirks_mode = self::NO_QUIRKS; 
  303. } while (false); 
  304. $this->mode = self::BEFORE_HTML; 
  305. } else { 
  306. // parse error 
  307. /** Switch the insertion mode to "before html", then reprocess the 
  308. * current token. */ 
  309. $this->mode = self::BEFORE_HTML; 
  310. $this->quirks_mode = self::QUIRKS_MODE; 
  311. $this->emitToken($token); 
  312. break; 
  313.  
  314. case self::BEFORE_HTML: 
  315.  
  316. /** A DOCTYPE token */ 
  317. if($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  318. // Parse error. Ignore the token. 
  319. $this->ignored = true; 
  320.  
  321. /** A comment token */ 
  322. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { 
  323. /** Append a Comment node to the Document object with the data 
  324. attribute set to the data given in the comment token. */ 
  325. // XDOM 
  326. $comment = $this->dom->createComment($token['data']); 
  327. $this->dom->appendChild($comment); 
  328.  
  329. /** A character token that is one of one of U+0009 CHARACTER TABULATION,  
  330. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),  
  331. or U+0020 SPACE */ 
  332. } elseif($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  333. /** Ignore the token. */ 
  334. $this->ignored = true; 
  335.  
  336. /** A start tag whose tag name is "html" */ 
  337. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] == 'html') { 
  338. /** Create an element for the token in the HTML namespace. Append it  
  339. * to the Document object. Put this element in the stack of open  
  340. * elements. */ 
  341. // XDOM 
  342. $html = $this->insertElement($token, false); 
  343. $this->dom->appendChild($html); 
  344. $this->stack[] = $html; 
  345.  
  346. $this->mode = self::BEFORE_HEAD; 
  347.  
  348. } else { 
  349. /** Create an html element. Append it to the Document object. Put 
  350. * this element in the stack of open elements. */ 
  351. // XDOM 
  352. $html = $this->dom->createElementNS(self::NS_HTML, 'html'); 
  353. $this->dom->appendChild($html); 
  354. $this->stack[] = $html; 
  355.  
  356. /** Switch the insertion mode to "before head", then reprocess the 
  357. * current token. */ 
  358. $this->mode = self::BEFORE_HEAD; 
  359. $this->emitToken($token); 
  360. break; 
  361.  
  362. case self::BEFORE_HEAD: 
  363.  
  364. /** A character token that is one of one of U+0009 CHARACTER TABULATION,  
  365. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),  
  366. or U+0020 SPACE */ 
  367. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  368. /** Ignore the token. */ 
  369. $this->ignored = true; 
  370.  
  371. /** A comment token */ 
  372. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { 
  373. /** Append a Comment node to the current node with the data attribute 
  374. set to the data given in the comment token. */ 
  375. $this->insertComment($token['data']); 
  376.  
  377. /** A DOCTYPE token */ 
  378. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  379. /** Parse error. Ignore the token */ 
  380. $this->ignored = true; 
  381. // parse error 
  382.  
  383. /** A start tag token with the tag name "html" */ 
  384. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { 
  385. /** Process the token using the rules for the "in body" 
  386. * insertion mode. */ 
  387. $this->processWithRulesFor($token, self::IN_BODY); 
  388.  
  389. /** A start tag token with the tag name "head" */ 
  390. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') { 
  391. /** Insert an HTML element for the token. */ 
  392. $element = $this->insertElement($token); 
  393.  
  394. /** Set the head element pointer to this new element node. */ 
  395. $this->head_pointer = $element; 
  396.  
  397. /** Change the insertion mode to "in head". */ 
  398. $this->mode = self::IN_HEAD; 
  399.  
  400. /** An end tag whose tag name is one of: "head", "body", "html", "br" */ 
  401. } elseif( 
  402. $token['type'] === HTML5_Tokenizer::ENDTAG && ( 
  403. $token['name'] === 'head' || $token['name'] === 'body' || 
  404. $token['name'] === 'html' || $token['name'] === 'br' 
  405. )) { 
  406. /** Act as if a start tag token with the tag name "head" and no 
  407. * attributes had been seen, then reprocess the current token. */ 
  408. $this->emitToken(array( 
  409. 'name' => 'head',  
  410. 'type' => HTML5_Tokenizer::STARTTAG,  
  411. 'attr' => array() 
  412. )); 
  413. $this->emitToken($token); 
  414.  
  415. /** Any other end tag */ 
  416. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG) { 
  417. /** Parse error. Ignore the token. */ 
  418. $this->ignored = true; 
  419.  
  420. } else { 
  421. /** Act as if a start tag token with the tag name "head" and no 
  422. * attributes had been seen, then reprocess the current token. 
  423. * Note: This will result in an empty head element being 
  424. * generated, with the current token being reprocessed in the 
  425. * "after head" insertion mode. */ 
  426. $this->emitToken(array( 
  427. 'name' => 'head',  
  428. 'type' => HTML5_Tokenizer::STARTTAG,  
  429. 'attr' => array() 
  430. )); 
  431. $this->emitToken($token); 
  432. break; 
  433.  
  434. case self::IN_HEAD: 
  435.  
  436. /** A character token that is one of one of U+0009 CHARACTER TABULATION,  
  437. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),  
  438. or U+0020 SPACE. */ 
  439. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  440. /** Insert the character into the current node. */ 
  441. $this->insertText($token['data']); 
  442.  
  443. /** A comment token */ 
  444. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { 
  445. /** Append a Comment node to the current node with the data attribute 
  446. set to the data given in the comment token. */ 
  447. $this->insertComment($token['data']); 
  448.  
  449. /** A DOCTYPE token */ 
  450. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  451. /** Parse error. Ignore the token. */ 
  452. $this->ignored = true; 
  453. // parse error 
  454.  
  455. /** A start tag whose tag name is "html" */ 
  456. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  457. $token['name'] === 'html') { 
  458. $this->processWithRulesFor($token, self::IN_BODY); 
  459.  
  460. /** A start tag whose tag name is one of: "base", "command", "link" */ 
  461. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  462. ($token['name'] === 'base' || $token['name'] === 'command' || 
  463. $token['name'] === 'link')) { 
  464. /** Insert an HTML element for the token. Immediately pop the 
  465. * current node off the stack of open elements. */ 
  466. $this->insertElement($token); 
  467. array_pop($this->stack); 
  468.  
  469. // YYY: Acknowledge the token's self-closing flag, if it is set. 
  470.  
  471. /** A start tag whose tag name is "meta" */ 
  472. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'meta') { 
  473. /** Insert an HTML element for the token. Immediately pop the 
  474. * current node off the stack of open elements. */ 
  475. $this->insertElement($token); 
  476. array_pop($this->stack); 
  477.  
  478. // XERROR: Acknowledge the token's self-closing flag, if it is set. 
  479.  
  480. // XENCODING: If the element has a charset attribute, and its value is a 
  481. // supported encoding, and the confidence is currently tentative,  
  482. // then change the encoding to the encoding given by the value of 
  483. // the charset attribute. 
  484. // 
  485. // Otherwise, if the element has a content attribute, and applying 
  486. // the algorithm for extracting an encoding from a Content-Type to 
  487. // its value returns a supported encoding encoding, and the 
  488. // confidence is currently tentative, then change the encoding to 
  489. // the encoding encoding. 
  490.  
  491. /** A start tag with the tag name "title" */ 
  492. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'title') { 
  493. $this->insertRCDATAElement($token); 
  494.  
  495. /** A start tag whose tag name is "noscript", if the scripting flag is enabled, or 
  496. * A start tag whose tag name is one of: "noframes", "style" */ 
  497. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  498. ($token['name'] === 'noscript' || $token['name'] === 'noframes' || $token['name'] === 'style')) { 
  499. // XSCRIPT: Scripting flag not respected 
  500. $this->insertCDATAElement($token); 
  501.  
  502. // XSCRIPT: Scripting flag disable not implemented 
  503.  
  504. /** A start tag with the tag name "script" */ 
  505. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') { 
  506. /** 1. Create an element for the token in the HTML namespace. */ 
  507. $node = $this->insertElement($token, false); 
  508.  
  509. /** 2. Mark the element as being "parser-inserted" */ 
  510. // Uhhh... XSCRIPT 
  511.  
  512. /** 3. If the parser was originally created for the HTML 
  513. * fragment parsing algorithm, then mark the script element as  
  514. * "already executed". (fragment case) */ 
  515. // ditto... XSCRIPT 
  516.  
  517. /** 4. Append the new element to the current node and push it onto  
  518. * the stack of open elements. */ 
  519. end($this->stack)->appendChild($node); 
  520. $this->stack[] = $node; 
  521. // I guess we could squash these together 
  522.  
  523. /** 6. Let the original insertion mode be the current insertion mode. */ 
  524. $this->original_mode = $this->mode; 
  525. /** 7. Switch the insertion mode to "in CDATA/RCDATA" */ 
  526. $this->mode = self::IN_CDATA_RCDATA; 
  527. /** 5. Switch the tokeniser's content model flag to the CDATA state. */ 
  528. $this->content_model = HTML5_Tokenizer::CDATA; 
  529.  
  530. /** An end tag with the tag name "head" */ 
  531. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'head') { 
  532. /** Pop the current node (which will be the head element) off the stack of open elements. */ 
  533. array_pop($this->stack); 
  534.  
  535. /** Change the insertion mode to "after head". */ 
  536. $this->mode = self::AFTER_HEAD; 
  537.  
  538. // Slight logic inversion here to minimize duplication 
  539. /** A start tag with the tag name "head". */ 
  540. /** An end tag whose tag name is not one of: "body", "html", "br" */ 
  541. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') || 
  542. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] !== 'html' && 
  543. $token['name'] !== 'body' && $token['name'] !== 'br')) { 
  544. // Parse error. Ignore the token. 
  545. $this->ignored = true; 
  546.  
  547. /** Anything else */ 
  548. } else { 
  549. /** Act as if an end tag token with the tag name "head" had been 
  550. * seen, and reprocess the current token. */ 
  551. $this->emitToken(array( 
  552. 'name' => 'head',  
  553. 'type' => HTML5_Tokenizer::ENDTAG 
  554. )); 
  555.  
  556. /** Then, reprocess the current token. */ 
  557. $this->emitToken($token); 
  558. break; 
  559.  
  560. case self::IN_HEAD_NOSCRIPT: 
  561. if ($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  562. // parse error 
  563. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { 
  564. $this->processWithRulesFor($token, self::IN_BODY); 
  565. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'noscript') { 
  566. /** Pop the current node (which will be a noscript element) from the 
  567. * stack of open elements; the new current node will be a head 
  568. * element. */ 
  569. array_pop($this->stack); 
  570. $this->mode = self::IN_HEAD; 
  571. } elseif ( 
  572. ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) || 
  573. ($token['type'] === HTML5_Tokenizer::COMMENT) || 
  574. ($token['type'] === HTML5_Tokenizer::STARTTAG && ( 
  575. $token['name'] === 'link' || $token['name'] === 'meta' || 
  576. $token['name'] === 'noframes' || $token['name'] === 'style'))) { 
  577. $this->processWithRulesFor($token, self::IN_HEAD); 
  578. // inverted logic 
  579. } elseif ( 
  580. ($token['type'] === HTML5_Tokenizer::STARTTAG && ( 
  581. $token['name'] === 'head' || $token['name'] === 'noscript')) || 
  582. ($token['type'] === HTML5_Tokenizer::ENDTAG && 
  583. $token['name'] !== 'br')) { 
  584. // parse error 
  585. } else { 
  586. // parse error 
  587. $this->emitToken(array( 
  588. 'type' => HTML5_Tokenizer::ENDTAG,  
  589. 'name' => 'noscript',  
  590. )); 
  591. $this->emitToken($token); 
  592. break; 
  593.  
  594. case self::AFTER_HEAD: 
  595. /** Handle the token as follows: */ 
  596.  
  597. /** A character token that is one of one of U+0009 CHARACTER TABULATION,  
  598. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),  
  599. or U+0020 SPACE */ 
  600. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  601. /** Append the character to the current node. */ 
  602. $this->insertText($token['data']); 
  603.  
  604. /** A comment token */ 
  605. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { 
  606. /** Append a Comment node to the current node with the data attribute 
  607. set to the data given in the comment token. */ 
  608. $this->insertComment($token['data']); 
  609.  
  610. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  611. // parse error 
  612.  
  613. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { 
  614. $this->processWithRulesFor($token, self::IN_BODY); 
  615.  
  616. /** A start tag token with the tag name "body" */ 
  617. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'body') { 
  618. $this->insertElement($token); 
  619.  
  620. /** Set the frameset-ok flag to "not ok". */ 
  621. $this->flag_frameset_ok = false; 
  622.  
  623. /** Change the insertion mode to "in body". */ 
  624. $this->mode = self::IN_BODY; 
  625.  
  626. /** A start tag token with the tag name "frameset" */ 
  627. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'frameset') { 
  628. /** Insert a frameset element for the token. */ 
  629. $this->insertElement($token); 
  630.  
  631. /** Change the insertion mode to "in frameset". */ 
  632. $this->mode = self::IN_FRAMESET; 
  633.  
  634. /** A start tag token whose tag name is one of: "base", "link", "meta",  
  635. "script", "style", "title" */ 
  636. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],  
  637. array('base', 'link', 'meta', 'noframes', 'script', 'style', 'title'))) { 
  638. // parse error 
  639. /** Push the node pointed to by the head element pointer onto the 
  640. * stack of open elements. */ 
  641. $this->stack[] = $this->head_pointer; 
  642. $this->processWithRulesFor($token, self::IN_HEAD); 
  643. array_splice($this->stack, array_search($this->head_pointer, $this->stack, true), 1); 
  644.  
  645. // inversion of specification 
  646. } elseif( 
  647. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'head') || 
  648. ($token['type'] === HTML5_Tokenizer::ENDTAG && 
  649. $token['name'] !== 'body' && $token['name'] !== 'html' && 
  650. $token['name'] !== 'br')) { 
  651. // parse error 
  652.  
  653. /** Anything else */ 
  654. } else { 
  655. $this->emitToken(array( 
  656. 'name' => 'body',  
  657. 'type' => HTML5_Tokenizer::STARTTAG,  
  658. 'attr' => array() 
  659. )); 
  660. $this->flag_frameset_ok = true; 
  661. $this->emitToken($token); 
  662. break; 
  663.  
  664. case self::IN_BODY: 
  665. /** Handle the token as follows: */ 
  666.  
  667. switch($token['type']) { 
  668. /** A character token */ 
  669. case HTML5_Tokenizer::CHARACTER: 
  670. case HTML5_Tokenizer::SPACECHARACTER: 
  671. /** Reconstruct the active formatting elements, if any. */ 
  672. $this->reconstructActiveFormattingElements(); 
  673.  
  674. /** Append the token's character to the current node. */ 
  675. $this->insertText($token['data']); 
  676.  
  677. /** If the token is not one of U+0009 CHARACTER TABULATION,  
  678. * U+000A LINE FEED (LF), U+000C FORM FEED (FF), or U+0020 
  679. * SPACE, then set the frameset-ok flag to "not ok". */ 
  680. // i.e., if any of the characters is not whitespace 
  681. if (strlen($token['data']) !== strspn($token['data'], HTML5_Tokenizer::WHITESPACE)) { 
  682. $this->flag_frameset_ok = false; 
  683. break; 
  684.  
  685. /** A comment token */ 
  686. case HTML5_Tokenizer::COMMENT: 
  687. /** Append a Comment node to the current node with the data 
  688. attribute set to the data given in the comment token. */ 
  689. $this->insertComment($token['data']); 
  690. break; 
  691.  
  692. case HTML5_Tokenizer::DOCTYPE: 
  693. // parse error 
  694. break; 
  695.  
  696. case HTML5_Tokenizer::EOF: 
  697. // parse error 
  698. break; 
  699.  
  700. case HTML5_Tokenizer::STARTTAG: 
  701. switch($token['name']) { 
  702. case 'html': 
  703. // parse error 
  704. /** For each attribute on the token, check to see if the 
  705. * attribute is already present on the top element of the 
  706. * stack of open elements. If it is not, add the attribute 
  707. * and its corresponding value to that element. */ 
  708. foreach($token['attr'] as $attr) { 
  709. if(!$this->stack[0]->hasAttribute($attr['name'])) { 
  710. $this->stack[0]->setAttribute($attr['name'], $attr['value']); 
  711. break; 
  712.  
  713. case 'base': case 'command': case 'link': case 'meta': case 'noframes': 
  714. case 'script': case 'style': case 'title': 
  715. /** Process the token as if the insertion mode had been "in 
  716. head". */ 
  717. $this->processWithRulesFor($token, self::IN_HEAD); 
  718. break; 
  719.  
  720. /** A start tag token with the tag name "body" */ 
  721. case 'body': 
  722. /** Parse error. If the second element on the stack of open 
  723. elements is not a body element, or, if the stack of open 
  724. elements has only one node on it, then ignore the token. 
  725. (fragment case) */ 
  726. if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') { 
  727. $this->ignored = true; 
  728. // Ignore 
  729.  
  730. /** Otherwise, for each attribute on the token, check to see 
  731. if the attribute is already present on the body element (the 
  732. second element) on the stack of open elements. If it is not,  
  733. add the attribute and its corresponding value to that 
  734. element. */ 
  735. } else { 
  736. foreach($token['attr'] as $attr) { 
  737. if(!$this->stack[1]->hasAttribute($attr['name'])) { 
  738. $this->stack[1]->setAttribute($attr['name'], $attr['value']); 
  739. break; 
  740.  
  741. case 'frameset': 
  742. // parse error 
  743. /** If the second element on the stack of open elements is 
  744. * not a body element, or, if the stack of open elements 
  745. * has only one node on it, then ignore the token. 
  746. * (fragment case) */ 
  747. if(count($this->stack) === 1 || $this->stack[1]->tagName !== 'body') { 
  748. $this->ignored = true; 
  749. // Ignore 
  750. } elseif (!$this->flag_frameset_ok) { 
  751. $this->ignored = true; 
  752. // Ignore 
  753. } else { 
  754. /** 1. Remove the second element on the stack of open  
  755. * elements from its parent node, if it has one. */ 
  756. if($this->stack[1]->parentNode) { 
  757. $this->stack[1]->parentNode->removeChild($this->stack[1]); 
  758.  
  759. /** 2. Pop all the nodes from the bottom of the stack of  
  760. * open elements, from the current node up to the root  
  761. * html element. */ 
  762. array_splice($this->stack, 1); 
  763.  
  764. $this->insertElement($token); 
  765. $this->mode = self::IN_FRAMESET; 
  766. break; 
  767.  
  768. // in spec, there is a diversion here 
  769.  
  770. case 'address': case 'article': case 'aside': case 'blockquote': 
  771. case 'center': case 'datagrid': case 'details': case 'dir': 
  772. case 'div': case 'dl': case 'fieldset': case 'figure': case 'footer': 
  773. case 'header': case 'hgroup': case 'menu': case 'nav': 
  774. case 'ol': case 'p': case 'section': case 'ul': 
  775. /** If the stack of open elements has a p element in scope,  
  776. then act as if an end tag with the tag name p had been 
  777. seen. */ 
  778. if($this->elementInScope('p')) { 
  779. $this->emitToken(array( 
  780. 'name' => 'p',  
  781. 'type' => HTML5_Tokenizer::ENDTAG 
  782. )); 
  783.  
  784. /** Insert an HTML element for the token. */ 
  785. $this->insertElement($token); 
  786. break; 
  787.  
  788. /** A start tag whose tag name is one of: "h1", "h2", "h3", "h4",  
  789. "h5", "h6" */ 
  790. case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': 
  791. /** If the stack of open elements has a p element in scope,  
  792. then act as if an end tag with the tag name p had been seen. */ 
  793. if($this->elementInScope('p')) { 
  794. $this->emitToken(array( 
  795. 'name' => 'p',  
  796. 'type' => HTML5_Tokenizer::ENDTAG 
  797. )); 
  798.  
  799. /** If the current node is an element whose tag name is one 
  800. * of "h1", "h2", "h3", "h4", "h5", or "h6", then this is a 
  801. * parse error; pop the current node off the stack of open 
  802. * elements. */ 
  803. $peek = array_pop($this->stack); 
  804. if (in_array($peek->tagName, array("h1", "h2", "h3", "h4", "h5", "h6"))) { 
  805. // parse error 
  806. } else { 
  807. $this->stack[] = $peek; 
  808.  
  809. /** Insert an HTML element for the token. */ 
  810. $this->insertElement($token); 
  811. break; 
  812.  
  813. case 'pre': case 'listing': 
  814. /** If the stack of open elements has a p element in scope,  
  815. then act as if an end tag with the tag name p had been seen. */ 
  816. if($this->elementInScope('p')) { 
  817. $this->emitToken(array( 
  818. 'name' => 'p',  
  819. 'type' => HTML5_Tokenizer::ENDTAG 
  820. )); 
  821. $this->insertElement($token); 
  822. /** If the next token is a U+000A LINE FEED (LF) character 
  823. * token, then ignore that token and move on to the next 
  824. * one. (Newlines at the start of pre blocks are ignored as 
  825. * an authoring convenience.) */ 
  826. $this->ignore_lf_token = 2; 
  827. $this->flag_frameset_ok = false; 
  828. break; 
  829.  
  830. /** A start tag whose tag name is "form" */ 
  831. case 'form': 
  832. /** If the form element pointer is not null, ignore the 
  833. token with a parse error. */ 
  834. if($this->form_pointer !== null) { 
  835. $this->ignored = true; 
  836. // Ignore. 
  837.  
  838. /** Otherwise: */ 
  839. } else { 
  840. /** If the stack of open elements has a p element in 
  841. scope, then act as if an end tag with the tag name p 
  842. had been seen. */ 
  843. if($this->elementInScope('p')) { 
  844. $this->emitToken(array( 
  845. 'name' => 'p',  
  846. 'type' => HTML5_Tokenizer::ENDTAG 
  847. )); 
  848.  
  849. /** Insert an HTML element for the token, and set the 
  850. form element pointer to point to the element created. */ 
  851. $element = $this->insertElement($token); 
  852. $this->form_pointer = $element; 
  853. break; 
  854.  
  855. // condensed specification 
  856. case 'li': case 'dc': case 'dd': case 'ds': case 'dt': 
  857. /** 1. Set the frameset-ok flag to "not ok". */ 
  858. $this->flag_frameset_ok = false; 
  859.  
  860. $stack_length = count($this->stack) - 1; 
  861. for($n = $stack_length; 0 <= $n; $n--) { 
  862. /** 2. Initialise node to be the current node (the 
  863. bottommost node of the stack). */ 
  864. $stop = false; 
  865. $node = $this->stack[$n]; 
  866. $cat = $this->getElementCategory($node); 
  867.  
  868. // for case 'li': 
  869. /** 3. If node is an li element, then act as if an end 
  870. * tag with the tag name "li" had been seen, then jump 
  871. * to the last step. */ 
  872. // for case 'dc': case 'dd': case 'ds': case 'dt': 
  873. /** If node is a dc, dd, ds or dt element, then act as if an end 
  874. * tag with the same tag name as node had been seen, then 
  875. * jump to the last step. */ 
  876. if(($token['name'] === 'li' && $node->tagName === 'li') || 
  877. ($token['name'] !== 'li' && ($node->tagName == 'dc' || $node->tagName === 'dd' || $node->tagName == 'ds' || $node->tagName === 'dt'))) { // limited conditional 
  878. $this->emitToken(array( 
  879. 'type' => HTML5_Tokenizer::ENDTAG,  
  880. 'name' => $node->tagName,  
  881. )); 
  882. break; 
  883.  
  884. /** 4. If node is not in the formatting category, and is 
  885. not in the phrasing category, and is not an address,  
  886. div or p element, then stop this algorithm. */ 
  887. if($cat !== self::FORMATTING && $cat !== self::PHRASING && 
  888. $node->tagName !== 'address' && $node->tagName !== 'div' && 
  889. $node->tagName !== 'p') { 
  890. break; 
  891.  
  892. /** 5. Otherwise, set node to the previous entry in the 
  893. * stack of open elements and return to step 2. */ 
  894.  
  895. /** 6. This is the last step. */ 
  896.  
  897. /** If the stack of open elements has a p element in scope,  
  898. then act as if an end tag with the tag name p had been 
  899. seen. */ 
  900. if($this->elementInScope('p')) { 
  901. $this->emitToken(array( 
  902. 'name' => 'p',  
  903. 'type' => HTML5_Tokenizer::ENDTAG 
  904. )); 
  905.  
  906. /** Finally, insert an HTML element with the same tag 
  907. name as the token's. */ 
  908. $this->insertElement($token); 
  909. break; 
  910.  
  911. /** A start tag token whose tag name is "plaintext" */ 
  912. case 'plaintext': 
  913. /** If the stack of open elements has a p element in scope,  
  914. then act as if an end tag with the tag name p had been 
  915. seen. */ 
  916. if($this->elementInScope('p')) { 
  917. $this->emitToken(array( 
  918. 'name' => 'p',  
  919. 'type' => HTML5_Tokenizer::ENDTAG 
  920. )); 
  921.  
  922. /** Insert an HTML element for the token. */ 
  923. $this->insertElement($token); 
  924.  
  925. $this->content_model = HTML5_Tokenizer::PLAINTEXT; 
  926. break; 
  927.  
  928. // more diversions 
  929.  
  930. /** A start tag whose tag name is "a" */ 
  931. case 'a': 
  932. /** If the list of active formatting elements contains 
  933. an element whose tag name is "a" between the end of the 
  934. list and the last marker on the list (or the start of 
  935. the list if there is no marker on the list), then this 
  936. is a parse error; act as if an end tag with the tag name 
  937. "a" had been seen, then remove that element from the list 
  938. of active formatting elements and the stack of open 
  939. elements if the end tag didn't already remove it (it 
  940. might not have if the element is not in table scope). */ 
  941. $leng = count($this->a_formatting); 
  942.  
  943. for($n = $leng - 1; $n >= 0; $n--) { 
  944. if($this->a_formatting[$n] === self::MARKER) { 
  945. break; 
  946.  
  947. } elseif($this->a_formatting[$n]->tagName === 'a') { 
  948. $a = $this->a_formatting[$n]; 
  949. $this->emitToken(array( 
  950. 'name' => 'a',  
  951. 'type' => HTML5_Tokenizer::ENDTAG 
  952. )); 
  953. if (in_array($a, $this->a_formatting)) { 
  954. $a_i = array_search($a, $this->a_formatting, true); 
  955. if($a_i !== false) array_splice($this->a_formatting, $a_i, 1); 
  956. if (in_array($a, $this->stack)) { 
  957. $a_i = array_search($a, $this->stack, true); 
  958. if ($a_i !== false) array_splice($this->stack, $a_i, 1); 
  959. break; 
  960.  
  961. /** Reconstruct the active formatting elements, if any. */ 
  962. $this->reconstructActiveFormattingElements(); 
  963.  
  964. /** Insert an HTML element for the token. */ 
  965. $el = $this->insertElement($token); 
  966.  
  967. /** Add that element to the list of active formatting 
  968. elements. */ 
  969. $this->a_formatting[] = $el; 
  970. break; 
  971.  
  972. case 'b': case 'big': case 'code': case 'em': case 'font': case 'i': 
  973. case 's': case 'small': case 'strike': 
  974. case 'strong': case 'tt': case 'u': 
  975. /** Reconstruct the active formatting elements, if any. */ 
  976. $this->reconstructActiveFormattingElements(); 
  977.  
  978. /** Insert an HTML element for the token. */ 
  979. $el = $this->insertElement($token); 
  980.  
  981. /** Add that element to the list of active formatting 
  982. elements. */ 
  983. $this->a_formatting[] = $el; 
  984. break; 
  985.  
  986. case 'nobr': 
  987. /** Reconstruct the active formatting elements, if any. */ 
  988. $this->reconstructActiveFormattingElements(); 
  989.  
  990. /** If the stack of open elements has a nobr element in 
  991. * scope, then this is a parse error; act as if an end tag 
  992. * with the tag name "nobr" had been seen, then once again 
  993. * reconstruct the active formatting elements, if any. */ 
  994. if ($this->elementInScope('nobr')) { 
  995. $this->emitToken(array( 
  996. 'name' => 'nobr',  
  997. 'type' => HTML5_Tokenizer::ENDTAG,  
  998. )); 
  999. $this->reconstructActiveFormattingElements(); 
  1000.  
  1001. /** Insert an HTML element for the token. */ 
  1002. $el = $this->insertElement($token); 
  1003.  
  1004. /** Add that element to the list of active formatting 
  1005. elements. */ 
  1006. $this->a_formatting[] = $el; 
  1007. break; 
  1008.  
  1009. // another diversion 
  1010.  
  1011. /** A start tag token whose tag name is "button" */ 
  1012. case 'button': 
  1013. /** If the stack of open elements has a button element in scope,  
  1014. then this is a parse error; act as if an end tag with the tag 
  1015. name "button" had been seen, then reprocess the token. (We don't 
  1016. do that. Unnecessary.) (I hope you're right! -- ezyang) */ 
  1017. if($this->elementInScope('button')) { 
  1018. $this->emitToken(array( 
  1019. 'name' => 'button',  
  1020. 'type' => HTML5_Tokenizer::ENDTAG 
  1021. )); 
  1022.  
  1023. /** Reconstruct the active formatting elements, if any. */ 
  1024. $this->reconstructActiveFormattingElements(); 
  1025.  
  1026. /** Insert an HTML element for the token. */ 
  1027. $this->insertElement($token); 
  1028.  
  1029. /** Insert a marker at the end of the list of active 
  1030. formatting elements. */ 
  1031. $this->a_formatting[] = self::MARKER; 
  1032.  
  1033. $this->flag_frameset_ok = false; 
  1034. break; 
  1035.  
  1036. case 'applet': case 'marquee': case 'object': 
  1037. /** Reconstruct the active formatting elements, if any. */ 
  1038. $this->reconstructActiveFormattingElements(); 
  1039.  
  1040. /** Insert an HTML element for the token. */ 
  1041. $this->insertElement($token); 
  1042.  
  1043. /** Insert a marker at the end of the list of active 
  1044. formatting elements. */ 
  1045. $this->a_formatting[] = self::MARKER; 
  1046.  
  1047. $this->flag_frameset_ok = false; 
  1048. break; 
  1049.  
  1050. // spec diversion 
  1051.  
  1052. /** A start tag whose tag name is "table" */ 
  1053. case 'table': 
  1054. /** If the Document is not set to quirks mode, and the  
  1055. * stack of open elements has a p element in scope, then  
  1056. * act as if an end tag with the tag name "p" had been  
  1057. * seen. */ 
  1058. if($this->quirks_mode !== self::QUIRKS_MODE && 
  1059. $this->elementInScope('p')) { 
  1060. $this->emitToken(array( 
  1061. 'name' => 'p',  
  1062. 'type' => HTML5_Tokenizer::ENDTAG 
  1063. )); 
  1064.  
  1065. /** Insert an HTML element for the token. */ 
  1066. $this->insertElement($token); 
  1067.  
  1068. $this->flag_frameset_ok = false; 
  1069.  
  1070. /** Change the insertion mode to "in table". */ 
  1071. $this->mode = self::IN_TABLE; 
  1072. break; 
  1073.  
  1074. /** A start tag whose tag name is one of: "area", "basefont",  
  1075. "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ 
  1076. case 'area': case 'basefont': case 'bgsound': case 'br': 
  1077. case 'embed': case 'img': case 'input': case 'keygen': case 'spacer': 
  1078. case 'wbr': 
  1079. /** Reconstruct the active formatting elements, if any. */ 
  1080. $this->reconstructActiveFormattingElements(); 
  1081.  
  1082. /** Insert an HTML element for the token. */ 
  1083. $this->insertElement($token); 
  1084.  
  1085. /** Immediately pop the current node off the stack of open elements. */ 
  1086. array_pop($this->stack); 
  1087.  
  1088. // YYY: Acknowledge the token's self-closing flag, if it is set. 
  1089.  
  1090. $this->flag_frameset_ok = false; 
  1091. break; 
  1092.  
  1093. case 'param': case 'source': 
  1094. /** Insert an HTML element for the token. */ 
  1095. $this->insertElement($token); 
  1096.  
  1097. /** Immediately pop the current node off the stack of open elements. */ 
  1098. array_pop($this->stack); 
  1099.  
  1100. // YYY: Acknowledge the token's self-closing flag, if it is set. 
  1101. break; 
  1102.  
  1103. /** A start tag whose tag name is "hr" */ 
  1104. case 'hr': 
  1105. /** If the stack of open elements has a p element in scope,  
  1106. then act as if an end tag with the tag name p had been seen. */ 
  1107. if($this->elementInScope('p')) { 
  1108. $this->emitToken(array( 
  1109. 'name' => 'p',  
  1110. 'type' => HTML5_Tokenizer::ENDTAG 
  1111. )); 
  1112.  
  1113. /** Insert an HTML element for the token. */ 
  1114. $this->insertElement($token); 
  1115.  
  1116. /** Immediately pop the current node off the stack of open elements. */ 
  1117. array_pop($this->stack); 
  1118.  
  1119. // YYY: Acknowledge the token's self-closing flag, if it is set. 
  1120.  
  1121. $this->flag_frameset_ok = false; 
  1122. break; 
  1123.  
  1124. /** A start tag whose tag name is "image" */ 
  1125. case 'image': 
  1126. /** Parse error. Change the token's tag name to "img" and 
  1127. reprocess it. (Don't ask.) */ 
  1128. $token['name'] = 'img'; 
  1129. $this->emitToken($token); 
  1130. break; 
  1131.  
  1132. /** A start tag whose tag name is "isindex" */ 
  1133. case 'isindex': 
  1134. /** Parse error. */ 
  1135.  
  1136. /** If the form element pointer is not null,  
  1137. then ignore the token. */ 
  1138. if($this->form_pointer === null) { 
  1139. /** Act as if a start tag token with the tag name "form" had 
  1140. been seen. */ 
  1141. /** If the token has an attribute called "action", set 
  1142. * the action attribute on the resulting form 
  1143. * element to the value of the "action" attribute of 
  1144. * the token. */ 
  1145. $attr = array(); 
  1146. $action = $this->getAttr($token, 'action'); 
  1147. if ($action !== false) { 
  1148. $attr[] = array('name' => 'action', 'value' => $action); 
  1149. $this->emitToken(array( 
  1150. 'name' => 'form',  
  1151. 'type' => HTML5_Tokenizer::STARTTAG,  
  1152. 'attr' => $attr 
  1153. )); 
  1154.  
  1155. /** Act as if a start tag token with the tag name "hr" had 
  1156. been seen. */ 
  1157. $this->emitToken(array( 
  1158. 'name' => 'hr',  
  1159. 'type' => HTML5_Tokenizer::STARTTAG,  
  1160. 'attr' => array() 
  1161. )); 
  1162.  
  1163. /** Act as if a start tag token with the tag name "label" 
  1164. had been seen. */ 
  1165. $this->emitToken(array( 
  1166. 'name' => 'label',  
  1167. 'type' => HTML5_Tokenizer::STARTTAG,  
  1168. 'attr' => array() 
  1169. )); 
  1170.  
  1171. /** Act as if a stream of character tokens had been seen. */ 
  1172. $prompt = $this->getAttr($token, 'prompt'); 
  1173. if ($prompt === false) { 
  1174. $prompt = 'This is a searchable index. '. 
  1175. 'Insert your search keywords here: '; 
  1176. $this->emitToken(array( 
  1177. 'data' => $prompt,  
  1178. 'type' => HTML5_Tokenizer::CHARACTER,  
  1179. )); 
  1180.  
  1181. /** Act as if a start tag token with the tag name "input" 
  1182. had been seen, with all the attributes from the "isindex" 
  1183. token, except with the "name" attribute set to the value 
  1184. "isindex" (ignoring any explicit "name" attribute). */ 
  1185. $attr = array(); 
  1186. foreach ($token['attr'] as $keypair) { 
  1187. if ($keypair['name'] === 'name' || $keypair['name'] === 'action' || 
  1188. $keypair['name'] === 'prompt') continue; 
  1189. $attr[] = $keypair; 
  1190. $attr[] = array('name' => 'name', 'value' => 'isindex'); 
  1191.  
  1192. $this->emitToken(array( 
  1193. 'name' => 'input',  
  1194. 'type' => HTML5_Tokenizer::STARTTAG,  
  1195. 'attr' => $attr 
  1196. )); 
  1197.  
  1198. /** Act as if an end tag token with the tag name "label" 
  1199. had been seen. */ 
  1200. $this->emitToken(array( 
  1201. 'name' => 'label',  
  1202. 'type' => HTML5_Tokenizer::ENDTAG 
  1203. )); 
  1204.  
  1205. /** Act as if a start tag token with the tag name "hr" had 
  1206. been seen. */ 
  1207. $this->emitToken(array( 
  1208. 'name' => 'hr',  
  1209. 'type' => HTML5_Tokenizer::STARTTAG 
  1210. )); 
  1211.  
  1212. /** Act as if an end tag token with the tag name "form" had 
  1213. been seen. */ 
  1214. $this->emitToken(array( 
  1215. 'name' => 'form',  
  1216. 'type' => HTML5_Tokenizer::ENDTAG 
  1217. )); 
  1218. } else { 
  1219. $this->ignored = true; 
  1220. break; 
  1221.  
  1222. /** A start tag whose tag name is "textarea" */ 
  1223. case 'textarea': 
  1224. $this->insertElement($token); 
  1225.  
  1226. /** If the next token is a U+000A LINE FEED (LF) 
  1227. * character token, then ignore that token and move on to 
  1228. * the next one. (Newlines at the start of textarea 
  1229. * elements are ignored as an authoring convenience.) 
  1230. * need flag, see also <pre> */ 
  1231. $this->ignore_lf_token = 2; 
  1232.  
  1233. $this->original_mode = $this->mode; 
  1234. $this->flag_frameset_ok = false; 
  1235. $this->mode = self::IN_CDATA_RCDATA; 
  1236.  
  1237. /** Switch the tokeniser's content model flag to the 
  1238. RCDATA state. */ 
  1239. $this->content_model = HTML5_Tokenizer::RCDATA; 
  1240. break; 
  1241.  
  1242. /** A start tag token whose tag name is "xmp" */ 
  1243. case 'xmp': 
  1244. /** If the stack of open elements has a p element in 
  1245. scope, then act as if an end tag with the tag name 
  1246. "p" has been seen. */ 
  1247. if ($this->elementInScope('p')) { 
  1248. $this->emitToken(array( 
  1249. 'name' => 'p',  
  1250. 'type' => HTML5_Tokenizer::ENDTAG 
  1251. )); 
  1252.  
  1253. /** Reconstruct the active formatting elements, if any. */ 
  1254. $this->reconstructActiveFormattingElements(); 
  1255.  
  1256. $this->flag_frameset_ok = false; 
  1257.  
  1258. $this->insertCDATAElement($token); 
  1259. break; 
  1260.  
  1261. case 'iframe': 
  1262. $this->flag_frameset_ok = false; 
  1263. $this->insertCDATAElement($token); 
  1264. break; 
  1265.  
  1266. case 'noembed': case 'noscript': 
  1267. // XSCRIPT: should check scripting flag 
  1268. $this->insertCDATAElement($token); 
  1269. break; 
  1270.  
  1271. /** A start tag whose tag name is "select" */ 
  1272. case 'select': 
  1273. /** Reconstruct the active formatting elements, if any. */ 
  1274. $this->reconstructActiveFormattingElements(); 
  1275.  
  1276. /** Insert an HTML element for the token. */ 
  1277. $this->insertElement($token); 
  1278.  
  1279. $this->flag_frameset_ok = false; 
  1280.  
  1281. /** If the insertion mode is one of in table", "in caption",  
  1282. * "in column group", "in table body", "in row", or "in 
  1283. * cell", then switch the insertion mode to "in select in 
  1284. * table". Otherwise, switch the insertion mode to "in 
  1285. * select". */ 
  1286. if ( 
  1287. $this->mode === self::IN_TABLE || $this->mode === self::IN_CAPTION || 
  1288. $this->mode === self::IN_COLUMN_GROUP || $this->mode ==+self::IN_TABLE_BODY || 
  1289. $this->mode === self::IN_ROW || $this->mode === self::IN_CELL 
  1290. ) { 
  1291. $this->mode = self::IN_SELECT_IN_TABLE; 
  1292. } else { 
  1293. $this->mode = self::IN_SELECT; 
  1294. break; 
  1295.  
  1296. case 'option': case 'optgroup': 
  1297. if ($this->elementInScope('option')) { 
  1298. $this->emitToken(array( 
  1299. 'name' => 'option',  
  1300. 'type' => HTML5_Tokenizer::ENDTAG,  
  1301. )); 
  1302. $this->reconstructActiveFormattingElements(); 
  1303. $this->insertElement($token); 
  1304. break; 
  1305.  
  1306. case 'rp': case 'rt': 
  1307. /** If the stack of open elements has a ruby element in scope, then generate 
  1308. * implied end tags. If the current node is not then a ruby element, this is 
  1309. * a parse error; pop all the nodes from the current node up to the node 
  1310. * immediately before the bottommost ruby element on the stack of open elements. 
  1311. */ 
  1312. if ($this->elementInScope('ruby')) { 
  1313. $this->generateImpliedEndTags(); 
  1314. $peek = false; 
  1315. do { 
  1316. if ($peek) { 
  1317. // parse error 
  1318. $peek = array_pop($this->stack); 
  1319. } while ($peek->tagName !== 'ruby'); 
  1320. $this->stack[] = $peek; // we popped one too many 
  1321. $this->insertElement($token); 
  1322. break; 
  1323.  
  1324. // spec diversion 
  1325.  
  1326. case 'math': 
  1327. $this->reconstructActiveFormattingElements(); 
  1328. $token = $this->adjustMathMLAttributes($token); 
  1329. $token = $this->adjustForeignAttributes($token); 
  1330. $this->insertForeignElement($token, self::NS_MATHML); 
  1331. if (isset($token['self-closing'])) { 
  1332. // XERROR: acknowledge the token's self-closing flag 
  1333. array_pop($this->stack); 
  1334. if ($this->mode !== self::IN_FOREIGN_CONTENT) { 
  1335. $this->secondary_mode = $this->mode; 
  1336. $this->mode = self::IN_FOREIGN_CONTENT; 
  1337. break; 
  1338.  
  1339. case 'svg': 
  1340. $this->reconstructActiveFormattingElements(); 
  1341. $token = $this->adjustSVGAttributes($token); 
  1342. $token = $this->adjustForeignAttributes($token); 
  1343. $this->insertForeignElement($token, self::NS_SVG); 
  1344. if (isset($token['self-closing'])) { 
  1345. // XERROR: acknowledge the token's self-closing flag 
  1346. array_pop($this->stack); 
  1347. if ($this->mode !== self::IN_FOREIGN_CONTENT) { 
  1348. $this->secondary_mode = $this->mode; 
  1349. $this->mode = self::IN_FOREIGN_CONTENT; 
  1350. break; 
  1351.  
  1352. case 'caption': case 'col': case 'colgroup': case 'frame': case 'head': 
  1353. case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': case 'tr': 
  1354. // parse error 
  1355. break; 
  1356.  
  1357. /** A start tag token not covered by the previous entries */ 
  1358. default: 
  1359. /** Reconstruct the active formatting elements, if any. */ 
  1360. $this->reconstructActiveFormattingElements(); 
  1361.  
  1362. $this->insertElement($token); 
  1363. /** This element will be a phrasing element. */ 
  1364. break; 
  1365. break; 
  1366.  
  1367. case HTML5_Tokenizer::ENDTAG: 
  1368. switch($token['name']) { 
  1369. /** An end tag with the tag name "body" */ 
  1370. case 'body': 
  1371. /** If the stack of open elements does not have a body  
  1372. * element in scope, this is a parse error; ignore the  
  1373. * token. */ 
  1374. if(!$this->elementInScope('body')) { 
  1375. $this->ignored = true; 
  1376.  
  1377. /** Otherwise, if there is a node in the stack of open  
  1378. * elements that is not either a dc element, a dd element,  
  1379. * a ds element, a dt element, an li element, an optgroup  
  1380. * element, an option element, a p element, an rp element,  
  1381. * an rt element, a tbody element, a td element, a tfoot  
  1382. * element, a th element, a thead element, a tr element,  
  1383. * the body element, or the html element, then this is a  
  1384. * parse error. 
  1385. */ 
  1386. } else { 
  1387. // XERROR: implement this check for parse error 
  1388.  
  1389. /** Change the insertion mode to "after body". */ 
  1390. $this->mode = self::AFTER_BODY; 
  1391. break; 
  1392.  
  1393. /** An end tag with the tag name "html" */ 
  1394. case 'html': 
  1395. /** Act as if an end tag with tag name "body" had been seen,  
  1396. then, if that token wasn't ignored, reprocess the current 
  1397. token. */ 
  1398. $this->emitToken(array( 
  1399. 'name' => 'body',  
  1400. 'type' => HTML5_Tokenizer::ENDTAG 
  1401. )); 
  1402.  
  1403. if (!$this->ignored) $this->emitToken($token); 
  1404. break; 
  1405.  
  1406. case 'address': case 'article': case 'aside': case 'blockquote': 
  1407. case 'center': case 'datagrid': case 'details': case 'dir': 
  1408. case 'div': case 'dl': case 'fieldset': case 'footer': 
  1409. case 'header': case 'hgroup': case 'listing': case 'menu': 
  1410. case 'nav': case 'ol': case 'pre': case 'section': case 'ul': 
  1411. /** If the stack of open elements has an element in scope 
  1412. with the same tag name as that of the token, then generate 
  1413. implied end tags. */ 
  1414. if($this->elementInScope($token['name'])) { 
  1415. $this->generateImpliedEndTags(); 
  1416.  
  1417. /** Now, if the current node is not an element with 
  1418. the same tag name as that of the token, then this 
  1419. is a parse error. */ 
  1420. // XERROR: implement parse error logic 
  1421.  
  1422. /** If the stack of open elements has an element in 
  1423. scope with the same tag name as that of the token,  
  1424. then pop elements from this stack until an element 
  1425. with that tag name has been popped from the stack. */ 
  1426. do { 
  1427. $node = array_pop($this->stack); 
  1428. } while ($node->tagName !== $token['name']); 
  1429. } else { 
  1430. // parse error 
  1431. break; 
  1432.  
  1433. /** An end tag whose tag name is "form" */ 
  1434. case 'form': 
  1435. /** Let node be the element that the form element pointer is set to. */ 
  1436. $node = $this->form_pointer; 
  1437. /** Set the form element pointer to null. */ 
  1438. $this->form_pointer = null; 
  1439. /** If node is null or the stack of open elements does not  
  1440. * have node in scope, then this is a parse error; ignore the token. */ 
  1441. if ($node === null || !in_array($node, $this->stack)) { 
  1442. // parse error 
  1443. $this->ignored = true; 
  1444. } else { 
  1445. /** 1. Generate implied end tags. */ 
  1446. $this->generateImpliedEndTags(); 
  1447. /** 2. If the current node is not node, then this is a parse error. */ 
  1448. if (end($this->stack) !== $node) { 
  1449. // parse error 
  1450. /** 3. Remove node from the stack of open elements. */ 
  1451. array_splice($this->stack, array_search($node, $this->stack, true), 1); 
  1452.  
  1453. break; 
  1454.  
  1455. /** An end tag whose tag name is "p" */ 
  1456. case 'p': 
  1457. /** If the stack of open elements has a p element in scope,  
  1458. then generate implied end tags, except for p elements. */ 
  1459. if($this->elementInScope('p')) { 
  1460. /** Generate implied end tags, except for elements with 
  1461. * the same tag name as the token. */ 
  1462. $this->generateImpliedEndTags(array('p')); 
  1463.  
  1464. /** If the current node is not a p element, then this is 
  1465. a parse error. */ 
  1466. // XERROR: implement 
  1467.  
  1468. /** Pop elements from the stack of open elements until 
  1469. * an element with the same tag name as the token has 
  1470. * been popped from the stack. */ 
  1471. do { 
  1472. $node = array_pop($this->stack); 
  1473. } while ($node->tagName !== 'p'); 
  1474.  
  1475. } else { 
  1476. // parse error 
  1477. $this->emitToken(array( 
  1478. 'name' => 'p',  
  1479. 'type' => HTML5_Tokenizer::STARTTAG,  
  1480. )); 
  1481. $this->emitToken($token); 
  1482. break; 
  1483.  
  1484. /** An end tag whose tag name is "li" */ 
  1485. case 'li': 
  1486. /** If the stack of open elements does not have an element 
  1487. * in list item scope with the same tag name as that of the 
  1488. * token, then this is a parse error; ignore the token. */ 
  1489. if ($this->elementInScope($token['name'], self::SCOPE_LISTITEM)) { 
  1490. /** Generate implied end tags, except for elements with the 
  1491. * same tag name as the token. */ 
  1492. $this->generateImpliedEndTags(array($token['name'])); 
  1493. /** If the current node is not an element with the same tag 
  1494. * name as that of the token, then this is a parse error. */ 
  1495. // XERROR: parse error 
  1496. /** Pop elements from the stack of open elements until an 
  1497. * element with the same tag name as the token has been 
  1498. * popped from the stack. */ 
  1499. do { 
  1500. $node = array_pop($this->stack); 
  1501. } while ($node->tagName !== $token['name']); 
  1502. } else { 
  1503. // XERROR: parse error 
  1504. break; 
  1505.  
  1506. /** An end tag whose tag name is "dc", "dd", "ds", "dt" */ 
  1507. case 'dc': case 'dd': case 'ds': case 'dt': 
  1508. if($this->elementInScope($token['name'])) { 
  1509. $this->generateImpliedEndTags(array($token['name'])); 
  1510.  
  1511. /** If the current node is not an element with the same 
  1512. tag name as the token, then this is a parse error. */ 
  1513. // XERROR: implement parse error 
  1514.  
  1515. /** Pop elements from the stack of open elements until 
  1516. * an element with the same tag name as the token has 
  1517. * been popped from the stack. */ 
  1518. do { 
  1519. $node = array_pop($this->stack); 
  1520. } while ($node->tagName !== $token['name']); 
  1521.  
  1522. } else { 
  1523. // XERROR: parse error 
  1524. break; 
  1525.  
  1526. /** An end tag whose tag name is one of: "h1", "h2", "h3", "h4",  
  1527. "h5", "h6" */ 
  1528. case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': 
  1529. $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); 
  1530.  
  1531. /** If the stack of open elements has in scope an element whose 
  1532. tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then 
  1533. generate implied end tags. */ 
  1534. if($this->elementInScope($elements)) { 
  1535. $this->generateImpliedEndTags(); 
  1536.  
  1537. /** Now, if the current node is not an element with the same 
  1538. tag name as that of the token, then this is a parse error. */ 
  1539. // XERROR: implement parse error 
  1540.  
  1541. /** If the stack of open elements has in scope an element 
  1542. whose tag name is one of "h1", "h2", "h3", "h4", "h5", or 
  1543. "h6", then pop elements from the stack until an element 
  1544. with one of those tag names has been popped from the stack. */ 
  1545. do { 
  1546. $node = array_pop($this->stack); 
  1547. } while (!in_array($node->tagName, $elements)); 
  1548. } else { 
  1549. // parse error 
  1550. break; 
  1551.  
  1552. /** An end tag whose tag name is one of: "a", "b", "big", "em",  
  1553. "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ 
  1554. case 'a': case 'b': case 'big': case 'code': case 'em': case 'font': 
  1555. case 'i': case 'nobr': case 's': case 'small': case 'strike': 
  1556. case 'strong': case 'tt': case 'u': 
  1557. // XERROR: generally speaking this needs parse error logic 
  1558. /** 1. Let the formatting element be the last element in 
  1559. the list of active formatting elements that: 
  1560. * is between the end of the list and the last scope 
  1561. marker in the list, if any, or the start of the list 
  1562. otherwise, and 
  1563. * has the same tag name as the token. 
  1564. */ 
  1565. while(true) { 
  1566. for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { 
  1567. if($this->a_formatting[$a] === self::MARKER) { 
  1568. break; 
  1569.  
  1570. } elseif($this->a_formatting[$a]->tagName === $token['name']) { 
  1571. $formatting_element = $this->a_formatting[$a]; 
  1572. $in_stack = in_array($formatting_element, $this->stack, true); 
  1573. $fe_af_pos = $a; 
  1574. break; 
  1575.  
  1576. /** If there is no such node, or, if that node is 
  1577. also in the stack of open elements but the element 
  1578. is not in scope, then this is a parse error. Abort 
  1579. these steps. The token is ignored. */ 
  1580. if(!isset($formatting_element) || ($in_stack && 
  1581. !$this->elementInScope($token['name']))) { 
  1582. $this->ignored = true; 
  1583. break; 
  1584.  
  1585. /** Otherwise, if there is such a node, but that node 
  1586. is not in the stack of open elements, then this is a 
  1587. parse error; remove the element from the list, and 
  1588. abort these steps. */ 
  1589. } elseif(isset($formatting_element) && !$in_stack) { 
  1590. unset($this->a_formatting[$fe_af_pos]); 
  1591. $this->a_formatting = array_merge($this->a_formatting); 
  1592. break; 
  1593.  
  1594. /** Otherwise, there is a formatting element and that 
  1595. * element is in the stack and is in scope. If the 
  1596. * element is not the current node, this is a parse 
  1597. * error. In any case, proceed with the algorithm as 
  1598. * written in the following steps. */ 
  1599. // XERROR: implement me 
  1600.  
  1601. /** 2. Let the furthest block be the topmost node in the 
  1602. stack of open elements that is lower in the stack 
  1603. than the formatting element, and is not an element in 
  1604. the phrasing or formatting categories. There might 
  1605. not be one. */ 
  1606. $fe_s_pos = array_search($formatting_element, $this->stack, true); 
  1607. $length = count($this->stack); 
  1608.  
  1609. for($s = $fe_s_pos + 1; $s < $length; $s++) { 
  1610. $category = $this->getElementCategory($this->stack[$s]); 
  1611.  
  1612. if($category !== self::PHRASING && $category !== self::FORMATTING) { 
  1613. $furthest_block = $this->stack[$s]; 
  1614. break; 
  1615.  
  1616. /** 3. If there is no furthest block, then the UA must 
  1617. skip the subsequent steps and instead just pop all 
  1618. the nodes from the bottom of the stack of open 
  1619. elements, from the current node up to the formatting 
  1620. element, and remove the formatting element from the 
  1621. list of active formatting elements. */ 
  1622. if(!isset($furthest_block)) { 
  1623. for($n = $length - 1; $n >= $fe_s_pos; $n--) { 
  1624. array_pop($this->stack); 
  1625.  
  1626. unset($this->a_formatting[$fe_af_pos]); 
  1627. $this->a_formatting = array_merge($this->a_formatting); 
  1628. break; 
  1629.  
  1630. /** 4. Let the common ancestor be the element 
  1631. immediately above the formatting element in the stack 
  1632. of open elements. */ 
  1633. $common_ancestor = $this->stack[$fe_s_pos - 1]; 
  1634.  
  1635. /** 5. Let a bookmark note the position of the 
  1636. formatting element in the list of active formatting 
  1637. elements relative to the elements on either side 
  1638. of it in the list. */ 
  1639. $bookmark = $fe_af_pos; 
  1640.  
  1641. /** 6. Let node and last node be the furthest block. 
  1642. Follow these steps: */ 
  1643. $node = $furthest_block; 
  1644. $last_node = $furthest_block; 
  1645.  
  1646. while(true) { 
  1647. for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { 
  1648. /** 6.1 Let node be the element immediately 
  1649. prior to node in the stack of open elements. */ 
  1650. $node = $this->stack[$n]; 
  1651.  
  1652. /** 6.2 If node is not in the list of active 
  1653. formatting elements, then remove node from 
  1654. the stack of open elements and then go back 
  1655. to step 1. */ 
  1656. if(!in_array($node, $this->a_formatting, true)) { 
  1657. array_splice($this->stack, $n, 1); 
  1658.  
  1659. } else { 
  1660. break; 
  1661.  
  1662. /** 6.3 Otherwise, if node is the formatting 
  1663. element, then go to the next step in the overall 
  1664. algorithm. */ 
  1665. if($node === $formatting_element) { 
  1666. break; 
  1667.  
  1668. /** 6.4 Otherwise, if last node is the furthest 
  1669. block, then move the aforementioned bookmark to 
  1670. be immediately after the node in the list of 
  1671. active formatting elements. */ 
  1672. } elseif($last_node === $furthest_block) { 
  1673. $bookmark = array_search($node, $this->a_formatting, true) + 1; 
  1674.  
  1675. /** 6.5 Create an element for the token for which 
  1676. * the element node was created, replace the entry 
  1677. * for node in the list of active formatting 
  1678. * elements with an entry for the new element,  
  1679. * replace the entry for node in the stack of open 
  1680. * elements with an entry for the new element, and 
  1681. * let node be the new element. */ 
  1682. // we don't know what the token is anymore 
  1683. // XDOM 
  1684. $clone = $node->cloneNode(); 
  1685. $a_pos = array_search($node, $this->a_formatting, true); 
  1686. $s_pos = array_search($node, $this->stack, true); 
  1687. $this->a_formatting[$a_pos] = $clone; 
  1688. $this->stack[$s_pos] = $clone; 
  1689. $node = $clone; 
  1690.  
  1691. /** 6.6 Insert last node into node, first removing 
  1692. it from its previous parent node if any. */ 
  1693. // XDOM 
  1694. if($last_node->parentNode !== null) { 
  1695. $last_node->parentNode->removeChild($last_node); 
  1696.  
  1697. // XDOM 
  1698. $node->appendChild($last_node); 
  1699.  
  1700. /** 6.7 Let last node be node. */ 
  1701. $last_node = $node; 
  1702.  
  1703. /** 6.8 Return to step 1 of this inner set of steps. */ 
  1704.  
  1705. /** 7. If the common ancestor node is a table, tbody,  
  1706. * tfoot, thead, or tr element, then, foster parent 
  1707. * whatever last node ended up being in the previous 
  1708. * step, first removing it from its previous parent 
  1709. * node if any. */ 
  1710. // XDOM 
  1711. if ($last_node->parentNode) { // common step 
  1712. $last_node->parentNode->removeChild($last_node); 
  1713. if (in_array($common_ancestor->tagName, array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { 
  1714. $this->fosterParent($last_node); 
  1715. /** Otherwise, append whatever last node ended up being 
  1716. * in the previous step to the common ancestor node,  
  1717. * first removing it from its previous parent node if 
  1718. * any. */ 
  1719. } else { 
  1720. // XDOM 
  1721. $common_ancestor->appendChild($last_node); 
  1722.  
  1723. /** 8. Create an element for the token for which the 
  1724. * formatting element was created. */ 
  1725. // XDOM 
  1726. $clone = $formatting_element->cloneNode(); 
  1727.  
  1728. /** 9. Take all of the child nodes of the furthest 
  1729. block and append them to the element created in the 
  1730. last step. */ 
  1731. // XDOM 
  1732. while($furthest_block->hasChildNodes()) { 
  1733. $child = $furthest_block->firstChild; 
  1734. $furthest_block->removeChild($child); 
  1735. $clone->appendChild($child); 
  1736.  
  1737. /** 10. Append that clone to the furthest block. */ 
  1738. // XDOM 
  1739. $furthest_block->appendChild($clone); 
  1740.  
  1741. /** 11. Remove the formatting element from the list 
  1742. of active formatting elements, and insert the new element 
  1743. into the list of active formatting elements at the 
  1744. position of the aforementioned bookmark. */ 
  1745. $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); 
  1746. array_splice($this->a_formatting, $fe_af_pos, 1); 
  1747.  
  1748. $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); 
  1749. $af_part2 = array_slice($this->a_formatting, $bookmark); 
  1750. $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); 
  1751.  
  1752. /** 12. Remove the formatting element from the stack 
  1753. of open elements, and insert the new element into the stack 
  1754. of open elements immediately below the position of the 
  1755. furthest block in that stack. */ 
  1756. $fe_s_pos = array_search($formatting_element, $this->stack, true); 
  1757. array_splice($this->stack, $fe_s_pos, 1); 
  1758.  
  1759. $fb_s_pos = array_search($furthest_block, $this->stack, true); 
  1760. $s_part1 = array_slice($this->stack, 0, $fb_s_pos + 1); 
  1761. $s_part2 = array_slice($this->stack, $fb_s_pos + 1); 
  1762. $this->stack = array_merge($s_part1, array($clone), $s_part2); 
  1763.  
  1764. /** 13. Jump back to step 1 in this series of steps. */ 
  1765. unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); 
  1766. break; 
  1767.  
  1768. case 'applet': case 'button': case 'marquee': case 'object': 
  1769. /** If the stack of open elements has an element in scope whose 
  1770. tag name matches the tag name of the token, then generate implied 
  1771. tags. */ 
  1772. if($this->elementInScope($token['name'])) { 
  1773. $this->generateImpliedEndTags(); 
  1774.  
  1775. /** Now, if the current node is not an element with the same 
  1776. tag name as the token, then this is a parse error. */ 
  1777. // XERROR: implement logic 
  1778.  
  1779. /** Pop elements from the stack of open elements until 
  1780. * an element with the same tag name as the token has 
  1781. * been popped from the stack. */ 
  1782. do { 
  1783. $node = array_pop($this->stack); 
  1784. } while ($node->tagName !== $token['name']); 
  1785.  
  1786. /** Clear the list of active formatting elements up to the 
  1787. * last marker. */ 
  1788. $keys = array_keys($this->a_formatting, self::MARKER, true); 
  1789. $marker = end($keys); 
  1790.  
  1791. for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { 
  1792. array_pop($this->a_formatting); 
  1793. } else { 
  1794. // parse error 
  1795. break; 
  1796.  
  1797. case 'br': 
  1798. // Parse error 
  1799. $this->emitToken(array( 
  1800. 'name' => 'br',  
  1801. 'type' => HTML5_Tokenizer::STARTTAG,  
  1802. )); 
  1803. break; 
  1804.  
  1805. /** An end tag token not covered by the previous entries */ 
  1806. default: 
  1807. for($n = count($this->stack) - 1; $n >= 0; $n--) { 
  1808. /** Initialise node to be the current node (the bottommost 
  1809. node of the stack). */ 
  1810. $node = $this->stack[$n]; 
  1811.  
  1812. /** If node has the same tag name as the end tag token,  
  1813. then: */ 
  1814. if($token['name'] === $node->tagName) { 
  1815. /** Generate implied end tags. */ 
  1816. $this->generateImpliedEndTags(); 
  1817.  
  1818. /** If the tag name of the end tag token does not 
  1819. match the tag name of the current node, this is a 
  1820. parse error. */ 
  1821. // XERROR: implement this 
  1822.  
  1823. /** Pop all the nodes from the current node up to 
  1824. node, including node, then stop these steps. */ 
  1825. // XSKETCHY 
  1826. do { 
  1827. $pop = array_pop($this->stack); 
  1828. } while ($pop !== $node); 
  1829. break; 
  1830.  
  1831. } else { 
  1832. $category = $this->getElementCategory($node); 
  1833.  
  1834. if($category !== self::FORMATTING && $category !== self::PHRASING) { 
  1835. /** Otherwise, if node is in neither the formatting 
  1836. category nor the phrasing category, then this is a 
  1837. parse error. Stop this algorithm. The end tag token 
  1838. is ignored. */ 
  1839. $this->ignored = true; 
  1840. break; 
  1841. // parse error 
  1842. /** Set node to the previous entry in the stack of open elements. Loop. */ 
  1843. break; 
  1844. break; 
  1845. break; 
  1846.  
  1847. case self::IN_CDATA_RCDATA: 
  1848. if ( 
  1849. $token['type'] === HTML5_Tokenizer::CHARACTER || 
  1850. $token['type'] === HTML5_Tokenizer::SPACECHARACTER 
  1851. ) { 
  1852. $this->insertText($token['data']); 
  1853. } elseif ($token['type'] === HTML5_Tokenizer::EOF) { 
  1854. // parse error 
  1855. /** If the current node is a script element, mark the script 
  1856. * element as "already executed". */ 
  1857. // probably not necessary 
  1858. array_pop($this->stack); 
  1859. $this->mode = $this->original_mode; 
  1860. $this->emitToken($token); 
  1861. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'script') { 
  1862. array_pop($this->stack); 
  1863. $this->mode = $this->original_mode; 
  1864. // we're ignoring all of the execution stuff 
  1865. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG) { 
  1866. array_pop($this->stack); 
  1867. $this->mode = $this->original_mode; 
  1868. break; 
  1869.  
  1870. case self::IN_TABLE: 
  1871. $clear = array('html', 'table'); 
  1872.  
  1873. /** A character token */ 
  1874. if ($token['type'] === HTML5_Tokenizer::CHARACTER || 
  1875. $token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  1876. /** Let the pending table character tokens 
  1877. * be an empty list of tokens. */ 
  1878. $this->pendingTableCharacters = ""; 
  1879. $this->pendingTableCharactersDirty = false; 
  1880. /** Let the original insertion mode be the current 
  1881. * insertion mode. */ 
  1882. $this->original_mode = $this->mode; 
  1883. /** Switch the insertion mode to 
  1884. * "in table text" and 
  1885. * reprocess the token. */ 
  1886. $this->mode = self::IN_TABLE_TEXT; 
  1887. $this->emitToken($token); 
  1888.  
  1889. /** A comment token */ 
  1890. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { 
  1891. /** Append a Comment node to the current node with the data 
  1892. attribute set to the data given in the comment token. */ 
  1893. $this->insertComment($token['data']); 
  1894.  
  1895. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  1896. // parse error 
  1897.  
  1898. /** A start tag whose tag name is "caption" */ 
  1899. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  1900. $token['name'] === 'caption') { 
  1901. /** Clear the stack back to a table context. */ 
  1902. $this->clearStackToTableContext($clear); 
  1903.  
  1904. /** Insert a marker at the end of the list of active 
  1905. formatting elements. */ 
  1906. $this->a_formatting[] = self::MARKER; 
  1907.  
  1908. /** Insert an HTML element for the token, then switch the 
  1909. insertion mode to "in caption". */ 
  1910. $this->insertElement($token); 
  1911. $this->mode = self::IN_CAPTION; 
  1912.  
  1913. /** A start tag whose tag name is "colgroup" */ 
  1914. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  1915. $token['name'] === 'colgroup') { 
  1916. /** Clear the stack back to a table context. */ 
  1917. $this->clearStackToTableContext($clear); 
  1918.  
  1919. /** Insert an HTML element for the token, then switch the 
  1920. insertion mode to "in column group". */ 
  1921. $this->insertElement($token); 
  1922. $this->mode = self::IN_COLUMN_GROUP; 
  1923.  
  1924. /** A start tag whose tag name is "col" */ 
  1925. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  1926. $token['name'] === 'col') { 
  1927. $this->emitToken(array( 
  1928. 'name' => 'colgroup',  
  1929. 'type' => HTML5_Tokenizer::STARTTAG,  
  1930. 'attr' => array() 
  1931. )); 
  1932.  
  1933. $this->emitToken($token); 
  1934.  
  1935. /** A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ 
  1936. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],  
  1937. array('tbody', 'tfoot', 'thead'))) { 
  1938. /** Clear the stack back to a table context. */ 
  1939. $this->clearStackToTableContext($clear); 
  1940.  
  1941. /** Insert an HTML element for the token, then switch the insertion 
  1942. mode to "in table body". */ 
  1943. $this->insertElement($token); 
  1944. $this->mode = self::IN_TABLE_BODY; 
  1945.  
  1946. /** A start tag whose tag name is one of: "td", "th", "tr" */ 
  1947. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  1948. in_array($token['name'], array('td', 'th', 'tr'))) { 
  1949. /** Act as if a start tag token with the tag name "tbody" had been 
  1950. seen, then reprocess the current token. */ 
  1951. $this->emitToken(array( 
  1952. 'name' => 'tbody',  
  1953. 'type' => HTML5_Tokenizer::STARTTAG,  
  1954. 'attr' => array() 
  1955. )); 
  1956.  
  1957. $this->emitToken($token); 
  1958.  
  1959. /** A start tag whose tag name is "table" */ 
  1960. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  1961. $token['name'] === 'table') { 
  1962. /** Parse error. Act as if an end tag token with the tag name "table" 
  1963. had been seen, then, if that token wasn't ignored, reprocess the 
  1964. current token. */ 
  1965. $this->emitToken(array( 
  1966. 'name' => 'table',  
  1967. 'type' => HTML5_Tokenizer::ENDTAG 
  1968. )); 
  1969.  
  1970. if (!$this->ignored) $this->emitToken($token); 
  1971.  
  1972. /** An end tag whose tag name is "table" */ 
  1973. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && 
  1974. $token['name'] === 'table') { 
  1975. /** If the stack of open elements does not have an element in table 
  1976. scope with the same tag name as the token, this is a parse error. 
  1977. Ignore the token. (fragment case) */ 
  1978. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { 
  1979. $this->ignored = true; 
  1980.  
  1981. /** Otherwise: */ 
  1982. } else { 
  1983. do { 
  1984. $node = array_pop($this->stack); 
  1985. } while ($node->tagName !== 'table'); 
  1986.  
  1987. /** Reset the insertion mode appropriately. */ 
  1988. $this->resetInsertionMode(); 
  1989.  
  1990. /** An end tag whose tag name is one of: "body", "caption", "col",  
  1991. "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ 
  1992. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],  
  1993. array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td',  
  1994. 'tfoot', 'th', 'thead', 'tr'))) { 
  1995. // Parse error. Ignore the token. 
  1996.  
  1997. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  1998. ($token['name'] === 'style' || $token['name'] === 'script')) { 
  1999. $this->processWithRulesFor($token, self::IN_HEAD); 
  2000.  
  2001. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'input' && 
  2002. // assignment is intentional 
  2003. /** If the token does not have an attribute with the name "type", or 
  2004. * if it does, but that attribute's value is not an ASCII 
  2005. * case-insensitive match for the string "hidden", then: act as 
  2006. * described in the "anything else" entry below. */ 
  2007. ($type = $this->getAttr($token, 'type')) && strtolower($type) === 'hidden') { 
  2008. // I.e., if its an input with the type attribute == 'hidden' 
  2009. /** Otherwise */ 
  2010. // parse error 
  2011. $this->insertElement($token); 
  2012. array_pop($this->stack); 
  2013. } elseif ($token['type'] === HTML5_Tokenizer::EOF) { 
  2014. /** If the current node is not the root html element, then this is a parse error. */ 
  2015. if (end($this->stack)->tagName !== 'html') { 
  2016. // Note: It can only be the current node in the fragment case. 
  2017. // parse error 
  2018. /** Stop parsing. */ 
  2019. /** Anything else */ 
  2020. } else { 
  2021. /** Parse error. Process the token as if the insertion mode was "in 
  2022. body", with the following exception: */ 
  2023.  
  2024. $old = $this->foster_parent; 
  2025. $this->foster_parent = true; 
  2026. $this->processWithRulesFor($token, self::IN_BODY); 
  2027. $this->foster_parent = $old; 
  2028. break; 
  2029.  
  2030. case self::IN_TABLE_TEXT: 
  2031. /** A character token */ 
  2032. if($token['type'] === HTML5_Tokenizer::CHARACTER) { 
  2033. /** Append the character token to the pending table 
  2034. * character tokens list. */ 
  2035. $this->pendingTableCharacters .= $token['data']; 
  2036. $this->pendingTableCharactersDirty = true; 
  2037. } elseif ($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  2038. $this->pendingTableCharacters .= $token['data']; 
  2039. /** Anything else */ 
  2040. } else { 
  2041. if ($this->pendingTableCharacters !== '' && is_string($this->pendingTableCharacters)) { 
  2042. /** If any of the tokens in the pending table character tokens list  
  2043. * are character tokens that are not one of U+0009 CHARACTER  
  2044. * TABULATION, U+000A LINE FEED (LF), U+000C FORM FEED (FF), or  
  2045. * U+0020 SPACE, then reprocess those character tokens using the  
  2046. * rules given in the "anything else" entry in the in table"  
  2047. * insertion mode.*/ 
  2048. if ($this->pendingTableCharactersDirty) { 
  2049. /** Parse error. Process the token using the rules for the  
  2050. * "in body" insertion mode, except that if the current  
  2051. * node is a table, tbody, tfoot, thead, or tr element,  
  2052. * then, whenever a node would be inserted into the current  
  2053. * node, it must instead be foster parented. */ 
  2054. // XERROR 
  2055. $old = $this->foster_parent; 
  2056. $this->foster_parent = true; 
  2057. $text_token = array( 
  2058. 'type' => HTML5_Tokenizer::CHARACTER,  
  2059. 'data' => $this->pendingTableCharacters,  
  2060. ); 
  2061. $this->processWithRulesFor($text_token, self::IN_BODY); 
  2062. $this->foster_parent = $old; 
  2063.  
  2064. /** Otherwise, insert the characters given by the pending table  
  2065. * character tokens list into the current node. */ 
  2066. } else { 
  2067. $this->insertText($this->pendingTableCharacters); 
  2068. $this->pendingTableCharacters = null; 
  2069. $this->pendingTableCharactersNull = null; 
  2070.  
  2071. /** Switch the insertion mode to the original insertion mode and  
  2072. * reprocess the token. 
  2073. */ 
  2074. $this->mode = $this->original_mode; 
  2075. $this->emitToken($token); 
  2076. break; 
  2077.  
  2078. case self::IN_CAPTION: 
  2079. /** An end tag whose tag name is "caption" */ 
  2080. if($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'caption') { 
  2081. /** If the stack of open elements does not have an element in table 
  2082. scope with the same tag name as the token, this is a parse error. 
  2083. Ignore the token. (fragment case) */ 
  2084. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { 
  2085. $this->ignored = true; 
  2086. // Ignore 
  2087.  
  2088. /** Otherwise: */ 
  2089. } else { 
  2090. /** Generate implied end tags. */ 
  2091. $this->generateImpliedEndTags(); 
  2092.  
  2093. /** Now, if the current node is not a caption element, then this 
  2094. is a parse error. */ 
  2095. // XERROR: implement 
  2096.  
  2097. /** Pop elements from this stack until a caption element has 
  2098. been popped from the stack. */ 
  2099. do { 
  2100. $node = array_pop($this->stack); 
  2101. } while ($node->tagName !== 'caption'); 
  2102.  
  2103. /** Clear the list of active formatting elements up to the last 
  2104. marker. */ 
  2105. $this->clearTheActiveFormattingElementsUpToTheLastMarker(); 
  2106.  
  2107. /** Switch the insertion mode to "in table". */ 
  2108. $this->mode = self::IN_TABLE; 
  2109.  
  2110. /** A start tag whose tag name is one of: "caption", "col", "colgroup",  
  2111. "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag 
  2112. name is "table" */ 
  2113. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],  
  2114. array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',  
  2115. 'thead', 'tr'))) || ($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2116. $token['name'] === 'table')) { 
  2117. /** Parse error. Act as if an end tag with the tag name "caption" 
  2118. had been seen, then, if that token wasn't ignored, reprocess the 
  2119. current token. */ 
  2120. $this->emitToken(array( 
  2121. 'name' => 'caption',  
  2122. 'type' => HTML5_Tokenizer::ENDTAG 
  2123. )); 
  2124.  
  2125. if (!$this->ignored) $this->emitToken($token); 
  2126.  
  2127. /** An end tag whose tag name is one of: "body", "col", "colgroup",  
  2128. "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ 
  2129. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],  
  2130. array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th',  
  2131. 'thead', 'tr'))) { 
  2132. // Parse error. Ignore the token. 
  2133. $this->ignored = true; 
  2134.  
  2135. /** Anything else */ 
  2136. } else { 
  2137. /** Process the token as if the insertion mode was "in body". */ 
  2138. $this->processWithRulesFor($token, self::IN_BODY); 
  2139. break; 
  2140.  
  2141. case self::IN_COLUMN_GROUP: 
  2142. /** A character token that is one of one of U+0009 CHARACTER TABULATION,  
  2143. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),  
  2144. or U+0020 SPACE */ 
  2145. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  2146. /** Append the character to the current node. */ 
  2147. $this->insertText($token['data']); 
  2148.  
  2149. /** A comment token */ 
  2150. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { 
  2151. /** Append a Comment node to the current node with the data 
  2152. attribute set to the data given in the comment token. */ 
  2153. $this->insertToken($token['data']); 
  2154.  
  2155. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  2156. // parse error 
  2157.  
  2158. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { 
  2159. $this->processWithRulesFor($token, self::IN_BODY); 
  2160.  
  2161. /** A start tag whose tag name is "col" */ 
  2162. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'col') { 
  2163. /** Insert a col element for the token. Immediately pop the current 
  2164. node off the stack of open elements. */ 
  2165. $this->insertElement($token); 
  2166. array_pop($this->stack); 
  2167. // XERROR: Acknowledge the token's self-closing flag, if it is set. 
  2168.  
  2169. /** An end tag whose tag name is "colgroup" */ 
  2170. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2171. $token['name'] === 'colgroup') { 
  2172. /** If the current node is the root html element, then this is a 
  2173. parse error, ignore the token. (fragment case) */ 
  2174. if(end($this->stack)->tagName === 'html') { 
  2175. $this->ignored = true; 
  2176.  
  2177. /** Otherwise, pop the current node (which will be a colgroup 
  2178. element) from the stack of open elements. Switch the insertion 
  2179. mode to "in table". */ 
  2180. } else { 
  2181. array_pop($this->stack); 
  2182. $this->mode = self::IN_TABLE; 
  2183.  
  2184. /** An end tag whose tag name is "col" */ 
  2185. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'col') { 
  2186. /** Parse error. Ignore the token. */ 
  2187. $this->ignored = true; 
  2188.  
  2189. /** An end-of-file token */ 
  2190. /** If the current node is the root html element */ 
  2191. } elseif($token['type'] === HTML5_Tokenizer::EOF && end($this->stack)->tagName === 'html') { 
  2192. /** Stop parsing */ 
  2193.  
  2194. /** Anything else */ 
  2195. } else { 
  2196. /** Act as if an end tag with the tag name "colgroup" had been seen,  
  2197. and then, if that token wasn't ignored, reprocess the current token. */ 
  2198. $this->emitToken(array( 
  2199. 'name' => 'colgroup',  
  2200. 'type' => HTML5_Tokenizer::ENDTAG 
  2201. )); 
  2202.  
  2203. if (!$this->ignored) $this->emitToken($token); 
  2204. break; 
  2205.  
  2206. case self::IN_TABLE_BODY: 
  2207. $clear = array('tbody', 'tfoot', 'thead', 'html'); 
  2208.  
  2209. /** A start tag whose tag name is "tr" */ 
  2210. if($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'tr') { 
  2211. /** Clear the stack back to a table body context. */ 
  2212. $this->clearStackToTableContext($clear); 
  2213.  
  2214. /** Insert a tr element for the token, then switch the insertion 
  2215. mode to "in row". */ 
  2216. $this->insertElement($token); 
  2217. $this->mode = self::IN_ROW; 
  2218.  
  2219. /** A start tag whose tag name is one of: "th", "td" */ 
  2220. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  2221. ($token['name'] === 'th' || $token['name'] === 'td')) { 
  2222. /** Parse error. Act as if a start tag with the tag name "tr" had 
  2223. been seen, then reprocess the current token. */ 
  2224. $this->emitToken(array( 
  2225. 'name' => 'tr',  
  2226. 'type' => HTML5_Tokenizer::STARTTAG,  
  2227. 'attr' => array() 
  2228. )); 
  2229.  
  2230. $this->emitToken($token); 
  2231.  
  2232. /** An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ 
  2233. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2234. in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { 
  2235. /** If the stack of open elements does not have an element in table 
  2236. scope with the same tag name as the token, this is a parse error. 
  2237. Ignore the token. */ 
  2238. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { 
  2239. // Parse error 
  2240. $this->ignored = true; 
  2241.  
  2242. /** Otherwise: */ 
  2243. } else { 
  2244. /** Clear the stack back to a table body context. */ 
  2245. $this->clearStackToTableContext($clear); 
  2246.  
  2247. /** Pop the current node from the stack of open elements. Switch 
  2248. the insertion mode to "in table". */ 
  2249. array_pop($this->stack); 
  2250. $this->mode = self::IN_TABLE; 
  2251.  
  2252. /** A start tag whose tag name is one of: "caption", "col", "colgroup",  
  2253. "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ 
  2254. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],  
  2255. array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead'))) || 
  2256. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) { 
  2257. /** If the stack of open elements does not have a tbody, thead, or 
  2258. tfoot element in table scope, this is a parse error. Ignore the 
  2259. token. (fragment case) */ 
  2260. if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), self::SCOPE_TABLE)) { 
  2261. // parse error 
  2262. $this->ignored = true; 
  2263.  
  2264. /** Otherwise: */ 
  2265. } else { 
  2266. /** Clear the stack back to a table body context. */ 
  2267. $this->clearStackToTableContext($clear); 
  2268.  
  2269. /** Act as if an end tag with the same tag name as the current 
  2270. node ("tbody", "tfoot", or "thead") had been seen, then 
  2271. reprocess the current token. */ 
  2272. $this->emitToken(array( 
  2273. 'name' => end($this->stack)->tagName,  
  2274. 'type' => HTML5_Tokenizer::ENDTAG 
  2275. )); 
  2276.  
  2277. $this->emitToken($token); 
  2278.  
  2279. /** An end tag whose tag name is one of: "body", "caption", "col",  
  2280. "colgroup", "html", "td", "th", "tr" */ 
  2281. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],  
  2282. array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { 
  2283. /** Parse error. Ignore the token. */ 
  2284. $this->ignored = true; 
  2285.  
  2286. /** Anything else */ 
  2287. } else { 
  2288. /** Process the token as if the insertion mode was "in table". */ 
  2289. $this->processWithRulesFor($token, self::IN_TABLE); 
  2290. break; 
  2291.  
  2292. case self::IN_ROW: 
  2293. $clear = array('tr', 'html'); 
  2294.  
  2295. /** A start tag whose tag name is one of: "th", "td" */ 
  2296. if($token['type'] === HTML5_Tokenizer::STARTTAG && 
  2297. ($token['name'] === 'th' || $token['name'] === 'td')) { 
  2298. /** Clear the stack back to a table row context. */ 
  2299. $this->clearStackToTableContext($clear); 
  2300.  
  2301. /** Insert an HTML element for the token, then switch the insertion 
  2302. mode to "in cell". */ 
  2303. $this->insertElement($token); 
  2304. $this->mode = self::IN_CELL; 
  2305.  
  2306. /** Insert a marker at the end of the list of active formatting 
  2307. elements. */ 
  2308. $this->a_formatting[] = self::MARKER; 
  2309.  
  2310. /** An end tag whose tag name is "tr" */ 
  2311. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'tr') { 
  2312. /** If the stack of open elements does not have an element in table 
  2313. scope with the same tag name as the token, this is a parse error. 
  2314. Ignore the token. (fragment case) */ 
  2315. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { 
  2316. // Ignore. 
  2317. $this->ignored = true; 
  2318.  
  2319. /** Otherwise: */ 
  2320. } else { 
  2321. /** Clear the stack back to a table row context. */ 
  2322. $this->clearStackToTableContext($clear); 
  2323.  
  2324. /** Pop the current node (which will be a tr element) from the 
  2325. stack of open elements. Switch the insertion mode to "in table 
  2326. body". */ 
  2327. array_pop($this->stack); 
  2328. $this->mode = self::IN_TABLE_BODY; 
  2329.  
  2330. /** A start tag whose tag name is one of: "caption", "col", "colgroup",  
  2331. "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ 
  2332. } elseif(($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],  
  2333. array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) || 
  2334. ($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'table')) { 
  2335. /** Act as if an end tag with the tag name "tr" had been seen, then,  
  2336. if that token wasn't ignored, reprocess the current token. */ 
  2337. $this->emitToken(array( 
  2338. 'name' => 'tr',  
  2339. 'type' => HTML5_Tokenizer::ENDTAG 
  2340. )); 
  2341. if (!$this->ignored) $this->emitToken($token); 
  2342.  
  2343. /** An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ 
  2344. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2345. in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { 
  2346. /** If the stack of open elements does not have an element in table 
  2347. scope with the same tag name as the token, this is a parse error. 
  2348. Ignore the token. */ 
  2349. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { 
  2350. $this->ignored = true; 
  2351.  
  2352. /** Otherwise: */ 
  2353. } else { 
  2354. /** Otherwise, act as if an end tag with the tag name "tr" had 
  2355. been seen, then reprocess the current token. */ 
  2356. $this->emitToken(array( 
  2357. 'name' => 'tr',  
  2358. 'type' => HTML5_Tokenizer::ENDTAG 
  2359. )); 
  2360.  
  2361. $this->emitToken($token); 
  2362.  
  2363. /** An end tag whose tag name is one of: "body", "caption", "col",  
  2364. "colgroup", "html", "td", "th" */ 
  2365. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],  
  2366. array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th'))) { 
  2367. /** Parse error. Ignore the token. */ 
  2368. $this->ignored = true; 
  2369.  
  2370. /** Anything else */ 
  2371. } else { 
  2372. /** Process the token as if the insertion mode was "in table". */ 
  2373. $this->processWithRulesFor($token, self::IN_TABLE); 
  2374. break; 
  2375.  
  2376. case self::IN_CELL: 
  2377. /** An end tag whose tag name is one of: "td", "th" */ 
  2378. if($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2379. ($token['name'] === 'td' || $token['name'] === 'th')) { 
  2380. /** If the stack of open elements does not have an element in table 
  2381. scope with the same tag name as that of the token, then this is a 
  2382. parse error and the token must be ignored. */ 
  2383. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { 
  2384. $this->ignored = true; 
  2385.  
  2386. /** Otherwise: */ 
  2387. } else { 
  2388. /** Generate implied end tags, except for elements with the same 
  2389. tag name as the token. */ 
  2390. $this->generateImpliedEndTags(array($token['name'])); 
  2391.  
  2392. /** Now, if the current node is not an element with the same tag 
  2393. name as the token, then this is a parse error. */ 
  2394. // XERROR: Implement parse error code 
  2395.  
  2396. /** Pop elements from this stack until an element with the same 
  2397. tag name as the token has been popped from the stack. */ 
  2398. do { 
  2399. $node = array_pop($this->stack); 
  2400. } while ($node->tagName !== $token['name']); 
  2401.  
  2402. /** Clear the list of active formatting elements up to the last 
  2403. marker. */ 
  2404. $this->clearTheActiveFormattingElementsUpToTheLastMarker(); 
  2405.  
  2406. /** Switch the insertion mode to "in row". (The current node 
  2407. will be a tr element at this point.) */ 
  2408. $this->mode = self::IN_ROW; 
  2409.  
  2410. /** A start tag whose tag name is one of: "caption", "col", "colgroup",  
  2411. "tbody", "td", "tfoot", "th", "thead", "tr" */ 
  2412. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && in_array($token['name'],  
  2413. array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th',  
  2414. 'thead', 'tr'))) { 
  2415. /** If the stack of open elements does not have a td or th element 
  2416. in table scope, then this is a parse error; ignore the token. 
  2417. (fragment case) */ 
  2418. if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) { 
  2419. // parse error 
  2420. $this->ignored = true; 
  2421.  
  2422. /** Otherwise, close the cell (see below) and reprocess the current 
  2423. token. */ 
  2424. } else { 
  2425. $this->closeCell(); 
  2426. $this->emitToken($token); 
  2427.  
  2428. /** An end tag whose tag name is one of: "body", "caption", "col",  
  2429. "colgroup", "html" */ 
  2430. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],  
  2431. array('body', 'caption', 'col', 'colgroup', 'html'))) { 
  2432. /** Parse error. Ignore the token. */ 
  2433. $this->ignored = true; 
  2434.  
  2435. /** An end tag whose tag name is one of: "table", "tbody", "tfoot",  
  2436. "thead", "tr" */ 
  2437. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && in_array($token['name'],  
  2438. array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { 
  2439. /** If the stack of open elements does not have a td or th element 
  2440. in table scope, then this is a parse error; ignore the token. 
  2441. (innerHTML case) */ 
  2442. if(!$this->elementInScope(array('td', 'th'), self::SCOPE_TABLE)) { 
  2443. // Parse error 
  2444. $this->ignored = true; 
  2445.  
  2446. /** Otherwise, close the cell (see below) and reprocess the current 
  2447. token. */ 
  2448. } else { 
  2449. $this->closeCell(); 
  2450. $this->emitToken($token); 
  2451.  
  2452. /** Anything else */ 
  2453. } else { 
  2454. /** Process the token as if the insertion mode was "in body". */ 
  2455. $this->processWithRulesFor($token, self::IN_BODY); 
  2456. break; 
  2457.  
  2458. case self::IN_SELECT: 
  2459. /** Handle the token as follows: */ 
  2460.  
  2461. /** A character token */ 
  2462. if( 
  2463. $token['type'] === HTML5_Tokenizer::CHARACTER || 
  2464. $token['type'] === HTML5_Tokenizer::SPACECHARACTER 
  2465. ) { 
  2466. /** Append the token's character to the current node. */ 
  2467. $this->insertText($token['data']); 
  2468.  
  2469. /** A comment token */ 
  2470. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { 
  2471. /** Append a Comment node to the current node with the data 
  2472. attribute set to the data given in the comment token. */ 
  2473. $this->insertComment($token['data']); 
  2474.  
  2475. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  2476. // parse error 
  2477.  
  2478. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { 
  2479. $this->processWithRulesFor($token, self::IN_BODY); 
  2480.  
  2481. /** A start tag token whose tag name is "option" */ 
  2482. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  2483. $token['name'] === 'option') { 
  2484. /** If the current node is an option element, act as if an end tag 
  2485. with the tag name "option" had been seen. */ 
  2486. if(end($this->stack)->tagName === 'option') { 
  2487. $this->emitToken(array( 
  2488. 'name' => 'option',  
  2489. 'type' => HTML5_Tokenizer::ENDTAG 
  2490. )); 
  2491.  
  2492. /** Insert an HTML element for the token. */ 
  2493. $this->insertElement($token); 
  2494.  
  2495. /** A start tag token whose tag name is "optgroup" */ 
  2496. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  2497. $token['name'] === 'optgroup') { 
  2498. /** If the current node is an option element, act as if an end tag 
  2499. with the tag name "option" had been seen. */ 
  2500. if(end($this->stack)->tagName === 'option') { 
  2501. $this->emitToken(array( 
  2502. 'name' => 'option',  
  2503. 'type' => HTML5_Tokenizer::ENDTAG 
  2504. )); 
  2505.  
  2506. /** If the current node is an optgroup element, act as if an end tag 
  2507. with the tag name "optgroup" had been seen. */ 
  2508. if(end($this->stack)->tagName === 'optgroup') { 
  2509. $this->emitToken(array( 
  2510. 'name' => 'optgroup',  
  2511. 'type' => HTML5_Tokenizer::ENDTAG 
  2512. )); 
  2513.  
  2514. /** Insert an HTML element for the token. */ 
  2515. $this->insertElement($token); 
  2516.  
  2517. /** An end tag token whose tag name is "optgroup" */ 
  2518. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2519. $token['name'] === 'optgroup') { 
  2520. /** First, if the current node is an option element, and the node 
  2521. immediately before it in the stack of open elements is an optgroup 
  2522. element, then act as if an end tag with the tag name "option" had 
  2523. been seen. */ 
  2524. $elements_in_stack = count($this->stack); 
  2525.  
  2526. if($this->stack[$elements_in_stack - 1]->tagName === 'option' && 
  2527. $this->stack[$elements_in_stack - 2]->tagName === 'optgroup') { 
  2528. $this->emitToken(array( 
  2529. 'name' => 'option',  
  2530. 'type' => HTML5_Tokenizer::ENDTAG 
  2531. )); 
  2532.  
  2533. /** If the current node is an optgroup element, then pop that node 
  2534. from the stack of open elements. Otherwise, this is a parse error,  
  2535. ignore the token. */ 
  2536. if(end($this->stack)->tagName === 'optgroup') { 
  2537. array_pop($this->stack); 
  2538. } else { 
  2539. // parse error 
  2540. $this->ignored = true; 
  2541.  
  2542. /** An end tag token whose tag name is "option" */ 
  2543. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2544. $token['name'] === 'option') { 
  2545. /** If the current node is an option element, then pop that node 
  2546. from the stack of open elements. Otherwise, this is a parse error,  
  2547. ignore the token. */ 
  2548. if(end($this->stack)->tagName === 'option') { 
  2549. array_pop($this->stack); 
  2550. } else { 
  2551. // parse error 
  2552. $this->ignored = true; 
  2553.  
  2554. /** An end tag whose tag name is "select" */ 
  2555. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2556. $token['name'] === 'select') { 
  2557. /** If the stack of open elements does not have an element in table 
  2558. scope with the same tag name as the token, this is a parse error. 
  2559. Ignore the token. (fragment case) */ 
  2560. if(!$this->elementInScope($token['name'], self::SCOPE_TABLE)) { 
  2561. $this->ignored = true; 
  2562. // parse error 
  2563.  
  2564. /** Otherwise: */ 
  2565. } else { 
  2566. /** Pop elements from the stack of open elements until a select 
  2567. element has been popped from the stack. */ 
  2568. do { 
  2569. $node = array_pop($this->stack); 
  2570. } while ($node->tagName !== 'select'); 
  2571.  
  2572. /** Reset the insertion mode appropriately. */ 
  2573. $this->resetInsertionMode(); 
  2574.  
  2575. /** A start tag whose tag name is "select" */ 
  2576. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'select') { 
  2577. /** Parse error. Act as if the token had been an end tag with the 
  2578. tag name "select" instead. */ 
  2579. $this->emitToken(array( 
  2580. 'name' => 'select',  
  2581. 'type' => HTML5_Tokenizer::ENDTAG 
  2582. )); 
  2583.  
  2584. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  2585. ($token['name'] === 'input' || $token['name'] === 'keygen' || $token['name'] === 'textarea')) { 
  2586. // parse error 
  2587. $this->emitToken(array( 
  2588. 'name' => 'select',  
  2589. 'type' => HTML5_Tokenizer::ENDTAG 
  2590. )); 
  2591. $this->emitToken($token); 
  2592.  
  2593. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'script') { 
  2594. $this->processWithRulesFor($token, self::IN_HEAD); 
  2595.  
  2596. } elseif($token['type'] === HTML5_Tokenizer::EOF) { 
  2597. // XERROR: If the current node is not the root html element, then this is a parse error. 
  2598. /** Stop parsing */ 
  2599.  
  2600. /** Anything else */ 
  2601. } else { 
  2602. /** Parse error. Ignore the token. */ 
  2603. $this->ignored = true; 
  2604. break; 
  2605.  
  2606. case self::IN_SELECT_IN_TABLE: 
  2607.  
  2608. if($token['type'] === HTML5_Tokenizer::STARTTAG && 
  2609. in_array($token['name'], array('caption', 'table', 'tbody',  
  2610. 'tfoot', 'thead', 'tr', 'td', 'th'))) { 
  2611. // parse error 
  2612. $this->emitToken(array( 
  2613. 'name' => 'select',  
  2614. 'type' => HTML5_Tokenizer::ENDTAG,  
  2615. )); 
  2616. $this->emitToken($token); 
  2617.  
  2618. /** An end tag whose tag name is one of: "caption", "table", "tbody",  
  2619. "tfoot", "thead", "tr", "td", "th" */ 
  2620. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2621. in_array($token['name'], array('caption', 'table', 'tbody', 'tfoot', 'thead', 'tr', 'td', 'th'))) { 
  2622. /** Parse error. */ 
  2623. // parse error 
  2624.  
  2625. /** If the stack of open elements has an element in table scope with 
  2626. the same tag name as that of the token, then act as if an end tag 
  2627. with the tag name "select" had been seen, and reprocess the token. 
  2628. Otherwise, ignore the token. */ 
  2629. if($this->elementInScope($token['name'], self::SCOPE_TABLE)) { 
  2630. $this->emitToken(array( 
  2631. 'name' => 'select',  
  2632. 'type' => HTML5_Tokenizer::ENDTAG 
  2633. )); 
  2634.  
  2635. $this->emitToken($token); 
  2636. } else { 
  2637. $this->ignored = true; 
  2638. } else { 
  2639. $this->processWithRulesFor($token, self::IN_SELECT); 
  2640. break; 
  2641.  
  2642. case self::IN_FOREIGN_CONTENT: 
  2643. if ($token['type'] === HTML5_Tokenizer::CHARACTER || 
  2644. $token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  2645. $this->insertText($token['data']); 
  2646. } elseif ($token['type'] === HTML5_Tokenizer::COMMENT) { 
  2647. $this->insertComment($token['data']); 
  2648. } elseif ($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  2649. // XERROR: parse error 
  2650. } elseif ($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2651. $token['name'] === 'script' && end($this->stack)->tagName === 'script' && 
  2652. // XDOM 
  2653. end($this->stack)->namespaceURI === self::NS_SVG) { 
  2654. array_pop($this->stack); 
  2655. // a bunch of script running mumbo jumbo 
  2656. } elseif ( 
  2657. ($token['type'] === HTML5_Tokenizer::STARTTAG && 
  2658. (( 
  2659. $token['name'] !== 'mglyph' && 
  2660. $token['name'] !== 'malignmark' && 
  2661. // XDOM 
  2662. end($this->stack)->namespaceURI === self::NS_MATHML && 
  2663. in_array(end($this->stack)->tagName, array('mi', 'mo', 'mn', 'ms', 'mtext')) 
  2664. ) || 
  2665. $token['name'] === 'svg' && 
  2666. // XDOM 
  2667. end($this->stack)->namespaceURI === self::NS_MATHML && 
  2668. end($this->stack)->tagName === 'annotation-xml' 
  2669. ) || 
  2670. // XDOM 
  2671. end($this->stack)->namespaceURI === self::NS_SVG && 
  2672. in_array(end($this->stack)->tagName, array('foreignObject', 'desc', 'title')) 
  2673. ) || 
  2674. // XSKETCHY && XDOM 
  2675. end($this->stack)->namespaceURI === self::NS_HTML 
  2676. )) 
  2677. ) || $token['type'] === HTML5_Tokenizer::ENDTAG 
  2678. ) { 
  2679. $this->processWithRulesFor($token, $this->secondary_mode); 
  2680. /** If, after doing so, the insertion mode is still "in foreign  
  2681. * content", but there is no element in scope that has a namespace  
  2682. * other than the HTML namespace, switch the insertion mode to the  
  2683. * secondary insertion mode. */ 
  2684. if ($this->mode === self::IN_FOREIGN_CONTENT) { 
  2685. $found = false; 
  2686. // this basically duplicates elementInScope() 
  2687. for ($i = count($this->stack) - 1; $i >= 0; $i--) { 
  2688. // XDOM 
  2689. $node = $this->stack[$i]; 
  2690. if ($node->namespaceURI !== self::NS_HTML) { 
  2691. $found = true; 
  2692. break; 
  2693. } elseif (in_array($node->tagName, array('table', 'html',  
  2694. 'applet', 'caption', 'td', 'th', 'button', 'marquee',  
  2695. 'object')) || ($node->tagName === 'foreignObject' && 
  2696. $node->namespaceURI === self::NS_SVG)) { 
  2697. break; 
  2698. if (!$found) { 
  2699. $this->mode = $this->secondary_mode; 
  2700. } elseif ($token['type'] === HTML5_Tokenizer::EOF || ( 
  2701. $token['type'] === HTML5_Tokenizer::STARTTAG && 
  2702. (in_array($token['name'], array('b', "big", "blockquote", "body", "br",  
  2703. "center", "code", "dc", "dd", "div", "dl", "ds", "dt", "em", "embed", "h1", "h2",  
  2704. "h3", "h4", "h5", "h6", "head", "hr", "i", "img", "li", "listing",  
  2705. "menu", "meta", "nobr", "ol", "p", "pre", "ruby", "s", "small",  
  2706. "span", "strong", "strike", "sub", "sup", "table", "tt", "u", "ul",  
  2707. "var")) || ($token['name'] === 'font' && ($this->getAttr($token, 'color') || 
  2708. $this->getAttr($token, 'face') || $this->getAttr($token, 'size')))))) { 
  2709. // XERROR: parse error 
  2710. do { 
  2711. $node = array_pop($this->stack); 
  2712. // XDOM 
  2713. } while ($node->namespaceURI !== self::NS_HTML); 
  2714. $this->stack[] = $node; 
  2715. $this->mode = $this->secondary_mode; 
  2716. $this->emitToken($token); 
  2717. } elseif ($token['type'] === HTML5_Tokenizer::STARTTAG) { 
  2718. static $svg_lookup = array( 
  2719. 'altglyph' => 'altGlyph',  
  2720. 'altglyphdef' => 'altGlyphDef',  
  2721. 'altglyphitem' => 'altGlyphItem',  
  2722. 'animatecolor' => 'animateColor',  
  2723. 'animatemotion' => 'animateMotion',  
  2724. 'animatetransform' => 'animateTransform',  
  2725. 'clippath' => 'clipPath',  
  2726. 'feblend' => 'feBlend',  
  2727. 'fecolormatrix' => 'feColorMatrix',  
  2728. 'fecomponenttransfer' => 'feComponentTransfer',  
  2729. 'fecomposite' => 'feComposite',  
  2730. 'feconvolvematrix' => 'feConvolveMatrix',  
  2731. 'fediffuselighting' => 'feDiffuseLighting',  
  2732. 'fedisplacementmap' => 'feDisplacementMap',  
  2733. 'fedistantlight' => 'feDistantLight',  
  2734. 'feflood' => 'feFlood',  
  2735. 'fefunca' => 'feFuncA',  
  2736. 'fefuncb' => 'feFuncB',  
  2737. 'fefuncg' => 'feFuncG',  
  2738. 'fefuncr' => 'feFuncR',  
  2739. 'fegaussianblur' => 'feGaussianBlur',  
  2740. 'feimage' => 'feImage',  
  2741. 'femerge' => 'feMerge',  
  2742. 'femergenode' => 'feMergeNode',  
  2743. 'femorphology' => 'feMorphology',  
  2744. 'feoffset' => 'feOffset',  
  2745. 'fepointlight' => 'fePointLight',  
  2746. 'fespecularlighting' => 'feSpecularLighting',  
  2747. 'fespotlight' => 'feSpotLight',  
  2748. 'fetile' => 'feTile',  
  2749. 'feturbulence' => 'feTurbulence',  
  2750. 'foreignobject' => 'foreignObject',  
  2751. 'glyphref' => 'glyphRef',  
  2752. 'lineargradient' => 'linearGradient',  
  2753. 'radialgradient' => 'radialGradient',  
  2754. 'textpath' => 'textPath',  
  2755. ); 
  2756. // XDOM 
  2757. $current = end($this->stack); 
  2758. if ($current->namespaceURI === self::NS_MATHML) { 
  2759. $token = $this->adjustMathMLAttributes($token); 
  2760. if ($current->namespaceURI === self::NS_SVG && 
  2761. isset($svg_lookup[$token['name']])) { 
  2762. $token['name'] = $svg_lookup[$token['name']]; 
  2763. if ($current->namespaceURI === self::NS_SVG) { 
  2764. $token = $this->adjustSVGAttributes($token); 
  2765. $token = $this->adjustForeignAttributes($token); 
  2766. $this->insertForeignElement($token, $current->namespaceURI); 
  2767. if (isset($token['self-closing'])) { 
  2768. array_pop($this->stack); 
  2769. // XERROR: acknowledge self-closing flag 
  2770. break; 
  2771.  
  2772. case self::AFTER_BODY: 
  2773. /** Handle the token as follows: */ 
  2774.  
  2775. /** A character token that is one of one of U+0009 CHARACTER TABULATION,  
  2776. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),  
  2777. or U+0020 SPACE */ 
  2778. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  2779. /** Process the token as it would be processed if the insertion mode 
  2780. was "in body". */ 
  2781. $this->processWithRulesFor($token, self::IN_BODY); 
  2782.  
  2783. /** A comment token */ 
  2784. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { 
  2785. /** Append a Comment node to the first element in the stack of open 
  2786. elements (the html element), with the data attribute set to the 
  2787. data given in the comment token. */ 
  2788. // XDOM 
  2789. $comment = $this->dom->createComment($token['data']); 
  2790. $this->stack[0]->appendChild($comment); 
  2791.  
  2792. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  2793. // parse error 
  2794.  
  2795. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { 
  2796. $this->processWithRulesFor($token, self::IN_BODY); 
  2797.  
  2798. /** An end tag with the tag name "html" */ 
  2799. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && $token['name'] === 'html') { 
  2800. /** If the parser was originally created as part of the HTML 
  2801. * fragment parsing algorithm, this is a parse error; ignore 
  2802. * the token. (fragment case) */ 
  2803. $this->ignored = true; 
  2804. // XERROR: implement this 
  2805.  
  2806. $this->mode = self::AFTER_AFTER_BODY; 
  2807.  
  2808. } elseif($token['type'] === HTML5_Tokenizer::EOF) { 
  2809. /** Stop parsing */ 
  2810.  
  2811. /** Anything else */ 
  2812. } else { 
  2813. /** Parse error. Set the insertion mode to "in body" and reprocess 
  2814. the token. */ 
  2815. $this->mode = self::IN_BODY; 
  2816. $this->emitToken($token); 
  2817. break; 
  2818.  
  2819. case self::IN_FRAMESET: 
  2820. /** Handle the token as follows: */ 
  2821.  
  2822. /** A character token that is one of one of U+0009 CHARACTER TABULATION,  
  2823. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),  
  2824. U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ 
  2825. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  2826. /** Append the character to the current node. */ 
  2827. $this->insertText($token['data']); 
  2828.  
  2829. /** A comment token */ 
  2830. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { 
  2831. /** Append a Comment node to the current node with the data 
  2832. attribute set to the data given in the comment token. */ 
  2833. $this->insertComment($token['data']); 
  2834.  
  2835. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  2836. // parse error 
  2837.  
  2838. /** A start tag with the tag name "frameset" */ 
  2839. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  2840. $token['name'] === 'frameset') { 
  2841. $this->insertElement($token); 
  2842.  
  2843. /** An end tag with the tag name "frameset" */ 
  2844. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2845. $token['name'] === 'frameset') { 
  2846. /** If the current node is the root html element, then this is a 
  2847. parse error; ignore the token. (fragment case) */ 
  2848. if(end($this->stack)->tagName === 'html') { 
  2849. $this->ignored = true; 
  2850. // Parse error 
  2851.  
  2852. } else { 
  2853. /** Otherwise, pop the current node from the stack of open 
  2854. elements. */ 
  2855. array_pop($this->stack); 
  2856.  
  2857. /** If the parser was not originally created as part of the HTML  
  2858. * fragment parsing algorithm (fragment case), and the current  
  2859. * node is no longer a frameset element, then switch the  
  2860. * insertion mode to "after frameset". */ 
  2861. $this->mode = self::AFTER_FRAMESET; 
  2862.  
  2863. /** A start tag with the tag name "frame" */ 
  2864. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  2865. $token['name'] === 'frame') { 
  2866. /** Insert an HTML element for the token. */ 
  2867. $this->insertElement($token); 
  2868.  
  2869. /** Immediately pop the current node off the stack of open elements. */ 
  2870. array_pop($this->stack); 
  2871.  
  2872. // XERROR: Acknowledge the token's self-closing flag, if it is set. 
  2873.  
  2874. /** A start tag with the tag name "noframes" */ 
  2875. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  2876. $token['name'] === 'noframes') { 
  2877. /** Process the token using the rules for the "in head" insertion mode. */ 
  2878. $this->processwithRulesFor($token, self::IN_HEAD); 
  2879.  
  2880. } elseif($token['type'] === HTML5_Tokenizer::EOF) { 
  2881. // XERROR: If the current node is not the root html element, then this is a parse error. 
  2882. /** Stop parsing */ 
  2883. /** Anything else */ 
  2884. } else { 
  2885. /** Parse error. Ignore the token. */ 
  2886. $this->ignored = true; 
  2887. break; 
  2888.  
  2889. case self::AFTER_FRAMESET: 
  2890. /** Handle the token as follows: */ 
  2891.  
  2892. /** A character token that is one of one of U+0009 CHARACTER TABULATION,  
  2893. U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF),  
  2894. U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ 
  2895. if($token['type'] === HTML5_Tokenizer::SPACECHARACTER) { 
  2896. /** Append the character to the current node. */ 
  2897. $this->insertText($token['data']); 
  2898.  
  2899. /** A comment token */ 
  2900. } elseif($token['type'] === HTML5_Tokenizer::COMMENT) { 
  2901. /** Append a Comment node to the current node with the data 
  2902. attribute set to the data given in the comment token. */ 
  2903. $this->insertComment($token['data']); 
  2904.  
  2905. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE) { 
  2906. // parse error 
  2907.  
  2908. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html') { 
  2909. $this->processWithRulesFor($token, self::IN_BODY); 
  2910.  
  2911. /** An end tag with the tag name "html" */ 
  2912. } elseif($token['type'] === HTML5_Tokenizer::ENDTAG && 
  2913. $token['name'] === 'html') { 
  2914. $this->mode = self::AFTER_AFTER_FRAMESET; 
  2915.  
  2916. /** A start tag with the tag name "noframes" */ 
  2917. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && 
  2918. $token['name'] === 'noframes') { 
  2919. $this->processWithRulesFor($token, self::IN_HEAD); 
  2920.  
  2921. } elseif($token['type'] === HTML5_Tokenizer::EOF) { 
  2922. /** Stop parsing */ 
  2923.  
  2924. /** Anything else */ 
  2925. } else { 
  2926. /** Parse error. Ignore the token. */ 
  2927. $this->ignored = true; 
  2928. break; 
  2929.  
  2930. case self::AFTER_AFTER_BODY: 
  2931. /** A comment token */ 
  2932. if($token['type'] === HTML5_Tokenizer::COMMENT) { 
  2933. /** Append a Comment node to the Document object with the data 
  2934. attribute set to the data given in the comment token. */ 
  2935. // XDOM 
  2936. $comment = $this->dom->createComment($token['data']); 
  2937. $this->dom->appendChild($comment); 
  2938.  
  2939. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE || 
  2940. $token['type'] === HTML5_Tokenizer::SPACECHARACTER || 
  2941. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) { 
  2942. $this->processWithRulesFor($token, self::IN_BODY); 
  2943.  
  2944. /** An end-of-file token */ 
  2945. } elseif($token['type'] === HTML5_Tokenizer::EOF) { 
  2946. /** OMG DONE!! */ 
  2947. } else { 
  2948. // parse error 
  2949. $this->mode = self::IN_BODY; 
  2950. $this->emitToken($token); 
  2951. break; 
  2952.  
  2953. case self::AFTER_AFTER_FRAMESET: 
  2954. /** A comment token */ 
  2955. if($token['type'] === HTML5_Tokenizer::COMMENT) { 
  2956. /** Append a Comment node to the Document object with the data 
  2957. attribute set to the data given in the comment token. */ 
  2958. // XDOM 
  2959. $comment = $this->dom->createComment($token['data']); 
  2960. $this->dom->appendChild($comment); 
  2961.  
  2962. } elseif($token['type'] === HTML5_Tokenizer::DOCTYPE || 
  2963. $token['type'] === HTML5_Tokenizer::SPACECHARACTER || 
  2964. ($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'html')) { 
  2965. $this->processWithRulesFor($token, self::IN_BODY); 
  2966.  
  2967. /** An end-of-file token */ 
  2968. } elseif($token['type'] === HTML5_Tokenizer::EOF) { 
  2969. /** OMG DONE!! */ 
  2970. } elseif($token['type'] === HTML5_Tokenizer::STARTTAG && $token['name'] === 'nofrmaes') { 
  2971. $this->processWithRulesFor($token, self::IN_HEAD); 
  2972. } else { 
  2973. // parse error 
  2974. break; 
  2975. // end funky indenting 
  2976.  
  2977. private function insertElement($token, $append = true) { 
  2978. $el = $this->dom->createElementNS(self::NS_HTML, $token['name']); 
  2979.  
  2980. if (!empty($token['attr'])) { 
  2981. foreach($token['attr'] as $attr) { 
  2982. if(!$el->hasAttribute($attr['name']) && preg_match("/^[a-zA-Z_:]/", $attr['name'])) { 
  2983. $el->setAttribute($attr['name'], $attr['value']); 
  2984. if ($append) { 
  2985. $this->appendToRealParent($el); 
  2986. $this->stack[] = $el; 
  2987.  
  2988. return $el; 
  2989.  
  2990. private function insertText($data) { 
  2991. if ($data === '') return; 
  2992. if ($this->ignore_lf_token) { 
  2993. if ($data[0] === "\n") { 
  2994. $data = substr($data, 1); 
  2995. if ($data === false) return; 
  2996. $text = $this->dom->createTextNode($data); 
  2997. $this->appendToRealParent($text); 
  2998.  
  2999. private function insertComment($data) { 
  3000. $comment = $this->dom->createComment($data); 
  3001. $this->appendToRealParent($comment); 
  3002.  
  3003. private function appendToRealParent($node) { 
  3004. // this is only for the foster_parent case 
  3005. /** If the current node is a table, tbody, tfoot, thead, or tr 
  3006. element, then, whenever a node would be inserted into the current 
  3007. node, it must instead be inserted into the foster parent element. */ 
  3008. if(!$this->foster_parent || !in_array(end($this->stack)->tagName,  
  3009. array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { 
  3010. end($this->stack)->appendChild($node); 
  3011. } else { 
  3012. $this->fosterParent($node); 
  3013.  
  3014. private function elementInScope($el, $scope = self::SCOPE) { 
  3015. if(is_array($el)) { 
  3016. foreach($el as $element) { 
  3017. if($this->elementInScope($element, $scope)) { 
  3018. return true; 
  3019.  
  3020. return false; 
  3021.  
  3022. $leng = count($this->stack); 
  3023.  
  3024. for($n = 0; $n < $leng; $n++) { 
  3025. /** 1. Initialise node to be the current node (the bottommost node of 
  3026. the stack). */ 
  3027. $node = $this->stack[$leng - 1 - $n]; 
  3028.  
  3029. if($node->tagName === $el) { 
  3030. /** 2. If node is the target node, terminate in a match state. */ 
  3031. return true; 
  3032.  
  3033. // We've expanded the logic for these states a little differently; 
  3034. // Hixie's refactoring into "specific scope" is more general, but 
  3035. // this "gets the job done" 
  3036.  
  3037. // these are the common states for all scopes 
  3038. } elseif($node->tagName === 'table' || $node->tagName === 'html') { 
  3039. return false; 
  3040.  
  3041. // these are valid for "in scope" and "in list item scope" 
  3042. } elseif($scope !== self::SCOPE_TABLE && 
  3043. (in_array($node->tagName, array('applet', 'caption', 'td',  
  3044. 'th', 'button', 'marquee', 'object')) || 
  3045. $node->tagName === 'foreignObject' && $node->namespaceURI === self::NS_SVG)) { 
  3046. return false; 
  3047.  
  3048.  
  3049. // these are valid for "in list item scope" 
  3050. } elseif($scope === self::SCOPE_LISTITEM && in_array($node->tagName, array('ol', 'ul'))) { 
  3051. return false; 
  3052.  
  3053. /** Otherwise, set node to the previous entry in the stack of open 
  3054. elements and return to step 2. (This will never fail, since the loop 
  3055. will always terminate in the previous step if the top of the stack 
  3056. is reached.) */ 
  3057.  
  3058. private function reconstructActiveFormattingElements() { 
  3059. /** 1. If there are no entries in the list of active formatting elements,  
  3060. then there is nothing to reconstruct; stop this algorithm. */ 
  3061. $formatting_elements = count($this->a_formatting); 
  3062.  
  3063. if($formatting_elements === 0) { 
  3064. return false; 
  3065.  
  3066. /** 3. Let entry be the last (most recently added) element in the list 
  3067. of active formatting elements. */ 
  3068. $entry = end($this->a_formatting); 
  3069.  
  3070. /** 2. If the last (most recently added) entry in the list of active 
  3071. formatting elements is a marker, or if it is an element that is in the 
  3072. stack of open elements, then there is nothing to reconstruct; stop this 
  3073. algorithm. */ 
  3074. if($entry === self::MARKER || in_array($entry, $this->stack, true)) { 
  3075. return false; 
  3076.  
  3077. for($a = $formatting_elements - 1; $a >= 0; true) { 
  3078. /** 4. If there are no entries before entry in the list of active 
  3079. formatting elements, then jump to step 8. */ 
  3080. if($a === 0) { 
  3081. $step_seven = false; 
  3082. break; 
  3083.  
  3084. /** 5. Let entry be the entry one earlier than entry in the list of 
  3085. active formatting elements. */ 
  3086. $a--; 
  3087. $entry = $this->a_formatting[$a]; 
  3088.  
  3089. /** 6. If entry is neither a marker nor an element that is also in 
  3090. thetack of open elements, go to step 4. */ 
  3091. if($entry === self::MARKER || in_array($entry, $this->stack, true)) { 
  3092. break; 
  3093.  
  3094. while(true) { 
  3095. /** 7. Let entry be the element one later than entry in the list of 
  3096. active formatting elements. */ 
  3097. if(isset($step_seven) && $step_seven === true) { 
  3098. $a++; 
  3099. $entry = $this->a_formatting[$a]; 
  3100.  
  3101. /** 8. Perform a shallow clone of the element entry to obtain clone. */ 
  3102. $clone = $entry->cloneNode(); 
  3103.  
  3104. /** 9. Append clone to the current node and push it onto the stack 
  3105. of open elements so that it is the new current node. */ 
  3106. $this->appendToRealParent($clone); 
  3107. $this->stack[] = $clone; 
  3108.  
  3109. /** 10. Replace the entry for entry in the list with an entry for 
  3110. clone. */ 
  3111. $this->a_formatting[$a] = $clone; 
  3112.  
  3113. /** 11. If the entry for clone in the list of active formatting 
  3114. elements is not the last entry in the list, return to step 7. */ 
  3115. if(end($this->a_formatting) !== $clone) { 
  3116. $step_seven = true; 
  3117. } else { 
  3118. break; 
  3119.  
  3120. private function clearTheActiveFormattingElementsUpToTheLastMarker() { 
  3121. /** When the steps below require the UA to clear the list of active 
  3122. formatting elements up to the last marker, the UA must perform the 
  3123. following steps: */ 
  3124.  
  3125. while(true) { 
  3126. /** 1. Let entry be the last (most recently added) entry in the list 
  3127. of active formatting elements. */ 
  3128. $entry = end($this->a_formatting); 
  3129.  
  3130. /** 2. Remove entry from the list of active formatting elements. */ 
  3131. array_pop($this->a_formatting); 
  3132.  
  3133. /** 3. If entry was a marker, then stop the algorithm at this point. 
  3134. The list has been cleared up to the last marker. */ 
  3135. if($entry === self::MARKER) { 
  3136. break; 
  3137.  
  3138. private function generateImpliedEndTags($exclude = array()) { 
  3139. /** When the steps below require the UA to generate implied end tags,  
  3140. * then, while the current node is a dc element, a dd element, a ds  
  3141. * element, a dt element, an li element, an option element, an optgroup  
  3142. * element, a p element, an rp element, or an rt element, the UA must  
  3143. * pop the current node off the stack of open elements. */ 
  3144. $node = end($this->stack); 
  3145. $elements = array_diff(array('dc', 'dd', 'ds', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); 
  3146.  
  3147. while(in_array(end($this->stack)->tagName, $elements)) { 
  3148. array_pop($this->stack); 
  3149.  
  3150. private function getElementCategory($node) { 
  3151. if (!is_object($node)) debug_print_backtrace(); 
  3152. $name = $node->tagName; 
  3153. if(in_array($name, $this->special)) 
  3154. return self::SPECIAL; 
  3155.  
  3156. elseif(in_array($name, $this->scoping)) 
  3157. return self::SCOPING; 
  3158.  
  3159. elseif(in_array($name, $this->formatting)) 
  3160. return self::FORMATTING; 
  3161.  
  3162. else 
  3163. return self::PHRASING; 
  3164.  
  3165. private function clearStackToTableContext($elements) { 
  3166. /** When the steps above require the UA to clear the stack back to a 
  3167. table context, it means that the UA must, while the current node is not 
  3168. a table element or an html element, pop elements from the stack of open 
  3169. elements. */ 
  3170. while(true) { 
  3171. $name = end($this->stack)->tagName; 
  3172.  
  3173. if(in_array($name, $elements)) { 
  3174. break; 
  3175. } else { 
  3176. array_pop($this->stack); 
  3177.  
  3178. private function resetInsertionMode($context = null) { 
  3179. /** 1. Let last be false. */ 
  3180. $last = false; 
  3181. $leng = count($this->stack); 
  3182.  
  3183. for($n = $leng - 1; $n >= 0; $n--) { 
  3184. /** 2. Let node be the last node in the stack of open elements. */ 
  3185. $node = $this->stack[$n]; 
  3186.  
  3187. /** 3. If node is the first node in the stack of open elements, then  
  3188. * set last to true and set node to the context element. (fragment  
  3189. * case) */ 
  3190. if($this->stack[0]->isSameNode($node)) { 
  3191. $last = true; 
  3192. $node = $context; 
  3193.  
  3194. /** 4. If node is a select element, then switch the insertion mode to 
  3195. "in select" and abort these steps. (fragment case) */ 
  3196. if($node->tagName === 'select') { 
  3197. $this->mode = self::IN_SELECT; 
  3198. break; 
  3199.  
  3200. /** 5. If node is a td or th element, then switch the insertion mode 
  3201. to "in cell" and abort these steps. */ 
  3202. } elseif($node->tagName === 'td' || $node->nodeName === 'th') { 
  3203. $this->mode = self::IN_CELL; 
  3204. break; 
  3205.  
  3206. /** 6. If node is a tr element, then switch the insertion mode to 
  3207. "in row" and abort these steps. */ 
  3208. } elseif($node->tagName === 'tr') { 
  3209. $this->mode = self::IN_ROW; 
  3210. break; 
  3211.  
  3212. /** 7. If node is a tbody, thead, or tfoot element, then switch the 
  3213. insertion mode to "in table body" and abort these steps. */ 
  3214. } elseif(in_array($node->tagName, array('tbody', 'thead', 'tfoot'))) { 
  3215. $this->mode = self::IN_TABLE_BODY; 
  3216. break; 
  3217.  
  3218. /** 8. If node is a caption element, then switch the insertion mode 
  3219. to "in caption" and abort these steps. */ 
  3220. } elseif($node->tagName === 'caption') { 
  3221. $this->mode = self::IN_CAPTION; 
  3222. break; 
  3223.  
  3224. /** 9. If node is a colgroup element, then switch the insertion mode 
  3225. to "in column group" and abort these steps. (innerHTML case) */ 
  3226. } elseif($node->tagName === 'colgroup') { 
  3227. $this->mode = self::IN_COLUMN_GROUP; 
  3228. break; 
  3229.  
  3230. /** 10. If node is a table element, then switch the insertion mode 
  3231. to "in table" and abort these steps. */ 
  3232. } elseif($node->tagName === 'table') { 
  3233. $this->mode = self::IN_TABLE; 
  3234. break; 
  3235.  
  3236. /** 11. If node is an element from the MathML namespace or the SVG  
  3237. * namespace, then switch the insertion mode to "in foreign  
  3238. * content", let the secondary insertion mode be "in body", and  
  3239. * abort these steps. */ 
  3240. } elseif($node->namespaceURI === self::NS_SVG || 
  3241. $node->namespaceURI === self::NS_MATHML) { 
  3242. $this->mode = self::IN_FOREIGN_CONTENT; 
  3243. $this->secondary_mode = self::IN_BODY; 
  3244. break; 
  3245.  
  3246. /** 12. If node is a head element, then switch the insertion mode 
  3247. to "in body" ("in body"! not "in head"!) and abort these steps. 
  3248. (fragment case) */ 
  3249. } elseif($node->tagName === 'head') { 
  3250. $this->mode = self::IN_BODY; 
  3251. break; 
  3252.  
  3253. /** 13. If node is a body element, then switch the insertion mode to 
  3254. "in body" and abort these steps. */ 
  3255. } elseif($node->tagName === 'body') { 
  3256. $this->mode = self::IN_BODY; 
  3257. break; 
  3258.  
  3259. /** 14. If node is a frameset element, then switch the insertion 
  3260. mode to "in frameset" and abort these steps. (fragment case) */ 
  3261. } elseif($node->tagName === 'frameset') { 
  3262. $this->mode = self::IN_FRAMESET; 
  3263. break; 
  3264.  
  3265. /** 15. If node is an html element, then: if the head element 
  3266. pointer is null, switch the insertion mode to "before head",  
  3267. otherwise, switch the insertion mode to "after head". In either 
  3268. case, abort these steps. (fragment case) */ 
  3269. } elseif($node->tagName === 'html') { 
  3270. $this->mode = ($this->head_pointer === null) 
  3271. ? self::BEFORE_HEAD 
  3272. : self::AFTER_HEAD; 
  3273.  
  3274. break; 
  3275.  
  3276. /** 16. If last is true, then set the insertion mode to "in body" 
  3277. and abort these steps. (fragment case) */ 
  3278. } elseif($last) { 
  3279. $this->mode = self::IN_BODY; 
  3280. break; 
  3281.  
  3282. private function closeCell() { 
  3283. /** If the stack of open elements has a td or th element in table scope,  
  3284. then act as if an end tag token with that tag name had been seen. */ 
  3285. foreach(array('td', 'th') as $cell) { 
  3286. if($this->elementInScope($cell, self::SCOPE_TABLE)) { 
  3287. $this->emitToken(array( 
  3288. 'name' => $cell,  
  3289. 'type' => HTML5_Tokenizer::ENDTAG 
  3290. )); 
  3291.  
  3292. break; 
  3293.  
  3294. private function processWithRulesFor($token, $mode) { 
  3295. /** "using the rules for the m insertion mode", where m is one of these 
  3296. * modes, the user agent must use the rules described under the m 
  3297. * insertion mode's section, but must leave the insertion mode 
  3298. * unchanged unless the rules in m themselves switch the insertion mode 
  3299. * to a new value. */ 
  3300. return $this->emitToken($token, $mode); 
  3301.  
  3302. private function insertCDATAElement($token) { 
  3303. $this->insertElement($token); 
  3304. $this->original_mode = $this->mode; 
  3305. $this->mode = self::IN_CDATA_RCDATA; 
  3306. $this->content_model = HTML5_Tokenizer::CDATA; 
  3307.  
  3308. private function insertRCDATAElement($token) { 
  3309. $this->insertElement($token); 
  3310. $this->original_mode = $this->mode; 
  3311. $this->mode = self::IN_CDATA_RCDATA; 
  3312. $this->content_model = HTML5_Tokenizer::RCDATA; 
  3313.  
  3314. private function getAttr($token, $key) { 
  3315. if (!isset($token['attr'])) return false; 
  3316. $ret = false; 
  3317. foreach ($token['attr'] as $keypair) { 
  3318. if ($keypair['name'] === $key) $ret = $keypair['value']; 
  3319. return $ret; 
  3320.  
  3321. private function getCurrentTable() { 
  3322. /** The current table is the last table element in the stack of open  
  3323. * elements, if there is one. If there is no table element in the stack  
  3324. * of open elements (fragment case), then the current table is the  
  3325. * first element in the stack of open elements (the html element). */ 
  3326. for ($i = count($this->stack) - 1; $i >= 0; $i--) { 
  3327. if ($this->stack[$i]->tagName === 'table') { 
  3328. return $this->stack[$i]; 
  3329. return $this->stack[0]; 
  3330.  
  3331. private function getFosterParent() { 
  3332. /** The foster parent element is the parent element of the last 
  3333. table element in the stack of open elements, if there is a 
  3334. table element and it has such a parent element. If there is no 
  3335. table element in the stack of open elements (innerHTML case),  
  3336. then the foster parent element is the first element in the 
  3337. stack of open elements (the html element). Otherwise, if there 
  3338. is a table element in the stack of open elements, but the last 
  3339. table element in the stack of open elements has no parent, or 
  3340. its parent node is not an element, then the foster parent 
  3341. element is the element before the last table element in the 
  3342. stack of open elements. */ 
  3343. for($n = count($this->stack) - 1; $n >= 0; $n--) { 
  3344. if($this->stack[$n]->tagName === 'table') { 
  3345. $table = $this->stack[$n]; 
  3346. break; 
  3347.  
  3348. if(isset($table) && $table->parentNode !== null) { 
  3349. return $table->parentNode; 
  3350.  
  3351. } elseif(!isset($table)) { 
  3352. return $this->stack[0]; 
  3353.  
  3354. } elseif(isset($table) && ($table->parentNode === null || 
  3355. $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { 
  3356. return $this->stack[$n - 1]; 
  3357.  
  3358. public function fosterParent($node) { 
  3359. $foster_parent = $this->getFosterParent(); 
  3360. $table = $this->getCurrentTable(); // almost equivalent to last table element, except it can be html 
  3361. /** When a node node is to be foster parented, the node node must be 
  3362. * be inserted into the foster parent element. */ 
  3363. /** If the foster parent element is the parent element of the last table  
  3364. * element in the stack of open elements, then node must be inserted  
  3365. * immediately before the last table element in the stack of open  
  3366. * elements in the foster parent element; otherwise, node must be  
  3367. * appended to the foster parent element. */ 
  3368. if ($table->tagName === 'table' && $table->parentNode->isSameNode($foster_parent)) { 
  3369. $foster_parent->insertBefore($node, $table); 
  3370. } else { 
  3371. $foster_parent->appendChild($node); 
  3372.  
  3373. /** 
  3374. * For debugging, prints the stack 
  3375. */ 
  3376. private function printStack() { 
  3377. $names = array(); 
  3378. foreach ($this->stack as $i => $element) { 
  3379. $names[] = $element->tagName; 
  3380. echo " -> stack [" . implode(', ', $names) . "]\n"; 
  3381.  
  3382. /** 
  3383. * For debugging, prints active formatting elements 
  3384. */ 
  3385. private function printActiveFormattingElements() { 
  3386. if (!$this->a_formatting) return; 
  3387. $names = array(); 
  3388. foreach ($this->a_formatting as $node) { 
  3389. if ($node === self::MARKER) $names[] = 'MARKER'; 
  3390. else $names[] = $node->tagName; 
  3391. echo " -> active formatting [" . implode(', ', $names) . "]\n"; 
  3392.  
  3393. public function currentTableIsTainted() { 
  3394. return !empty($this->getCurrentTable()->tainted); 
  3395.  
  3396. /** 
  3397. * Sets up the tree constructor for building a fragment. 
  3398. */ 
  3399. public function setupContext($context = null) { 
  3400. $this->fragment = true; 
  3401. if ($context) { 
  3402. $context = $this->dom->createElementNS(self::NS_HTML, $context); 
  3403. /** 4.1. Set the HTML parser's tokenization stage's content model 
  3404. * flag according to the context element, as follows: */ 
  3405. switch ($context->tagName) { 
  3406. case 'title': case 'textarea': 
  3407. $this->content_model = HTML5_Tokenizer::RCDATA; 
  3408. break; 
  3409. case 'style': case 'script': case 'xmp': case 'iframe': 
  3410. case 'noembed': case 'noframes': 
  3411. $this->content_model = HTML5_Tokenizer::CDATA; 
  3412. break; 
  3413. case 'noscript': 
  3414. // XSCRIPT: assuming scripting is enabled 
  3415. $this->content_model = HTML5_Tokenizer::CDATA; 
  3416. break; 
  3417. case 'plaintext': 
  3418. $this->content_model = HTML5_Tokenizer::PLAINTEXT; 
  3419. break; 
  3420. /** 4.2. Let root be a new html element with no attributes. */ 
  3421. $root = $this->dom->createElementNS(self::NS_HTML, 'html'); 
  3422. $this->root = $root; 
  3423. /** 4.3 Append the element root to the Document node created above. */ 
  3424. $this->dom->appendChild($root); 
  3425. /** 4.4 Set up the parser's stack of open elements so that it  
  3426. * contains just the single element root. */ 
  3427. $this->stack = array($root); 
  3428. /** 4.5 Reset the parser's insertion mode appropriately. */ 
  3429. $this->resetInsertionMode($context); 
  3430. /** 4.6 Set the parser's form element pointer to the nearest node  
  3431. * to the context element that is a form element (going straight up  
  3432. * the ancestor chain, and including the element itself, if it is a  
  3433. * form element), or, if there is no such form element, to null. */ 
  3434. $node = $context; 
  3435. do { 
  3436. if ($node->tagName === 'form') { 
  3437. $this->form_pointer = $node; 
  3438. break; 
  3439. } while ($node = $node->parentNode); 
  3440.  
  3441. public function adjustMathMLAttributes($token) { 
  3442. foreach ($token['attr'] as &$kp) { 
  3443. if ($kp['name'] === 'definitionurl') { 
  3444. $kp['name'] = 'definitionURL'; 
  3445. return $token; 
  3446.  
  3447. public function adjustSVGAttributes($token) { 
  3448. static $lookup = array( 
  3449. 'attributename' => 'attributeName',  
  3450. 'attributetype' => 'attributeType',  
  3451. 'basefrequency' => 'baseFrequency',  
  3452. 'baseprofile' => 'baseProfile',  
  3453. 'calcmode' => 'calcMode',  
  3454. 'clippathunits' => 'clipPathUnits',  
  3455. 'contentscripttype' => 'contentScriptType',  
  3456. 'contentstyletype' => 'contentStyleType',  
  3457. 'diffuseconstant' => 'diffuseConstant',  
  3458. 'edgemode' => 'edgeMode',  
  3459. 'externalresourcesrequired' => 'externalResourcesRequired',  
  3460. 'filterres' => 'filterRes',  
  3461. 'filterunits' => 'filterUnits',  
  3462. 'glyphref' => 'glyphRef',  
  3463. 'gradienttransform' => 'gradientTransform',  
  3464. 'gradientunits' => 'gradientUnits',  
  3465. 'kernelmatrix' => 'kernelMatrix',  
  3466. 'kernelunitlength' => 'kernelUnitLength',  
  3467. 'keypoints' => 'keyPoints',  
  3468. 'keysplines' => 'keySplines',  
  3469. 'keytimes' => 'keyTimes',  
  3470. 'lengthadjust' => 'lengthAdjust',  
  3471. 'limitingconeangle' => 'limitingConeAngle',  
  3472. 'markerheight' => 'markerHeight',  
  3473. 'markerunits' => 'markerUnits',  
  3474. 'markerwidth' => 'markerWidth',  
  3475. 'maskcontentunits' => 'maskContentUnits',  
  3476. 'maskunits' => 'maskUnits',  
  3477. 'numoctaves' => 'numOctaves',  
  3478. 'pathlength' => 'pathLength',  
  3479. 'patterncontentunits' => 'patternContentUnits',  
  3480. 'patterntransform' => 'patternTransform',  
  3481. 'patternunits' => 'patternUnits',  
  3482. 'pointsatx' => 'pointsAtX',  
  3483. 'pointsaty' => 'pointsAtY',  
  3484. 'pointsatz' => 'pointsAtZ',  
  3485. 'preservealpha' => 'preserveAlpha',  
  3486. 'preserveaspectratio' => 'preserveAspectRatio',  
  3487. 'primitiveunits' => 'primitiveUnits',  
  3488. 'refx' => 'refX',  
  3489. 'refy' => 'refY',  
  3490. 'repeatcount' => 'repeatCount',  
  3491. 'repeatdur' => 'repeatDur',  
  3492. 'requiredextensions' => 'requiredExtensions',  
  3493. 'requiredfeatures' => 'requiredFeatures',  
  3494. 'specularconstant' => 'specularConstant',  
  3495. 'specularexponent' => 'specularExponent',  
  3496. 'spreadmethod' => 'spreadMethod',  
  3497. 'startoffset' => 'startOffset',  
  3498. 'stddeviation' => 'stdDeviation',  
  3499. 'stitchtiles' => 'stitchTiles',  
  3500. 'surfacescale' => 'surfaceScale',  
  3501. 'systemlanguage' => 'systemLanguage',  
  3502. 'tablevalues' => 'tableValues',  
  3503. 'targetx' => 'targetX',  
  3504. 'targety' => 'targetY',  
  3505. 'textlength' => 'textLength',  
  3506. 'viewbox' => 'viewBox',  
  3507. 'viewtarget' => 'viewTarget',  
  3508. 'xchannelselector' => 'xChannelSelector',  
  3509. 'ychannelselector' => 'yChannelSelector',  
  3510. 'zoomandpan' => 'zoomAndPan',  
  3511. ); 
  3512. foreach ($token['attr'] as &$kp) { 
  3513. if (isset($lookup[$kp['name']])) { 
  3514. $kp['name'] = $lookup[$kp['name']]; 
  3515. return $token; 
  3516.  
  3517. public function adjustForeignAttributes($token) { 
  3518. static $lookup = array( 
  3519. 'xlink:actuate' => array('xlink', 'actuate', self::NS_XLINK),  
  3520. 'xlink:arcrole' => array('xlink', 'arcrole', self::NS_XLINK),  
  3521. 'xlink:href' => array('xlink', 'href', self::NS_XLINK),  
  3522. 'xlink:role' => array('xlink', 'role', self::NS_XLINK),  
  3523. 'xlink:show' => array('xlink', 'show', self::NS_XLINK),  
  3524. 'xlink:title' => array('xlink', 'title', self::NS_XLINK),  
  3525. 'xlink:type' => array('xlink', 'type', self::NS_XLINK),  
  3526. 'xml:base' => array('xml', 'base', self::NS_XML),  
  3527. 'xml:lang' => array('xml', 'lang', self::NS_XML),  
  3528. 'xml:space' => array('xml', 'space', self::NS_XML),  
  3529. 'xmlns' => array(null, 'xmlns', self::NS_XMLNS),  
  3530. 'xmlns:xlink' => array('xmlns', 'xlink', self::NS_XMLNS),  
  3531. ); 
  3532. foreach ($token['attr'] as &$kp) { 
  3533. if (isset($lookup[$kp['name']])) { 
  3534. $kp['name'] = $lookup[$kp['name']]; 
  3535. return $token; 
  3536.  
  3537. public function insertForeignElement($token, $namespaceURI) { 
  3538. $el = $this->dom->createElementNS($namespaceURI, $token['name']); 
  3539. if (!empty($token['attr'])) { 
  3540. foreach ($token['attr'] as $kp) { 
  3541. $attr = $kp['name']; 
  3542. if (is_array($attr)) { 
  3543. $ns = $attr[2]; 
  3544. $attr = $attr[1]; 
  3545. } else { 
  3546. $ns = self::NS_HTML; 
  3547. if (!$el->hasAttributeNS($ns, $attr)) { 
  3548. // XSKETCHY: work around godawful libxml bug 
  3549. if ($ns === self::NS_XLINK) { 
  3550. $el->setAttribute('xlink:'.$attr, $kp['value']); 
  3551. } elseif ($ns === self::NS_HTML) { 
  3552. // Another godawful libxml bug 
  3553. $el->setAttribute($attr, $kp['value']); 
  3554. } else { 
  3555. $el->setAttributeNS($ns, $attr, $kp['value']); 
  3556. $this->appendToRealParent($el); 
  3557. $this->stack[] = $el; 
  3558. // XERROR: see below 
  3559. /** If the newly created element has an xmlns attribute in the XMLNS  
  3560. * namespace whose value is not exactly the same as the element's  
  3561. * namespace, that is a parse error. Similarly, if the newly created  
  3562. * element has an xmlns:xlink attribute in the XMLNS namespace whose  
  3563. * value is not the XLink Namespace, that is a parse error. */ 
  3564.  
  3565. public function save() { 
  3566. $this->dom->normalize(); 
  3567. if (!$this->fragment) { 
  3568. return $this->dom; 
  3569. } else { 
  3570. if ($this->root) { 
  3571. return $this->root->childNodes; 
  3572. } else { 
  3573. return $this->dom->childNodes;