JSTokenizer

The W3 Total Cache JSTokenizer class.

Defined (1)

The class is defined in the following location(s).

/lib/Minify/JSMinPlus.php  
  1. class JSTokenizer 
  2. private $cursor = 0; 
  3. private $source; 
  4.  
  5. public $tokens = array(); 
  6. public $tokenIndex = 0; 
  7. public $lookahead = 0; 
  8. public $scanNewlines = false; 
  9. public $scanOperand = true; 
  10.  
  11. public $filename; 
  12. public $lineno; 
  13.  
  14. private $keywords = array( 
  15. 'break',  
  16. 'case', 'catch', 'const', 'continue',  
  17. 'debugger', 'default', 'delete', 'do',  
  18. 'else', 'enum',  
  19. 'false', 'finally', 'for', 'function',  
  20. 'if', 'in', 'instanceof',  
  21. 'new', 'null',  
  22. 'return',  
  23. 'switch',  
  24. 'this', 'throw', 'true', 'try', 'typeof',  
  25. 'var', 'void',  
  26. 'while', 'with' 
  27. ); 
  28.  
  29. private $opTypeNames = array( 
  30. ';', ', ', '?', ':', '||', '&&', '|', '^',  
  31. '&', '===', '==', '=', '!==', '!=', '<<', '<=',  
  32. '<', '>>>', '>>', '>=', '>', '++', '--', '+',  
  33. '-', '*', '/', '%', '!', '~', '.', '[',  
  34. ']', '{', '}', '(', ')', '@*/' 
  35. ); 
  36.  
  37. private $assignOps = array('|', '^', '&', '<<', '>>', '>>>', '+', '-', '*', '/', '%'); 
  38. private $opRegExp; 
  39.  
  40. public function __construct() 
  41. $this->opRegExp = '#^(' . implode('|', array_map('preg_quote', $this->opTypeNames)) . ')#'; 
  42.  
  43. public function init($source, $filename = '', $lineno = 1) 
  44. $this->source = $source; 
  45. $this->filename = $filename ? $filename : '[inline]'; 
  46. $this->lineno = $lineno; 
  47.  
  48. $this->cursor = 0; 
  49. $this->tokens = array(); 
  50. $this->tokenIndex = 0; 
  51. $this->lookahead = 0; 
  52. $this->scanNewlines = false; 
  53. $this->scanOperand = true; 
  54.  
  55. public function getInput($chunksize) 
  56. if ($chunksize) 
  57. return substr($this->source, $this->cursor, $chunksize); 
  58.  
  59. return substr($this->source, $this->cursor); 
  60.  
  61. public function isDone() 
  62. return $this->peek() == TOKEN_END; 
  63.  
  64. public function match($tt) 
  65. return $this->get() == $tt || $this->unget(); 
  66.  
  67. public function mustMatch($tt) 
  68. if (!$this->match($tt)) 
  69. throw $this->newSyntaxError('Unexpected token; token ' . $tt . ' expected'); 
  70.  
  71. return $this->currentToken(); 
  72.  
  73. public function peek() 
  74. if ($this->lookahead) 
  75. $next = $this->tokens[($this->tokenIndex + $this->lookahead) & 3]; 
  76. if ($this->scanNewlines && $next->lineno != $this->lineno) 
  77. $tt = TOKEN_NEWLINE; 
  78. else 
  79. $tt = $next->type; 
  80. else 
  81. $tt = $this->get(); 
  82. $this->unget(); 
  83.  
  84. return $tt; 
  85.  
  86. public function peekOnSameLine() 
  87. $this->scanNewlines = true; 
  88. $tt = $this->peek(); 
  89. $this->scanNewlines = false; 
  90.  
  91. return $tt; 
  92.  
  93. public function currentToken() 
  94. if (!empty($this->tokens)) 
  95. return $this->tokens[$this->tokenIndex]; 
  96.  
  97. public function get($chunksize = 1000) 
  98. while($this->lookahead) 
  99. $this->lookahead--; 
  100. $this->tokenIndex = ($this->tokenIndex + 1) & 3; 
  101. $token = $this->tokens[$this->tokenIndex]; 
  102. if ($token->type != TOKEN_NEWLINE || $this->scanNewlines) 
  103. return $token->type; 
  104.  
  105. $conditional_comment = false; 
  106.  
  107. // strip whitespace and comments 
  108. while(true) 
  109. $input = $this->getInput($chunksize); 
  110.  
  111. // whitespace handling; gobble up \r as well (effectively we don't have support for MAC newlines!) 
  112. $re = $this->scanNewlines ? '/^[ \r\t]+/' : '/^\s+/'; 
  113. if (preg_match($re, $input, $match)) 
  114. $spaces = $match[0]; 
  115. $spacelen = strlen($spaces); 
  116. $this->cursor += $spacelen; 
  117. if (!$this->scanNewlines) 
  118. $this->lineno += substr_count($spaces, "\n"); 
  119.  
  120. if ($spacelen == $chunksize) 
  121. continue; // complete chunk contained whitespace 
  122.  
  123. $input = $this->getInput($chunksize); 
  124. if ($input == '' || $input[0] != '/') 
  125. break; 
  126.  
  127. // Comments 
  128. if (!preg_match('/^\/(?:\*(@(?:cc_on|if|elif|else|end))?.*?\*\/|\/[^\n]*)/s', $input, $match)) 
  129. if (!$chunksize) 
  130. break; 
  131.  
  132. // retry with a full chunk fetch; this also prevents breakage of long regular expressions (which will never match a comment) 
  133. $chunksize = null; 
  134. continue; 
  135.  
  136. // check if this is a conditional (JScript) comment 
  137. if (!empty($match[1])) 
  138. $match[0] = '/*' . $match[1]; 
  139. $conditional_comment = true; 
  140. break; 
  141. else 
  142. $this->cursor += strlen($match[0]); 
  143. $this->lineno += substr_count($match[0], "\n"); 
  144.   
  145. if ($input == '') 
  146. $tt = TOKEN_END; 
  147. $match = array(''); 
  148. elseif ($conditional_comment) 
  149. $tt = TOKEN_CONDCOMMENT_START; 
  150. else 
  151. switch ($input[0]) 
  152. case '0': 
  153. // hexadecimal 
  154. if (($input[1] == 'x' || $input[1] == 'X') && preg_match('/^0x[0-9a-f]+/i', $input, $match)) 
  155. $tt = TOKEN_NUMBER; 
  156. break; 
  157. // FALL THROUGH 
  158.   
  159. case '1': case '2': case '3': case '4': case '5': 
  160. case '6': case '7': case '8': case '9': 
  161. // should always match 
  162. preg_match('/^\d+(?:\.\d*)?(?:[eE][-+]?\d+)?/', $input, $match); 
  163. $tt = TOKEN_NUMBER; 
  164. break; 
  165.   
  166. case "'": 
  167. if (preg_match('/^\'(?:[^\\\\\'\r\n]++|\\\\(?:.|\r?\n))*\'/', $input, $match)) 
  168. $tt = TOKEN_STRING; 
  169. else 
  170. if ($chunksize) 
  171. return $this->get(null); // retry with a full chunk fetch 
  172.   
  173. throw $this->newSyntaxError('Unterminated string literal'); 
  174. break; 
  175.   
  176. case '"': 
  177. if (preg_match('/^"(?:[^\\\\"\r\n]++|\\\\(?:.|\r?\n))*"/', $input, $match)) 
  178. $tt = TOKEN_STRING; 
  179. else 
  180. if ($chunksize) 
  181. return $this->get(null); // retry with a full chunk fetch 
  182.   
  183. throw $this->newSyntaxError('Unterminated string literal'); 
  184. break; 
  185.   
  186. case '/': 
  187. if ($this->scanOperand && preg_match('/^\/((?:\\\\.|\[(?:\\\\.|[^\]])*\]|[^\/])+)\/([gimy]*)/', $input, $match)) 
  188. $tt = TOKEN_REGEXP; 
  189. break; 
  190. // FALL THROUGH 
  191.   
  192. case '|': 
  193. case '^': 
  194. case '&': 
  195. case '<': 
  196. case '>': 
  197. case '+': 
  198. case '-': 
  199. case '*': 
  200. case '%': 
  201. case '=': 
  202. case '!': 
  203. // should always match 
  204. preg_match($this->opRegExp, $input, $match); 
  205. $op = $match[0]; 
  206. if (in_array($op, $this->assignOps) && $input[strlen($op)] == '=') 
  207. $tt = OP_ASSIGN; 
  208. $match[0] .= '='; 
  209. else 
  210. $tt = $op; 
  211. if ($this->scanOperand) 
  212. if ($op == OP_PLUS) 
  213. $tt = OP_UNARY_PLUS; 
  214. elseif ($op == OP_MINUS) 
  215. $tt = OP_UNARY_MINUS; 
  216. $op = null; 
  217. break; 
  218.   
  219. case '.': 
  220. if (preg_match('/^\.\d+(?:[eE][-+]?\d+)?/', $input, $match)) 
  221. $tt = TOKEN_NUMBER; 
  222. break; 
  223. // FALL THROUGH 
  224.   
  225. case ';': 
  226. case ', ': 
  227. case '?': 
  228. case ':': 
  229. case '~': 
  230. case '[': 
  231. case ']': 
  232. case '{': 
  233. case '}': 
  234. case '(': 
  235. case ')': 
  236. // these are all single 
  237. $match = array($input[0]); 
  238. $tt = $input[0]; 
  239. break; 
  240.   
  241. case '@': 
  242. // check end of conditional comment 
  243. if (substr($input, 0, 3) == '@*/') 
  244. $match = array('@*/'); 
  245. $tt = TOKEN_CONDCOMMENT_END; 
  246. else 
  247. throw $this->newSyntaxError('Illegal token'); 
  248. break; 
  249.  
  250. case "\n": 
  251. if ($this->scanNewlines) 
  252. $match = array("\n"); 
  253. $tt = TOKEN_NEWLINE; 
  254. else 
  255. throw $this->newSyntaxError('Illegal token'); 
  256. break; 
  257.  
  258. default: 
  259. // FIXME: add support for unicode and unicode escape sequence \uHHHH 
  260. if (preg_match('/^[$\w]+/', $input, $match)) 
  261. $tt = in_array($match[0], $this->keywords) ? $match[0] : TOKEN_IDENTIFIER; 
  262. else 
  263. throw $this->newSyntaxError('Illegal token'); 
  264.  
  265. $this->tokenIndex = ($this->tokenIndex + 1) & 3; 
  266.  
  267. if (!isset($this->tokens[$this->tokenIndex])) 
  268. $this->tokens[$this->tokenIndex] = new JSToken(); 
  269.  
  270. $token = $this->tokens[$this->tokenIndex]; 
  271. $token->type = $tt; 
  272.  
  273. if ($tt == OP_ASSIGN) 
  274. $token->assignOp = $op; 
  275.  
  276. $token->start = $this->cursor; 
  277.  
  278. $token->value = $match[0]; 
  279. $this->cursor += strlen($match[0]); 
  280.  
  281. $token->end = $this->cursor; 
  282. $token->lineno = $this->lineno; 
  283.  
  284. return $tt; 
  285.  
  286. public function unget() 
  287. if (++$this->lookahead == 4) 
  288. throw $this->newSyntaxError('PANIC: too much lookahead!'); 
  289.  
  290. $this->tokenIndex = ($this->tokenIndex - 1) & 3; 
  291.  
  292. public function newSyntaxError($m) 
  293. return new Exception('Parse error: ' . $m . ' in file \'' . $this->filename . '\' on line ' . $this->lineno);