Markdown_Parser

The Pods - Custom Content Types and Fields Markdown Parser class.

Defined (1)

The class is defined in the following location(s).

/components/Markdown.php  
  1. class Markdown_Parser { 
  2.  
  3. ### Configuration Variables ### 
  4.  
  5. # Change to ">" for HTML output. 
  6. var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX; 
  7. var $tab_width = MARKDOWN_TAB_WIDTH; 
  8.  
  9. # Change to `true` to disallow markup or entities. 
  10. var $no_markup = false; 
  11. var $no_entities = false; 
  12.  
  13. # Predefined urls and titles for reference links and images. 
  14. var $predef_urls = array(); 
  15. var $predef_titles = array(); 
  16.  
  17.  
  18. ### Parser Implementation ### 
  19.  
  20. # Regex to match balanced [brackets]. 
  21. # Needed to insert a maximum bracked depth while converting to PHP. 
  22. var $nested_brackets_depth = 6; 
  23. var $nested_brackets_re; 
  24.  
  25. var $nested_url_parenthesis_depth = 4; 
  26. var $nested_url_parenthesis_re; 
  27.  
  28. # Table of hash values for escaped characters: 
  29. var $escape_chars = '\`*_{}[]()>#+-.!'; 
  30. var $escape_chars_re; 
  31.  
  32.  
  33. function Markdown_Parser() { 
  34. # Constructor function. Initialize appropriate member variables. 
  35. $this->_initDetab(); 
  36. $this->prepareItalicsAndBold(); 
  37.  
  38. $this->nested_brackets_re = 
  39. str_repeat('(?>[^\[\]]+|\[', $this->nested_brackets_depth). 
  40. str_repeat('\])*', $this->nested_brackets_depth); 
  41.  
  42. $this->nested_url_parenthesis_re = 
  43. str_repeat('(?>[^()\s]+|\(', $this->nested_url_parenthesis_depth). 
  44. str_repeat('(?>\)))*', $this->nested_url_parenthesis_depth); 
  45.  
  46. $this->escape_chars_re = '['.preg_quote($this->escape_chars).']'; 
  47.  
  48. # Sort document, block, and span gamut in ascendent priority order. 
  49. asort($this->document_gamut); 
  50. asort($this->block_gamut); 
  51. asort($this->span_gamut); 
  52.  
  53.  
  54. # Internal hashes used during transformation. 
  55. var $urls = array(); 
  56. var $titles = array(); 
  57. var $html_hashes = array(); 
  58.  
  59. # Status flag to avoid invalid nesting. 
  60. var $in_anchor = false; 
  61.  
  62.  
  63. function setup() { 
  64. # Called before the transformation process starts to setup parser 
  65. # states. 
  66. # Clear global hashes. 
  67. $this->urls = $this->predef_urls; 
  68. $this->titles = $this->predef_titles; 
  69. $this->html_hashes = array(); 
  70.  
  71. $this->in_anchor = false; 
  72.  
  73. function teardown() { 
  74. # Called after the transformation process to clear any variable 
  75. # which may be taking up memory unnecessarly. 
  76. $this->urls = array(); 
  77. $this->titles = array(); 
  78. $this->html_hashes = array(); 
  79.  
  80.  
  81. function transform($text) { 
  82. # Main function. Performs some preprocessing on the input text 
  83. # and pass it through the document gamut. 
  84. $this->setup(); 
  85.  
  86. # Remove UTF-8 BOM and marker character in input, if present. 
  87. $text = preg_replace('{^\xEF\xBB\xBF|\x1A}', '', $text); 
  88.  
  89. # Standardize line endings: 
  90. # DOS to Unix and Mac to Unix 
  91. $text = preg_replace('{\r\n?}', "\n", $text); 
  92.  
  93. # Make sure $text ends with a couple of newlines: 
  94. $text .= "\n\n"; 
  95.  
  96. # Convert all tabs to spaces. 
  97. $text = $this->detab($text); 
  98.  
  99. # Turn block-level HTML blocks into hash entries 
  100. $text = $this->hashHTMLBlocks($text); 
  101.  
  102. # Strip any lines consisting only of spaces and tabs. 
  103. # This makes subsequent regexen easier to write, because we can 
  104. # match consecutive blank lines with /\n+/ instead of something 
  105. # contorted like /[ ]*\n+/ . 
  106. $text = preg_replace('/^[ ]+$/m', '', $text); 
  107.  
  108. # Run document gamut methods. 
  109. foreach ($this->document_gamut as $method => $priority) { 
  110. $text = $this->$method($text); 
  111.  
  112. $this->teardown(); 
  113.  
  114. return $text . "\n"; 
  115.  
  116. var $document_gamut = array( 
  117. # Strip link definitions, store in hashes. 
  118. "stripLinkDefinitions" => 20,  
  119.  
  120. "runBasicBlockGamut" => 30,  
  121. ); 
  122.  
  123.  
  124. function stripLinkDefinitions($text) { 
  125. # Strips link definitions from text, stores the URLs and titles in 
  126. # hash references. 
  127. $less_than_tab = $this->tab_width - 1; 
  128.  
  129. # Link defs are in the form: ^[id]: url "optional title" 
  130. $text = preg_replace_callback('{ 
  131. ^[ ]{0, '.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 
  132. [ ]* 
  133. \n? # maybe *one* newline 
  134. [ ]* 
  135. (?: 
  136. <(.+?)> # url = $2 
  137. (\S+?) # url = $3 
  138. [ ]* 
  139. \n? # maybe one newline 
  140. [ ]* 
  141. (?: 
  142. (?<=\s) # lookbehind for whitespace 
  143. ["(] 
  144. (.*?) # title = $4 
  145. [")] 
  146. [ ]* 
  147. )? # title is optional 
  148. (?:\n+|\Z) 
  149. }xm',  
  150. array(&$this, '_stripLinkDefinitions_callback'),  
  151. $text); 
  152. return $text; 
  153. function _stripLinkDefinitions_callback($matches) { 
  154. $link_id = strtolower($matches[1]); 
  155. $url = $matches[2] == '' ? $matches[3] : $matches[2]; 
  156. $this->urls[$link_id] = $url; 
  157. $this->titles[$link_id] =& $matches[4]; 
  158. return ''; # String that will replace the block 
  159.  
  160.  
  161. function hashHTMLBlocks($text) { 
  162. if ($this->no_markup) return $text; 
  163.  
  164. $less_than_tab = $this->tab_width - 1; 
  165.  
  166. # Hashify HTML blocks: 
  167. # We only want to do this for block-level HTML tags, such as headers,  
  168. # lists, and tables. That's because we still want to wrap <p>s around 
  169. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,  
  170. # phrase emphasis, and spans. The list of tags we're looking for is 
  171. # hard-coded: 
  172. # * List "a" is made of tags which can be both inline or block-level. 
  173. # These will be treated block-level when the start tag is alone on 
  174. # its line, otherwise they're not matched here and will be taken as 
  175. # inline later. 
  176. # * List "b" is made of tags which are always block-level; 
  177. $block_tags_a_re = 'ins|del'; 
  178. $block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|'. 
  179. 'script|noscript|form|fieldset|iframe|math|svg|'. 
  180. 'article|section|nav|aside|hgroup|header|footer|'. 
  181. 'figure'; 
  182.  
  183. # Regular expression for the content of a block tag. 
  184. $nested_tags_level = 4; 
  185. $attr = ' 
  186. (?> # optional tag attributes 
  187. \s # starts with whitespace 
  188. (?> 
  189. [^>"/]+ # text outside quotes 
  190. /+(?!>) # slash not followed by ">" 
  191. "[^"]*" # text inside double quotes (tolerate ">") 
  192. \'[^\']*\' # text inside single quotes (tolerate ">") 
  193. )* 
  194. )? 
  195. '; 
  196. $content = 
  197. str_repeat(' 
  198. (?> 
  199. [^<]+ # content without tag 
  200. <\2 # nested opening tag 
  201. '.$attr.' # attributes 
  202. (?> 
  203. /> 
  204. >', $nested_tags_level). # end of opening tag 
  205. '.*?'. # last level nested tag content 
  206. str_repeat(' 
  207. </\2\s*> # closing nested tag 
  208. <(?!/\2\s*> # other tags with a different name 
  209. )*',  
  210. $nested_tags_level); 
  211. $content2 = str_replace('\2', '\3', $content); 
  212.  
  213. # First, look for nested blocks, e.g.: 
  214. # <div> 
  215. # <div> 
  216. # tags for inner block must be indented. 
  217. # </div> 
  218. # </div> 
  219. # The outermost tags must start at the left margin for this to match, and 
  220. # the inner nested divs must be indented. 
  221. # We need to do this before the next, more liberal match, because the next 
  222. # match will start at the first `<div>` and stop at the first `</div>`. 
  223. $text = preg_replace_callback('{(?> 
  224. (?> 
  225. (?<=\n\n) # Starting after a blank line 
  226. | # or 
  227. \A\n? # the beginning of the doc 
  228. ( # save in $1 
  229.  
  230. # Match from `\n<tag>` to `</tag>\n`, handling nested tags 
  231. # in between. 
  232.  
  233. [ ]{0, '.$less_than_tab.'} 
  234. <('.$block_tags_b_re.')# start tag = $2 
  235. '.$attr.'> # attributes followed by > and \n 
  236. '.$content.' # content, support nesting 
  237. </\2> # the matching end tag 
  238. [ ]* # trailing spaces/tabs 
  239. (?=\n+|\Z) # followed by a newline or end of document 
  240.  
  241. | # Special version for tags of group a. 
  242.  
  243. [ ]{0, '.$less_than_tab.'} 
  244. <('.$block_tags_a_re.')# start tag = $3 
  245. '.$attr.'>[ ]*\n # attributes followed by > 
  246. '.$content2.' # content, support nesting 
  247. </\3> # the matching end tag 
  248. [ ]* # trailing spaces/tabs 
  249. (?=\n+|\Z) # followed by a newline or end of document 
  250.  
  251. | # Special case just for <hr />. It was easier to make a special 
  252. # case than to make the other regex more complicated. 
  253.  
  254. [ ]{0, '.$less_than_tab.'} 
  255. <(hr) # start tag = $2 
  256. '.$attr.' # attributes 
  257. /?> # the matching end tag 
  258. [ ]* 
  259. (?=\n{2, }|\Z) # followed by a blank line or end of document 
  260.  
  261. | # Special case for standalone HTML comments: 
  262.  
  263. [ ]{0, '.$less_than_tab.'} 
  264. (?s: 
  265. <!-- .*? --> 
  266. [ ]* 
  267. (?=\n{2, }|\Z) # followed by a blank line or end of document 
  268.  
  269. | # PHP and ASP-style processor instructions (<? and <%) 
  270.  
  271. [ ]{0, '.$less_than_tab.'} 
  272. (?s: 
  273. <([?%]) # $2 
  274. .*? 
  275. \2> 
  276. [ ]* 
  277. (?=\n{2, }|\Z) # followed by a blank line or end of document 
  278.  
  279. )}Sxmi',  
  280. array(&$this, '_hashHTMLBlocks_callback'),  
  281. $text); 
  282.  
  283. return $text; 
  284. function _hashHTMLBlocks_callback($matches) { 
  285. $text = $matches[1]; 
  286. $key = $this->hashBlock($text); 
  287. return "\n\n$key\n\n"; 
  288.  
  289.  
  290. function hashPart($text, $boundary = 'X') { 
  291. # Called whenever a tag must be hashed when a function insert an atomic 
  292. # element in the text stream. Passing $text to through this function gives 
  293. # a unique text-token which will be reverted back when calling unhash. 
  294. # The $boundary argument specify what character should be used to surround 
  295. # the token. By convension, "B" is used for block elements that needs not 
  296. # to be wrapped into paragraph tags at the end, ":" is used for elements 
  297. # that are word separators and "X" is used in the general case. 
  298. # Swap back any tag hash found in $text so we do not have to `unhash` 
  299. # multiple times at the end. 
  300. $text = $this->unhash($text); 
  301.  
  302. # Then hash the block. 
  303. static $i = 0; 
  304. $key = "$boundary\x1A" . ++$i . $boundary; 
  305. $this->html_hashes[$key] = $text; 
  306. return $key; # String that will replace the tag. 
  307.  
  308.  
  309. function hashBlock($text) { 
  310. # Shortcut function for hashPart with block-level boundaries. 
  311. return $this->hashPart($text, 'B'); 
  312.  
  313.  
  314. var $block_gamut = array( 
  315. # These are all the transformations that form block-level 
  316. # tags like paragraphs, headers, and list items. 
  317. "doHeaders" => 10,  
  318. "doHorizontalRules" => 20,  
  319.  
  320. "doLists" => 40,  
  321. "doCodeBlocks" => 50,  
  322. "doBlockQuotes" => 60,  
  323. ); 
  324.  
  325. function runBlockGamut($text) { 
  326. # Run block gamut tranformations. 
  327. # We need to escape raw HTML in Markdown source before doing anything 
  328. # else. This need to be done for each block, and not only at the 
  329. # begining in the Markdown function since hashed blocks can be part of 
  330. # list items and could have been indented. Indented blocks would have 
  331. # been seen as a code block in a previous pass of hashHTMLBlocks. 
  332. $text = $this->hashHTMLBlocks($text); 
  333.  
  334. return $this->runBasicBlockGamut($text); 
  335.  
  336. function runBasicBlockGamut($text) { 
  337. # Run block gamut tranformations, without hashing HTML blocks. This is 
  338. # useful when HTML blocks are known to be already hashed, like in the first 
  339. # whole-document pass. 
  340. foreach ($this->block_gamut as $method => $priority) { 
  341. $text = $this->$method($text); 
  342.  
  343. # Finally form paragraph and restore hashed blocks. 
  344. $text = $this->formParagraphs($text); 
  345.  
  346. return $text; 
  347.  
  348.  
  349. function doHorizontalRules($text) { 
  350. # Do Horizontal Rules: 
  351. return preg_replace( 
  352. '{ 
  353. ^[ ]{0, 3} # Leading space 
  354. ([-*_]) # $1: First marker 
  355. (?> # Repeated marker group 
  356. [ ]{0, 2} # Zero, one, or two spaces. 
  357. \1 # Marker character 
  358. ) {2, } # Group repeated at least twice 
  359. [ ]* # Tailing spaces 
  360. $ # End of line. 
  361. }mx',  
  362. "\n".$this->hashBlock("<hr$this->empty_element_suffix")."\n",  
  363. $text); 
  364.  
  365.  
  366. var $span_gamut = array( 
  367. # These are all the transformations that occur *within* block-level 
  368. # tags like paragraphs, headers, and list items. 
  369. # Process character escapes, code spans, and inline HTML 
  370. # in one shot. 
  371. "parseSpan" => -30,  
  372.  
  373. # Process anchor and image tags. Images must come first,  
  374. # because ![foo][f] looks like an anchor. 
  375. "doImages" => 10,  
  376. "doAnchors" => 20,  
  377.  
  378. # Make links out of things like `<http://example.com/>` 
  379. # Must come after doAnchors, because you can use < and > 
  380. # delimiters in inline links like [this](<url>). 
  381. "doAutoLinks" => 30,  
  382. "encodeAmpsAndAngles" => 40,  
  383.  
  384. "doItalicsAndBold" => 50,  
  385. "doHardBreaks" => 60,  
  386. ); 
  387.  
  388. function runSpanGamut($text) { 
  389. # Run span gamut tranformations. 
  390. foreach ($this->span_gamut as $method => $priority) { 
  391. $text = $this->$method($text); 
  392.  
  393. return $text; 
  394.  
  395.  
  396. function doHardBreaks($text) { 
  397. # Do hard breaks: 
  398. return preg_replace_callback('/ {2, }\n/',  
  399. array(&$this, '_doHardBreaks_callback'), $text); 
  400. function _doHardBreaks_callback($matches) { 
  401. return $this->hashPart("<br$this->empty_element_suffix\n"); 
  402.  
  403.  
  404. function doAnchors($text) { 
  405. # Turn Markdown link shortcuts into XHTML <a> tags. 
  406. if ($this->in_anchor) return $text; 
  407. $this->in_anchor = true; 
  408.  
  409. # First, handle reference-style links: [link text] [id] 
  410. $text = preg_replace_callback('{ 
  411. ( # wrap whole match in $1 
  412. \[ 
  413. ('.$this->nested_brackets_re.') # link text = $2 
  414. \] 
  415.  
  416. [ ]? # one optional space 
  417. (?:\n[ ]*)? # one optional newline followed by spaces 
  418.  
  419. \[ 
  420. (.*?) # id = $3 
  421. \] 
  422. }xs',  
  423. array(&$this, '_doAnchors_reference_callback'), $text); 
  424.  
  425. # Next, inline-style links: [link text](url "optional title") 
  426. $text = preg_replace_callback('{ 
  427. ( # wrap whole match in $1 
  428. \[ 
  429. ('.$this->nested_brackets_re.') # link text = $2 
  430. \] 
  431. \( # literal paren 
  432. [ \n]* 
  433. (?: 
  434. <(.+?)> # href = $3 
  435. ('.$this->nested_url_parenthesis_re.') # href = $4 
  436. [ \n]* 
  437. ( # $5 
  438. ([\'"]) # quote char = $6 
  439. (.*?) # Title = $7 
  440. \6 # matching quote 
  441. [ \n]* # ignore any spaces/tabs between closing quote and ) 
  442. )? # title is optional 
  443. \) 
  444. }xs',  
  445. array(&$this, '_doAnchors_inline_callback'), $text); 
  446.  
  447. # Last, handle reference-style shortcuts: [link text] 
  448. # These must come last in case you've also got [link text][1] 
  449. # or [link text](/foo) 
  450. $text = preg_replace_callback('{ 
  451. ( # wrap whole match in $1 
  452. \[ 
  453. ([^\[\]]+) # link text = $2; can\'t contain [ or ] 
  454. \] 
  455. }xs',  
  456. array(&$this, '_doAnchors_reference_callback'), $text); 
  457.  
  458. $this->in_anchor = false; 
  459. return $text; 
  460. function _doAnchors_reference_callback($matches) { 
  461. $whole_match = $matches[1]; 
  462. $link_text = $matches[2]; 
  463. $link_id =& $matches[3]; 
  464.  
  465. if ($link_id == "") { 
  466. # for shortcut links like [this][] or [this]. 
  467. $link_id = $link_text; 
  468.  
  469. # lower-case and turn embedded newlines into spaces 
  470. $link_id = strtolower($link_id); 
  471. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 
  472.  
  473. if (isset($this->urls[$link_id])) { 
  474. $url = $this->urls[$link_id]; 
  475. $url = $this->encodeAttribute($url); 
  476.  
  477. $result = "<a href=\"$url\""; 
  478. if ( isset( $this->titles[$link_id] ) ) { 
  479. $title = $this->titles[$link_id]; 
  480. $title = $this->encodeAttribute($title); 
  481. $result .= " title=\"$title\""; 
  482.  
  483. $link_text = $this->runSpanGamut($link_text); 
  484. $result .= ">$link_text</a>"; 
  485. $result = $this->hashPart($result); 
  486. else { 
  487. $result = $whole_match; 
  488. return $result; 
  489. function _doAnchors_inline_callback($matches) { 
  490. $whole_match = $matches[1]; 
  491. $link_text = $this->runSpanGamut($matches[2]); 
  492. $url = $matches[3] == '' ? $matches[4] : $matches[3]; 
  493. $title =& $matches[7]; 
  494.  
  495. $url = $this->encodeAttribute($url); 
  496.  
  497. $result = "<a href=\"$url\""; 
  498. if (isset($title)) { 
  499. $title = $this->encodeAttribute($title); 
  500. $result .= " title=\"$title\""; 
  501.  
  502. $link_text = $this->runSpanGamut($link_text); 
  503. $result .= ">$link_text</a>"; 
  504.  
  505. return $this->hashPart($result); 
  506.  
  507.  
  508. function doImages($text) { 
  509. # Turn Markdown image shortcuts into <img> tags. 
  510. # First, handle reference-style labeled images: ![alt text][id] 
  511. $text = preg_replace_callback('{ 
  512. ( # wrap whole match in $1 
  513. !\[ 
  514. ('.$this->nested_brackets_re.') # alt text = $2 
  515. \] 
  516.  
  517. [ ]? # one optional space 
  518. (?:\n[ ]*)? # one optional newline followed by spaces 
  519.  
  520. \[ 
  521. (.*?) # id = $3 
  522. \] 
  523.  
  524. }xs',  
  525. array(&$this, '_doImages_reference_callback'), $text); 
  526.  
  527. # Next, handle inline images: ![alt text](url "optional title") 
  528. # Don't forget: encode * and _ 
  529. $text = preg_replace_callback('{ 
  530. ( # wrap whole match in $1 
  531. !\[ 
  532. ('.$this->nested_brackets_re.') # alt text = $2 
  533. \] 
  534. \s? # One optional whitespace character 
  535. \( # literal paren 
  536. [ \n]* 
  537. (?: 
  538. <(\S*)> # src url = $3 
  539. ('.$this->nested_url_parenthesis_re.') # src url = $4 
  540. [ \n]* 
  541. ( # $5 
  542. ([\'"]) # quote char = $6 
  543. (.*?) # title = $7 
  544. \6 # matching quote 
  545. [ \n]* 
  546. )? # title is optional 
  547. \) 
  548. }xs',  
  549. array(&$this, '_doImages_inline_callback'), $text); 
  550.  
  551. return $text; 
  552. function _doImages_reference_callback($matches) { 
  553. $whole_match = $matches[1]; 
  554. $alt_text = $matches[2]; 
  555. $link_id = strtolower($matches[3]); 
  556.  
  557. if ($link_id == "") { 
  558. $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 
  559.  
  560. $alt_text = $this->encodeAttribute($alt_text); 
  561. if (isset($this->urls[$link_id])) { 
  562. $url = $this->encodeAttribute($this->urls[$link_id]); 
  563. $result = "<img src=\"$url\" alt=\"$alt_text\""; 
  564. if (isset($this->titles[$link_id])) { 
  565. $title = $this->titles[$link_id]; 
  566. $title = $this->encodeAttribute($title); 
  567. $result .= " title=\"$title\""; 
  568. $result .= $this->empty_element_suffix; 
  569. $result = $this->hashPart($result); 
  570. else { 
  571. # If there's no such link ID, leave intact: 
  572. $result = $whole_match; 
  573.  
  574. return $result; 
  575. function _doImages_inline_callback($matches) { 
  576. $whole_match = $matches[1]; 
  577. $alt_text = $matches[2]; 
  578. $url = $matches[3] == '' ? $matches[4] : $matches[3]; 
  579. $title =& $matches[7]; 
  580.  
  581. $alt_text = $this->encodeAttribute($alt_text); 
  582. $url = $this->encodeAttribute($url); 
  583. $result = "<img src=\"$url\" alt=\"$alt_text\""; 
  584. if (isset($title)) { 
  585. $title = $this->encodeAttribute($title); 
  586. $result .= " title=\"$title\""; # $title already quoted 
  587. $result .= $this->empty_element_suffix; 
  588.  
  589. return $this->hashPart($result); 
  590.  
  591.  
  592. function doHeaders($text) { 
  593. # Setext-style headers: 
  594. # Header 1 
  595. # ======== 
  596. # Header 2 
  597. # -------- 
  598. $text = preg_replace_callback('{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx',  
  599. array(&$this, '_doHeaders_callback_setext'), $text); 
  600.  
  601. # atx-style headers: 
  602. # # Header 1 
  603. # ## Header 2 
  604. # ## Header 2 with closing hashes ## 
  605. # ... 
  606. # ###### Header 6 
  607. $text = preg_replace_callback('{ 
  608. ^(\#{1, 6}) # $1 = string of #\'s 
  609. [ ]* 
  610. (.+?) # $2 = Header text 
  611. [ ]* 
  612. \#* # optional closing #\'s (not counted) 
  613. \n+ 
  614. }xm',  
  615. array(&$this, '_doHeaders_callback_atx'), $text); 
  616.  
  617. return $text; 
  618. function _doHeaders_callback_setext($matches) { 
  619. # Terrible hack to check we haven't found an empty list item. 
  620. if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) 
  621. return $matches[0]; 
  622.  
  623. $level = $matches[2]{0} == '=' ? 1 : 2; 
  624. $block = "<h$level>".$this->runSpanGamut($matches[1])."</h$level>"; 
  625. return "\n" . $this->hashBlock($block) . "\n\n"; 
  626. function _doHeaders_callback_atx($matches) { 
  627. $level = strlen($matches[1]); 
  628. $block = "<h$level>".$this->runSpanGamut($matches[2])."</h$level>"; 
  629. return "\n" . $this->hashBlock($block) . "\n\n"; 
  630.  
  631.  
  632. function doLists($text) { 
  633. # Form HTML ordered (numbered) and unordered (bulleted) lists. 
  634. $less_than_tab = $this->tab_width - 1; 
  635.  
  636. # Re-usable patterns to match list item bullets and number markers: 
  637. $marker_ul_re = '[*+-]'; 
  638. $marker_ol_re = '\d+[\.]'; 
  639. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 
  640.  
  641. $markers_relist = array( 
  642. $marker_ul_re => $marker_ol_re,  
  643. $marker_ol_re => $marker_ul_re,  
  644. ); 
  645.  
  646. foreach ($markers_relist as $marker_re => $other_marker_re) { 
  647. # Re-usable pattern to match any entirel ul or ol list: 
  648. $whole_list_re = ' 
  649. ( # $1 = whole list 
  650. ( # $2 
  651. ([ ]{0, '.$less_than_tab.'}) # $3 = number of spaces 
  652. ('.$marker_re.') # $4 = first list item marker 
  653. [ ]+ 
  654. (?s:.+?) 
  655. ( # $5 
  656. \z 
  657. \n{2, } 
  658. (?=\S) 
  659. (?! # Negative lookahead for another list item marker 
  660. [ ]* 
  661. '.$marker_re.'[ ]+ 
  662. (?= # Lookahead for another kind of list 
  663. \n 
  664. \3 # Must have the same indentation 
  665. '.$other_marker_re.'[ ]+ 
  666. '; // mx 
  667.  
  668. # We use a different prefix before nested lists than top-level lists. 
  669. # See extended comment in _ProcessListItems(). 
  670.  
  671. if ($this->list_level) { 
  672. $text = preg_replace_callback('{ 
  673. '.$whole_list_re.' 
  674. }mx',  
  675. array(&$this, '_doLists_callback'), $text); 
  676. else { 
  677. $text = preg_replace_callback('{ 
  678. (?:(?<=\n)\n|\A\n?) # Must eat the newline 
  679. '.$whole_list_re.' 
  680. }mx',  
  681. array(&$this, '_doLists_callback'), $text); 
  682.  
  683. return $text; 
  684. function _doLists_callback($matches) { 
  685. # Re-usable patterns to match list item bullets and number markers: 
  686. $marker_ul_re = '[*+-]'; 
  687. $marker_ol_re = '\d+[\.]'; 
  688. $marker_any_re = "(?:$marker_ul_re|$marker_ol_re)"; 
  689.  
  690. $list = $matches[1]; 
  691. $list_type = preg_match("/$marker_ul_re/", $matches[4]) ? "ul" : "ol"; 
  692.  
  693. $marker_any_re = ( $list_type == "ul" ? $marker_ul_re : $marker_ol_re ); 
  694.  
  695. $list .= "\n"; 
  696. $result = $this->processListItems($list, $marker_any_re); 
  697.  
  698. $result = $this->hashBlock("<$list_type>\n" . $result . "</$list_type>"); 
  699. return "\n". $result ."\n\n"; 
  700.  
  701. var $list_level = 0; 
  702.  
  703. function processListItems($list_str, $marker_any_re) { 
  704. # Process the contents of a single ordered or unordered list, splitting it 
  705. # into individual list items. 
  706. # The $this->list_level global keeps track of when we're inside a list. 
  707. # Each time we enter a list, we increment it; when we leave a list,  
  708. # we decrement. If it's zero, we're not in a list anymore. 
  709. # We do this because when we're not inside a list, we want to treat 
  710. # something like this: 
  711. # I recommend upgrading to version 
  712. # 8. Oops, now this line is treated 
  713. # as a sub-list. 
  714. # As a single paragraph, despite the fact that the second line starts 
  715. # with a digit-period-space sequence. 
  716. # Whereas when we're inside a list (or sub-list), that line will be 
  717. # treated as the start of a sub-list. What a kludge, huh? This is 
  718. # an aspect of Markdown's syntax that's hard to parse perfectly 
  719. # without resorting to mind-reading. Perhaps the solution is to 
  720. # change the syntax rules such that sub-lists must start with a 
  721. # starting cardinal number; e.g. "1." or "a.". 
  722.  
  723. $this->list_level++; 
  724.  
  725. # trim trailing blank lines: 
  726. $list_str = preg_replace("/\n{2, }\\z/", "\n", $list_str); 
  727.  
  728. $list_str = preg_replace_callback('{ 
  729. (\n)? # leading line = $1 
  730. (^[ ]*) # leading whitespace = $2 
  731. ('.$marker_any_re.' # list marker and space = $3 
  732. (?:[ ]+|(?=\n)) # space only required if item is not empty 
  733. ((?s:.*?)) # list item text = $4 
  734. (?:(\n+(?=\n))|\n) # tailing blank line = $5 
  735. (?= \n* (\z | \2 ('.$marker_any_re.') (?:[ ]+|(?=\n)))) 
  736. }xm',  
  737. array(&$this, '_processListItems_callback'), $list_str); 
  738.  
  739. $this->list_level--; 
  740. return $list_str; 
  741. function _processListItems_callback($matches) { 
  742. $item = $matches[4]; 
  743. $leading_line =& $matches[1]; 
  744. $leading_space =& $matches[2]; 
  745. $marker_space = $matches[3]; 
  746. $tailing_blank_line =& $matches[5]; 
  747.  
  748. if ($leading_line || $tailing_blank_line || 
  749. preg_match('/\n{2, }/', $item)) 
  750. # Replace marker with the appropriate whitespace indentation 
  751. $item = $leading_space . str_repeat(' ', strlen($marker_space)) . $item; 
  752. $item = $this->runBlockGamut($this->outdent($item)."\n"); 
  753. else { 
  754. # Recursion for sub-lists: 
  755. $item = $this->doLists($this->outdent($item)); 
  756. $item = preg_replace('/\n+$/', '', $item); 
  757. $item = $this->runSpanGamut($item); 
  758.  
  759. return "<li>" . $item . "</li>\n"; 
  760.  
  761.  
  762. function doCodeBlocks($text) { 
  763. # Process Markdown `<pre><code>` blocks. 
  764. $text = preg_replace_callback('{ 
  765. (?:\n\n|\A\n?) 
  766. ( # $1 = the code block -- one or more lines, starting with a space/tab 
  767. (?> 
  768. [ ]{'.$this->tab_width.'} # Lines must start with a tab or a tab-width of spaces 
  769. .*\n+ 
  770. )+ 
  771. ((?=^[ ]{0, '.$this->tab_width.'}\S)|\Z) # Lookahead for non-space at line-start, or end of doc 
  772. }xm',  
  773. array(&$this, '_doCodeBlocks_callback'), $text); 
  774.  
  775. return $text; 
  776. function _doCodeBlocks_callback($matches) { 
  777. $codeblock = $matches[1]; 
  778.  
  779. $codeblock = $this->outdent($codeblock); 
  780. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 
  781.  
  782. # trim leading newlines and trailing newlines 
  783. $codeblock = preg_replace('/\A\n+|\n+\z/', '', $codeblock); 
  784.  
  785. $codeblock = "<pre><code>$codeblock\n</code></pre>"; 
  786. return "\n\n".$this->hashBlock($codeblock)."\n\n"; 
  787.  
  788.  
  789. function makeCodeSpan($code) { 
  790. # Create a code span markup for $code. Called from handleSpanToken. 
  791. $code = htmlspecialchars(trim($code), ENT_NOQUOTES); 
  792. return $this->hashPart("<code>$code</code>"); 
  793.  
  794.  
  795. var $em_relist = array( 
  796. '' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S|$)(?![\., :;]\s)',  
  797. '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',  
  798. '_' => '(?<=\S|^)(?<!_)_(?!_)',  
  799. ); 
  800. var $strong_relist = array( 
  801. '' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S|$)(?![\., :;]\s)',  
  802. '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',  
  803. '__' => '(?<=\S|^)(?<!_)__(?!_)',  
  804. ); 
  805. var $em_strong_relist = array( 
  806. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S|$)(?![\., :;]\s)',  
  807. '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',  
  808. '___' => '(?<=\S|^)(?<!_)___(?!_)',  
  809. ); 
  810. var $em_strong_prepared_relist; 
  811.  
  812. function prepareItalicsAndBold() { 
  813. # Prepare regular expressions for searching emphasis tokens in any 
  814. # context. 
  815. foreach ($this->em_relist as $em => $em_re) { 
  816. foreach ($this->strong_relist as $strong => $strong_re) { 
  817. # Construct list of allowed token expressions. 
  818. $token_relist = array(); 
  819. if (isset($this->em_strong_relist["$em$strong"])) { 
  820. $token_relist[] = $this->em_strong_relist["$em$strong"]; 
  821. $token_relist[] = $em_re; 
  822. $token_relist[] = $strong_re; 
  823.  
  824. # Construct master expression from list. 
  825. $token_re = '{('. implode('|', $token_relist) .')}'; 
  826. $this->em_strong_prepared_relist["$em$strong"] = $token_re; 
  827.  
  828. function doItalicsAndBold($text) { 
  829. $token_stack = array(''); 
  830. $text_stack = array(''); 
  831. $em = ''; 
  832. $strong = ''; 
  833. $tree_char_em = false; 
  834.  
  835. while (1) { 
  836. # Get prepared regular expression for seraching emphasis tokens 
  837. # in current context. 
  838. $token_re = $this->em_strong_prepared_relist["$em$strong"]; 
  839.  
  840. # Each loop iteration search for the next emphasis token. 
  841. # Each token is then passed to handleSpanToken. 
  842. $parts = preg_split($token_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 
  843. $text_stack[0] .= $parts[0]; 
  844. $token =& $parts[1]; 
  845. $text =& $parts[2]; 
  846.  
  847. if (empty($token)) { 
  848. # Reached end of text span: empty stack without emitting. 
  849. # any more emphasis. 
  850. while ($token_stack[0]) { 
  851. $text_stack[1] .= array_shift($token_stack); 
  852. $text_stack[0] .= array_shift($text_stack); 
  853. break; 
  854.  
  855. $token_len = strlen($token); 
  856. if ($tree_char_em) { 
  857. # Reached closing marker while inside a three-char emphasis. 
  858. if ($token_len == 3) { 
  859. # Three-char closing marker, close em and strong. 
  860. array_shift($token_stack); 
  861. $span = array_shift($text_stack); 
  862. $span = $this->runSpanGamut($span); 
  863. $span = "<strong><em>$span</em></strong>"; 
  864. $text_stack[0] .= $this->hashPart($span); 
  865. $em = ''; 
  866. $strong = ''; 
  867. } else { 
  868. # Other closing marker: close one em or strong and 
  869. # change current token state to match the other 
  870. $token_stack[0] = str_repeat($token{0}, 3-$token_len); 
  871. $tag = $token_len == 2 ? "strong" : "em"; 
  872. $span = $text_stack[0]; 
  873. $span = $this->runSpanGamut($span); 
  874. $span = "<$tag>$span</$tag>"; 
  875. $text_stack[0] = $this->hashPart($span); 
  876. $$tag = ''; # $$tag stands for $em or $strong 
  877. $tree_char_em = false; 
  878. } else if ($token_len == 3) { 
  879. if ($em) { 
  880. # Reached closing marker for both em and strong. 
  881. # Closing strong marker: 
  882. for ($i = 0; $i < 2; ++$i) { 
  883. $shifted_token = array_shift($token_stack); 
  884. $tag = strlen($shifted_token) == 2 ? "strong" : "em"; 
  885. $span = array_shift($text_stack); 
  886. $span = $this->runSpanGamut($span); 
  887. $span = "<$tag>$span</$tag>"; 
  888. $text_stack[0] .= $this->hashPart($span); 
  889. $$tag = ''; # $$tag stands for $em or $strong 
  890. } else { 
  891. # Reached opening three-char emphasis marker. Push on token 
  892. # stack; will be handled by the special condition above. 
  893. $em = $token{0}; 
  894. $strong = "$em$em"; 
  895. array_unshift($token_stack, $token); 
  896. array_unshift($text_stack, ''); 
  897. $tree_char_em = true; 
  898. } else if ($token_len == 2) { 
  899. if ($strong) { 
  900. # Unwind any dangling emphasis marker: 
  901. if (strlen($token_stack[0]) == 1) { 
  902. $text_stack[1] .= array_shift($token_stack); 
  903. $text_stack[0] .= array_shift($text_stack); 
  904. # Closing strong marker: 
  905. array_shift($token_stack); 
  906. $span = array_shift($text_stack); 
  907. $span = $this->runSpanGamut($span); 
  908. $span = "<strong>$span</strong>"; 
  909. $text_stack[0] .= $this->hashPart($span); 
  910. $strong = ''; 
  911. } else { 
  912. array_unshift($token_stack, $token); 
  913. array_unshift($text_stack, ''); 
  914. $strong = $token; 
  915. } else { 
  916. # Here $token_len == 1 
  917. if ($em) { 
  918. if (strlen($token_stack[0]) == 1) { 
  919. # Closing emphasis marker: 
  920. array_shift($token_stack); 
  921. $span = array_shift($text_stack); 
  922. $span = $this->runSpanGamut($span); 
  923. $span = "<em>$span</em>"; 
  924. $text_stack[0] .= $this->hashPart($span); 
  925. $em = ''; 
  926. } else { 
  927. $text_stack[0] .= $token; 
  928. } else { 
  929. array_unshift($token_stack, $token); 
  930. array_unshift($text_stack, ''); 
  931. $em = $token; 
  932. return $text_stack[0]; 
  933.  
  934.  
  935. function doBlockQuotes($text) { 
  936. $text = preg_replace_callback('/ 
  937. ( # Wrap whole match in $1 
  938. (?> 
  939. ^[ ]*>[ ]? # ">" at the start of a line 
  940. .+\n # rest of the first line 
  941. (.+\n)* # subsequent consecutive lines 
  942. \n* # blanks 
  943. )+ 
  944. /xm',  
  945. array(&$this, '_doBlockQuotes_callback'), $text); 
  946.  
  947. return $text; 
  948. function _doBlockQuotes_callback($matches) { 
  949. $bq = $matches[1]; 
  950. # trim one level of quoting - trim whitespace-only lines 
  951. $bq = preg_replace('/^[ ]*>[ ]?|^[ ]+$/m', '', $bq); 
  952. $bq = $this->runBlockGamut($bq); # recurse 
  953.  
  954. $bq = preg_replace('/^/m', " ", $bq); 
  955. # These leading spaces cause problem with <pre> content,  
  956. # so we need to fix that: 
  957. $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',  
  958. array(&$this, '_doBlockQuotes_callback2'), $bq); 
  959.  
  960. return "\n". $this->hashBlock("<blockquote>\n$bq\n</blockquote>")."\n\n"; 
  961. function _doBlockQuotes_callback2($matches) { 
  962. $pre = $matches[1]; 
  963. $pre = preg_replace('/^ /m', '', $pre); 
  964. return $pre; 
  965.  
  966.  
  967. function formParagraphs($text) { 
  968. # Params: 
  969. # $text - string to process with html <p> tags 
  970. # Strip leading and trailing lines: 
  971. $text = preg_replace('/\A\n+|\n+\z/', '', $text); 
  972.  
  973. $grafs = preg_split('/\n{2, }/', $text, -1, PREG_SPLIT_NO_EMPTY); 
  974.  
  975. # Wrap <p> tags and unhashify HTML blocks 
  976. foreach ($grafs as $key => $value) { 
  977. if (!preg_match('/^B\x1A[0-9]+B$/', $value)) { 
  978. # Is a paragraph. 
  979. $value = $this->runSpanGamut($value); 
  980. $value = preg_replace('/^([ ]*)/', "<p>", $value); 
  981. $value .= "</p>"; 
  982. $grafs[$key] = $this->unhash($value); 
  983. else { 
  984. # Is a block. 
  985. # Modify elements of @grafs in-place... 
  986. $graf = $value; 
  987. $block = $this->html_hashes[$graf]; 
  988. $graf = $block; 
  989. // if (preg_match('{ 
  990. // \A 
  991. // ( # $1 = <div> tag 
  992. // <div \s+ 
  993. // [^>]* 
  994. // \b 
  995. // markdown\s*=\s* ([\'"]) # $2 = attr quote char 
  996. // 1 
  997. // \2 
  998. // [^>]* 
  999. // > 
  1000. // ) 
  1001. // ( # $3 = contents 
  1002. // .* 
  1003. // ) 
  1004. // (</div>) # $4 = closing tag 
  1005. // \z 
  1006. // }xs', $block, $matches)) 
  1007. // { 
  1008. // list(, $div_open, , $div_content, $div_close) = $matches; 
  1009. // 
  1010. // # We can't call Markdown(), because that resets the hash; 
  1011. // # that initialization code should be pulled into its own sub, though. 
  1012. // $div_content = $this->hashHTMLBlocks($div_content); 
  1013. // 
  1014. // # Run document gamut methods on the content. 
  1015. // foreach ($this->document_gamut as $method => $priority) { 
  1016. // $div_content = $this->$method($div_content); 
  1017. // } 
  1018. // 
  1019. // $div_open = preg_replace( 
  1020. // '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open); 
  1021. // 
  1022. // $graf = $div_open . "\n" . $div_content . "\n" . $div_close; 
  1023. // } 
  1024. $grafs[$key] = $graf; 
  1025.  
  1026. return implode("\n\n", $grafs); 
  1027.  
  1028.  
  1029. function encodeAttribute($text) { 
  1030. # Encode text for a double-quoted HTML attribute. This function 
  1031. # is *not* suitable for attributes enclosed in single quotes. 
  1032. $text = $this->encodeAmpsAndAngles($text); 
  1033. $text = str_replace('"', '"', $text); 
  1034. return $text; 
  1035.  
  1036.  
  1037. function encodeAmpsAndAngles($text) { 
  1038. # Smart processing for ampersands and angle brackets that need to 
  1039. # be encoded. Valid character entities are left alone unless the 
  1040. # no-entities mode is set. 
  1041. if ($this->no_entities) { 
  1042. $text = str_replace('&', '&', $text); 
  1043. } else { 
  1044. # Ampersand-encoding based entirely on Nat Irons's Amputator 
  1045. # MT plugin: <http://bumppo.net/projects/amputator/> 
  1046. $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',  
  1047. '&', $text);; 
  1048. # Encode remaining <'s 
  1049. $text = str_replace('<', '<', $text); 
  1050.  
  1051. return $text; 
  1052.  
  1053.  
  1054. function doAutoLinks($text) { 
  1055. $text = preg_replace_callback('{<((https?|ftp|dict):[^\'">\s]+)>}i',  
  1056. array(&$this, '_doAutoLinks_url_callback'), $text); 
  1057.  
  1058. # Email addresses: <address@domain.foo> 
  1059. $text = preg_replace_callback('{ 
  1060. (?:mailto:)? 
  1061. (?: 
  1062. [-!#$%&\'*+/=?^_`.{|}~\w\x80-\xFF]+ 
  1063. ".*?" 
  1064. \@ 
  1065. (?: 
  1066. [-a-z0-9\x80-\xFF]+(\.[-a-z0-9\x80-\xFF]+)*\.[a-z]+ 
  1067. \[[\d.a-fA-F:]+\] # IPv4 & IPv6 
  1068. }xi',  
  1069. array(&$this, '_doAutoLinks_email_callback'), $text); 
  1070. $text = preg_replace_callback('{<(tel:([^\'">\s]+))>}i', array(&$this, '_doAutoLinks_tel_callback'), $text); 
  1071.  
  1072. return $text; 
  1073. function _doAutoLinks_tel_callback($matches) { 
  1074. $url = $this->encodeAttribute($matches[1]); 
  1075. $tel = $this->encodeAttribute($matches[2]); 
  1076. $link = "<a href=\"$url\">$tel</a>"; 
  1077. return $this->hashPart($link); 
  1078. function _doAutoLinks_url_callback($matches) { 
  1079. $url = $this->encodeAttribute($matches[1]); 
  1080. $link = "<a href=\"$url\">$url</a>"; 
  1081. return $this->hashPart($link); 
  1082. function _doAutoLinks_email_callback($matches) { 
  1083. $address = $matches[1]; 
  1084. $link = $this->encodeEmailAddress($address); 
  1085. return $this->hashPart($link); 
  1086.  
  1087.  
  1088. function encodeEmailAddress($addr) { 
  1089. # Input: an email address, e.g. "foo@example.com" 
  1090. # Output: the email address as a mailto link, with each character 
  1091. # of the address encoded as either a decimal or hex entity, in 
  1092. # the hopes of foiling most address harvesting spam bots. E.g.: 
  1093. # <p><a href="mailto:foo 
  1094. # @example.co 
  1095. # m">foo@exampl 
  1096. # e.com</a></p> 
  1097. # Based by a filter by Matthew Wickline, posted to BBEdit-Talk. 
  1098. # With some optimizations by Milian Wolff. 
  1099. $addr = "mailto:" . $addr; 
  1100. $chars = preg_split('/(?<!^)(?!$)/', $addr); 
  1101. $seed = (int)abs(crc32($addr) / strlen($addr)); # Deterministic seed. 
  1102.  
  1103. foreach ($chars as $key => $char) { 
  1104. $ord = ord($char); 
  1105. # Ignore non-ascii chars. 
  1106. if ($ord < 128) { 
  1107. $r = ($seed * (1 + $key)) % 100; # Pseudo-random function. 
  1108. # roughly 10% raw, 45% hex, 45% dec 
  1109. # '@' *must* be encoded. I insist. 
  1110. if ($r > 90 && $char != '@') /** do nothing */; 
  1111. else if ($r < 45) $chars[$key] = '&#x'.dechex($ord).';'; 
  1112. else $chars[$key] = '&#'.$ord.';'; 
  1113.  
  1114. $addr = implode('', $chars); 
  1115. $text = implode('', array_slice($chars, 7)); # text without `mailto:` 
  1116. $addr = "<a href=\"$addr\">$text</a>"; 
  1117.  
  1118. return $addr; 
  1119.  
  1120.  
  1121. function parseSpan($str) { 
  1122. # Take the string $str and parse it into tokens, hashing embeded HTML,  
  1123. # escaped characters and handling code spans. 
  1124. $output = ''; 
  1125.  
  1126. $span_re = '{ 
  1127. \\\\'.$this->escape_chars_re.' 
  1128. (?<![`\\\\]) 
  1129. `+ # code span marker 
  1130. '.( $this->no_markup ? '' : ' 
  1131. <!-- .*? --> # comment 
  1132. <\?.*?\?> | <%.*?%> # processing instruction 
  1133. <[!$]?[-a-zA-Z0-9:_]+ # regular tags 
  1134. (?> 
  1135. \s 
  1136. (?>[^"\'>]+|"[^"]*"|\'[^\']*\')* 
  1137. )? 
  1138. <[-a-zA-Z0-9:_]+\s*/> # xml-style empty tag 
  1139. </[-a-zA-Z0-9:_]+\s*> # closing tag 
  1140. ').' 
  1141. }xs'; 
  1142.  
  1143. while (1) { 
  1144. # Each loop iteration seach for either the next tag, the next 
  1145. # openning code span marker, or the next escaped character. 
  1146. # Each token is then passed to handleSpanToken. 
  1147. $parts = preg_split($span_re, $str, 2, PREG_SPLIT_DELIM_CAPTURE); 
  1148.  
  1149. # Create token from text preceding tag. 
  1150. if ($parts[0] != "") { 
  1151. $output .= $parts[0]; 
  1152.  
  1153. # Check if we reach the end. 
  1154. if (isset($parts[1])) { 
  1155. $output .= $this->handleSpanToken($parts[1], $parts[2]); 
  1156. $str = $parts[2]; 
  1157. else { 
  1158. break; 
  1159.  
  1160. return $output; 
  1161.  
  1162.  
  1163. function handleSpanToken($token, &$str) { 
  1164. # Handle $token provided by parseSpan by determining its nature and 
  1165. # returning the corresponding value that should replace it. 
  1166. switch ($token{0}) { 
  1167. case "\\": 
  1168. return $this->hashPart("&#". ord($token{1}). ";"); 
  1169. case "`": 
  1170. # Search for end marker in remaining text. 
  1171. if (preg_match('/^(.*?[^`])'.preg_quote($token).'(?!`)(.*)$/sm',  
  1172. $str, $matches)) 
  1173. $str = $matches[2]; 
  1174. $codespan = $this->makeCodeSpan($matches[1]); 
  1175. return $this->hashPart($codespan); 
  1176. return $token; // return as text since no ending marker found. 
  1177. default: 
  1178. return $this->hashPart($token); 
  1179.  
  1180.  
  1181. function outdent($text) { 
  1182. # Remove one level of line-leading tabs or spaces 
  1183. return preg_replace('/^(\t|[ ]{1, '.$this->tab_width.'})/m', '', $text); 
  1184.  
  1185.  
  1186. # String length function for detab. `_initDetab` will create a function to 
  1187. # hanlde UTF-8 if the default function does not exist. 
  1188. var $utf8_strlen = 'mb_strlen'; 
  1189.  
  1190. function detab($text) { 
  1191. # Replace tabs with the appropriate amount of space. 
  1192. # For each line we separate the line in blocks delemited by 
  1193. # tab characters. Then we reconstruct every line by adding the 
  1194. # appropriate number of space between each blocks. 
  1195.  
  1196. $text = preg_replace_callback('/^.*\t.*$/m',  
  1197. array(&$this, '_detab_callback'), $text); 
  1198.  
  1199. return $text; 
  1200. function _detab_callback($matches) { 
  1201. $line = $matches[0]; 
  1202. $strlen = $this->utf8_strlen; # strlen function for UTF-8. 
  1203.  
  1204. # Split in blocks. 
  1205. $blocks = explode("\t", $line); 
  1206. # Add each blocks to the line. 
  1207. $line = $blocks[0]; 
  1208. unset($blocks[0]); # Do not add first block twice. 
  1209. foreach ($blocks as $block) { 
  1210. # Calculate amount of space, insert spaces, insert block. 
  1211. $amount = $this->tab_width - 
  1212. $strlen($line, 'UTF-8') % $this->tab_width; 
  1213. $line .= str_repeat(" ", $amount) . $block; 
  1214. return $line; 
  1215. function _initDetab() { 
  1216. # Check for the availability of the function in the `utf8_strlen` property 
  1217. # (initially `mb_strlen`). If the function is not available, create a 
  1218. # function that will loosely count the number of UTF-8 characters with a 
  1219. # regular expression. 
  1220. if (function_exists($this->utf8_strlen)) return; 
  1221. $this->utf8_strlen = create_function('$text', 'return preg_match_all( 
  1222. "/[\\\\x00-\\\\xBF]|[\\\\xC0-\\\\xFF][\\\\x80-\\\\xBF]*/",  
  1223. $text, $m);'); 
  1224.  
  1225.  
  1226. function unhash($text) { 
  1227. # Swap back in all the tags hashed by _HashHTMLBlocks. 
  1228. return preg_replace_callback('/(.)\x1A[0-9]+\1/',  
  1229. array(&$this, '_unhash_callback'), $text); 
  1230. function _unhash_callback($matches) { 
  1231. return $this->html_hashes[$matches[0]]; 
  1232.