MarkdownExtra_Parser

The Jetpack by WordPress.com MarkdownExtra Parser class.

Defined (1)

The class is defined in the following location(s).

/_inc/lib/markdown/extra.php  
  1. class MarkdownExtra_Parser extends Markdown_Parser { 
  2.  
  3. ### Configuration Variables ### 
  4.  
  5. # Prefix for footnote ids. 
  6. public $fn_id_prefix = ""; 
  7.  
  8. # Optional title attribute for footnote links and backlinks. 
  9. public $fn_link_title = MARKDOWN_FN_LINK_TITLE; 
  10. public $fn_backlink_title = MARKDOWN_FN_BACKLINK_TITLE; 
  11.  
  12. # Optional class attribute for footnote links and backlinks. 
  13. public $fn_link_class = MARKDOWN_FN_LINK_CLASS; 
  14. public $fn_backlink_class = MARKDOWN_FN_BACKLINK_CLASS; 
  15.  
  16. # Optional class prefix for fenced code block. 
  17. public $code_class_prefix = MARKDOWN_CODE_CLASS_PREFIX; 
  18. # Class attribute for code blocks goes on the `code` tag; 
  19. # setting this to true will put attributes on the `pre` tag instead. 
  20. public $code_attr_on_pre = MARKDOWN_CODE_ATTR_ON_PRE; 
  21.  
  22. # Predefined abbreviations. 
  23. public $predef_abbr = array(); 
  24.  
  25.  
  26. ### Parser Implementation ### 
  27.  
  28. function __construct() { 
  29. # Constructor function. Initialize the parser object. 
  30. # Add extra escapable characters before parent constructor 
  31. # initialize the table. 
  32. $this->escape_chars .= ':|'; 
  33.  
  34. # Insert extra document, block, and span transformations. 
  35. # Parent constructor will do the sorting. 
  36. $this->document_gamut += array( 
  37. "doFencedCodeBlocks" => 5,  
  38. "stripFootnotes" => 15,  
  39. "stripAbbreviations" => 25,  
  40. "appendFootnotes" => 50,  
  41. ); 
  42. $this->block_gamut += array( 
  43. "doFencedCodeBlocks" => 5,  
  44. "doTables" => 15,  
  45. "doDefLists" => 45,  
  46. ); 
  47. $this->span_gamut += array( 
  48. "doFootnotes" => 5,  
  49. "doAbbreviations" => 70,  
  50. ); 
  51.  
  52. parent::__construct(); 
  53.  
  54.  
  55. # Extra variables used during extra transformations. 
  56. public $footnotes = array(); 
  57. public $footnotes_ordered = array(); 
  58. public $footnotes_ref_count = array(); 
  59. public $footnotes_numbers = array(); 
  60. public $abbr_desciptions = array(); 
  61. public $abbr_word_re = ''; 
  62.  
  63. # Give the current footnote number. 
  64. public $footnote_counter = 1; 
  65.  
  66.  
  67. function setup() { 
  68. # Setting up Extra-specific variables. 
  69. parent::setup(); 
  70.  
  71. $this->footnotes = array(); 
  72. $this->footnotes_ordered = array(); 
  73. $this->footnotes_ref_count = array(); 
  74. $this->footnotes_numbers = array(); 
  75. $this->abbr_desciptions = array(); 
  76. $this->abbr_word_re = ''; 
  77. $this->footnote_counter = 1; 
  78.  
  79. foreach ($this->predef_abbr as $abbr_word => $abbr_desc) { 
  80. if ($this->abbr_word_re) 
  81. $this->abbr_word_re .= '|'; 
  82. $this->abbr_word_re .= preg_quote($abbr_word); 
  83. $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 
  84.  
  85. function teardown() { 
  86. # Clearing Extra-specific variables. 
  87. $this->footnotes = array(); 
  88. $this->footnotes_ordered = array(); 
  89. $this->footnotes_ref_count = array(); 
  90. $this->footnotes_numbers = array(); 
  91. $this->abbr_desciptions = array(); 
  92. $this->abbr_word_re = ''; 
  93.  
  94. parent::teardown(); 
  95.  
  96.  
  97. ### Extra Attribute Parser ### 
  98.  
  99. # Expression to use to catch attributes (includes the braces) 
  100. public $id_class_attr_catch_re = '\{((?:[ ]*[#.][-_:a-zA-Z0-9]+) {1, })[ ]*\}'; 
  101. # Expression to use when parsing in a context when no capture is desired 
  102. public $id_class_attr_nocatch_re = '\{(?:[ ]*[#.][-_:a-zA-Z0-9]+) {1, }[ ]*\}'; 
  103.  
  104. function doExtraAttributes($tag_name, $attr) { 
  105. # Parse attributes caught by the $this->id_class_attr_catch_re expression 
  106. # and return the HTML-formatted list of attributes. 
  107. # Currently supported attributes are .class and #id. 
  108. if (empty($attr)) return ""; 
  109.  
  110. # Split on components 
  111. preg_match_all('/[#.][-_:a-zA-Z0-9]+/', $attr, $matches); 
  112. $elements = $matches[0]; 
  113.  
  114. # handle classes and ids (only first id taken into account) 
  115. $classes = array(); 
  116. $id = false; 
  117. foreach ($elements as $element) { 
  118. if ($element{0} == '.') { 
  119. $classes[] = substr($element, 1); 
  120. } else if ($element{0} == '#') { 
  121. if ($id === false) $id = substr($element, 1); 
  122.  
  123. # compose attributes as string 
  124. $attr_str = ""; 
  125. if (!empty($id)) { 
  126. $attr_str .= ' id="'.$id.'"'; 
  127. if (!empty($classes)) { 
  128. $attr_str .= ' class="'.implode(" ", $classes).'"'; 
  129. return $attr_str; 
  130.  
  131.  
  132. function stripLinkDefinitions($text) { 
  133. # Strips link definitions from text, stores the URLs and titles in 
  134. # hash references. 
  135. $less_than_tab = $this->tab_width - 1; 
  136.  
  137. # Link defs are in the form: ^[id]: url "optional title" 
  138. $text = preg_replace_callback('{ 
  139. ^[ ]{0, '.$less_than_tab.'}\[(.+)\][ ]?: # id = $1 
  140. [ ]* 
  141. \n? # maybe *one* newline 
  142. [ ]* 
  143. (?: 
  144. <(.+?)> # url = $2 
  145. (\S+?) # url = $3 
  146. [ ]* 
  147. \n? # maybe one newline 
  148. [ ]* 
  149. (?: 
  150. (?<=\s) # lookbehind for whitespace 
  151. ["(] 
  152. (.*?) # title = $4 
  153. [")] 
  154. [ ]* 
  155. )? # title is optional 
  156. (?:[ ]* '.$this->id_class_attr_catch_re.' )? # $5 = extra id & class attr 
  157. (?:\n+|\Z) 
  158. }xm',  
  159. array(&$this, '_stripLinkDefinitions_callback'),  
  160. $text); 
  161. return $text; 
  162. function _stripLinkDefinitions_callback($matches) { 
  163. $link_id = strtolower($matches[1]); 
  164. $url = $matches[2] == '' ? $matches[3] : $matches[2]; 
  165. $this->urls[$link_id] = $url; 
  166. $this->titles[$link_id] =& $matches[4]; 
  167. $this->ref_attr[$link_id] = $this->doExtraAttributes("", $dummy =& $matches[5]); 
  168. return ''; # String that will replace the block 
  169.  
  170.  
  171. ### HTML Block Parser ### 
  172.  
  173. # Tags that are always treated as block tags: 
  174. public $block_tags_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption'; 
  175.  
  176. # Tags treated as block tags only if the opening tag is alone on its line: 
  177. public $context_block_tags_re = 'script|noscript|ins|del|iframe|object|source|track|param|math|svg|canvas|audio|video'; 
  178.  
  179. # Tags where markdown="1" default to span mode: 
  180. public $contain_span_tags_re = 'p|h[1-6]|li|dd|dt|td|th|legend|address'; 
  181.  
  182. # Tags which must not have their contents modified, no matter where 
  183. # they appear: 
  184. public $clean_tags_re = 'script|math|svg'; 
  185.  
  186. # Tags that do not need to be closed. 
  187. public $auto_close_tags_re = 'hr|img|param|source|track'; 
  188.  
  189.  
  190. function hashHTMLBlocks($text) { 
  191. # Hashify HTML Blocks and "clean tags". 
  192. # We only want to do this for block-level HTML tags, such as headers,  
  193. # lists, and tables. That's because we still want to wrap <p>s around 
  194. # "paragraphs" that are wrapped in non-block-level tags, such as anchors,  
  195. # phrase emphasis, and spans. The list of tags we're looking for is 
  196. # hard-coded. 
  197. # This works by calling _HashHTMLBlocks_InMarkdown, which then calls 
  198. # _HashHTMLBlocks_InHTML when it encounter block tags. When the markdown="1" 
  199. # attribute is found within a tag, _HashHTMLBlocks_InHTML calls back 
  200. # _HashHTMLBlocks_InMarkdown to handle the Markdown syntax within the tag. 
  201. # These two functions are calling each other. It's recursive! 
  202. if ($this->no_markup) return $text; 
  203.  
  204. # Call the HTML-in-Markdown hasher. 
  205. list($text, ) = $this->_hashHTMLBlocks_inMarkdown($text); 
  206.  
  207. return $text; 
  208. function _hashHTMLBlocks_inMarkdown($text, $indent = 0,  
  209. $enclosing_tag_re = '', $span = false) 
  210. # Parse markdown text, calling _HashHTMLBlocks_InHTML for block tags. 
  211. # * $indent is the number of space to be ignored when checking for code 
  212. # blocks. This is important because if we don't take the indent into 
  213. # account, something like this (which looks right) won't work as expected: 
  214. # <div> 
  215. # <div markdown="1"> 
  216. # Hello World. <-- Is this a Markdown code block or text? 
  217. # </div> <-- Is this a Markdown code block or a real tag? 
  218. # <div> 
  219. # If you don't like this, just don't indent the tag on which 
  220. # you apply the markdown="1" attribute. 
  221. # * If $enclosing_tag_re is not empty, stops at the first unmatched closing 
  222. # tag with that name. Nested tags supported. 
  223. # * If $span is true, text inside must treated as span. So any double 
  224. # newline will be replaced by a single newline so that it does not create 
  225. # paragraphs. 
  226. # Returns an array of that form: ( processed text , remaining text ) 
  227. if ($text === '') return array('', ''); 
  228.  
  229. # Regex to check for the presence of newlines around a block tag. 
  230. $newline_before_re = '/(?:^\n?|\n\n)*$/'; 
  231. $newline_after_re = 
  232. '{ 
  233. ^ # Start of text following the tag. 
  234. (?>[ ]*<!--.*?-->)? # Optional comment. 
  235. [ ]*\n # Must be followed by newline. 
  236. }xs'; 
  237.  
  238. # Regex to match any tag. 
  239. $block_tag_re = 
  240. '{ 
  241. ( # $2: Capture whole tag. 
  242. </? # Any opening or closing tag. 
  243. (?> # Tag name. 
  244. '.$this->block_tags_re.' | 
  245. '.$this->context_block_tags_re.' | 
  246. '.$this->clean_tags_re.' | 
  247. (?!\s)'.$enclosing_tag_re.' 
  248. (?: 
  249. (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 
  250. (?> 
  251. ".*?" | # Double quotes (can contain `>`) 
  252. \'.*?\' | # Single quotes (can contain `>`) 
  253. .+? # Anything but quotes and `>`. 
  254. )*? 
  255. )? 
  256. > # End of tag. 
  257. <!-- .*? --> # HTML Comment 
  258. <\?.*?\?> | <%.*?%> # Processing instruction 
  259. <!\[CDATA\[.*?\]\]> # CData Block 
  260. '. ( !$span ? ' # If not in span. 
  261. # Indented code block 
  262. (?: ^[ ]*\n | ^ | \n[ ]*\n ) 
  263. [ ]{'.($indent+4).'}[^\n]* \n 
  264. (?> 
  265. (?: [ ]{'.($indent+4).'}[^\n]* | [ ]* ) \n 
  266. )* 
  267. # Fenced code block marker 
  268. (?<= ^ | \n ) 
  269. [ ]{0, '.($indent+3).'}(?:~{3, }|`{3, }) 
  270. [ ]* 
  271. (?: 
  272. \.?[-_:a-zA-Z0-9]+ # standalone class name 
  273. '.$this->id_class_attr_nocatch_re.' # extra attributes 
  274. )? 
  275. [ ]* 
  276. (?= \n ) 
  277. ' : '' ). ' # End (if not is span). 
  278. # Code span marker 
  279. # Note, this regex needs to go after backtick fenced 
  280. # code blocks but it should also be kept outside of the 
  281. # "if not in span" condition adding backticks to the parser 
  282. `+ 
  283. }xs'; 
  284.  
  285.  
  286. $depth = 0; # Current depth inside the tag tree. 
  287. $parsed = ""; # Parsed text that will be returned. 
  288.  
  289. # Loop through every tag until we find the closing tag of the parent 
  290. # or loop until reaching the end of text if no parent tag specified. 
  291. do { 
  292. # Split the text using the first $tag_match pattern found. 
  293. # Text before pattern will be first in the array, text after 
  294. # pattern will be at the end, and between will be any catches made 
  295. # by the pattern. 
  296. $parts = preg_split($block_tag_re, $text, 2,  
  297. PREG_SPLIT_DELIM_CAPTURE); 
  298.  
  299. # If in Markdown span mode, add a empty-string span-level hash 
  300. # after each newline to prevent triggering any block element. 
  301. if ($span) { 
  302. $void = $this->hashPart("", ':'); 
  303. $newline = "$void\n"; 
  304. $parts[0] = $void . str_replace("\n", $newline, $parts[0]) . $void; 
  305.  
  306. $parsed .= $parts[0]; # Text before current tag. 
  307.  
  308. # If end of $text has been reached. Stop loop. 
  309. if (count($parts) < 3) { 
  310. $text = ""; 
  311. break; 
  312.  
  313. $tag = $parts[1]; # Tag to handle. 
  314. $text = $parts[2]; # Remaining text after current tag. 
  315. $tag_re = preg_quote($tag); # For use in a regular expression. 
  316.  
  317. # Check for: Fenced code block marker. 
  318. # Note: need to recheck the whole tag to disambiguate backtick 
  319. # fences from code spans 
  320. if (preg_match('{^\n?([ ]{0, '.($indent+3).'})(~{3, }|`{3, })[ ]*(?:\.?[-_:a-zA-Z0-9]+|'.$this->id_class_attr_nocatch_re.')?[ ]*\n?$}', $tag, $capture)) { 
  321. # Fenced code block marker: find matching end marker. 
  322. $fence_indent = strlen($capture[1]); # use captured indent in re 
  323. $fence_re = $capture[2]; # use captured fence in re 
  324. if (preg_match('{^(?>.*\n)*?[ ]{'.($fence_indent).'}'.$fence_re.'[ ]*(?:\n|$)}', $text,  
  325. $matches)) 
  326. # End marker found: pass text unchanged until marker. 
  327. $parsed .= $tag . $matches[0]; 
  328. $text = substr($text, strlen($matches[0])); 
  329. else { 
  330. # No end marker: just skip it. 
  331. $parsed .= $tag; 
  332. # Check for: Indented code block. 
  333. else if ($tag{0} == "\n" || $tag{0} == " ") { 
  334. # Indented code block: pass it unchanged, will be handled 
  335. # later. 
  336. $parsed .= $tag; 
  337. # Check for: Code span marker 
  338. # Note: need to check this after backtick fenced code blocks 
  339. else if ($tag{0} == "`") { 
  340. # Find corresponding end marker. 
  341. $tag_re = preg_quote($tag); 
  342. if (preg_match('{^(?>.+?|\n(?!\n))*?(?<!`)'.$tag_re.'(?!`)}',  
  343. $text, $matches)) 
  344. # End marker found: pass text unchanged until marker. 
  345. $parsed .= $tag . $matches[0]; 
  346. $text = substr($text, strlen($matches[0])); 
  347. else { 
  348. # Unmatched marker: just skip it. 
  349. $parsed .= $tag; 
  350. # Check for: Opening Block level tag or 
  351. # Opening Context Block tag (like ins and del) 
  352. # used as a block tag (tag is alone on it's line). 
  353. else if (preg_match('{^<(?:'.$this->block_tags_re.')\b}', $tag) || 
  354. ( preg_match('{^<(?:'.$this->context_block_tags_re.')\b}', $tag) && 
  355. preg_match($newline_before_re, $parsed) && 
  356. preg_match($newline_after_re, $text) ) 
  357. # Need to parse tag and following text using the HTML parser. 
  358. list($block_text, $text) = 
  359. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashBlock", true); 
  360.  
  361. # Make sure it stays outside of any paragraph by adding newlines. 
  362. $parsed .= "\n\n$block_text\n\n"; 
  363. # Check for: Clean tag (like script, math) 
  364. # HTML Comments, processing instructions. 
  365. else if (preg_match('{^<(?:'.$this->clean_tags_re.')\b}', $tag) || 
  366. $tag{1} == '!' || $tag{1} == '?') 
  367. # Need to parse tag and following text using the HTML parser. 
  368. # (don't check for markdown attribute) 
  369. list($block_text, $text) = 
  370. $this->_hashHTMLBlocks_inHTML($tag . $text, "hashClean", false); 
  371.  
  372. $parsed .= $block_text; 
  373. # Check for: Tag with same name as enclosing tag. 
  374. else if ($enclosing_tag_re !== '' && 
  375. # Same name as enclosing tag. 
  376. preg_match('{^</?(?:'.$enclosing_tag_re.')\b}', $tag)) 
  377. # Increase/decrease nested tag count. 
  378. if ($tag{1} == '/') $depth--; 
  379. else if ($tag{strlen($tag)-2} != '/') $depth++; 
  380.  
  381. if ($depth < 0) { 
  382. # Going out of parent element. Clean up and break so we 
  383. # return to the calling function. 
  384. $text = $tag . $text; 
  385. break; 
  386.  
  387. $parsed .= $tag; 
  388. else { 
  389. $parsed .= $tag; 
  390. } while ($depth >= 0); 
  391.  
  392. return array($parsed, $text); 
  393. function _hashHTMLBlocks_inHTML($text, $hash_method, $md_attr) { 
  394. # Parse HTML, calling _HashHTMLBlocks_InMarkdown for block tags. 
  395. # * Calls $hash_method to convert any blocks. 
  396. # * Stops when the first opening tag closes. 
  397. # * $md_attr indicate if the use of the `markdown="1"` attribute is allowed. 
  398. # (it is not inside clean tags) 
  399. # Returns an array of that form: ( processed text , remaining text ) 
  400. if ($text === '') return array('', ''); 
  401.  
  402. # Regex to match `markdown` attribute inside of a tag. 
  403. $markdown_attr_re = ' 
  404. \s* # Eat whitespace before the `markdown` attribute 
  405. markdown 
  406. \s*=\s* 
  407. (?> 
  408. (["\']) # $1: quote delimiter 
  409. (.*?) # $2: attribute value 
  410. \1 # matching delimiter 
  411. ([^\s>]*) # $3: unquoted attribute value 
  412. () # $4: make $3 always defined (avoid warnings) 
  413. }xs'; 
  414.  
  415. # Regex to match any tag. 
  416. $tag_re = '{ 
  417. ( # $2: Capture whole tag. 
  418. </? # Any opening or closing tag. 
  419. [\w:$]+ # Tag name. 
  420. (?: 
  421. (?=[\s"\'/a-zA-Z0-9]) # Allowed characters after tag name. 
  422. (?> 
  423. ".*?" | # Double quotes (can contain `>`) 
  424. \'.*?\' | # Single quotes (can contain `>`) 
  425. .+? # Anything but quotes and `>`. 
  426. )*? 
  427. )? 
  428. > # End of tag. 
  429. <!-- .*? --> # HTML Comment 
  430. <\?.*?\?> | <%.*?%> # Processing instruction 
  431. <!\[CDATA\[.*?\]\]> # CData Block 
  432. }xs'; 
  433.  
  434. $original_text = $text; # Save original text in case of faliure. 
  435.  
  436. $depth = 0; # Current depth inside the tag tree. 
  437. $block_text = ""; # Temporary text holder for current text. 
  438. $parsed = ""; # Parsed text that will be returned. 
  439.  
  440. # Get the name of the starting tag. 
  441. # (This pattern makes $base_tag_name_re safe without quoting.) 
  442. if (preg_match('/^<([\w:$]*)\b/', $text, $matches)) 
  443. $base_tag_name_re = $matches[1]; 
  444.  
  445. # Loop through every tag until we find the corresponding closing tag. 
  446. do { 
  447. # Split the text using the first $tag_match pattern found. 
  448. # Text before pattern will be first in the array, text after 
  449. # pattern will be at the end, and between will be any catches made 
  450. # by the pattern. 
  451. $parts = preg_split($tag_re, $text, 2, PREG_SPLIT_DELIM_CAPTURE); 
  452.  
  453. if (count($parts) < 3) { 
  454. # End of $text reached with unbalenced tag(s). 
  455. # In that case, we return original text unchanged and pass the 
  456. # first character as filtered to prevent an infinite loop in the 
  457. # parent function. 
  458. return array($original_text{0}, substr($original_text, 1)); 
  459.  
  460. $block_text .= $parts[0]; # Text before current tag. 
  461. $tag = $parts[1]; # Tag to handle. 
  462. $text = $parts[2]; # Remaining text after current tag. 
  463.  
  464. # Check for: Auto-close tag (like <hr/>) 
  465. # Comments and Processing Instructions. 
  466. if (preg_match('{^</?(?:'.$this->auto_close_tags_re.')\b}', $tag) || 
  467. $tag{1} == '!' || $tag{1} == '?') 
  468. # Just add the tag to the block as if it was text. 
  469. $block_text .= $tag; 
  470. else { 
  471. # Increase/decrease nested tag count. Only do so if 
  472. # the tag's name match base tag's. 
  473. if (preg_match('{^</?'.$base_tag_name_re.'\b}', $tag)) { 
  474. if ($tag{1} == '/') $depth--; 
  475. else if ($tag{strlen($tag)-2} != '/') $depth++; 
  476.  
  477. # Check for `markdown="1"` attribute and handle it. 
  478. if ($md_attr && 
  479. preg_match($markdown_attr_re, $tag, $attr_m) && 
  480. preg_match('/^1|block|span$/', $attr_m[2] . $attr_m[3])) 
  481. # Remove `markdown` attribute from opening tag. 
  482. $tag = preg_replace($markdown_attr_re, '', $tag); 
  483.  
  484. # Check if text inside this tag must be parsed in span mode. 
  485. $this->mode = $attr_m[2] . $attr_m[3]; 
  486. $span_mode = $this->mode == 'span' || $this->mode != 'block' && 
  487. preg_match('{^<(?:'.$this->contain_span_tags_re.')\b}', $tag); 
  488.  
  489. # Calculate indent before tag. 
  490. if (preg_match('/(?:^|\n)( *?)(?! ).*?$/', $block_text, $matches)) { 
  491. $strlen = $this->utf8_strlen; 
  492. $indent = $strlen($matches[1], 'UTF-8'); 
  493. } else { 
  494. $indent = 0; 
  495.  
  496. # End preceding block with this tag. 
  497. $block_text .= $tag; 
  498. $parsed .= $this->$hash_method($block_text); 
  499.  
  500. # Get enclosing tag name for the ParseMarkdown function. 
  501. # (This pattern makes $tag_name_re safe without quoting.) 
  502. preg_match('/^<([\w:$]*)\b/', $tag, $matches); 
  503. $tag_name_re = $matches[1]; 
  504.  
  505. # Parse the content using the HTML-in-Markdown parser. 
  506. list ($block_text, $text) 
  507. = $this->_hashHTMLBlocks_inMarkdown($text, $indent,  
  508. $tag_name_re, $span_mode); 
  509.  
  510. # Outdent markdown text. 
  511. if ($indent > 0) { 
  512. $block_text = preg_replace("/^[ ]{1, $indent}/m", "",  
  513. $block_text); 
  514.  
  515. # Append tag content to parsed text. 
  516. if (!$span_mode) $parsed .= "\n\n$block_text\n\n"; 
  517. else $parsed .= "$block_text"; 
  518.  
  519. # Start over with a new block. 
  520. $block_text = ""; 
  521. else $block_text .= $tag; 
  522.  
  523. } while ($depth > 0); 
  524.  
  525. # Hash last block text that wasn't processed inside the loop. 
  526. $parsed .= $this->$hash_method($block_text); 
  527.  
  528. return array($parsed, $text); 
  529.  
  530.  
  531. function hashClean($text) { 
  532. # Called whenever a tag must be hashed when a function inserts a "clean" tag 
  533. # in $text, it passes through this function and is automaticaly escaped,  
  534. # blocking invalid nested overlap. 
  535. return $this->hashPart($text, 'C'); 
  536.  
  537.  
  538. function doAnchors($text) { 
  539. # Turn Markdown link shortcuts into XHTML <a> tags. 
  540. if ($this->in_anchor) return $text; 
  541. $this->in_anchor = true; 
  542.  
  543. # First, handle reference-style links: [link text] [id] 
  544. $text = preg_replace_callback('{ 
  545. ( # wrap whole match in $1 
  546. \[ 
  547. ('.$this->nested_brackets_re.') # link text = $2 
  548. \] 
  549.  
  550. [ ]? # one optional space 
  551. (?:\n[ ]*)? # one optional newline followed by spaces 
  552.  
  553. \[ 
  554. (.*?) # id = $3 
  555. \] 
  556. }xs',  
  557. array(&$this, '_doAnchors_reference_callback'), $text); 
  558.  
  559. # Next, inline-style links: [link text](url "optional title") 
  560. $text = preg_replace_callback('{ 
  561. ( # wrap whole match in $1 
  562. \[ 
  563. ('.$this->nested_brackets_re.') # link text = $2 
  564. \] 
  565. \( # literal paren 
  566. [ \n]* 
  567. (?: 
  568. <(.+?)> # href = $3 
  569. ('.$this->nested_url_parenthesis_re.') # href = $4 
  570. [ \n]* 
  571. ( # $5 
  572. ([\'"]) # quote char = $6 
  573. (.*?) # Title = $7 
  574. \6 # matching quote 
  575. [ \n]* # ignore any spaces/tabs between closing quote and ) 
  576. )? # title is optional 
  577. \) 
  578. (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes 
  579. }xs',  
  580. array(&$this, '_doAnchors_inline_callback'), $text); 
  581.  
  582. # Last, handle reference-style shortcuts: [link text] 
  583. # These must come last in case you've also got [link text][1] 
  584. # or [link text](/foo) 
  585. $text = preg_replace_callback('{ 
  586. ( # wrap whole match in $1 
  587. \[ 
  588. ([^\[\]]+) # link text = $2; can\'t contain [ or ] 
  589. \] 
  590. }xs',  
  591. array(&$this, '_doAnchors_reference_callback'), $text); 
  592.  
  593. $this->in_anchor = false; 
  594. return $text; 
  595. function _doAnchors_reference_callback($matches) { 
  596. $whole_match = $matches[1]; 
  597. $link_text = $matches[2]; 
  598. $link_id =& $matches[3]; 
  599.  
  600. if ($link_id == "") { 
  601. # for shortcut links like [this][] or [this]. 
  602. $link_id = $link_text; 
  603.  
  604. # lower-case and turn embedded newlines into spaces 
  605. $link_id = strtolower($link_id); 
  606. $link_id = preg_replace('{[ ]?\n}', ' ', $link_id); 
  607.  
  608. if (isset($this->urls[$link_id])) { 
  609. $url = $this->urls[$link_id]; 
  610. $url = $this->encodeAttribute($url); 
  611.  
  612. $result = "<a href=\"$url\""; 
  613. if ( isset( $this->titles[$link_id] ) ) { 
  614. $title = $this->titles[$link_id]; 
  615. $title = $this->encodeAttribute($title); 
  616. $result .= " title=\"$title\""; 
  617. if (isset($this->ref_attr[$link_id])) 
  618. $result .= $this->ref_attr[$link_id]; 
  619.  
  620. $link_text = $this->runSpanGamut($link_text); 
  621. $result .= ">$link_text</a>"; 
  622. $result = $this->hashPart($result); 
  623. else { 
  624. $result = $whole_match; 
  625. return $result; 
  626. function _doAnchors_inline_callback($matches) { 
  627. $whole_match = $matches[1]; 
  628. $link_text = $this->runSpanGamut($matches[2]); 
  629. $url = $matches[3] == '' ? $matches[4] : $matches[3]; 
  630. $title =& $matches[7]; 
  631. $attr = $this->doExtraAttributes("a", $dummy =& $matches[8]); 
  632.  
  633.  
  634. $url = $this->encodeAttribute($url); 
  635.  
  636. $result = "<a href=\"$url\""; 
  637. if (isset($title)) { 
  638. $title = $this->encodeAttribute($title); 
  639. $result .= " title=\"$title\""; 
  640. $result .= $attr; 
  641.  
  642. $link_text = $this->runSpanGamut($link_text); 
  643. $result .= ">$link_text</a>"; 
  644.  
  645. return $this->hashPart($result); 
  646.  
  647.  
  648. function doImages($text) { 
  649. # Turn Markdown image shortcuts into <img> tags. 
  650. # First, handle reference-style labeled images: ![alt text][id] 
  651. $text = preg_replace_callback('{ 
  652. ( # wrap whole match in $1 
  653. !\[ 
  654. ('.$this->nested_brackets_re.') # alt text = $2 
  655. \] 
  656.  
  657. [ ]? # one optional space 
  658. (?:\n[ ]*)? # one optional newline followed by spaces 
  659.  
  660. \[ 
  661. (.*?) # id = $3 
  662. \] 
  663.  
  664. }xs',  
  665. array(&$this, '_doImages_reference_callback'), $text); 
  666.  
  667. # Next, handle inline images: ![alt text](url "optional title") 
  668. # Don't forget: encode * and _ 
  669. $text = preg_replace_callback('{ 
  670. ( # wrap whole match in $1 
  671. !\[ 
  672. ('.$this->nested_brackets_re.') # alt text = $2 
  673. \] 
  674. \s? # One optional whitespace character 
  675. \( # literal paren 
  676. [ \n]* 
  677. (?: 
  678. <(\S*)> # src url = $3 
  679. ('.$this->nested_url_parenthesis_re.') # src url = $4 
  680. [ \n]* 
  681. ( # $5 
  682. ([\'"]) # quote char = $6 
  683. (.*?) # title = $7 
  684. \6 # matching quote 
  685. [ \n]* 
  686. )? # title is optional 
  687. \) 
  688. (?:[ ]? '.$this->id_class_attr_catch_re.' )? # $8 = id/class attributes 
  689. }xs',  
  690. array(&$this, '_doImages_inline_callback'), $text); 
  691.  
  692. return $text; 
  693. function _doImages_reference_callback($matches) { 
  694. $whole_match = $matches[1]; 
  695. $alt_text = $matches[2]; 
  696. $link_id = strtolower($matches[3]); 
  697.  
  698. if ($link_id == "") { 
  699. $link_id = strtolower($alt_text); # for shortcut links like ![this][]. 
  700.  
  701. $alt_text = $this->encodeAttribute($alt_text); 
  702. if (isset($this->urls[$link_id])) { 
  703. $url = $this->encodeAttribute($this->urls[$link_id]); 
  704. $result = "<img src=\"$url\" alt=\"$alt_text\""; 
  705. if (isset($this->titles[$link_id])) { 
  706. $title = $this->titles[$link_id]; 
  707. $title = $this->encodeAttribute($title); 
  708. $result .= " title=\"$title\""; 
  709. if (isset($this->ref_attr[$link_id])) 
  710. $result .= $this->ref_attr[$link_id]; 
  711. $result .= $this->empty_element_suffix; 
  712. $result = $this->hashPart($result); 
  713. else { 
  714. # If there's no such link ID, leave intact: 
  715. $result = $whole_match; 
  716.  
  717. return $result; 
  718. function _doImages_inline_callback($matches) { 
  719. $whole_match = $matches[1]; 
  720. $alt_text = $matches[2]; 
  721. $url = $matches[3] == '' ? $matches[4] : $matches[3]; 
  722. $title =& $matches[7]; 
  723. $attr = $this->doExtraAttributes("img", $dummy =& $matches[8]); 
  724.  
  725. $alt_text = $this->encodeAttribute($alt_text); 
  726. $url = $this->encodeAttribute($url); 
  727. $result = "<img src=\"$url\" alt=\"$alt_text\""; 
  728. if (isset($title)) { 
  729. $title = $this->encodeAttribute($title); 
  730. $result .= " title=\"$title\""; # $title already quoted 
  731. $result .= $attr; 
  732. $result .= $this->empty_element_suffix; 
  733.  
  734. return $this->hashPart($result); 
  735.  
  736.  
  737. function doHeaders($text) { 
  738. # Redefined to add id and class attribute support. 
  739. # Setext-style headers: 
  740. # Header 1 {#header1} 
  741. # ======== 
  742. # Header 2 {#header2 .class1 .class2} 
  743. # -------- 
  744. $text = preg_replace_callback( 
  745. '{ 
  746. (^.+?) # $1: Header text 
  747. (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes 
  748. [ ]*\n(=+|-+)[ ]*\n+ # $3: Header footer 
  749. }mx',  
  750. array(&$this, '_doHeaders_callback_setext'), $text); 
  751.  
  752. # atx-style headers: 
  753. # # Header 1 {#header1} 
  754. # ## Header 2 {#header2} 
  755. # ## Header 2 with closing hashes ## {#header3.class1.class2} 
  756. # ... 
  757. # ###### Header 6 {.class2} 
  758. $text = preg_replace_callback('{ 
  759. ^(\#{1, 6}) # $1 = string of #\'s 
  760. [ ]* 
  761. (.+?) # $2 = Header text 
  762. [ ]* 
  763. \#* # optional closing #\'s (not counted) 
  764. (?:[ ]+ '.$this->id_class_attr_catch_re.' )? # $3 = id/class attributes 
  765. [ ]* 
  766. \n+ 
  767. }xm',  
  768. array(&$this, '_doHeaders_callback_atx'), $text); 
  769.  
  770. return $text; 
  771. function _doHeaders_callback_setext($matches) { 
  772. if ($matches[3] == '-' && preg_match('{^- }', $matches[1])) 
  773. return $matches[0]; 
  774. $level = $matches[3]{0} == '=' ? 1 : 2; 
  775. $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[2]); 
  776. $block = "<h$level$attr>".$this->runSpanGamut($matches[1])."</h$level>"; 
  777. return "\n" . $this->hashBlock($block) . "\n\n"; 
  778. function _doHeaders_callback_atx($matches) { 
  779. $level = strlen($matches[1]); 
  780. $attr = $this->doExtraAttributes("h$level", $dummy =& $matches[3]); 
  781. $block = "<h$level$attr>".$this->runSpanGamut($matches[2])."</h$level>"; 
  782. return "\n" . $this->hashBlock($block) . "\n\n"; 
  783.  
  784.  
  785. function doTables($text) { 
  786. # Form HTML tables. 
  787. $less_than_tab = $this->tab_width - 1; 
  788. # Find tables with leading pipe. 
  789. # | Header 1 | Header 2 
  790. # | -------- | -------- 
  791. # | Cell 1 | Cell 2 
  792. # | Cell 3 | Cell 4 
  793. $text = preg_replace_callback(' 
  794. ^ # Start of a line 
  795. [ ]{0, '.$less_than_tab.'} # Allowed whitespace. 
  796. [|] # Optional leading pipe (present) 
  797. (.+) \n # $1: Header row (at least one pipe) 
  798.  
  799. [ ]{0, '.$less_than_tab.'} # Allowed whitespace. 
  800. [|] ([ ]*[-:]+[-| :]*) \n # $2: Header underline 
  801.  
  802. ( # $3: Cells 
  803. (?> 
  804. [ ]* # Allowed whitespace. 
  805. [|] .* \n # Row content. 
  806. )* 
  807. (?=\n|\Z) # Stop at final double newline. 
  808. }xm',  
  809. array(&$this, '_doTable_leadingPipe_callback'), $text); 
  810.  
  811. # Find tables without leading pipe. 
  812. # Header 1 | Header 2 
  813. # -------- | -------- 
  814. # Cell 1 | Cell 2 
  815. # Cell 3 | Cell 4 
  816. $text = preg_replace_callback(' 
  817. ^ # Start of a line 
  818. [ ]{0, '.$less_than_tab.'} # Allowed whitespace. 
  819. (\S.*[|].*) \n # $1: Header row (at least one pipe) 
  820.  
  821. [ ]{0, '.$less_than_tab.'} # Allowed whitespace. 
  822. ([-:]+[ ]*[|][-| :]*) \n # $2: Header underline 
  823.  
  824. ( # $3: Cells 
  825. (?> 
  826. .* [|] .* \n # Row content 
  827. )* 
  828. (?=\n|\Z) # Stop at final double newline. 
  829. }xm',  
  830. array(&$this, '_DoTable_callback'), $text); 
  831.  
  832. return $text; 
  833. function _doTable_leadingPipe_callback($matches) { 
  834. $head = $matches[1]; 
  835. $underline = $matches[2]; 
  836. $content = $matches[3]; 
  837.  
  838. # Remove leading pipe for each row. 
  839. $content = preg_replace('/^ *[|]/m', '', $content); 
  840.  
  841. return $this->_doTable_callback(array($matches[0], $head, $underline, $content)); 
  842. function _doTable_callback($matches) { 
  843. $head = $matches[1]; 
  844. $underline = $matches[2]; 
  845. $content = $matches[3]; 
  846.  
  847. # Remove any tailing pipes for each line. 
  848. $head = preg_replace('/[|] *$/m', '', $head); 
  849. $underline = preg_replace('/[|] *$/m', '', $underline); 
  850. $content = preg_replace('/[|] *$/m', '', $content); 
  851.  
  852. # Reading alignement from header underline. 
  853. $separators = preg_split('/ *[|] */', $underline); 
  854. foreach ($separators as $n => $s) { 
  855. if (preg_match('/^ *-+: *$/', $s)) $attr[$n] = ' align="right"'; 
  856. else if (preg_match('/^ *:-+: *$/', $s))$attr[$n] = ' align="center"'; 
  857. else if (preg_match('/^ *:-+ *$/', $s)) $attr[$n] = ' align="left"'; 
  858. else $attr[$n] = ''; 
  859.  
  860. # Parsing span elements, including code spans, character escapes,  
  861. # and inline HTML tags, so that pipes inside those gets ignored. 
  862. $head = $this->parseSpan($head); 
  863. $headers = preg_split('/ *[|] */', $head); 
  864. $col_count = count($headers); 
  865. $attr = array_pad($attr, $col_count, ''); 
  866.  
  867. # Write column headers. 
  868. $text = "<table>\n"; 
  869. $text .= "<thead>\n"; 
  870. $text .= "<tr>\n"; 
  871. foreach ($headers as $n => $header) 
  872. $text .= " <th$attr[$n]>".$this->runSpanGamut(trim($header))."</th>\n"; 
  873. $text .= "</tr>\n"; 
  874. $text .= "</thead>\n"; 
  875.  
  876. # Split content by row. 
  877. $rows = explode("\n", trim($content, "\n")); 
  878.  
  879. $text .= "<tbody>\n"; 
  880. foreach ($rows as $row) { 
  881. # Parsing span elements, including code spans, character escapes,  
  882. # and inline HTML tags, so that pipes inside those gets ignored. 
  883. $row = $this->parseSpan($row); 
  884.  
  885. # Split row by cell. 
  886. $row_cells = preg_split('/ *[|] */', $row, $col_count); 
  887. $row_cells = array_pad($row_cells, $col_count, ''); 
  888.  
  889. $text .= "<tr>\n"; 
  890. foreach ($row_cells as $n => $cell) 
  891. $text .= " <td$attr[$n]>".$this->runSpanGamut(trim($cell))."</td>\n"; 
  892. $text .= "</tr>\n"; 
  893. $text .= "</tbody>\n"; 
  894. $text .= "</table>"; 
  895.  
  896. return $this->hashBlock($text) . "\n"; 
  897.  
  898.  
  899. function doDefLists($text) { 
  900. # Form HTML definition lists. 
  901. $less_than_tab = $this->tab_width - 1; 
  902.  
  903. # Re-usable pattern to match any entire dl list: 
  904. $whole_list_re = '(?> 
  905. ( # $1 = whole list 
  906. ( # $2 
  907. [ ]{0, '.$less_than_tab.'} 
  908. ((?>.*\S.*\n)+) # $3 = defined term 
  909. \n? 
  910. [ ]{0, '.$less_than_tab.'}:[ ]+ # colon starting definition 
  911. (?s:.+?) 
  912. ( # $4 
  913. \z 
  914. \n{2, } 
  915. (?=\S) 
  916. (?! # Negative lookahead for another term 
  917. [ ]{0, '.$less_than_tab.'} 
  918. (?: \S.*\n )+? # defined term 
  919. \n? 
  920. [ ]{0, '.$less_than_tab.'}:[ ]+ # colon starting definition 
  921. (?! # Negative lookahead for another definition 
  922. [ ]{0, '.$less_than_tab.'}:[ ]+ # colon starting definition 
  923. )'; // mx 
  924.  
  925. $text = preg_replace_callback('{ 
  926. (?>\A\n?|(?<=\n\n)) 
  927. '.$whole_list_re.' 
  928. }mx',  
  929. array(&$this, '_doDefLists_callback'), $text); 
  930.  
  931. return $text; 
  932. function _doDefLists_callback($matches) { 
  933. # Re-usable patterns to match list item bullets and number markers: 
  934. $list = $matches[1]; 
  935.  
  936. # Turn double returns into triple returns, so that we can make a 
  937. # paragraph for the last item in a list, if necessary: 
  938. $result = trim($this->processDefListItems($list)); 
  939. $result = "<dl>\n" . $result . "\n</dl>"; 
  940. return $this->hashBlock($result) . "\n\n"; 
  941.  
  942.  
  943. function processDefListItems($list_str) { 
  944. # Process the contents of a single definition list, splitting it 
  945. # into individual term and definition list items. 
  946. $less_than_tab = $this->tab_width - 1; 
  947.  
  948. # trim trailing blank lines: 
  949. $list_str = preg_replace("/\n{2, }\\z/", "\n", $list_str); 
  950.  
  951. # Process definition terms. 
  952. $list_str = preg_replace_callback('{ 
  953. (?>\A\n?|\n\n+) # leading line 
  954. ( # definition terms = $1 
  955. [ ]{0, '.$less_than_tab.'} # leading whitespace 
  956. (?!\:[ ]|[ ]) # negative lookahead for a definition 
  957. # mark (colon) or more whitespace. 
  958. (?> \S.* \n)+? # actual term (not whitespace). 
  959. (?=\n?[ ]{0, 3}:[ ]) # lookahead for following line feed 
  960. # with a definition mark. 
  961. }xm',  
  962. array(&$this, '_processDefListItems_callback_dt'), $list_str); 
  963.  
  964. # Process actual definitions. 
  965. $list_str = preg_replace_callback('{ 
  966. \n(\n+)? # leading line = $1 
  967. ( # marker space = $2 
  968. [ ]{0, '.$less_than_tab.'} # whitespace before colon 
  969. \:[ ]+ # definition mark (colon) 
  970. ((?s:.+?)) # definition text = $3 
  971. (?= \n+ # stop at next definition mark,  
  972. (?: # next term or end of text 
  973. [ ]{0, '.$less_than_tab.'} \:[ ] | 
  974. <dt> | \z 
  975. }xm',  
  976. array(&$this, '_processDefListItems_callback_dd'), $list_str); 
  977.  
  978. return $list_str; 
  979. function _processDefListItems_callback_dt($matches) { 
  980. $terms = explode("\n", trim($matches[1])); 
  981. $text = ''; 
  982. foreach ($terms as $term) { 
  983. $term = $this->runSpanGamut(trim($term)); 
  984. $text .= "\n<dt>" . $term . "</dt>"; 
  985. return $text . "\n"; 
  986. function _processDefListItems_callback_dd($matches) { 
  987. $leading_line = $matches[1]; 
  988. $marker_space = $matches[2]; 
  989. $def = $matches[3]; 
  990.  
  991. if ($leading_line || preg_match('/\n{2, }/', $def)) { 
  992. # Replace marker with the appropriate whitespace indentation 
  993. $def = str_repeat(' ', strlen($marker_space)) . $def; 
  994. $def = $this->runBlockGamut($this->outdent($def . "\n\n")); 
  995. $def = "\n". $def ."\n"; 
  996. else { 
  997. $def = rtrim($def); 
  998. $def = $this->runSpanGamut($this->outdent($def)); 
  999.  
  1000. return "\n<dd>" . $def . "</dd>\n"; 
  1001.  
  1002.  
  1003. function doFencedCodeBlocks($text) { 
  1004. # Adding the fenced code block syntax to regular Markdown: 
  1005. # ~~~ 
  1006. # Code block 
  1007. # ~~~ 
  1008. $less_than_tab = $this->tab_width; 
  1009.  
  1010. $text = preg_replace_callback('{ 
  1011. (?:\n|\A) 
  1012. # 1: Opening marker 
  1013. (?:~{3, }|`{3, }) # 3 or more tildes/backticks. 
  1014. [ ]* 
  1015. (?: 
  1016. \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 
  1017. '.$this->id_class_attr_catch_re.' # 3: Extra attributes 
  1018. )? 
  1019. [ ]* \n # Whitespace and newline following marker. 
  1020.  
  1021. # 4: Content 
  1022. (?> 
  1023. (?!\1 [ ]* \n) # Not a closing marker. 
  1024. .*\n+ 
  1025. )+ 
  1026.  
  1027. # Closing marker. 
  1028. \1 [ ]* (?= \n ) 
  1029. }xm',  
  1030. array(&$this, '_doFencedCodeBlocks_callback'), $text); 
  1031.  
  1032. return $text; 
  1033. function _doFencedCodeBlocks_callback($matches) { 
  1034. $classname =& $matches[2]; 
  1035. $attrs =& $matches[3]; 
  1036. $codeblock = $matches[4]; 
  1037. $codeblock = htmlspecialchars($codeblock, ENT_NOQUOTES); 
  1038. $codeblock = preg_replace_callback('/^\n+/',  
  1039. array(&$this, '_doFencedCodeBlocks_newlines'), $codeblock); 
  1040.  
  1041. if ($classname != "") { 
  1042. if ($classname{0} == '.') 
  1043. $classname = substr($classname, 1); 
  1044. $attr_str = ' class="'.$this->code_class_prefix.$classname.'"'; 
  1045. } else { 
  1046. $attr_str = $this->doExtraAttributes($this->code_attr_on_pre ? "pre" : "code", $attrs); 
  1047. $pre_attr_str = $this->code_attr_on_pre ? $attr_str : ''; 
  1048. $code_attr_str = $this->code_attr_on_pre ? '' : $attr_str; 
  1049. $codeblock = "<pre$pre_attr_str><code$code_attr_str>$codeblock</code></pre>"; 
  1050.  
  1051. return "\n\n".$this->hashBlock($codeblock)."\n\n"; 
  1052. function _doFencedCodeBlocks_newlines($matches) { 
  1053. return str_repeat("<br$this->empty_element_suffix",  
  1054. strlen($matches[0])); 
  1055.  
  1056.  
  1057. # Redefining emphasis markers so that emphasis by underscore does not 
  1058. # work in the middle of a word. 
  1059. public $em_relist = array( 
  1060. '' => '(?:(?<!\*)\*(?!\*)|(?<![a-zA-Z0-9_])_(?!_))(?=\S|$)(?![\., :;]\s)',  
  1061. '*' => '(?<=\S|^)(?<!\*)\*(?!\*)',  
  1062. '_' => '(?<=\S|^)(?<!_)_(?![a-zA-Z0-9_])',  
  1063. ); 
  1064. public $strong_relist = array( 
  1065. '' => '(?:(?<!\*)\*\*(?!\*)|(?<![a-zA-Z0-9_])__(?!_))(?=\S|$)(?![\., :;]\s)',  
  1066. '**' => '(?<=\S|^)(?<!\*)\*\*(?!\*)',  
  1067. '__' => '(?<=\S|^)(?<!_)__(?![a-zA-Z0-9_])',  
  1068. ); 
  1069. public $em_strong_relist = array( 
  1070. '' => '(?:(?<!\*)\*\*\*(?!\*)|(?<![a-zA-Z0-9_])___(?!_))(?=\S|$)(?![\., :;]\s)',  
  1071. '***' => '(?<=\S|^)(?<!\*)\*\*\*(?!\*)',  
  1072. '___' => '(?<=\S|^)(?<!_)___(?![a-zA-Z0-9_])',  
  1073. ); 
  1074.  
  1075.  
  1076. function formParagraphs($text) { 
  1077. # Params: 
  1078. # $text - string to process with html <p> tags 
  1079. # Strip leading and trailing lines: 
  1080. $text = preg_replace('/\A\n+|\n+\z/', '', $text); 
  1081.  
  1082. $grafs = preg_split('/\n{2, }/', $text, -1, PREG_SPLIT_NO_EMPTY); 
  1083.  
  1084. # Wrap <p> tags and unhashify HTML blocks 
  1085. foreach ($grafs as $key => $value) { 
  1086. $value = trim($this->runSpanGamut($value)); 
  1087.  
  1088. # Check if this should be enclosed in a paragraph. 
  1089. # Clean tag hashes & block tag hashes are left alone. 
  1090. $is_p = !preg_match('/^B\x1A[0-9]+B|^C\x1A[0-9]+C$/', $value); 
  1091.  
  1092. if ($is_p) { 
  1093. $value = "<p>$value</p>"; 
  1094. $grafs[$key] = $value; 
  1095.  
  1096. # Join grafs in one text, then unhash HTML tags. 
  1097. $text = implode("\n\n", $grafs); 
  1098.  
  1099. # Finish by removing any tag hashes still present in $text. 
  1100. $text = $this->unhash($text); 
  1101.  
  1102. return $text; 
  1103.  
  1104.  
  1105. ### Footnotes 
  1106.  
  1107. function stripFootnotes($text) { 
  1108. # Strips link definitions from text, stores the URLs and titles in 
  1109. # hash references. 
  1110. $less_than_tab = $this->tab_width - 1; 
  1111.  
  1112. # Link defs are in the form: [^id]: url "optional title" 
  1113. $text = preg_replace_callback('{ 
  1114. ^[ ]{0, '.$less_than_tab.'}\[\^(.+?)\][ ]?: # note_id = $1 
  1115. [ ]* 
  1116. \n? # maybe *one* newline 
  1117. ( # text = $2 (no blank lines allowed) 
  1118. (?: 
  1119. .+ # actual text 
  1120. \n # newlines but 
  1121. (?!\[\^.+?\]:\s)# negative lookahead for footnote marker. 
  1122. (?!\n+[ ]{0, 3}\S)# ensure line is not blank and followed 
  1123. # by non-indented content 
  1124. )* 
  1125. }xm',  
  1126. array(&$this, '_stripFootnotes_callback'),  
  1127. $text); 
  1128. return $text; 
  1129. function _stripFootnotes_callback($matches) { 
  1130. $note_id = $this->fn_id_prefix . $matches[1]; 
  1131. $this->footnotes[$note_id] = $this->outdent($matches[2]); 
  1132. return ''; # String that will replace the block 
  1133.  
  1134.  
  1135. function doFootnotes($text) { 
  1136. # Replace footnote references in $text [^id] with a special text-token 
  1137. # which will be replaced by the actual footnote marker in appendFootnotes. 
  1138. if (!$this->in_anchor) { 
  1139. $text = preg_replace('{\[\^(.+?)\]}', "F\x1Afn:\\1\x1A:", $text); 
  1140. return $text; 
  1141.  
  1142.  
  1143. function appendFootnotes($text) { 
  1144. # Append footnote list to text. 
  1145. $text = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',  
  1146. array(&$this, '_appendFootnotes_callback'), $text); 
  1147.  
  1148. if (!empty($this->footnotes_ordered)) { 
  1149. $text .= "\n\n"; 
  1150. $text .= "<div class=\"footnotes\">\n"; 
  1151. $text .= "<hr". $this->empty_element_suffix ."\n"; 
  1152. $text .= "<ol>\n\n"; 
  1153.  
  1154. $attr = " rev=\"footnote\""; 
  1155. if ($this->fn_backlink_class != "") { 
  1156. $class = $this->fn_backlink_class; 
  1157. $class = $this->encodeAttribute($class); 
  1158. $attr .= " class=\"$class\""; 
  1159. if ($this->fn_backlink_title != "") { 
  1160. $title = $this->fn_backlink_title; 
  1161. $title = $this->encodeAttribute($title); 
  1162. $attr .= " title=\"$title\""; 
  1163. $num = 0; 
  1164.  
  1165. while (!empty($this->footnotes_ordered)) { 
  1166. $footnote = reset($this->footnotes_ordered); 
  1167. $note_id = key($this->footnotes_ordered); 
  1168. unset($this->footnotes_ordered[$note_id]); 
  1169. $ref_count = $this->footnotes_ref_count[$note_id]; 
  1170. unset($this->footnotes_ref_count[$note_id]); 
  1171. unset($this->footnotes[$note_id]); 
  1172.  
  1173. $footnote .= "\n"; # Need to append newline before parsing. 
  1174. $footnote = $this->runBlockGamut("$footnote\n"); 
  1175. $footnote = preg_replace_callback('{F\x1Afn:(.*?)\x1A:}',  
  1176. array(&$this, '_appendFootnotes_callback'), $footnote); 
  1177.  
  1178. $attr = str_replace("%%", ++$num, $attr); 
  1179. $note_id = $this->encodeAttribute($note_id); 
  1180.  
  1181. # Prepare backlink, multiple backlinks if multiple references 
  1182. $backlink = "<a href=\"#fnref:$note_id\"$attr>↩</a>"; 
  1183. for ($ref_num = 2; $ref_num <= $ref_count; ++$ref_num) { 
  1184. $backlink .= " <a href=\"#fnref$ref_num:$note_id\"$attr>↩</a>"; 
  1185. # Add backlink to last paragraph; create new paragraph if needed. 
  1186. if (preg_match('{</p>$}', $footnote)) { 
  1187. $footnote = substr($footnote, 0, -4) . " $backlink</p>"; 
  1188. } else { 
  1189. $footnote .= "\n\n<p>$backlink</p>"; 
  1190.  
  1191. $text .= "<li id=\"fn:$note_id\">\n"; 
  1192. $text .= $footnote . "\n"; 
  1193. $text .= "</li>\n\n"; 
  1194.  
  1195. $text .= "</ol>\n"; 
  1196. $text .= "</div>"; 
  1197. return $text; 
  1198. function _appendFootnotes_callback($matches) { 
  1199. $node_id = $this->fn_id_prefix . $matches[1]; 
  1200.  
  1201. # Create footnote marker only if it has a corresponding footnote *and* 
  1202. # the footnote hasn't been used by another marker. 
  1203. if (isset($this->footnotes[$node_id])) { 
  1204. $num =& $this->footnotes_numbers[$node_id]; 
  1205. if (!isset($num)) { 
  1206. # Transfer footnote content to the ordered list and give it its 
  1207. # number 
  1208. $this->footnotes_ordered[$node_id] = $this->footnotes[$node_id]; 
  1209. $this->footnotes_ref_count[$node_id] = 1; 
  1210. $num = $this->footnote_counter++; 
  1211. $ref_count_mark = ''; 
  1212. } else { 
  1213. $ref_count_mark = $this->footnotes_ref_count[$node_id] += 1; 
  1214.  
  1215. $attr = " rel=\"footnote\""; 
  1216. if ($this->fn_link_class != "") { 
  1217. $class = $this->fn_link_class; 
  1218. $class = $this->encodeAttribute($class); 
  1219. $attr .= " class=\"$class\""; 
  1220. if ($this->fn_link_title != "") { 
  1221. $title = $this->fn_link_title; 
  1222. $title = $this->encodeAttribute($title); 
  1223. $attr .= " title=\"$title\""; 
  1224.  
  1225. $attr = str_replace("%%", $num, $attr); 
  1226. $node_id = $this->encodeAttribute($node_id); 
  1227.  
  1228. return 
  1229. "<sup id=\"fnref$ref_count_mark:$node_id\">". 
  1230. "<a href=\"#fn:$node_id\"$attr>$num</a>". 
  1231. "</sup>"; 
  1232.  
  1233. return "[^".$matches[1]."]"; 
  1234.  
  1235.  
  1236. ### Abbreviations ### 
  1237.  
  1238. function stripAbbreviations($text) { 
  1239. # Strips abbreviations from text, stores titles in hash references. 
  1240. $less_than_tab = $this->tab_width - 1; 
  1241.  
  1242. # Link defs are in the form: [id]*: url "optional title" 
  1243. $text = preg_replace_callback('{ 
  1244. ^[ ]{0, '.$less_than_tab.'}\*\[(.+?)\][ ]?: # abbr_id = $1 
  1245. (.*) # text = $2 (no blank lines allowed) 
  1246. }xm',  
  1247. array(&$this, '_stripAbbreviations_callback'),  
  1248. $text); 
  1249. return $text; 
  1250. function _stripAbbreviations_callback($matches) { 
  1251. $abbr_word = $matches[1]; 
  1252. $abbr_desc = $matches[2]; 
  1253. if ($this->abbr_word_re) 
  1254. $this->abbr_word_re .= '|'; 
  1255. $this->abbr_word_re .= preg_quote($abbr_word); 
  1256. $this->abbr_desciptions[$abbr_word] = trim($abbr_desc); 
  1257. return ''; # String that will replace the block 
  1258.  
  1259.  
  1260. function doAbbreviations($text) { 
  1261. # Find defined abbreviations in text and wrap them in <abbr> elements. 
  1262. if ($this->abbr_word_re) { 
  1263. // cannot use the /x modifier because abbr_word_re may 
  1264. // contain significant spaces: 
  1265. $text = preg_replace_callback('{'. 
  1266. '(?<![\w\x1A])'. 
  1267. '(?:'.$this->abbr_word_re.')'. 
  1268. '(?![\w\x1A])'. 
  1269. '}',  
  1270. array(&$this, '_doAbbreviations_callback'), $text); 
  1271. return $text; 
  1272. function _doAbbreviations_callback($matches) { 
  1273. $abbr = $matches[0]; 
  1274. if (isset($this->abbr_desciptions[$abbr])) { 
  1275. $desc = $this->abbr_desciptions[$abbr]; 
  1276. if (empty($desc)) { 
  1277. return $this->hashPart("<abbr>$abbr</abbr>"); 
  1278. } else { 
  1279. $desc = $this->encodeAttribute($desc); 
  1280. return $this->hashPart("<abbr title=\"$desc\">$abbr</abbr>"); 
  1281. } else { 
  1282. return $matches[0]; 
  1283.