/_inc/lib/markdown/gfm.php

  1. <?php 
  2. /** 
  3. * GitHub-Flavoured Markdown. Inspired by Evan's plugin, but modified. 
  4. * 
  5. * @author Evan Solomon 
  6. * @author Matt Wiebe <wiebe@automattic.com> 
  7. * @link https://github.com/evansolomon/wp-github-flavored-markdown-comments 
  8. * 
  9. * Add a few extras from GitHub's Markdown implementation. Must be used in a WordPress environment. 
  10. */ 
  11.  
  12. class WPCom_GHF_Markdown_Parser extends MarkdownExtra_Parser { 
  13.  
  14. /** 
  15. * Hooray somewhat arbitrary numbers that are fearful of 1.0.x. 
  16. */ 
  17. const WPCOM_GHF_MARDOWN_VERSION = '0.9.0'; 
  18.  
  19. /** 
  20. * Use a [code] shortcode when encountering a fenced code block 
  21. * @var boolean 
  22. */ 
  23. public $use_code_shortcode = true; 
  24.  
  25. /** 
  26. * Preserve shortcodes, untouched by Markdown. 
  27. * This requires use within a WordPress installation. 
  28. * @var boolean 
  29. */ 
  30. public $preserve_shortcodes = true; 
  31.  
  32. /** 
  33. * Preserve the legacy $latex your-latex-code-here$ style 
  34. * LaTeX markup 
  35. */ 
  36. public $preserve_latex = true; 
  37.  
  38. /** 
  39. * Preserve single-line <code> blocks. 
  40. * @var boolean 
  41. */ 
  42. public $preserve_inline_code_blocks = true; 
  43.  
  44. /** 
  45. * Strip paragraphs from the output. This is the right default for WordPress,  
  46. * which generally wants to create its own paragraphs with `wpautop` 
  47. * @var boolean 
  48. */ 
  49. public $strip_paras = true; 
  50.  
  51. // Will run through sprintf - you can supply your own syntax if you want 
  52. public $shortcode_start = '[code lang=%s]'; 
  53. public $shortcode_end = '[/code]'; 
  54.  
  55. // Stores shortcodes we remove and then replace 
  56. protected $preserve_text_hash = array(); 
  57.  
  58. /** 
  59. * Set environment defaults based on presence of key functions/classes. 
  60. */ 
  61. public function __construct() { 
  62. $this->use_code_shortcode = class_exists( 'SyntaxHighlighter' ); 
  63. $this->preserve_shortcodes = function_exists( 'get_shortcode_regex' ); 
  64. $this->preserve_latex = function_exists( 'latex_markup' ); 
  65. $this->strip_paras = function_exists( 'wpautop' ); 
  66.  
  67. parent::__construct(); 
  68.  
  69. /** 
  70. * Overload to specify heading styles only if the hash has space(s) after it. This is actually in keeping with 
  71. * the documentation and eases the semantic overload of the hash character. 
  72. * #Will Not Produce a Heading 1 
  73. * # This Will Produce a Heading 1 
  74. * 
  75. * @param string $text Markdown text 
  76. * @return string HTML-transformed text 
  77. */ 
  78. public function transform( $text ) { 
  79. // Preserve anything inside a single-line <code> element 
  80. if ( $this->preserve_inline_code_blocks ) { 
  81. $text = $this->single_line_code_preserve( $text ); 
  82. // Remove all shortcodes so their interiors are left intact 
  83. if ( $this->preserve_shortcodes ) { 
  84. $text = $this->shortcode_preserve( $text ); 
  85. // Remove legacy LaTeX so it's left intact 
  86. if ( $this->preserve_latex ) { 
  87. $text = $this->latex_preserve( $text ); 
  88.  
  89. // escape line-beginning # chars that do not have a space after them. 
  90. $text = preg_replace_callback( '|^#{1, 6}( )?|um', array( $this, '_doEscapeForHashWithoutSpacing' ), $text ); 
  91.  
  92. // run through core Markdown 
  93. $text = parent::transform( $text ); 
  94.  
  95. // Occasionally Markdown Extra chokes on a para structure, producing odd paragraphs. 
  96. $text = str_replace( "<p><</p>\n\n<p>p>", '<p>', $text ); 
  97.  
  98. // put start-of-line # chars back in place 
  99. $text = $this->restore_leading_hash( $text ); 
  100.  
  101. // Strip paras if set 
  102. if ( $this->strip_paras ) { 
  103. $text = $this->unp( $text ); 
  104.  
  105. // Restore preserved things like shortcodes/LaTeX 
  106. $text = $this->do_restore( $text ); 
  107.  
  108. return $text; 
  109.  
  110. /** 
  111. * Prevents blocks like <code>__this__</code> from turning into <code><strong>this</strong></code> 
  112. * @param string $text Text that may need preserving 
  113. * @return string Text that was preserved if needed 
  114. */ 
  115. public function single_line_code_preserve( $text ) { 
  116. return preg_replace_callback( '|<code\b[^>]*>(.*?)</code>|', array( $this, 'do_single_line_code_preserve' ), $text ); 
  117.  
  118. /** 
  119. * Regex callback for inline code presevation 
  120. * @param array $matches Regex matches 
  121. * @return string Hashed content for later restoration 
  122. */ 
  123. public function do_single_line_code_preserve( $matches ) { 
  124. return '<code>' . $this->hash_block( $matches[1] ) . '</code>'; 
  125.  
  126. /** 
  127. * Preserve code block contents by HTML encoding them. Useful before getting to KSES stripping. 
  128. * @param string $text Markdown/HTML content 
  129. * @return string Markdown/HTML content with escaped code blocks 
  130. */ 
  131. public function codeblock_preserve( $text ) { 
  132. return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_preserve' ), $text ); 
  133.  
  134. /** 
  135. * Regex callback for code block preservation. 
  136. * @param array $matches Regex matches 
  137. * @return string Codeblock with escaped interior 
  138. */ 
  139. public function do_codeblock_preserve( $matches ) { 
  140. $block = stripslashes( $matches[3] ); 
  141. $block = esc_html( $block ); 
  142. $block = str_replace( '\\', '\\\\', $block ); 
  143. $open = $matches[1] . $matches[2] . "\n"; 
  144. return $open . $block . $matches[4]; 
  145.  
  146. /** 
  147. * Restore previously preserved (i.e. escaped) code block contents. 
  148. * @param string $text Markdown/HTML content with escaped code blocks 
  149. * @return string Markdown/HTML content 
  150. */ 
  151. public function codeblock_restore( $text ) { 
  152. return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_restore' ), $text ); 
  153.  
  154. /** 
  155. * Regex callback for code block restoration (unescaping). 
  156. * @param array $matches Regex matches 
  157. * @return string Codeblock with unescaped interior 
  158. */ 
  159. public function do_codeblock_restore( $matches ) { 
  160. $block = html_entity_decode( $matches[3], ENT_QUOTES ); 
  161. $open = $matches[1] . $matches[2] . "\n"; 
  162. return $open . $block . $matches[4]; 
  163.  
  164. /** 
  165. * Called to preserve legacy LaTeX like $latex some-latex-text $ 
  166. * @param string $text Text in which to preserve LaTeX 
  167. * @return string Text with LaTeX replaced by a hash that will be restored later 
  168. */ 
  169. protected function latex_preserve( $text ) { 
  170. // regex from latex_remove() 
  171. $regex = '% 
  172. \$latex(?:=\s*|\s+) 
  173. ((?: 
  174. [^$]+ # Not a dollar 
  175. (?<=(?<!\\\\)\\\\)\$ # Dollar preceded by exactly one slash 
  176. )+) 
  177. (?<!\\\\)\$ # Dollar preceded by zero slashes 
  178. %ix'; 
  179. $text = preg_replace_callback( $regex, array( $this, '_doRemoveText'), $text ); 
  180. return $text; 
  181.  
  182. /** 
  183. * Called to preserve WP shortcodes from being formatted by Markdown in any way. 
  184. * @param string $text Text in which to preserve shortcodes 
  185. * @return string Text with shortcodes replaced by a hash that will be restored later 
  186. */ 
  187. protected function shortcode_preserve( $text ) { 
  188. $text = preg_replace_callback( $this->get_shortcode_regex(), array( $this, '_doRemoveText' ), $text ); 
  189. return $text; 
  190.  
  191. /** 
  192. * Restores any text preserved by $this->hash_block() 
  193. * @param string $text Text that may have hashed preservation placeholders 
  194. * @return string Text with hashed preseravtion placeholders replaced by original text 
  195. */ 
  196. protected function do_restore( $text ) { 
  197. foreach( $this->preserve_text_hash as $hash => $value ) { 
  198. $placeholder = $this->hash_maker( $hash ); 
  199. $text = str_replace( $placeholder, $value, $text ); 
  200. // reset the hash 
  201. $this->preserve_text_hash = array(); 
  202. return $text; 
  203.  
  204. /** 
  205. * Regex callback for text preservation 
  206. * @param array $m Regex $matches array 
  207. * @return string A placeholder that will later be replaced by the original text 
  208. */ 
  209. protected function _doRemoveText( $m ) { 
  210. return $this->hash_block( $m[0] ); 
  211.  
  212. /** 
  213. * Call this to store a text block for later restoration. 
  214. * @param string $text Text to preserve for later 
  215. * @return string Placeholder that will be swapped out later for the original text 
  216. */ 
  217. protected function hash_block( $text ) { 
  218. $hash = md5( $text ); 
  219. $this->preserve_text_hash[ $hash ] = $text; 
  220. $placeholder = $this->hash_maker( $hash ); 
  221. return $placeholder; 
  222.  
  223. /** 
  224. * Less glamorous than the Keymaker 
  225. * @param string $hash An md5 hash 
  226. * @return string A placeholder hash 
  227. */ 
  228. protected function hash_maker( $hash ) { 
  229. return 'MARKDOWN_HASH' . $hash . 'MARKDOWN_HASH'; 
  230.  
  231. /** 
  232. * Remove bare <p> elements. <p>s with attributes will be preserved. 
  233. * @param string $text HTML content 
  234. * @return string <p>-less content 
  235. */ 
  236. public function unp( $text ) { 
  237. return preg_replace( "#<p>(.*?)</p>(\n|$)#ums", '$1$2', $text ); 
  238.  
  239. /** 
  240. * A regex of all shortcodes currently registered by the current 
  241. * WordPress installation 
  242. * @uses get_shortcode_regex() 
  243. * @return string A regex for grabbing shortcodes. 
  244. */ 
  245. protected function get_shortcode_regex() { 
  246. $pattern = get_shortcode_regex(); 
  247.  
  248. // don't match markdown link anchors that could be mistaken for shortcodes. 
  249. $pattern .= '(?!\()'; 
  250.  
  251. return "/$pattern/s"; 
  252.  
  253. /** 
  254. * Since we escape unspaced #Headings, put things back later. 
  255. * @param string $text text with a leading escaped hash 
  256. * @return string text with leading hashes unescaped 
  257. */ 
  258. protected function restore_leading_hash( $text ) { 
  259. return preg_replace( "/^(<p>)?(#|\\\\#)/um", "$1#", $text ); 
  260.  
  261. /** 
  262. * Overload to support ```-fenced code blocks for pre-Markdown Extra 1.2.8 
  263. * https://help.github.com/articles/github-flavored-markdown#fenced-code-blocks 
  264. */ 
  265. public function doFencedCodeBlocks( $text ) { 
  266. // If we're at least at 1.2.8, native fenced code blocks are in. 
  267. // Below is just copied from it in case we somehow got loaded on 
  268. // top of someone else's Markdown Extra 
  269. if ( version_compare( MARKDOWNEXTRA_VERSION, '1.2.8', '>=' ) ) 
  270. return parent::doFencedCodeBlocks( $text ); 
  271.  
  272. # Adding the fenced code block syntax to regular Markdown: 
  273. # ~~~ 
  274. # Code block 
  275. # ~~~ 
  276. $less_than_tab = $this->tab_width; 
  277.  
  278. $text = preg_replace_callback('{ 
  279. (?:\n|\A) 
  280. # 1: Opening marker 
  281. (?:~{3, }|`{3, }) # 3 or more tildes/backticks. 
  282. [ ]* 
  283. (?: 
  284. \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 
  285. '.$this->id_class_attr_catch_re.' # 3: Extra attributes 
  286. )? 
  287. [ ]* \n # Whitespace and newline following marker. 
  288.  
  289. # 4: Content 
  290. (?> 
  291. (?!\1 [ ]* \n) # Not a closing marker. 
  292. .*\n+ 
  293. )+ 
  294.  
  295. # Closing marker. 
  296. \1 [ ]* (?= \n ) 
  297. }xm',  
  298. array($this, '_doFencedCodeBlocks_callback'), $text); 
  299.  
  300. return $text; 
  301.  
  302. /** 
  303. * Callback for pre-processing start of line hashes to slyly escape headings that don't 
  304. * have a leading space 
  305. * @param array $m preg_match matches 
  306. * @return string possibly escaped start of line hash 
  307. */ 
  308. public function _doEscapeForHashWithoutSpacing( $m ) { 
  309. if ( ! isset( $m[1] ) ) 
  310. $m[0] = '\\' . $m[0]; 
  311. return $m[0]; 
  312.  
  313. /** 
  314. * Overload to support Viper's [code] shortcode. Because awesome. 
  315. */ 
  316. public function _doFencedCodeBlocks_callback( $matches ) { 
  317. // in case we have some escaped leading hashes right at the start of the block 
  318. $matches[4] = $this->restore_leading_hash( $matches[4] ); 
  319. // just MarkdownExtra_Parser if we're not going ultra-deluxe 
  320. if ( ! $this->use_code_shortcode ) { 
  321. return parent::_doFencedCodeBlocks_callback( $matches ); 
  322.  
  323. // default to a "text" class if one wasn't passed. Helps with encoding issues later. 
  324. if ( empty( $matches[2] ) ) { 
  325. $matches[2] = 'text'; 
  326.  
  327. $classname =& $matches[2]; 
  328. $codeblock = preg_replace_callback('/^\n+/', array( $this, '_doFencedCodeBlocks_newlines' ), $matches[4] ); 
  329.  
  330. if ( $classname{0} == '.' ) 
  331. $classname = substr( $classname, 1 ); 
  332.  
  333. $codeblock = esc_html( $codeblock ); 
  334. $codeblock = sprintf( $this->shortcode_start, $classname ) . "\n{$codeblock}" . $this->shortcode_end; 
  335. return "\n\n" . $this->hashBlock( $codeblock ). "\n\n"; 
  336.  
.