WPCom_GHF_Markdown_Parser

GitHub-Flavoured Markdown.

Defined (1)

The class is defined in the following location(s).

/_inc/lib/markdown/gfm.php  
  1. class WPCom_GHF_Markdown_Parser extends MarkdownExtra_Parser { 
  2.  
  3. /** 
  4. * Hooray somewhat arbitrary numbers that are fearful of 1.0.x. 
  5. */ 
  6. const WPCOM_GHF_MARDOWN_VERSION = '0.9.0'; 
  7.  
  8. /** 
  9. * Use a [code] shortcode when encountering a fenced code block 
  10. * @var boolean 
  11. */ 
  12. public $use_code_shortcode = true; 
  13.  
  14. /** 
  15. * Preserve shortcodes, untouched by Markdown. 
  16. * This requires use within a WordPress installation. 
  17. * @var boolean 
  18. */ 
  19. public $preserve_shortcodes = true; 
  20.  
  21. /** 
  22. * Preserve the legacy $latex your-latex-code-here$ style 
  23. * LaTeX markup 
  24. */ 
  25. public $preserve_latex = true; 
  26.  
  27. /** 
  28. * Preserve single-line <code> blocks. 
  29. * @var boolean 
  30. */ 
  31. public $preserve_inline_code_blocks = true; 
  32.  
  33. /** 
  34. * Strip paragraphs from the output. This is the right default for WordPress,  
  35. * which generally wants to create its own paragraphs with `wpautop` 
  36. * @var boolean 
  37. */ 
  38. public $strip_paras = true; 
  39.  
  40. // Will run through sprintf - you can supply your own syntax if you want 
  41. public $shortcode_start = '[code lang=%s]'; 
  42. public $shortcode_end = '[/code]'; 
  43.  
  44. // Stores shortcodes we remove and then replace 
  45. protected $preserve_text_hash = array(); 
  46.  
  47. /** 
  48. * Set environment defaults based on presence of key functions/classes. 
  49. */ 
  50. public function __construct() { 
  51. $this->use_code_shortcode = class_exists( 'SyntaxHighlighter' ); 
  52. $this->preserve_shortcodes = function_exists( 'get_shortcode_regex' ); 
  53. $this->preserve_latex = function_exists( 'latex_markup' ); 
  54. $this->strip_paras = function_exists( 'wpautop' ); 
  55.  
  56. parent::__construct(); 
  57.  
  58. /** 
  59. * Overload to specify heading styles only if the hash has space(s) after it. This is actually in keeping with 
  60. * the documentation and eases the semantic overload of the hash character. 
  61. * #Will Not Produce a Heading 1 
  62. * # This Will Produce a Heading 1 
  63. * @param string $text Markdown text 
  64. * @return string HTML-transformed text 
  65. */ 
  66. public function transform( $text ) { 
  67. // Preserve anything inside a single-line <code> element 
  68. if ( $this->preserve_inline_code_blocks ) { 
  69. $text = $this->single_line_code_preserve( $text ); 
  70. // Remove all shortcodes so their interiors are left intact 
  71. if ( $this->preserve_shortcodes ) { 
  72. $text = $this->shortcode_preserve( $text ); 
  73. // Remove legacy LaTeX so it's left intact 
  74. if ( $this->preserve_latex ) { 
  75. $text = $this->latex_preserve( $text ); 
  76.  
  77. // escape line-beginning # chars that do not have a space after them. 
  78. $text = preg_replace_callback( '|^#{1, 6}( )?|um', array( $this, '_doEscapeForHashWithoutSpacing' ), $text ); 
  79.  
  80. // run through core Markdown 
  81. $text = parent::transform( $text ); 
  82.  
  83. // Occasionally Markdown Extra chokes on a para structure, producing odd paragraphs. 
  84. $text = str_replace( "<p><</p>\n\n<p>p>", '<p>', $text ); 
  85.  
  86. // put start-of-line # chars back in place 
  87. $text = $this->restore_leading_hash( $text ); 
  88.  
  89. // Strip paras if set 
  90. if ( $this->strip_paras ) { 
  91. $text = $this->unp( $text ); 
  92.  
  93. // Restore preserved things like shortcodes/LaTeX 
  94. $text = $this->do_restore( $text ); 
  95.  
  96. return $text; 
  97.  
  98. /** 
  99. * Prevents blocks like <code>__this__</code> from turning into <code><strong>this</strong></code> 
  100. * @param string $text Text that may need preserving 
  101. * @return string Text that was preserved if needed 
  102. */ 
  103. public function single_line_code_preserve( $text ) { 
  104. return preg_replace_callback( '|<code\b[^>]*>(.*?)</code>|', array( $this, 'do_single_line_code_preserve' ), $text ); 
  105.  
  106. /** 
  107. * Regex callback for inline code presevation 
  108. * @param array $matches Regex matches 
  109. * @return string Hashed content for later restoration 
  110. */ 
  111. public function do_single_line_code_preserve( $matches ) { 
  112. return '<code>' . $this->hash_block( $matches[1] ) . '</code>'; 
  113.  
  114. /** 
  115. * Preserve code block contents by HTML encoding them. Useful before getting to KSES stripping. 
  116. * @param string $text Markdown/HTML content 
  117. * @return string Markdown/HTML content with escaped code blocks 
  118. */ 
  119. public function codeblock_preserve( $text ) { 
  120. return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_preserve' ), $text ); 
  121.  
  122. /** 
  123. * Regex callback for code block preservation. 
  124. * @param array $matches Regex matches 
  125. * @return string Codeblock with escaped interior 
  126. */ 
  127. public function do_codeblock_preserve( $matches ) { 
  128. $block = stripslashes( $matches[3] ); 
  129. $block = esc_html( $block ); 
  130. $block = str_replace( '\\', '\\\\', $block ); 
  131. $open = $matches[1] . $matches[2] . "\n"; 
  132. return $open . $block . $matches[4]; 
  133.  
  134. /** 
  135. * Restore previously preserved (i.e. escaped) code block contents. 
  136. * @param string $text Markdown/HTML content with escaped code blocks 
  137. * @return string Markdown/HTML content 
  138. */ 
  139. public function codeblock_restore( $text ) { 
  140. return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_restore' ), $text ); 
  141.  
  142. /** 
  143. * Regex callback for code block restoration (unescaping). 
  144. * @param array $matches Regex matches 
  145. * @return string Codeblock with unescaped interior 
  146. */ 
  147. public function do_codeblock_restore( $matches ) { 
  148. $block = html_entity_decode( $matches[3], ENT_QUOTES ); 
  149. $open = $matches[1] . $matches[2] . "\n"; 
  150. return $open . $block . $matches[4]; 
  151.  
  152. /** 
  153. * Called to preserve legacy LaTeX like $latex some-latex-text $ 
  154. * @param string $text Text in which to preserve LaTeX 
  155. * @return string Text with LaTeX replaced by a hash that will be restored later 
  156. */ 
  157. protected function latex_preserve( $text ) { 
  158. // regex from latex_remove() 
  159. $regex = '% 
  160. \$latex(?:=\s*|\s+) 
  161. ((?: 
  162. [^$]+ # Not a dollar 
  163. (?<=(?<!\\\\)\\\\)\$ # Dollar preceded by exactly one slash 
  164. )+) 
  165. (?<!\\\\)\$ # Dollar preceded by zero slashes 
  166. %ix'; 
  167. $text = preg_replace_callback( $regex, array( $this, '_doRemoveText'), $text ); 
  168. return $text; 
  169.  
  170. /** 
  171. * Called to preserve WP shortcodes from being formatted by Markdown in any way. 
  172. * @param string $text Text in which to preserve shortcodes 
  173. * @return string Text with shortcodes replaced by a hash that will be restored later 
  174. */ 
  175. protected function shortcode_preserve( $text ) { 
  176. $text = preg_replace_callback( $this->get_shortcode_regex(), array( $this, '_doRemoveText' ), $text ); 
  177. return $text; 
  178.  
  179. /** 
  180. * Restores any text preserved by $this->hash_block() 
  181. * @param string $text Text that may have hashed preservation placeholders 
  182. * @return string Text with hashed preseravtion placeholders replaced by original text 
  183. */ 
  184. protected function do_restore( $text ) { 
  185. foreach( $this->preserve_text_hash as $hash => $value ) { 
  186. $placeholder = $this->hash_maker( $hash ); 
  187. $text = str_replace( $placeholder, $value, $text ); 
  188. // reset the hash 
  189. $this->preserve_text_hash = array(); 
  190. return $text; 
  191.  
  192. /** 
  193. * Regex callback for text preservation 
  194. * @param array $m Regex $matches array 
  195. * @return string A placeholder that will later be replaced by the original text 
  196. */ 
  197. protected function _doRemoveText( $m ) { 
  198. return $this->hash_block( $m[0] ); 
  199.  
  200. /** 
  201. * Call this to store a text block for later restoration. 
  202. * @param string $text Text to preserve for later 
  203. * @return string Placeholder that will be swapped out later for the original text 
  204. */ 
  205. protected function hash_block( $text ) { 
  206. $hash = md5( $text ); 
  207. $this->preserve_text_hash[ $hash ] = $text; 
  208. $placeholder = $this->hash_maker( $hash ); 
  209. return $placeholder; 
  210.  
  211. /** 
  212. * Less glamorous than the Keymaker 
  213. * @param string $hash An md5 hash 
  214. * @return string A placeholder hash 
  215. */ 
  216. protected function hash_maker( $hash ) { 
  217. return 'MARKDOWN_HASH' . $hash . 'MARKDOWN_HASH'; 
  218.  
  219. /** 
  220. * Remove bare <p> elements. <p>s with attributes will be preserved. 
  221. * @param string $text HTML content 
  222. * @return string <p>-less content 
  223. */ 
  224. public function unp( $text ) { 
  225. return preg_replace( "#<p>(.*?)</p>(\n|$)#ums", '$1$2', $text ); 
  226.  
  227. /** 
  228. * A regex of all shortcodes currently registered by the current 
  229. * WordPress installation 
  230. * @uses get_shortcode_regex() 
  231. * @return string A regex for grabbing shortcodes. 
  232. */ 
  233. protected function get_shortcode_regex() { 
  234. $pattern = get_shortcode_regex(); 
  235.  
  236. // don't match markdown link anchors that could be mistaken for shortcodes. 
  237. $pattern .= '(?!\()'; 
  238.  
  239. return "/$pattern/s"; 
  240.  
  241. /** 
  242. * Since we escape unspaced #Headings, put things back later. 
  243. * @param string $text text with a leading escaped hash 
  244. * @return string text with leading hashes unescaped 
  245. */ 
  246. protected function restore_leading_hash( $text ) { 
  247. return preg_replace( "/^(<p>)?(#|\\\\#)/um", "$1#", $text ); 
  248.  
  249. /** 
  250. * Overload to support ```-fenced code blocks for pre-Markdown Extra 1.2.8 
  251. * https://help.github.com/articles/github-flavored-markdown#fenced-code-blocks 
  252. */ 
  253. public function doFencedCodeBlocks( $text ) { 
  254. // If we're at least at 1.2.8, native fenced code blocks are in. 
  255. // Below is just copied from it in case we somehow got loaded on 
  256. // top of someone else's Markdown Extra 
  257. if ( version_compare( MARKDOWNEXTRA_VERSION, '1.2.8', '>=' ) ) 
  258. return parent::doFencedCodeBlocks( $text ); 
  259.  
  260. # Adding the fenced code block syntax to regular Markdown: 
  261. # ~~~ 
  262. # Code block 
  263. # ~~~ 
  264. $less_than_tab = $this->tab_width; 
  265.  
  266. $text = preg_replace_callback('{ 
  267. (?:\n|\A) 
  268. # 1: Opening marker 
  269. (?:~{3, }|`{3, }) # 3 or more tildes/backticks. 
  270. [ ]* 
  271. (?: 
  272. \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 
  273. '.$this->id_class_attr_catch_re.' # 3: Extra attributes 
  274. )? 
  275. [ ]* \n # Whitespace and newline following marker. 
  276.  
  277. # 4: Content 
  278. (?> 
  279. (?!\1 [ ]* \n) # Not a closing marker. 
  280. .*\n+ 
  281. )+ 
  282.  
  283. # Closing marker. 
  284. \1 [ ]* (?= \n ) 
  285. }xm',  
  286. array($this, '_doFencedCodeBlocks_callback'), $text); 
  287.  
  288. return $text; 
  289.  
  290. /** 
  291. * Callback for pre-processing start of line hashes to slyly escape headings that don't 
  292. * have a leading space 
  293. * @param array $m preg_match matches 
  294. * @return string possibly escaped start of line hash 
  295. */ 
  296. public function _doEscapeForHashWithoutSpacing( $m ) { 
  297. if ( ! isset( $m[1] ) ) 
  298. $m[0] = '\\' . $m[0]; 
  299. return $m[0]; 
  300.  
  301. /** 
  302. * Overload to support Viper's [code] shortcode. Because awesome. 
  303. */ 
  304. public function _doFencedCodeBlocks_callback( $matches ) { 
  305. // in case we have some escaped leading hashes right at the start of the block 
  306. $matches[4] = $this->restore_leading_hash( $matches[4] ); 
  307. // just MarkdownExtra_Parser if we're not going ultra-deluxe 
  308. if ( ! $this->use_code_shortcode ) { 
  309. return parent::_doFencedCodeBlocks_callback( $matches ); 
  310.  
  311. // default to a "text" class if one wasn't passed. Helps with encoding issues later. 
  312. if ( empty( $matches[2] ) ) { 
  313. $matches[2] = 'text'; 
  314.  
  315. $classname =& $matches[2]; 
  316. $codeblock = preg_replace_callback('/^\n+/', array( $this, '_doFencedCodeBlocks_newlines' ), $matches[4] ); 
  317.  
  318. if ( $classname{0} == '.' ) 
  319. $classname = substr( $classname, 1 ); 
  320.  
  321. $codeblock = esc_html( $codeblock ); 
  322. $codeblock = sprintf( $this->shortcode_start, $classname ) . "\n{$codeblock}" . $this->shortcode_end; 
  323. return "\n\n" . $this->hashBlock( $codeblock ). "\n\n"; 
  324.