WPCom_GHF_Markdown_Parser

GitHub-Flavoured Markdown.

Defined (1)

The class is defined in the following location(s).

/_inc/lib/markdown/gfm.php  
  1. class WPCom_GHF_Markdown_Parser extends MarkdownExtra_Parser { 
  2.  
  3. /** 
  4. * Hooray somewhat arbitrary numbers that are fearful of 1.0.x. 
  5. */ 
  6. const WPCOM_GHF_MARDOWN_VERSION = '0.9.0'; 
  7.  
  8. /** 
  9. * Use a [code] shortcode when encountering a fenced code block 
  10. * @var boolean 
  11. */ 
  12. public $use_code_shortcode = true; 
  13.  
  14. /** 
  15. * Preserve shortcodes, untouched by Markdown. 
  16. * This requires use within a WordPress installation. 
  17. * @var boolean 
  18. */ 
  19. public $preserve_shortcodes = true; 
  20.  
  21. /** 
  22. * Preserve the legacy $latex your-latex-code-here$ style 
  23. * LaTeX markup 
  24. */ 
  25. public $preserve_latex = true; 
  26.  
  27. /** 
  28. * Preserve single-line <code> blocks. 
  29. * @var boolean 
  30. */ 
  31. public $preserve_inline_code_blocks = true; 
  32.  
  33. /** 
  34. * Strip paragraphs from the output. This is the right default for WordPress,  
  35. * which generally wants to create its own paragraphs with `wpautop` 
  36. * @var boolean 
  37. */ 
  38. public $strip_paras = true; 
  39.  
  40. // Will run through sprintf - you can supply your own syntax if you want 
  41. public $shortcode_start = '[code lang=%s]'; 
  42. public $shortcode_end = '[/code]'; 
  43.  
  44. // Stores shortcodes we remove and then replace 
  45. protected $preserve_text_hash = array(); 
  46.  
  47. /** 
  48. * Set environment defaults based on presence of key functions/classes. 
  49. */ 
  50. public function __construct() { 
  51. $this->use_code_shortcode = class_exists( 'SyntaxHighlighter' ); 
  52. /** 
  53. * Allow processing shortcode contents. 
  54. * @module markdown 
  55. * @since 4.4.0 
  56. * @param boolean $preserve_shortcodes Defaults to $this->preserve_shortcodes. 
  57. */ 
  58. $this->preserve_shortcodes = apply_filters( 'jetpack_markdown_preserve_shortcodes', $this->preserve_shortcodes ) && function_exists( 'get_shortcode_regex' ); 
  59. $this->preserve_latex = function_exists( 'latex_markup' ); 
  60. $this->strip_paras = function_exists( 'wpautop' ); 
  61.  
  62. parent::__construct(); 
  63.  
  64. /** 
  65. * Overload to specify heading styles only if the hash has space(s) after it. This is actually in keeping with 
  66. * the documentation and eases the semantic overload of the hash character. 
  67. * #Will Not Produce a Heading 1 
  68. * # This Will Produce a Heading 1 
  69. * @param string $text Markdown text 
  70. * @return string HTML-transformed text 
  71. */ 
  72. public function transform( $text ) { 
  73. // Preserve anything inside a single-line <code> element 
  74. if ( $this->preserve_inline_code_blocks ) { 
  75. $text = $this->single_line_code_preserve( $text ); 
  76. // Remove all shortcodes so their interiors are left intact 
  77. if ( $this->preserve_shortcodes ) { 
  78. $text = $this->shortcode_preserve( $text ); 
  79. // Remove legacy LaTeX so it's left intact 
  80. if ( $this->preserve_latex ) { 
  81. $text = $this->latex_preserve( $text ); 
  82.  
  83. // escape line-beginning # chars that do not have a space after them. 
  84. $text = preg_replace_callback( '|^#{1, 6}( )?|um', array( $this, '_doEscapeForHashWithoutSpacing' ), $text ); 
  85.  
  86. /** 
  87. * Allow third-party plugins to define custom patterns that won't be processed by Markdown. 
  88. * @module markdown 
  89. * @since 3.9.2 
  90. * @param array $custom_patterns Array of custom patterns to be ignored by Markdown. 
  91. */ 
  92. $custom_patterns = apply_filters( 'jetpack_markdown_preserve_pattern', array() ); 
  93. if ( is_array( $custom_patterns ) && ! empty( $custom_patterns ) ) { 
  94. foreach ( $custom_patterns as $pattern ) { 
  95. $text = preg_replace_callback( $pattern, array( $this, '_doRemoveText'), $text ); 
  96.  
  97. // run through core Markdown 
  98. $text = parent::transform( $text ); 
  99.  
  100. // Occasionally Markdown Extra chokes on a para structure, producing odd paragraphs. 
  101. $text = str_replace( "<p><</p>\n\n<p>p>", '<p>', $text ); 
  102.  
  103. // put start-of-line # chars back in place 
  104. $text = $this->restore_leading_hash( $text ); 
  105.  
  106. // Strip paras if set 
  107. if ( $this->strip_paras ) { 
  108. $text = $this->unp( $text ); 
  109.  
  110. // Restore preserved things like shortcodes/LaTeX 
  111. $text = $this->do_restore( $text ); 
  112.  
  113. return $text; 
  114.  
  115. /** 
  116. * Prevents blocks like <code>__this__</code> from turning into <code><strong>this</strong></code> 
  117. * @param string $text Text that may need preserving 
  118. * @return string Text that was preserved if needed 
  119. */ 
  120. public function single_line_code_preserve( $text ) { 
  121. return preg_replace_callback( '|<code\b[^>]*>(.*?)</code>|', array( $this, 'do_single_line_code_preserve' ), $text ); 
  122.  
  123. /** 
  124. * Regex callback for inline code presevation 
  125. * @param array $matches Regex matches 
  126. * @return string Hashed content for later restoration 
  127. */ 
  128. public function do_single_line_code_preserve( $matches ) { 
  129. return '<code>' . $this->hash_block( $matches[1] ) . '</code>'; 
  130.  
  131. /** 
  132. * Preserve code block contents by HTML encoding them. Useful before getting to KSES stripping. 
  133. * @param string $text Markdown/HTML content 
  134. * @return string Markdown/HTML content with escaped code blocks 
  135. */ 
  136. public function codeblock_preserve( $text ) { 
  137. return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_preserve' ), $text ); 
  138.  
  139. /** 
  140. * Regex callback for code block preservation. 
  141. * @param array $matches Regex matches 
  142. * @return string Codeblock with escaped interior 
  143. */ 
  144. public function do_codeblock_preserve( $matches ) { 
  145. $block = stripslashes( $matches[3] ); 
  146. $block = esc_html( $block ); 
  147. $block = str_replace( '\\', '\\\\', $block ); 
  148. $open = $matches[1] . $matches[2] . "\n"; 
  149. return $open . $block . $matches[4]; 
  150.  
  151. /** 
  152. * Restore previously preserved (i.e. escaped) code block contents. 
  153. * @param string $text Markdown/HTML content with escaped code blocks 
  154. * @return string Markdown/HTML content 
  155. */ 
  156. public function codeblock_restore( $text ) { 
  157. return preg_replace_callback( "/^([`~]{3})([^`\n]+)?\n([^`~]+)(\\1)/m", array( $this, 'do_codeblock_restore' ), $text ); 
  158.  
  159. /** 
  160. * Regex callback for code block restoration (unescaping). 
  161. * @param array $matches Regex matches 
  162. * @return string Codeblock with unescaped interior 
  163. */ 
  164. public function do_codeblock_restore( $matches ) { 
  165. $block = html_entity_decode( $matches[3], ENT_QUOTES ); 
  166. $open = $matches[1] . $matches[2] . "\n"; 
  167. return $open . $block . $matches[4]; 
  168.  
  169. /** 
  170. * Called to preserve legacy LaTeX like $latex some-latex-text $ 
  171. * @param string $text Text in which to preserve LaTeX 
  172. * @return string Text with LaTeX replaced by a hash that will be restored later 
  173. */ 
  174. protected function latex_preserve( $text ) { 
  175. // regex from latex_remove() 
  176. $regex = '% 
  177. \$latex(?:=\s*|\s+) 
  178. ((?: 
  179. [^$]+ # Not a dollar 
  180. (?<=(?<!\\\\)\\\\)\$ # Dollar preceded by exactly one slash 
  181. )+) 
  182. (?<!\\\\)\$ # Dollar preceded by zero slashes 
  183. %ix'; 
  184. $text = preg_replace_callback( $regex, array( $this, '_doRemoveText'), $text ); 
  185. return $text; 
  186.  
  187. /** 
  188. * Called to preserve WP shortcodes from being formatted by Markdown in any way. 
  189. * @param string $text Text in which to preserve shortcodes 
  190. * @return string Text with shortcodes replaced by a hash that will be restored later 
  191. */ 
  192. protected function shortcode_preserve( $text ) { 
  193. $text = preg_replace_callback( $this->get_shortcode_regex(), array( $this, '_doRemoveText' ), $text ); 
  194. return $text; 
  195.  
  196. /** 
  197. * Restores any text preserved by $this->hash_block() 
  198. * @param string $text Text that may have hashed preservation placeholders 
  199. * @return string Text with hashed preseravtion placeholders replaced by original text 
  200. */ 
  201. protected function do_restore( $text ) { 
  202. foreach( $this->preserve_text_hash as $hash => $value ) { 
  203. $placeholder = $this->hash_maker( $hash ); 
  204. $text = str_replace( $placeholder, $value, $text ); 
  205. // reset the hash 
  206. $this->preserve_text_hash = array(); 
  207. return $text; 
  208.  
  209. /** 
  210. * Regex callback for text preservation 
  211. * @param array $m Regex $matches array 
  212. * @return string A placeholder that will later be replaced by the original text 
  213. */ 
  214. protected function _doRemoveText( $m ) { 
  215. return $this->hash_block( $m[0] ); 
  216.  
  217. /** 
  218. * Call this to store a text block for later restoration. 
  219. * @param string $text Text to preserve for later 
  220. * @return string Placeholder that will be swapped out later for the original text 
  221. */ 
  222. protected function hash_block( $text ) { 
  223. $hash = md5( $text ); 
  224. $this->preserve_text_hash[ $hash ] = $text; 
  225. $placeholder = $this->hash_maker( $hash ); 
  226. return $placeholder; 
  227.  
  228. /** 
  229. * Less glamorous than the Keymaker 
  230. * @param string $hash An md5 hash 
  231. * @return string A placeholder hash 
  232. */ 
  233. protected function hash_maker( $hash ) { 
  234. return 'MARKDOWN_HASH' . $hash . 'MARKDOWN_HASH'; 
  235.  
  236. /** 
  237. * Remove bare <p> elements. <p>s with attributes will be preserved. 
  238. * @param string $text HTML content 
  239. * @return string <p>-less content 
  240. */ 
  241. public function unp( $text ) { 
  242. return preg_replace( "#<p>(.*?)</p>(\n|$)#ums", '$1$2', $text ); 
  243.  
  244. /** 
  245. * A regex of all shortcodes currently registered by the current 
  246. * WordPress installation 
  247. * @uses get_shortcode_regex() 
  248. * @return string A regex for grabbing shortcodes. 
  249. */ 
  250. protected function get_shortcode_regex() { 
  251. $pattern = get_shortcode_regex(); 
  252.  
  253. // don't match markdown link anchors that could be mistaken for shortcodes. 
  254. $pattern .= '(?!\()'; 
  255.  
  256. return "/$pattern/s"; 
  257.  
  258. /** 
  259. * Since we escape unspaced #Headings, put things back later. 
  260. * @param string $text text with a leading escaped hash 
  261. * @return string text with leading hashes unescaped 
  262. */ 
  263. protected function restore_leading_hash( $text ) { 
  264. return preg_replace( "/^(<p>)?(#|\\\\#)/um", "$1#", $text ); 
  265.  
  266. /** 
  267. * Overload to support ```-fenced code blocks for pre-Markdown Extra 1.2.8 
  268. * https://help.github.com/articles/github-flavored-markdown#fenced-code-blocks 
  269. */ 
  270. public function doFencedCodeBlocks( $text ) { 
  271. // If we're at least at 1.2.8, native fenced code blocks are in. 
  272. // Below is just copied from it in case we somehow got loaded on 
  273. // top of someone else's Markdown Extra 
  274. if ( version_compare( MARKDOWNEXTRA_VERSION, '1.2.8', '>=' ) ) 
  275. return parent::doFencedCodeBlocks( $text ); 
  276.  
  277. # Adding the fenced code block syntax to regular Markdown: 
  278. # ~~~ 
  279. # Code block 
  280. # ~~~ 
  281. $less_than_tab = $this->tab_width; 
  282.  
  283. $text = preg_replace_callback('{ 
  284. (?:\n|\A) 
  285. # 1: Opening marker 
  286. (?:~{3, }|`{3, }) # 3 or more tildes/backticks. 
  287. [ ]* 
  288. (?: 
  289. \.?([-_:a-zA-Z0-9]+) # 2: standalone class name 
  290. '.$this->id_class_attr_catch_re.' # 3: Extra attributes 
  291. )? 
  292. [ ]* \n # Whitespace and newline following marker. 
  293.  
  294. # 4: Content 
  295. (?> 
  296. (?!\1 [ ]* \n) # Not a closing marker. 
  297. .*\n+ 
  298. )+ 
  299.  
  300. # Closing marker. 
  301. \1 [ ]* (?= \n ) 
  302. }xm',  
  303. array($this, '_doFencedCodeBlocks_callback'), $text); 
  304.  
  305. return $text; 
  306.  
  307. /** 
  308. * Callback for pre-processing start of line hashes to slyly escape headings that don't 
  309. * have a leading space 
  310. * @param array $m preg_match matches 
  311. * @return string possibly escaped start of line hash 
  312. */ 
  313. public function _doEscapeForHashWithoutSpacing( $m ) { 
  314. if ( ! isset( $m[1] ) ) 
  315. $m[0] = '\\' . $m[0]; 
  316. return $m[0]; 
  317.  
  318. /** 
  319. * Overload to support Viper's [code] shortcode. Because awesome. 
  320. */ 
  321. public function _doFencedCodeBlocks_callback( $matches ) { 
  322. // in case we have some escaped leading hashes right at the start of the block 
  323. $matches[4] = $this->restore_leading_hash( $matches[4] ); 
  324. // just MarkdownExtra_Parser if we're not going ultra-deluxe 
  325. if ( ! $this->use_code_shortcode ) { 
  326. return parent::_doFencedCodeBlocks_callback( $matches ); 
  327.  
  328. // default to a "text" class if one wasn't passed. Helps with encoding issues later. 
  329. if ( empty( $matches[2] ) ) { 
  330. $matches[2] = 'text'; 
  331.  
  332. $classname =& $matches[2]; 
  333. $codeblock = preg_replace_callback('/^\n+/', array( $this, '_doFencedCodeBlocks_newlines' ), $matches[4] ); 
  334.  
  335. if ( $classname{0} == '.' ) 
  336. $classname = substr( $classname, 1 ); 
  337.  
  338. $codeblock = esc_html( $codeblock ); 
  339. $codeblock = sprintf( $this->shortcode_start, $classname ) . "\n{$codeblock}" . $this->shortcode_end; 
  340. return "\n\n" . $this->hashBlock( $codeblock ). "\n\n"; 
  341.