WXR_Parser_XML

WXR Parser that makes use of the XML Parser PHP extension.

Defined (1)

The class is defined in the following location(s).

/parsers.php  
  1. class WXR_Parser_XML { 
  2. var $wp_tags = array( 
  3. 'wp:post_id', 'wp:post_date', 'wp:post_date_gmt', 'wp:comment_status', 'wp:ping_status', 'wp:attachment_url',  
  4. 'wp:status', 'wp:post_name', 'wp:post_parent', 'wp:menu_order', 'wp:post_type', 'wp:post_password',  
  5. 'wp:is_sticky', 'wp:term_id', 'wp:category_nicename', 'wp:category_parent', 'wp:cat_name', 'wp:category_description',  
  6. 'wp:tag_slug', 'wp:tag_name', 'wp:tag_description', 'wp:term_taxonomy', 'wp:term_parent',  
  7. 'wp:term_name', 'wp:term_description', 'wp:author_id', 'wp:author_login', 'wp:author_email', 'wp:author_display_name',  
  8. 'wp:author_first_name', 'wp:author_last_name',  
  9. ); 
  10. var $wp_sub_tags = array( 
  11. 'wp:comment_id', 'wp:comment_author', 'wp:comment_author_email', 'wp:comment_author_url',  
  12. 'wp:comment_author_IP', 'wp:comment_date', 'wp:comment_date_gmt', 'wp:comment_content',  
  13. 'wp:comment_approved', 'wp:comment_type', 'wp:comment_parent', 'wp:comment_user_id',  
  14. ); 
  15.  
  16. function parse( $file ) { 
  17. $this->wxr_version = $this->in_post = $this->cdata = $this->data = $this->sub_data = $this->in_tag = $this->in_sub_tag = false; 
  18. $this->authors = $this->posts = $this->term = $this->category = $this->tag = array(); 
  19.  
  20. $xml = xml_parser_create( 'UTF-8' ); 
  21. xml_parser_set_option( $xml, XML_OPTION_SKIP_WHITE, 1 ); 
  22. xml_parser_set_option( $xml, XML_OPTION_CASE_FOLDING, 0 ); 
  23. xml_set_object( $xml, $this ); 
  24. xml_set_character_data_handler( $xml, 'cdata' ); 
  25. xml_set_element_handler( $xml, 'tag_open', 'tag_close' ); 
  26.  
  27. if ( ! xml_parse( $xml, file_get_contents( $file ), true ) ) { 
  28. $current_line = xml_get_current_line_number( $xml ); 
  29. $current_column = xml_get_current_column_number( $xml ); 
  30. $error_code = xml_get_error_code( $xml ); 
  31. $error_string = xml_error_string( $error_code ); 
  32. return new WP_Error( 'XML_parse_error', 'There was an error when reading this WXR file', array( $current_line, $current_column, $error_string ) ); 
  33. xml_parser_free( $xml ); 
  34.  
  35. if ( ! preg_match( '/^\d+\.\d+$/', $this->wxr_version ) ) 
  36. return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); 
  37.  
  38. return array( 
  39. 'authors' => $this->authors,  
  40. 'posts' => $this->posts,  
  41. 'categories' => $this->category,  
  42. 'tags' => $this->tag,  
  43. 'terms' => $this->term,  
  44. 'base_url' => $this->base_url,  
  45. 'version' => $this->wxr_version 
  46. ); 
  47.  
  48. function tag_open( $parse, $tag, $attr ) { 
  49. if ( in_array( $tag, $this->wp_tags ) ) { 
  50. $this->in_tag = substr( $tag, 3 ); 
  51. return; 
  52.  
  53. if ( in_array( $tag, $this->wp_sub_tags ) ) { 
  54. $this->in_sub_tag = substr( $tag, 3 ); 
  55. return; 
  56.  
  57. switch ( $tag ) { 
  58. case 'category': 
  59. if ( isset($attr['domain'], $attr['nicename']) ) { 
  60. $this->sub_data['domain'] = $attr['domain']; 
  61. $this->sub_data['slug'] = $attr['nicename']; 
  62. break; 
  63. case 'item': $this->in_post = true; 
  64. case 'title': if ( $this->in_post ) $this->in_tag = 'post_title'; break; 
  65. case 'guid': $this->in_tag = 'guid'; break; 
  66. case 'dc:creator': $this->in_tag = 'post_author'; break; 
  67. case 'content:encoded': $this->in_tag = 'post_content'; break; 
  68. case 'excerpt:encoded': $this->in_tag = 'post_excerpt'; break; 
  69.  
  70. case 'wp:term_slug': $this->in_tag = 'slug'; break; 
  71. case 'wp:meta_key': $this->in_sub_tag = 'key'; break; 
  72. case 'wp:meta_value': $this->in_sub_tag = 'value'; break; 
  73.  
  74. function cdata( $parser, $cdata ) { 
  75. if ( ! trim( $cdata ) ) 
  76. return; 
  77.  
  78. if ( false !== $this->in_tag || false !== $this->in_sub_tag ) { 
  79. $this->cdata .= $cdata; 
  80. } else { 
  81. $this->cdata .= trim( $cdata ); 
  82.  
  83. function tag_close( $parser, $tag ) { 
  84. switch ( $tag ) { 
  85. case 'wp:comment': 
  86. unset( $this->sub_data['key'], $this->sub_data['value'] ); // remove meta sub_data 
  87. if ( ! empty( $this->sub_data ) ) 
  88. $this->data['comments'][] = $this->sub_data; 
  89. $this->sub_data = false; 
  90. break; 
  91. case 'wp:commentmeta': 
  92. $this->sub_data['commentmeta'][] = array( 
  93. 'key' => $this->sub_data['key'],  
  94. 'value' => $this->sub_data['value'] 
  95. ); 
  96. break; 
  97. case 'category': 
  98. if ( ! empty( $this->sub_data ) ) { 
  99. $this->sub_data['name'] = $this->cdata; 
  100. $this->data['terms'][] = $this->sub_data; 
  101. $this->sub_data = false; 
  102. break; 
  103. case 'wp:postmeta': 
  104. if ( ! empty( $this->sub_data ) ) 
  105. $this->data['postmeta'][] = $this->sub_data; 
  106. $this->sub_data = false; 
  107. break; 
  108. case 'item': 
  109. $this->posts[] = $this->data; 
  110. $this->data = false; 
  111. break; 
  112. case 'wp:category': 
  113. case 'wp:tag': 
  114. case 'wp:term': 
  115. $n = substr( $tag, 3 ); 
  116. array_push( $this->$n, $this->data ); 
  117. $this->data = false; 
  118. break; 
  119. case 'wp:author': 
  120. if ( ! empty($this->data['author_login']) ) 
  121. $this->authors[$this->data['author_login']] = $this->data; 
  122. $this->data = false; 
  123. break; 
  124. case 'wp:base_site_url': 
  125. $this->base_url = $this->cdata; 
  126. break; 
  127. case 'wp:wxr_version': 
  128. $this->wxr_version = $this->cdata; 
  129. break; 
  130.  
  131. default: 
  132. if ( $this->in_sub_tag ) { 
  133. $this->sub_data[$this->in_sub_tag] = ! empty( $this->cdata ) ? $this->cdata : ''; 
  134. $this->in_sub_tag = false; 
  135. } else if ( $this->in_tag ) { 
  136. $this->data[$this->in_tag] = ! empty( $this->cdata ) ? $this->cdata : ''; 
  137. $this->in_tag = false; 
  138.  
  139. $this->cdata = false;