WXR_Parser_SimpleXML

WXR Parser that makes use of the SimpleXML PHP extension.

Defined (1)

The class is defined in the following location(s).

/parsers.php  
  1. class WXR_Parser_SimpleXML { 
  2. function parse( $file ) { 
  3. $authors = $posts = $categories = $tags = $terms = array(); 
  4.  
  5. $internal_errors = libxml_use_internal_errors(true); 
  6.  
  7. $dom = new DOMDocument; 
  8. $old_value = null; 
  9. if ( function_exists( 'libxml_disable_entity_loader' ) ) { 
  10. $old_value = libxml_disable_entity_loader( true ); 
  11. $success = $dom->loadXML( file_get_contents( $file ) ); 
  12. if ( ! is_null( $old_value ) ) { 
  13. libxml_disable_entity_loader( $old_value ); 
  14.  
  15. if ( ! $success || isset( $dom->doctype ) ) { 
  16. return new WP_Error( 'SimpleXML_parse_error', __( 'There was an error when reading this WXR file', 'wordpress-importer' ), libxml_get_errors() ); 
  17.  
  18. $xml = simplexml_import_dom( $dom ); 
  19. unset( $dom ); 
  20.  
  21. // halt if loading produces an error 
  22. if ( ! $xml ) 
  23. return new WP_Error( 'SimpleXML_parse_error', __( 'There was an error when reading this WXR file', 'wordpress-importer' ), libxml_get_errors() ); 
  24.  
  25. $wxr_version = $xml->xpath('/rss/channel/wp:wxr_version'); 
  26. if ( ! $wxr_version ) 
  27. return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); 
  28.  
  29. $wxr_version = (string) trim( $wxr_version[0] ); 
  30. // confirm that we are dealing with the correct file format 
  31. if ( ! preg_match( '/^\d+\.\d+$/', $wxr_version ) ) 
  32. return new WP_Error( 'WXR_parse_error', __( 'This does not appear to be a WXR file, missing/invalid WXR version number', 'wordpress-importer' ) ); 
  33.  
  34. $base_url = $xml->xpath('/rss/channel/wp:base_site_url'); 
  35. $base_url = (string) trim( $base_url[0] ); 
  36.  
  37. $namespaces = $xml->getDocNamespaces(); 
  38. if ( ! isset( $namespaces['wp'] ) ) 
  39. $namespaces['wp'] = 'http://wordpress.org/export/1.1/'; 
  40. if ( ! isset( $namespaces['excerpt'] ) ) 
  41. $namespaces['excerpt'] = 'http://wordpress.org/export/1.1/excerpt/'; 
  42.  
  43. // grab authors 
  44. foreach ( $xml->xpath('/rss/channel/wp:author') as $author_arr ) { 
  45. $a = $author_arr->children( $namespaces['wp'] ); 
  46. $login = (string) $a->author_login; 
  47. $authors[$login] = array( 
  48. 'author_id' => (int) $a->author_id,  
  49. 'author_login' => $login,  
  50. 'author_email' => (string) $a->author_email,  
  51. 'author_display_name' => (string) $a->author_display_name,  
  52. 'author_first_name' => (string) $a->author_first_name,  
  53. 'author_last_name' => (string) $a->author_last_name 
  54. ); 
  55.  
  56. // grab cats, tags and terms 
  57. foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) { 
  58. $t = $term_arr->children( $namespaces['wp'] ); 
  59. $category = array( 
  60. 'term_id' => (int) $t->term_id,  
  61. 'category_nicename' => (string) $t->category_nicename,  
  62. 'category_parent' => (string) $t->category_parent,  
  63. 'cat_name' => (string) $t->cat_name,  
  64. 'category_description' => (string) $t->category_description 
  65. ); 
  66.  
  67. foreach ( $t->termmeta as $meta ) { 
  68. $category['termmeta'][] = array( 
  69. 'key' => (string) $meta->meta_key,  
  70. 'value' => (string) $meta->meta_value 
  71. ); 
  72.  
  73. $categories[] = $category; 
  74.  
  75. foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) { 
  76. $t = $term_arr->children( $namespaces['wp'] ); 
  77. $tag = array( 
  78. 'term_id' => (int) $t->term_id,  
  79. 'tag_slug' => (string) $t->tag_slug,  
  80. 'tag_name' => (string) $t->tag_name,  
  81. 'tag_description' => (string) $t->tag_description 
  82. ); 
  83.  
  84. foreach ( $t->termmeta as $meta ) { 
  85. $tag['termmeta'][] = array( 
  86. 'key' => (string) $meta->meta_key,  
  87. 'value' => (string) $meta->meta_value 
  88. ); 
  89.  
  90. $tags[] = $tag; 
  91.  
  92. foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) { 
  93. $t = $term_arr->children( $namespaces['wp'] ); 
  94. $term = array( 
  95. 'term_id' => (int) $t->term_id,  
  96. 'term_taxonomy' => (string) $t->term_taxonomy,  
  97. 'slug' => (string) $t->term_slug,  
  98. 'term_parent' => (string) $t->term_parent,  
  99. 'term_name' => (string) $t->term_name,  
  100. 'term_description' => (string) $t->term_description 
  101. ); 
  102.  
  103. foreach ( $t->termmeta as $meta ) { 
  104. $term['termmeta'][] = array( 
  105. 'key' => (string) $meta->meta_key,  
  106. 'value' => (string) $meta->meta_value 
  107. ); 
  108.  
  109. $terms[] = $term; 
  110.  
  111. // grab posts 
  112. foreach ( $xml->channel->item as $item ) { 
  113. $post = array( 
  114. 'post_title' => (string) $item->title,  
  115. 'guid' => (string) $item->guid,  
  116. ); 
  117.  
  118. $dc = $item->children( 'http://purl.org/dc/elements/1.1/' ); 
  119. $post['post_author'] = (string) $dc->creator; 
  120.  
  121. $content = $item->children( 'http://purl.org/rss/1.0/modules/content/' ); 
  122. $excerpt = $item->children( $namespaces['excerpt'] ); 
  123. $post['post_content'] = (string) $content->encoded; 
  124. $post['post_excerpt'] = (string) $excerpt->encoded; 
  125.  
  126. $wp = $item->children( $namespaces['wp'] ); 
  127. $post['post_id'] = (int) $wp->post_id; 
  128. $post['post_date'] = (string) $wp->post_date; 
  129. $post['post_date_gmt'] = (string) $wp->post_date_gmt; 
  130. $post['comment_status'] = (string) $wp->comment_status; 
  131. $post['ping_status'] = (string) $wp->ping_status; 
  132. $post['post_name'] = (string) $wp->post_name; 
  133. $post['status'] = (string) $wp->status; 
  134. $post['post_parent'] = (int) $wp->post_parent; 
  135. $post['menu_order'] = (int) $wp->menu_order; 
  136. $post['post_type'] = (string) $wp->post_type; 
  137. $post['post_password'] = (string) $wp->post_password; 
  138. $post['is_sticky'] = (int) $wp->is_sticky; 
  139.  
  140. if ( isset($wp->attachment_url) ) 
  141. $post['attachment_url'] = (string) $wp->attachment_url; 
  142.  
  143. foreach ( $item->category as $c ) { 
  144. $att = $c->attributes(); 
  145. if ( isset( $att['nicename'] ) ) 
  146. $post['terms'][] = array( 
  147. 'name' => (string) $c,  
  148. 'slug' => (string) $att['nicename'],  
  149. 'domain' => (string) $att['domain'] 
  150. ); 
  151.  
  152. foreach ( $wp->postmeta as $meta ) { 
  153. $post['postmeta'][] = array( 
  154. 'key' => (string) $meta->meta_key,  
  155. 'value' => (string) $meta->meta_value 
  156. ); 
  157.  
  158. foreach ( $wp->comment as $comment ) { 
  159. $meta = array(); 
  160. if ( isset( $comment->commentmeta ) ) { 
  161. foreach ( $comment->commentmeta as $m ) { 
  162. $meta[] = array( 
  163. 'key' => (string) $m->meta_key,  
  164. 'value' => (string) $m->meta_value 
  165. ); 
  166.  
  167. $post['comments'][] = array( 
  168. 'comment_id' => (int) $comment->comment_id,  
  169. 'comment_author' => (string) $comment->comment_author,  
  170. 'comment_author_email' => (string) $comment->comment_author_email,  
  171. 'comment_author_IP' => (string) $comment->comment_author_IP,  
  172. 'comment_author_url' => (string) $comment->comment_author_url,  
  173. 'comment_date' => (string) $comment->comment_date,  
  174. 'comment_date_gmt' => (string) $comment->comment_date_gmt,  
  175. 'comment_content' => (string) $comment->comment_content,  
  176. 'comment_approved' => (string) $comment->comment_approved,  
  177. 'comment_type' => (string) $comment->comment_type,  
  178. 'comment_parent' => (string) $comment->comment_parent,  
  179. 'comment_user_id' => (int) $comment->comment_user_id,  
  180. 'commentmeta' => $meta,  
  181. ); 
  182.  
  183. $posts[] = $post; 
  184.  
  185. return array( 
  186. 'authors' => $authors,  
  187. 'posts' => $posts,  
  188. 'categories' => $categories,  
  189. 'tags' => $tags,  
  190. 'terms' => $terms,  
  191. 'base_url' => $base_url,  
  192. 'version' => $wxr_version 
  193. );