Snoopy

The WordPress Core Snoopy class.

Defined (1)

The class is defined in the following location(s).

/wp-includes/class-snoopy.php  
  1. class Snoopy 
  2. /**** Public variables ****/ 
  3.  
  4. /** user definable vars */ 
  5.  
  6. var $host = "www.php.net"; // host name we are connecting to 
  7. var $port = 80; // port we are connecting to 
  8. var $proxy_host = ""; // proxy host to use 
  9. var $proxy_port = ""; // proxy port to use 
  10. var $proxy_user = ""; // proxy user to use 
  11. var $proxy_pass = ""; // proxy password to use 
  12.  
  13. var $agent = "Snoopy v1.2.4"; // agent we masquerade as 
  14. var $referer = ""; // referer info to pass 
  15. var $cookies = array(); // array of cookies to pass 
  16. // $cookies["username"]="joe"; 
  17. var $rawheaders = array(); // array of raw headers to send 
  18. // $rawheaders["Content-type"]="text/html"; 
  19.  
  20. var $maxredirs = 5; // http redirection depth maximum. 0 = disallow 
  21. var $lastredirectaddr = ""; // contains address of last redirected address 
  22. var $offsiteok = true; // allows redirection off-site 
  23. var $maxframes = 0; // frame content depth maximum. 0 = disallow 
  24. var $expandlinks = true; // expand links to fully qualified URLs. 
  25. // this only applies to fetchlinks() 
  26. // submitlinks(), and submittext() 
  27. var $passcookies = true; // pass set cookies back through redirects 
  28. // NOTE: this currently does not respect 
  29. // dates, domains or paths. 
  30.  
  31. var $user = ""; // user for http authentication 
  32. var $pass = ""; // password for http authentication 
  33.  
  34. // http accept types 
  35. var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*"; 
  36.   
  37. var $results = ""; // where the content is put 
  38.   
  39. var $error = ""; // error messages sent here 
  40. var $response_code = ""; // response code returned from server 
  41. var $headers = array(); // headers returned from server sent here 
  42. var $maxlength = 500000; // max return data length (body) 
  43. var $read_timeout = 0; // timeout on read operations, in seconds 
  44. // supported only since PHP 4 Beta 4 
  45. // set to 0 to disallow timeouts 
  46. var $timed_out = false; // if a read operation timed out 
  47. var $status = 0; // http request status 
  48.   
  49. var $temp_dir = "/tmp"; // temporary directory that the webserver 
  50. // has permission to write to. 
  51. // under Windows, this should be C:\temp 
  52.   
  53. var $curl_path = "/usr/local/bin/curl"; 
  54. // Snoopy will use cURL for fetching 
  55. // SSL content if a full system path to 
  56. // the cURL binary is supplied here. 
  57. // set to false if you do not have 
  58. // cURL installed. See http://curl.haxx.se 
  59. // for details on installing cURL. 
  60. // Snoopy does *not* use the cURL 
  61. // library functions built into php,  
  62. // as these functions are not stable 
  63. // as of this Snoopy release. 
  64.   
  65. /**** Private variables ****/ 
  66.  
  67. var $_maxlinelen = 4096; // max line length (headers) 
  68.  
  69. var $_httpmethod = "GET"; // default http request method 
  70. var $_httpversion = "HTTP/1.0"; // default http request version 
  71. var $_submit_method = "POST"; // default submit method 
  72. var $_submit_type = "application/x-www-form-urlencoded"; // default submit type 
  73. var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type 
  74. var $_redirectaddr = false; // will be set if page fetched is a redirect 
  75. var $_redirectdepth = 0; // increments on an http redirect 
  76. var $_frameurls = array(); // frame src urls 
  77. var $_framedepth = 0; // increments on frame depth 
  78.  
  79. var $_isproxy = false; // set if using a proxy server 
  80. var $_fp_timeout = 30; // timeout for socket connection 
  81.  
  82. /**======================================================================*\ 
  83. Function: fetch 
  84. Purpose: fetch the contents of a web page 
  85. (and possibly other protocols in the 
  86. future like ftp, nntp, gopher, etc.) 
  87. Input: $URI the location of the page to fetch 
  88. Output: $this->results the output text from the fetch 
  89. \*======================================================================*/ 
  90.  
  91. function fetch($URI) 
  92.  
  93. //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|", $URI, $URI_PARTS); 
  94. $URI_PARTS = parse_url($URI); 
  95. if (!empty($URI_PARTS["user"])) 
  96. $this->user = $URI_PARTS["user"]; 
  97. if (!empty($URI_PARTS["pass"])) 
  98. $this->pass = $URI_PARTS["pass"]; 
  99. if (empty($URI_PARTS["query"])) 
  100. $URI_PARTS["query"] = ''; 
  101. if (empty($URI_PARTS["path"])) 
  102. $URI_PARTS["path"] = ''; 
  103.  
  104. switch(strtolower($URI_PARTS["scheme"])) 
  105. case "http": 
  106. $this->host = $URI_PARTS["host"]; 
  107. if(!empty($URI_PARTS["port"])) 
  108. $this->port = $URI_PARTS["port"]; 
  109. if($this->_connect($fp)) 
  110. if($this->_isproxy) 
  111. // using proxy, send entire URI 
  112. $this->_httprequest($URI, $fp, $URI, $this->_httpmethod); 
  113. else 
  114. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 
  115. // no proxy, send only the path 
  116. $this->_httprequest($path, $fp, $URI, $this->_httpmethod); 
  117.  
  118. $this->_disconnect($fp); 
  119.  
  120. if($this->_redirectaddr) 
  121. /** url was redirected, check if we've hit the max depth */ 
  122. if($this->maxredirs > $this->_redirectdepth) 
  123. // only follow redirect if it's on this site, or offsiteok is true 
  124. if(preg_match("|^http://".preg_quote($this->host)."|i", $this->_redirectaddr) || $this->offsiteok) 
  125. /** follow the redirect */ 
  126. $this->_redirectdepth++; 
  127. $this->lastredirectaddr=$this->_redirectaddr; 
  128. $this->fetch($this->_redirectaddr); 
  129.  
  130. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 
  131. $frameurls = $this->_frameurls; 
  132. $this->_frameurls = array(); 
  133.  
  134. while(list(, $frameurl) = each($frameurls)) 
  135. if($this->_framedepth < $this->maxframes) 
  136. $this->fetch($frameurl); 
  137. $this->_framedepth++; 
  138. else 
  139. break; 
  140. else 
  141. return false; 
  142. return true; 
  143. break; 
  144. case "https": 
  145. if(!$this->curl_path) 
  146. return false; 
  147. if(function_exists("is_executable")) 
  148. if (!is_executable($this->curl_path)) 
  149. return false; 
  150. $this->host = $URI_PARTS["host"]; 
  151. if(!empty($URI_PARTS["port"])) 
  152. $this->port = $URI_PARTS["port"]; 
  153. if($this->_isproxy) 
  154. // using proxy, send entire URI 
  155. $this->_httpsrequest($URI, $URI, $this->_httpmethod); 
  156. else 
  157. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 
  158. // no proxy, send only the path 
  159. $this->_httpsrequest($path, $URI, $this->_httpmethod); 
  160.  
  161. if($this->_redirectaddr) 
  162. /** url was redirected, check if we've hit the max depth */ 
  163. if($this->maxredirs > $this->_redirectdepth) 
  164. // only follow redirect if it's on this site, or offsiteok is true 
  165. if(preg_match("|^http://".preg_quote($this->host)."|i", $this->_redirectaddr) || $this->offsiteok) 
  166. /** follow the redirect */ 
  167. $this->_redirectdepth++; 
  168. $this->lastredirectaddr=$this->_redirectaddr; 
  169. $this->fetch($this->_redirectaddr); 
  170.  
  171. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 
  172. $frameurls = $this->_frameurls; 
  173. $this->_frameurls = array(); 
  174.  
  175. while(list(, $frameurl) = each($frameurls)) 
  176. if($this->_framedepth < $this->maxframes) 
  177. $this->fetch($frameurl); 
  178. $this->_framedepth++; 
  179. else 
  180. break; 
  181. return true; 
  182. break; 
  183. default: 
  184. // not a valid protocol 
  185. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; 
  186. return false; 
  187. break; 
  188. return true; 
  189.  
  190. /**======================================================================*\ 
  191. Function: submit 
  192. Purpose: submit an http form 
  193. Input: $URI the location to post the data 
  194. $formvars the formvars to use. 
  195. format: $formvars["var"] = "val"; 
  196. $formfiles an array of files to submit 
  197. format: $formfiles["var"] = "/dir/filename.ext"; 
  198. Output: $this->results the text output from the post 
  199. \*======================================================================*/ 
  200.  
  201. function submit($URI, $formvars="", $formfiles="") 
  202. unset($postdata); 
  203.  
  204. $postdata = $this->_prepare_post_body($formvars, $formfiles); 
  205.  
  206. $URI_PARTS = parse_url($URI); 
  207. if (!empty($URI_PARTS["user"])) 
  208. $this->user = $URI_PARTS["user"]; 
  209. if (!empty($URI_PARTS["pass"])) 
  210. $this->pass = $URI_PARTS["pass"]; 
  211. if (empty($URI_PARTS["query"])) 
  212. $URI_PARTS["query"] = ''; 
  213. if (empty($URI_PARTS["path"])) 
  214. $URI_PARTS["path"] = ''; 
  215.  
  216. switch(strtolower($URI_PARTS["scheme"])) 
  217. case "http": 
  218. $this->host = $URI_PARTS["host"]; 
  219. if(!empty($URI_PARTS["port"])) 
  220. $this->port = $URI_PARTS["port"]; 
  221. if($this->_connect($fp)) 
  222. if($this->_isproxy) 
  223. // using proxy, send entire URI 
  224. $this->_httprequest($URI, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); 
  225. else 
  226. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 
  227. // no proxy, send only the path 
  228. $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata); 
  229.  
  230. $this->_disconnect($fp); 
  231.  
  232. if($this->_redirectaddr) 
  233. /** url was redirected, check if we've hit the max depth */ 
  234. if($this->maxredirs > $this->_redirectdepth) 
  235. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) 
  236. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"]."://".$URI_PARTS["host"]); 
  237.  
  238. // only follow redirect if it's on this site, or offsiteok is true 
  239. if(preg_match("|^http://".preg_quote($this->host)."|i", $this->_redirectaddr) || $this->offsiteok) 
  240. /** follow the redirect */ 
  241. $this->_redirectdepth++; 
  242. $this->lastredirectaddr=$this->_redirectaddr; 
  243. if( strpos( $this->_redirectaddr, "?" ) > 0 ) 
  244. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get 
  245. else 
  246. $this->submit($this->_redirectaddr, $formvars, $formfiles); 
  247.  
  248. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 
  249. $frameurls = $this->_frameurls; 
  250. $this->_frameurls = array(); 
  251.  
  252. while(list(, $frameurl) = each($frameurls)) 
  253. if($this->_framedepth < $this->maxframes) 
  254. $this->fetch($frameurl); 
  255. $this->_framedepth++; 
  256. else 
  257. break; 
  258.  
  259. else 
  260. return false; 
  261. return true; 
  262. break; 
  263. case "https": 
  264. if(!$this->curl_path) 
  265. return false; 
  266. if(function_exists("is_executable")) 
  267. if (!is_executable($this->curl_path)) 
  268. return false; 
  269. $this->host = $URI_PARTS["host"]; 
  270. if(!empty($URI_PARTS["port"])) 
  271. $this->port = $URI_PARTS["port"]; 
  272. if($this->_isproxy) 
  273. // using proxy, send entire URI 
  274. $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata); 
  275. else 
  276. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : ""); 
  277. // no proxy, send only the path 
  278. $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata); 
  279.  
  280. if($this->_redirectaddr) 
  281. /** url was redirected, check if we've hit the max depth */ 
  282. if($this->maxredirs > $this->_redirectdepth) 
  283. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr)) 
  284. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr, $URI_PARTS["scheme"]."://".$URI_PARTS["host"]); 
  285.  
  286. // only follow redirect if it's on this site, or offsiteok is true 
  287. if(preg_match("|^http://".preg_quote($this->host)."|i", $this->_redirectaddr) || $this->offsiteok) 
  288. /** follow the redirect */ 
  289. $this->_redirectdepth++; 
  290. $this->lastredirectaddr=$this->_redirectaddr; 
  291. if( strpos( $this->_redirectaddr, "?" ) > 0 ) 
  292. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get 
  293. else 
  294. $this->submit($this->_redirectaddr, $formvars, $formfiles); 
  295.  
  296. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0) 
  297. $frameurls = $this->_frameurls; 
  298. $this->_frameurls = array(); 
  299.  
  300. while(list(, $frameurl) = each($frameurls)) 
  301. if($this->_framedepth < $this->maxframes) 
  302. $this->fetch($frameurl); 
  303. $this->_framedepth++; 
  304. else 
  305. break; 
  306. return true; 
  307. break; 
  308.  
  309. default: 
  310. // not a valid protocol 
  311. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n'; 
  312. return false; 
  313. break; 
  314. return true; 
  315.  
  316. /**======================================================================*\ 
  317. Function: fetchlinks 
  318. Purpose: fetch the links from a web page 
  319. Input: $URI where you are fetching from 
  320. Output: $this->results an array of the URLs 
  321. \*======================================================================*/ 
  322.  
  323. function fetchlinks($URI) 
  324. if ($this->fetch($URI)) 
  325. if($this->lastredirectaddr) 
  326. $URI = $this->lastredirectaddr; 
  327. if(is_array($this->results)) 
  328. for($x=0;$x<count($this->results);$x++) 
  329. $this->results[$x] = $this->_striplinks($this->results[$x]); 
  330. else 
  331. $this->results = $this->_striplinks($this->results); 
  332.  
  333. if($this->expandlinks) 
  334. $this->results = $this->_expandlinks($this->results, $URI); 
  335. return true; 
  336. else 
  337. return false; 
  338.  
  339. /**======================================================================*\ 
  340. Function: fetchform 
  341. Purpose: fetch the form elements from a web page 
  342. Input: $URI where you are fetching from 
  343. Output: $this->results the resulting html form 
  344. \*======================================================================*/ 
  345.  
  346. function fetchform($URI) 
  347.  
  348. if ($this->fetch($URI)) 
  349.  
  350. if(is_array($this->results)) 
  351. for($x=0;$x<count($this->results);$x++) 
  352. $this->results[$x] = $this->_stripform($this->results[$x]); 
  353. else 
  354. $this->results = $this->_stripform($this->results); 
  355.  
  356. return true; 
  357. else 
  358. return false; 
  359.  
  360.  
  361. /**======================================================================*\ 
  362. Function: fetchtext 
  363. Purpose: fetch the text from a web page, stripping the links 
  364. Input: $URI where you are fetching from 
  365. Output: $this->results the text from the web page 
  366. \*======================================================================*/ 
  367.  
  368. function fetchtext($URI) 
  369. if($this->fetch($URI)) 
  370. if(is_array($this->results)) 
  371. for($x=0;$x<count($this->results);$x++) 
  372. $this->results[$x] = $this->_striptext($this->results[$x]); 
  373. else 
  374. $this->results = $this->_striptext($this->results); 
  375. return true; 
  376. else 
  377. return false; 
  378.  
  379. /**======================================================================*\ 
  380. Function: submitlinks 
  381. Purpose: grab links from a form submission 
  382. Input: $URI where you are submitting from 
  383. Output: $this->results an array of the links from the post 
  384. \*======================================================================*/ 
  385.  
  386. function submitlinks($URI, $formvars="", $formfiles="") 
  387. if($this->submit($URI, $formvars, $formfiles)) 
  388. if($this->lastredirectaddr) 
  389. $URI = $this->lastredirectaddr; 
  390. if(is_array($this->results)) 
  391. for($x=0;$x<count($this->results);$x++) 
  392. $this->results[$x] = $this->_striplinks($this->results[$x]); 
  393. if($this->expandlinks) 
  394. $this->results[$x] = $this->_expandlinks($this->results[$x], $URI); 
  395. else 
  396. $this->results = $this->_striplinks($this->results); 
  397. if($this->expandlinks) 
  398. $this->results = $this->_expandlinks($this->results, $URI); 
  399. return true; 
  400. else 
  401. return false; 
  402.  
  403. /**======================================================================*\ 
  404. Function: submittext 
  405. Purpose: grab text from a form submission 
  406. Input: $URI where you are submitting from 
  407. Output: $this->results the text from the web page 
  408. \*======================================================================*/ 
  409.  
  410. function submittext($URI, $formvars = "", $formfiles = "") 
  411. if($this->submit($URI, $formvars, $formfiles)) 
  412. if($this->lastredirectaddr) 
  413. $URI = $this->lastredirectaddr; 
  414. if(is_array($this->results)) 
  415. for($x=0;$x<count($this->results);$x++) 
  416. $this->results[$x] = $this->_striptext($this->results[$x]); 
  417. if($this->expandlinks) 
  418. $this->results[$x] = $this->_expandlinks($this->results[$x], $URI); 
  419. else 
  420. $this->results = $this->_striptext($this->results); 
  421. if($this->expandlinks) 
  422. $this->results = $this->_expandlinks($this->results, $URI); 
  423. return true; 
  424. else 
  425. return false; 
  426.  
  427.  
  428.  
  429. /**======================================================================*\ 
  430. Function: set_submit_multipart 
  431. Purpose: Set the form submission content type to 
  432. multipart/form-data 
  433. \*======================================================================*/ 
  434. function set_submit_multipart() 
  435. $this->_submit_type = "multipart/form-data"; 
  436.  
  437.  
  438. /**======================================================================*\ 
  439. Function: set_submit_normal 
  440. Purpose: Set the form submission content type to 
  441. application/x-www-form-urlencoded 
  442. \*======================================================================*/ 
  443. function set_submit_normal() 
  444. $this->_submit_type = "application/x-www-form-urlencoded"; 
  445.  
  446.  
  447.  
  448.  
  449. /**======================================================================*\ 
  450. Private functions 
  451. \*======================================================================*/ 
  452.  
  453.  
  454. /**======================================================================*\ 
  455. Function: _striplinks 
  456. Purpose: strip the hyperlinks from an html document 
  457. Input: $document document to strip. 
  458. Output: $match an array of the links 
  459. \*======================================================================*/ 
  460.  
  461. function _striplinks($document) 
  462. preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href= 
  463. ([\"\'])? # find single or double quote 
  464. (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching 
  465. # quote, otherwise match up to next space 
  466. 'isx", $document, $links); 
  467.  
  468.  
  469. // catenate the non-empty matches from the conditional subpattern 
  470.  
  471. while(list($key, $val) = each($links[2])) 
  472. if(!empty($val)) 
  473. $match[] = $val; 
  474.  
  475. while(list($key, $val) = each($links[3])) 
  476. if(!empty($val)) 
  477. $match[] = $val; 
  478.  
  479. // return the links 
  480. return $match; 
  481.  
  482. /**======================================================================*\ 
  483. Function: _stripform 
  484. Purpose: strip the form elements from an html document 
  485. Input: $document document to strip. 
  486. Output: $match an array of the links 
  487. \*======================================================================*/ 
  488.  
  489. function _stripform($document) 
  490. preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi", $document, $elements); 
  491.  
  492. // catenate the matches 
  493. $match = implode("\r\n", $elements[0]); 
  494.  
  495. // return the links 
  496. return $match; 
  497.  
  498.  
  499.  
  500. /**======================================================================*\ 
  501. Function: _striptext 
  502. Purpose: strip the text from an html document 
  503. Input: $document document to strip. 
  504. Output: $text the resulting text 
  505. \*======================================================================*/ 
  506.  
  507. function _striptext($document) 
  508.  
  509. // I didn't use preg eval (//e) since that is only available in PHP 4.0. 
  510. // so, list your entities one by one here. I included some of the 
  511. // more common ones. 
  512.  
  513. $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript 
  514. "'<[\/\!]*?[^<>]*?>'si", // strip out html tags 
  515. "'([\r\n])[\s]+'", // strip out white space 
  516. "'&(quot|#34|#034|#x22);'i", // replace html entities 
  517. "'&(amp|#38|#038|#x26);'i", // added hexadecimal values 
  518. "'&(lt|#60|#060|#x3c);'i",  
  519. "'&(gt|#62|#062|#x3e);'i",  
  520. "'&(nbsp|#160|#xa0);'i",  
  521. "'&(iexcl|#161);'i",  
  522. "'&(cent|#162);'i",  
  523. "'&(pound|#163);'i",  
  524. "'&(copy|#169);'i",  
  525. "'&(reg|#174);'i",  
  526. "'&(deg|#176);'i",  
  527. "'&(#39|#039|#x27);'",  
  528. "'&(euro|#8364);'i", // europe 
  529. "'&a(uml|UML);'", // german 
  530. "'&o(uml|UML);'",  
  531. "'&u(uml|UML);'",  
  532. "'&A(uml|UML);'",  
  533. "'&O(uml|UML);'",  
  534. "'&U(uml|UML);'",  
  535. "'ß'i",  
  536. ); 
  537. $replace = array( "",  
  538. "",  
  539. "\\1",  
  540. "\"",  
  541. "&",  
  542. "<",  
  543. ">",  
  544. " ",  
  545. chr(161),  
  546. chr(162),  
  547. chr(163),  
  548. chr(169),  
  549. chr(174),  
  550. chr(176),  
  551. chr(39),  
  552. chr(128),  
  553. chr(0xE4), // ANSI ä 
  554. chr(0xF6), // ANSI ö 
  555. chr(0xFC), // ANSI ü 
  556. chr(0xC4), // ANSI Ä 
  557. chr(0xD6), // ANSI Ö 
  558. chr(0xDC), // ANSI Ü 
  559. chr(0xDF), // ANSI ß 
  560. ); 
  561.  
  562. $text = preg_replace($search, $replace, $document); 
  563.  
  564. return $text; 
  565.  
  566. /**======================================================================*\ 
  567. Function: _expandlinks 
  568. Purpose: expand each link into a fully qualified URL 
  569. Input: $links the links to qualify 
  570. $URI the full URI to get the base from 
  571. Output: $expandedLinks the expanded links 
  572. \*======================================================================*/ 
  573.  
  574. function _expandlinks($links, $URI) 
  575.  
  576. preg_match("/^[^\?]+/", $URI, $match); 
  577.  
  578. $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|", "", $match[0]); 
  579. $match = preg_replace("|/$|", "", $match); 
  580. $match_part = parse_url($match); 
  581. $match_root = 
  582. $match_part["scheme"]."://".$match_part["host"]; 
  583.  
  584. $search = array( "|^http://".preg_quote($this->host)."|i",  
  585. "|^(\/)|i",  
  586. "|^(?!http://)(?!mailto:)|i",  
  587. "|/\./|",  
  588. "|/[^\/]+/\.\./|" 
  589. ); 
  590.  
  591. $replace = array( "",  
  592. $match_root."/",  
  593. $match."/",  
  594. "/",  
  595. "/" 
  596. ); 
  597.  
  598. $expandedLinks = preg_replace($search, $replace, $links); 
  599.  
  600. return $expandedLinks; 
  601.  
  602. /**======================================================================*\ 
  603. Function: _httprequest 
  604. Purpose: go get the http data from the server 
  605. Input: $url the url to fetch 
  606. $fp the current open file pointer 
  607. $URI the full URI 
  608. $body body contents to send if any (POST) 
  609. Output: 
  610. \*======================================================================*/ 
  611.  
  612. function _httprequest($url, $fp, $URI, $http_method, $content_type="", $body="") 
  613. $cookie_headers = ''; 
  614. if($this->passcookies && $this->_redirectaddr) 
  615. $this->setcookies(); 
  616.  
  617. $URI_PARTS = parse_url($URI); 
  618. if(empty($url)) 
  619. $url = "/"; 
  620. $headers = $http_method." ".$url." ".$this->_httpversion."\r\n"; 
  621. if(!empty($this->agent)) 
  622. $headers .= "User-Agent: ".$this->agent."\r\n"; 
  623. if(!empty($this->host) && !isset($this->rawheaders['Host'])) { 
  624. $headers .= "Host: ".$this->host; 
  625. if(!empty($this->port) && $this->port != 80) 
  626. $headers .= ":".$this->port; 
  627. $headers .= "\r\n"; 
  628. if(!empty($this->accept)) 
  629. $headers .= "Accept: ".$this->accept."\r\n"; 
  630. if(!empty($this->referer)) 
  631. $headers .= "Referer: ".$this->referer."\r\n"; 
  632. if(!empty($this->cookies)) 
  633. if(!is_array($this->cookies)) 
  634. $this->cookies = (array)$this->cookies; 
  635.  
  636. reset($this->cookies); 
  637. if ( count($this->cookies) > 0 ) { 
  638. $cookie_headers .= 'Cookie: '; 
  639. foreach ( $this->cookies as $cookieKey => $cookieVal ) { 
  640. $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; "; 
  641. $headers .= substr($cookie_headers, 0, -2) . "\r\n"; 
  642. if(!empty($this->rawheaders)) 
  643. if(!is_array($this->rawheaders)) 
  644. $this->rawheaders = (array)$this->rawheaders; 
  645. while(list($headerKey, $headerVal) = each($this->rawheaders)) 
  646. $headers .= $headerKey.": ".$headerVal."\r\n"; 
  647. if(!empty($content_type)) { 
  648. $headers .= "Content-type: $content_type"; 
  649. if ($content_type == "multipart/form-data") 
  650. $headers .= "; boundary=".$this->_mime_boundary; 
  651. $headers .= "\r\n"; 
  652. if(!empty($body)) 
  653. $headers .= "Content-length: ".strlen($body)."\r\n"; 
  654. if(!empty($this->user) || !empty($this->pass)) 
  655. $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n"; 
  656.  
  657. //add proxy auth headers 
  658. if(!empty($this->proxy_user)) 
  659. $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n"; 
  660.  
  661.  
  662. $headers .= "\r\n"; 
  663.  
  664. // set the read timeout if needed 
  665. if ($this->read_timeout > 0) 
  666. socket_set_timeout($fp, $this->read_timeout); 
  667. $this->timed_out = false; 
  668.  
  669. fwrite($fp, $headers.$body, strlen($headers.$body)); 
  670.  
  671. $this->_redirectaddr = false; 
  672. unset($this->headers); 
  673.  
  674. while($currentHeader = fgets($fp, $this->_maxlinelen)) 
  675. if ($this->read_timeout > 0 && $this->_check_timeout($fp)) 
  676. $this->status=-100; 
  677. return false; 
  678.  
  679. if($currentHeader == "\r\n") 
  680. break; 
  681.  
  682. // if a header begins with Location: or URI:, set the redirect 
  683. if(preg_match("/^(Location:|URI:)/i", $currentHeader)) 
  684. // get URL portion of the redirect 
  685. preg_match("/^(Location:|URI:)[ ]+(.*)/i", chop($currentHeader), $matches); 
  686. // look for :// in the Location header to see if hostname is included 
  687. if(!preg_match("|\:\/\/|", $matches[2])) 
  688. // no host in the path, so prepend 
  689. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; 
  690. // eliminate double slash 
  691. if(!preg_match("|^/|", $matches[2])) 
  692. $this->_redirectaddr .= "/".$matches[2]; 
  693. else 
  694. $this->_redirectaddr .= $matches[2]; 
  695. else 
  696. $this->_redirectaddr = $matches[2]; 
  697.  
  698. if(preg_match("|^HTTP/|", $currentHeader)) 
  699. if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|", $currentHeader, $status)) 
  700. $this->status= $status[1]; 
  701. $this->response_code = $currentHeader; 
  702.  
  703. $this->headers[] = $currentHeader; 
  704.  
  705. $results = ''; 
  706. do { 
  707. $_data = fread($fp, $this->maxlength); 
  708. if (strlen($_data) == 0) { 
  709. break; 
  710. $results .= $_data; 
  711. } while(true); 
  712.  
  713. if ($this->read_timeout > 0 && $this->_check_timeout($fp)) 
  714. $this->status=-100; 
  715. return false; 
  716.  
  717. // check if there is a redirect meta tag 
  718.  
  719. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) 
  720.  
  721. $this->_redirectaddr = $this->_expandlinks($match[1], $URI); 
  722.  
  723. // have we hit our frame depth and is there frame src to fetch? 
  724. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) 
  725. $this->results[] = $results; 
  726. for($x=0; $x<count($match[1]); $x++) 
  727. $this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"]."://".$this->host); 
  728. // have we already fetched framed content? 
  729. elseif(is_array($this->results)) 
  730. $this->results[] = $results; 
  731. // no framed content 
  732. else 
  733. $this->results = $results; 
  734.  
  735. return true; 
  736.  
  737. /**======================================================================*\ 
  738. Function: _httpsrequest 
  739. Purpose: go get the https data from the server using curl 
  740. Input: $url the url to fetch 
  741. $URI the full URI 
  742. $body body contents to send if any (POST) 
  743. Output: 
  744. \*======================================================================*/ 
  745.  
  746. function _httpsrequest($url, $URI, $http_method, $content_type="", $body="") 
  747. if($this->passcookies && $this->_redirectaddr) 
  748. $this->setcookies(); 
  749.  
  750. $headers = array(); 
  751.  
  752. $URI_PARTS = parse_url($URI); 
  753. if(empty($url)) 
  754. $url = "/"; 
  755. // GET ... header not needed for curl 
  756. //$headers[] = $http_method." ".$url." ".$this->_httpversion; 
  757. if(!empty($this->agent)) 
  758. $headers[] = "User-Agent: ".$this->agent; 
  759. if(!empty($this->host)) 
  760. if(!empty($this->port)) 
  761. $headers[] = "Host: ".$this->host.":".$this->port; 
  762. else 
  763. $headers[] = "Host: ".$this->host; 
  764. if(!empty($this->accept)) 
  765. $headers[] = "Accept: ".$this->accept; 
  766. if(!empty($this->referer)) 
  767. $headers[] = "Referer: ".$this->referer; 
  768. if(!empty($this->cookies)) 
  769. if(!is_array($this->cookies)) 
  770. $this->cookies = (array)$this->cookies; 
  771.  
  772. reset($this->cookies); 
  773. if ( count($this->cookies) > 0 ) { 
  774. $cookie_str = 'Cookie: '; 
  775. foreach ( $this->cookies as $cookieKey => $cookieVal ) { 
  776. $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; "; 
  777. $headers[] = substr($cookie_str, 0, -2); 
  778. if(!empty($this->rawheaders)) 
  779. if(!is_array($this->rawheaders)) 
  780. $this->rawheaders = (array)$this->rawheaders; 
  781. while(list($headerKey, $headerVal) = each($this->rawheaders)) 
  782. $headers[] = $headerKey.": ".$headerVal; 
  783. if(!empty($content_type)) { 
  784. if ($content_type == "multipart/form-data") 
  785. $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary; 
  786. else 
  787. $headers[] = "Content-type: $content_type"; 
  788. if(!empty($body)) 
  789. $headers[] = "Content-length: ".strlen($body); 
  790. if(!empty($this->user) || !empty($this->pass)) 
  791. $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass); 
  792.  
  793. $headerfile = tempnam( $this->temp_dir, "sno" ); 
  794. $cmdline_params = '-k -D ' . escapeshellarg( $headerfile ); 
  795.  
  796. foreach ( $headers as $header ) { 
  797. $cmdline_params .= ' -H ' . escapeshellarg( $header ); 
  798.  
  799. if ( ! empty( $body ) ) { 
  800. $cmdline_params .= ' -d ' . escapeshellarg( $body ); 
  801.  
  802. if ( $this->read_timeout > 0 ) { 
  803. $cmdline_params .= ' -m ' . escapeshellarg( $this->read_timeout ); 
  804.  
  805.  
  806. exec( $this->curl_path . ' ' . $cmdline_params . ' ' . escapeshellarg( $URI ), $results, $return ); 
  807.  
  808. if($return) 
  809. $this->error = "Error: cURL could not retrieve the document, error $return."; 
  810. return false; 
  811.  
  812.  
  813. $results = implode("\r\n", $results); 
  814.  
  815. $result_headers = file("$headerfile"); 
  816.  
  817. $this->_redirectaddr = false; 
  818. unset($this->headers); 
  819.  
  820. for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++) 
  821.  
  822. // if a header begins with Location: or URI:, set the redirect 
  823. if(preg_match("/^(Location: |URI: )/i", $result_headers[$currentHeader])) 
  824. // get URL portion of the redirect 
  825. preg_match("/^(Location: |URI:)\s+(.*)/", chop($result_headers[$currentHeader]), $matches); 
  826. // look for :// in the Location header to see if hostname is included 
  827. if(!preg_match("|\:\/\/|", $matches[2])) 
  828. // no host in the path, so prepend 
  829. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port; 
  830. // eliminate double slash 
  831. if(!preg_match("|^/|", $matches[2])) 
  832. $this->_redirectaddr .= "/".$matches[2]; 
  833. else 
  834. $this->_redirectaddr .= $matches[2]; 
  835. else 
  836. $this->_redirectaddr = $matches[2]; 
  837.  
  838. if(preg_match("|^HTTP/|", $result_headers[$currentHeader])) 
  839. $this->response_code = $result_headers[$currentHeader]; 
  840.  
  841. $this->headers[] = $result_headers[$currentHeader]; 
  842.  
  843. // check if there is a redirect meta tag 
  844.  
  845. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i", $results, $match)) 
  846. $this->_redirectaddr = $this->_expandlinks($match[1], $URI); 
  847.  
  848. // have we hit our frame depth and is there frame src to fetch? 
  849. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i", $results, $match)) 
  850. $this->results[] = $results; 
  851. for($x=0; $x<count($match[1]); $x++) 
  852. $this->_frameurls[] = $this->_expandlinks($match[1][$x], $URI_PARTS["scheme"]."://".$this->host); 
  853. // have we already fetched framed content? 
  854. elseif(is_array($this->results)) 
  855. $this->results[] = $results; 
  856. // no framed content 
  857. else 
  858. $this->results = $results; 
  859.  
  860. unlink("$headerfile"); 
  861.  
  862. return true; 
  863.  
  864. /**======================================================================*\ 
  865. Function: setcookies() 
  866. Purpose: set cookies for a redirection 
  867. \*======================================================================*/ 
  868.  
  869. function setcookies() 
  870. for($x=0; $x<count($this->headers); $x++) 
  871. if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x], $match)) 
  872. $this->cookies[$match[1]] = urldecode($match[2]); 
  873.  
  874.  
  875. /**======================================================================*\ 
  876. Function: _check_timeout 
  877. Purpose: checks whether timeout has occurred 
  878. Input: $fp file pointer 
  879. \*======================================================================*/ 
  880.  
  881. function _check_timeout($fp) 
  882. if ($this->read_timeout > 0) { 
  883. $fp_status = socket_get_status($fp); 
  884. if ($fp_status["timed_out"]) { 
  885. $this->timed_out = true; 
  886. return true; 
  887. return false; 
  888.  
  889. /**======================================================================*\ 
  890. Function: _connect 
  891. Purpose: make a socket connection 
  892. Input: $fp file pointer 
  893. \*======================================================================*/ 
  894.  
  895. function _connect(&$fp) 
  896. if(!empty($this->proxy_host) && !empty($this->proxy_port)) 
  897. $this->_isproxy = true; 
  898.  
  899. $host = $this->proxy_host; 
  900. $port = $this->proxy_port; 
  901. else 
  902. $host = $this->host; 
  903. $port = $this->port; 
  904.  
  905. $this->status = 0; 
  906.  
  907. if($fp = fsockopen( 
  908. $host,  
  909. $port,  
  910. $errno,  
  911. $errstr,  
  912. $this->_fp_timeout 
  913. )) 
  914. // socket connection succeeded 
  915.  
  916. return true; 
  917. else 
  918. // socket connection failed 
  919. $this->status = $errno; 
  920. switch($errno) 
  921. case -3: 
  922. $this->error="socket creation failed (-3)"; 
  923. case -4: 
  924. $this->error="dns lookup failure (-4)"; 
  925. case -5: 
  926. $this->error="connection refused or timed out (-5)"; 
  927. default: 
  928. $this->error="connection failed (".$errno.")"; 
  929. return false; 
  930. /**======================================================================*\ 
  931. Function: _disconnect 
  932. Purpose: disconnect a socket connection 
  933. Input: $fp file pointer 
  934. \*======================================================================*/ 
  935.  
  936. function _disconnect($fp) 
  937. return(fclose($fp)); 
  938.  
  939.  
  940. /**======================================================================*\ 
  941. Function: _prepare_post_body 
  942. Purpose: Prepare post body according to encoding type 
  943. Input: $formvars - form variables 
  944. $formfiles - form upload files 
  945. Output: post body 
  946. \*======================================================================*/ 
  947.  
  948. function _prepare_post_body($formvars, $formfiles) 
  949. settype($formvars, "array"); 
  950. settype($formfiles, "array"); 
  951. $postdata = ''; 
  952.  
  953. if (count($formvars) == 0 && count($formfiles) == 0) 
  954. return; 
  955.  
  956. switch ($this->_submit_type) { 
  957. case "application/x-www-form-urlencoded": 
  958. reset($formvars); 
  959. while(list($key, $val) = each($formvars)) { 
  960. if (is_array($val) || is_object($val)) { 
  961. while (list($cur_key, $cur_val) = each($val)) { 
  962. $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&"; 
  963. } else 
  964. $postdata .= urlencode($key)."=".urlencode($val)."&"; 
  965. break; 
  966.  
  967. case "multipart/form-data": 
  968. $this->_mime_boundary = "Snoopy".md5(uniqid(microtime())); 
  969.  
  970. reset($formvars); 
  971. while(list($key, $val) = each($formvars)) { 
  972. if (is_array($val) || is_object($val)) { 
  973. while (list($cur_key, $cur_val) = each($val)) { 
  974. $postdata .= "--".$this->_mime_boundary."\r\n"; 
  975. $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n"; 
  976. $postdata .= "$cur_val\r\n"; 
  977. } else { 
  978. $postdata .= "--".$this->_mime_boundary."\r\n"; 
  979. $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n"; 
  980. $postdata .= "$val\r\n"; 
  981.  
  982. reset($formfiles); 
  983. while (list($field_name, $file_names) = each($formfiles)) { 
  984. settype($file_names, "array"); 
  985. while (list(, $file_name) = each($file_names)) { 
  986. if (!is_readable($file_name)) continue; 
  987.  
  988. $fp = fopen($file_name, "r"); 
  989. $file_content = fread($fp, filesize($file_name)); 
  990. fclose($fp); 
  991. $base_name = basename($file_name); 
  992.  
  993. $postdata .= "--".$this->_mime_boundary."\r\n"; 
  994. $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n"; 
  995. $postdata .= "$file_content\r\n"; 
  996. $postdata .= "--".$this->_mime_boundary."--\r\n"; 
  997. break; 
  998.  
  999. return $postdata;