PHP curl获取网络中的cookie

风之舞 2023-05-08 17:38:05 137
php 
简介: PHP curl获取网络中的cookie
$url = "https://www.test.com";
$res = curl_request_with_param($url,array("ret_header"=>1));
preg_match_all('/Set-Cookie:(.*);/iU',$res,$matches);
$cookies = implode(';',$matches[1]);
file_put_contents("./file.txt",$res);

function curl_request_with_param_bak($url,$arr_param=array()){
   //$arr_param=array('timeout'=>10,'ua'=>'Googlebot/2.1;','referer'=>'http://www.baidu.com','proxy'=>'8.8.8.8:12880','cookie'=>'is_login:1','post_json_str'=>'','header_arr'=>array('Accept-Encoding:gzip'))
   //$arr_param=array('ret_header'=>1,'ret_nobody'=>1,'FOLLOWLOCATION'=>1)    //返回头信息、不要BODY信息、跟随跳转

   if(!$arr_param['timeout']) $arr_param['timeout']=10;  //默认10秒延时
   if(!$arr_param['ua'])  $arr_param['ua']='Mozilla/5.0 (Linux; Android 9; V1913A Build/P00610; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36 VivoBrowser web doman version/9.9.70.0'.rand(1,99999); //默认ua

    $curl = curl_init();
   $url=str_replace(' ','%20',$url);  //url里含空格会报错
    curl_setopt($curl, CURLOPT_URL, $url);
    curl_setopt($curl, CURLOPT_HEADER, $arr_param['ret_header']?true:false);
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($curl, CURLOPT_TIMEOUT, $arr_param['timeout']);
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); //检查服务器证书
    curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);

    if($arr_param['ua'])   curl_setopt($curl, CURLOPT_USERAGENT, $arr_param['ua']);
    if($arr_param['proxy'])    curl_setopt($curl, CURLOPT_PROXY, $arr_param['proxy']);
    if($arr_param['cookie'])   curl_setopt($curl,CURLOPT_COOKIE,$arr_param['cookie']);
    if($arr_param['referer'])  curl_setopt($curl, CURLOPT_REFERER, $arr_param['referer']);
   if($arr_param['ret_nobody'])   curl_setopt($curl, CURLOPT_NOBODY, true); 
   if($arr_param['FOLLOWLOCATION'])   curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true); 
   if($arr_param['post_json_str']){
      curl_setopt($curl, CURLOPT_POST, 1); 
      curl_setopt($curl, CURLOPT_POSTFIELDS, $arr_param['post_json_str']); 
   }
   if($arr_param['header_arr']){
      curl_setopt($curl, CURLOPT_HTTPHEADER, $arr_param['header_arr']); 
      if(strpos(print_r($arr_param['header_arr'],true),':gzip')!==false){
         curl_setopt($curl, CURLOPT_ENCODING, "gzip");
      }
   }

   $res = curl_exec($curl);
   $curl_info = curl_getinfo($curl);
   if($curl_info['http_code']==200||$arr_param['ret_header']){
      $curl_error=curl_error($curl);
      if((!$curl_error&&strlen($res)>0)||$arr_param['ret_header'])   $return=$res;  //不指定返回HEADER时http不是200,或者有curl_error的,或者返回数据为空的,就不记录返回数据了
   }
    curl_close($curl);

    return $return;
}

//多线程
//curl多线程,请求HTTPS时,会出现内存泄露(同时满足这两个条件时才会内存泄露)
function curl_multi_request_with_param_ext(&$arr_url,$arr_param=array()){  //$arr_url=array(array('url'=>'','post_json_str'=>''),array('timeout'=>30,'post_json_str'=>'','cookie'=>''));
   if(!$arr_param['timeout']) $arr_param['timeout']=10;  //默认10秒延时
   if(!$arr_param['ua'])  $arr_param['ua']='Mozilla/5.0 (Linux; Android 9; V1913A Build/P00610; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36 VivoBrowser web doman version/9.9.70.0'.rand(1,99999); //默认ua
   if(!$arr_param['curl_multi_max_size']) $arr_param['curl_multi_max_size']=500; //单页面抓取一般500,如果页面体积较大,所以调小点

   $config_curl_multi_max_size = $arr_param['curl_multi_max_size'];
   $curl_multi_count = ceil(count($arr_url)/$config_curl_multi_max_size);
   for($i=0;$i<$curl_multi_count;$i++){
      $new_arr_url[$i] = array_slice($arr_url,$i*$config_curl_multi_max_size,($i+1)*$config_curl_multi_max_size>count($arr_url)?(count($arr_url)-$i*$config_curl_multi_max_size):$config_curl_multi_max_size,true);

      $mh[$i] = curl_multi_init();
      foreach ($new_arr_url[$i] as $k=>$row_url) {
            if(strpos($row_url['url'],"baidu.com")!==false){
                $row_url['url'] = "http://images.goodxxxxx.top:82/?url=".$row_url['url'];
            }
         $row_url['url']=str_replace(' ','%20',$row_url['url']);    //url里含空格会报错
         $conn[$i][$k]=curl_init($row_url['url']);
         curl_setopt($conn[$i][$k],CURLOPT_RETURNTRANSFER,1);
         curl_setopt($conn[$i][$k], CURLOPT_HEADER, $arr_param['ret_header']?true:false);
         curl_setopt($conn[$i][$k], CURLOPT_TIMEOUT, $arr_param['timeout']);
         curl_setopt($conn[$i][$k], CURLOPT_SSL_VERIFYPEER, false); //检查服务器证书
         curl_setopt($conn[$i][$k], CURLOPT_SSL_VERIFYHOST, false);

         curl_setopt($conn[$i][$k], CURLOPT_USERAGENT, $arr_param['ua']);
         if($row_url['cookie'])
            curl_setopt($conn[$i][$k],CURLOPT_COOKIE,$row_url['cookie']);
         elseif($arr_param['cookie'])
            curl_setopt($conn[$i][$k],CURLOPT_COOKIE,$arr_param['cookie']);
         if($arr_param['referer'])  curl_setopt($conn[$i][$k], CURLOPT_REFERER, $arr_param['referer']); 
         if($arr_param['ret_nobody'])   curl_setopt($conn[$i][$k], CURLOPT_NOBODY, true); 
         if($arr_param['FOLLOWLOCATION'])   curl_setopt($conn[$i][$k], CURLOPT_FOLLOWLOCATION, true); 
         if($row_url['post_json_str']){
            curl_setopt($conn[$i][$k], CURLOPT_POST, 1); 
            curl_setopt($conn[$i][$k], CURLOPT_POSTFIELDS, $row_url['post_json_str']); 
         }
         if($arr_param['header_arr']){
            curl_setopt($conn[$i][$k], CURLOPT_HTTPHEADER, $arr_param['header_arr']); 
            if(strpos(print_r($arr_param['header_arr'],true),':gzip')!==false){
               curl_setopt($conn[$i][$k], CURLOPT_ENCODING, "gzip");
            }
         }
         if($arr_param['arrProxy']){
            $proxy=$arr_param['arrProxy'][rand(0,count($arr_param['arrProxy'])-1)];    //随机取代理
            if($proxy) curl_setopt($conn[$i][$k], CURLOPT_PROXY, $proxy);
         }
         curl_multi_add_handle ($mh[$i],$conn[$i][$k]);
      }
      if(!$_SERVER['HTTP_HOST']) echo "##fetch ".($i+1).'/'.$curl_multi_count." install ".count($new_arr_url[$i])." ok.start fetch...##\n"; //命令行执行时显示
      unset($active);unset($mrc);
      do {
         usleep(100000);
         $mrc = curl_multi_exec($mh[$i],$active);//当无数据时或请求暂停时,active=true
      } while ($mrc == CURLM_CALL_MULTI_PERFORM);//当正在接受数据时
      while ($active and $mrc == CURLM_OK) {//当无数据时或请求暂停时,active=true,为了减少cpu的无谓负担
         if (curl_multi_select($mh[$i]) != -1) {
            do {
               usleep(50000);
               $mrc = curl_multi_exec($mh[$i], $active);
            } while ($mrc == CURLM_CALL_MULTI_PERFORM);
         }
      }

      foreach ($new_arr_url[$i] as $k => $row_url) {
         $t=curl_multi_getcontent($conn[$i][$k]);
         $curl_info = curl_getinfo($conn[$i][$k]);
         if($curl_info['http_code']==200||$arr_param['ret_header']){
            $curl_error=curl_error($conn[$i][$k]);
            if((!$curl_error&&$t)||$arr_param['ret_header'])   $arr_url[$k]['ret']=$t;    //不指定返回HEADER时http不是200,或者有curl_error的,或者返回数据为空的,就不记录返回数据了
         }
         curl_multi_remove_handle($mh[$i], $conn[$i][$k]);
         curl_close($conn[$i][$k]);//关闭所有对象
      }
      curl_multi_close($mh[$i]);$mh[$i]=NULL;$conn[$i]=NULL;$new_arr_url[$i]=NULL;
   }
   return $arr_url;
}