PHP curl获取网络中的cookie
简介:
PHP curl获取网络中的cookie
$url = "https://www.test.com";
$res = curl_request_with_param($url,array("ret_header"=>1));
preg_match_all('/Set-Cookie:(.*);/iU',$res,$matches);
$cookies = implode(';',$matches[1]);
file_put_contents("./file.txt",$res);
function curl_request_with_param_bak($url,$arr_param=array()){
//$arr_param=array('timeout'=>10,'ua'=>'Googlebot/2.1;','referer'=>'http://www.baidu.com','proxy'=>'8.8.8.8:12880','cookie'=>'is_login:1','post_json_str'=>'','header_arr'=>array('Accept-Encoding:gzip'))
//$arr_param=array('ret_header'=>1,'ret_nobody'=>1,'FOLLOWLOCATION'=>1) //返回头信息、不要BODY信息、跟随跳转
if(!$arr_param['timeout']) $arr_param['timeout']=10; //默认10秒延时
if(!$arr_param['ua']) $arr_param['ua']='Mozilla/5.0 (Linux; Android 9; V1913A Build/P00610; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36 VivoBrowser web doman version/9.9.70.0'.rand(1,99999); //默认ua
$curl = curl_init();
$url=str_replace(' ','%20',$url); //url里含空格会报错
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_HEADER, $arr_param['ret_header']?true:false);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_TIMEOUT, $arr_param['timeout']);
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false); //检查服务器证书
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
if($arr_param['ua']) curl_setopt($curl, CURLOPT_USERAGENT, $arr_param['ua']);
if($arr_param['proxy']) curl_setopt($curl, CURLOPT_PROXY, $arr_param['proxy']);
if($arr_param['cookie']) curl_setopt($curl,CURLOPT_COOKIE,$arr_param['cookie']);
if($arr_param['referer']) curl_setopt($curl, CURLOPT_REFERER, $arr_param['referer']);
if($arr_param['ret_nobody']) curl_setopt($curl, CURLOPT_NOBODY, true);
if($arr_param['FOLLOWLOCATION']) curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
if($arr_param['post_json_str']){
curl_setopt($curl, CURLOPT_POST, 1);
curl_setopt($curl, CURLOPT_POSTFIELDS, $arr_param['post_json_str']);
}
if($arr_param['header_arr']){
curl_setopt($curl, CURLOPT_HTTPHEADER, $arr_param['header_arr']);
if(strpos(print_r($arr_param['header_arr'],true),':gzip')!==false){
curl_setopt($curl, CURLOPT_ENCODING, "gzip");
}
}
$res = curl_exec($curl);
$curl_info = curl_getinfo($curl);
if($curl_info['http_code']==200||$arr_param['ret_header']){
$curl_error=curl_error($curl);
if((!$curl_error&&strlen($res)>0)||$arr_param['ret_header']) $return=$res; //不指定返回HEADER时http不是200,或者有curl_error的,或者返回数据为空的,就不记录返回数据了
}
curl_close($curl);
return $return;
}
//多线程
//curl多线程,请求HTTPS时,会出现内存泄露(同时满足这两个条件时才会内存泄露)
function curl_multi_request_with_param_ext(&$arr_url,$arr_param=array()){ //$arr_url=array(array('url'=>'','post_json_str'=>''),array('timeout'=>30,'post_json_str'=>'','cookie'=>''));
if(!$arr_param['timeout']) $arr_param['timeout']=10; //默认10秒延时
if(!$arr_param['ua']) $arr_param['ua']='Mozilla/5.0 (Linux; Android 9; V1913A Build/P00610; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/87.0.4280.141 Mobile Safari/537.36 VivoBrowser web doman version/9.9.70.0'.rand(1,99999); //默认ua
if(!$arr_param['curl_multi_max_size']) $arr_param['curl_multi_max_size']=500; //单页面抓取一般500,如果页面体积较大,所以调小点
$config_curl_multi_max_size = $arr_param['curl_multi_max_size'];
$curl_multi_count = ceil(count($arr_url)/$config_curl_multi_max_size);
for($i=0;$i<$curl_multi_count;$i++){
$new_arr_url[$i] = array_slice($arr_url,$i*$config_curl_multi_max_size,($i+1)*$config_curl_multi_max_size>count($arr_url)?(count($arr_url)-$i*$config_curl_multi_max_size):$config_curl_multi_max_size,true);
$mh[$i] = curl_multi_init();
foreach ($new_arr_url[$i] as $k=>$row_url) {
if(strpos($row_url['url'],"baidu.com")!==false){
$row_url['url'] = "http://images.goodxxxxx.top:82/?url=".$row_url['url'];
}
$row_url['url']=str_replace(' ','%20',$row_url['url']); //url里含空格会报错
$conn[$i][$k]=curl_init($row_url['url']);
curl_setopt($conn[$i][$k],CURLOPT_RETURNTRANSFER,1);
curl_setopt($conn[$i][$k], CURLOPT_HEADER, $arr_param['ret_header']?true:false);
curl_setopt($conn[$i][$k], CURLOPT_TIMEOUT, $arr_param['timeout']);
curl_setopt($conn[$i][$k], CURLOPT_SSL_VERIFYPEER, false); //检查服务器证书
curl_setopt($conn[$i][$k], CURLOPT_SSL_VERIFYHOST, false);
curl_setopt($conn[$i][$k], CURLOPT_USERAGENT, $arr_param['ua']);
if($row_url['cookie'])
curl_setopt($conn[$i][$k],CURLOPT_COOKIE,$row_url['cookie']);
elseif($arr_param['cookie'])
curl_setopt($conn[$i][$k],CURLOPT_COOKIE,$arr_param['cookie']);
if($arr_param['referer']) curl_setopt($conn[$i][$k], CURLOPT_REFERER, $arr_param['referer']);
if($arr_param['ret_nobody']) curl_setopt($conn[$i][$k], CURLOPT_NOBODY, true);
if($arr_param['FOLLOWLOCATION']) curl_setopt($conn[$i][$k], CURLOPT_FOLLOWLOCATION, true);
if($row_url['post_json_str']){
curl_setopt($conn[$i][$k], CURLOPT_POST, 1);
curl_setopt($conn[$i][$k], CURLOPT_POSTFIELDS, $row_url['post_json_str']);
}
if($arr_param['header_arr']){
curl_setopt($conn[$i][$k], CURLOPT_HTTPHEADER, $arr_param['header_arr']);
if(strpos(print_r($arr_param['header_arr'],true),':gzip')!==false){
curl_setopt($conn[$i][$k], CURLOPT_ENCODING, "gzip");
}
}
if($arr_param['arrProxy']){
$proxy=$arr_param['arrProxy'][rand(0,count($arr_param['arrProxy'])-1)]; //随机取代理
if($proxy) curl_setopt($conn[$i][$k], CURLOPT_PROXY, $proxy);
}
curl_multi_add_handle ($mh[$i],$conn[$i][$k]);
}
if(!$_SERVER['HTTP_HOST']) echo "##fetch ".($i+1).'/'.$curl_multi_count." install ".count($new_arr_url[$i])." ok.start fetch...##\n"; //命令行执行时显示
unset($active);unset($mrc);
do {
usleep(100000);
$mrc = curl_multi_exec($mh[$i],$active);//当无数据时或请求暂停时,active=true
} while ($mrc == CURLM_CALL_MULTI_PERFORM);//当正在接受数据时
while ($active and $mrc == CURLM_OK) {//当无数据时或请求暂停时,active=true,为了减少cpu的无谓负担
if (curl_multi_select($mh[$i]) != -1) {
do {
usleep(50000);
$mrc = curl_multi_exec($mh[$i], $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
}
}
foreach ($new_arr_url[$i] as $k => $row_url) {
$t=curl_multi_getcontent($conn[$i][$k]);
$curl_info = curl_getinfo($conn[$i][$k]);
if($curl_info['http_code']==200||$arr_param['ret_header']){
$curl_error=curl_error($conn[$i][$k]);
if((!$curl_error&&$t)||$arr_param['ret_header']) $arr_url[$k]['ret']=$t; //不指定返回HEADER时http不是200,或者有curl_error的,或者返回数据为空的,就不记录返回数据了
}
curl_multi_remove_handle($mh[$i], $conn[$i][$k]);
curl_close($conn[$i][$k]);//关闭所有对象
}
curl_multi_close($mh[$i]);$mh[$i]=NULL;$conn[$i]=NULL;$new_arr_url[$i]=NULL;
}
return $arr_url;
}