2017-04-19 61 views
0

我试图使用卷曲函数获取数据,但不幸的是,它为大多数网站返回空。我的代码如下多卷曲返回空数据

$responses = multi([ 
    'blocket' => ['url' => 'http://blocket.se','opts' => [ CURLOPT_RETURNTRANSFER => true]] 
]); 
print_r($responses); 


function multi(array $requests, array $opts = []) { 
// create array for curl handles 
$chs = []; 
// merge general curl options args with defaults 
$opts += [CURLOPT_CONNECTTIMEOUT => 3, CURLOPT_TIMEOUT => 3, CURLOPT_RETURNTRANSFER => 1]; 
// create array for responses 
$responses = []; 
// init curl multi handle 
$mh = curl_multi_init(); 
// create running flag 
$running = null; 
// cycle through requests and set up 
foreach ($requests as $key => $request) { 

    // init individual curl handle 
    $chs[$key] = curl_init(); 
    // set url 
    curl_setopt($chs[$key], CURLOPT_URL, $request['url']); 
    $scraper[$key] = $request['scraper']; 
    // check for post data and handle if present 
    if (isset($request['post_data'])) { 
     curl_setopt($chs[$key], CURLOPT_POST, 1); 
     curl_setopt($chs[$key], CURLOPT_POSTFIELDS, $request['post_array']); 
    } 
    // set opts 
    curl_setopt_array($chs[$key], (isset($request['opts']) ? $request['opts'] + $opts : $opts)); 
    curl_multi_add_handle($mh, $chs[$key]); 
} 
do { 
    // execute curl requests 
    curl_multi_exec($mh, $running); 
    // block to avoid needless cycling until change in status 
    curl_multi_select($mh); 
// check flag to see if we're done 
} while($running > 0); 
// cycle through requests 
foreach ($chs as $key => $ch) { 
    // handle error 
    if (curl_errno($ch)) { 
     $responses[$key] = ['data' => null, 'info' => null, 'error' => curl_error($ch), 'scraper' => $scraper[$key]]; 
    } else { 
     // save successful response 
     $responses[$key] = ['data' => curl_multi_getcontent($ch), 'info' => curl_getinfo($ch), 'error' => null, 'scraper' => $scraper[$key]]; 
    } 
    // close individual handle 
    curl_multi_remove_handle($mh, $ch); 
} 
// close multi handle 
curl_multi_close($mh); 
// return respones 
return $responses; 
} 

结果

Array ([blocket] => Array ([data] => [info] => Array ([url] => http://blocket.se/ [content_type] => [http_code] => 302 [header_size] => 119 [request_size] => 49 [filetime] => -1 [ssl_verify_result] => 0 [redirect_count] => 0 [total_time] => 0.328 [namelookup_time] => 0 [connect_time] => 0.172 [pretransfer_time] => 0.172 [size_upload] => 0 [size_download] => 0 [speed_download] => 0 [speed_upload] => 0 [download_content_length] => 0 [upload_content_length] => -1 [starttransfer_time] => 0.328 [redirect_time] => 0 [redirect_url] => https://www.blocket.se [primary_ip] => 185.49.132.3 [certinfo] => Array () [primary_port] => 80 [local_ip] => 192.168.0.135 [local_port] => 58357) [error] =>)) 

正如你在Resutl [数据]看空。

UPDATE

与@Sahil讨论后发现,上面的代码工作正常,不具有SSL的网站。但是那些做的,这个代码失败了。所以我尝试使用SSL_VERIFYPEER和SSL_VERIFYHOST以及CURLOPT_FOLLOWLOCATION,但这些都没有帮助到目前为止

回答

0

一切工作正常与您的代码唯一的问题是您的网址,您当前的网址重定向与302响应。尝试这个。

更改您的网址:

http://www.blocket.se/ 

此:

https://www.blocket.se/ 

enter image description here PHP代码:

<?php 

ini_set('display_errors', 1); 
$responses = multi([ 
    'blocket' => ['url' => 'https://www.blocket.se/', 'opts' => [ CURLOPT_RETURNTRANSFER => true]] 
     ]); 
print_r($responses); 

function multi(array $requests, array $opts = []) 
{ 

    $chs = []; 

    $opts += [CURLOPT_CONNECTTIMEOUT => 3, CURLOPT_TIMEOUT => 3, CURLOPT_RETURNTRANSFER => 1]; 

    $responses = []; 

    $mh = curl_multi_init(); 

    $running = null; 

    foreach ($requests as $key => $request) 
    { 
     $chs[$key] = curl_init(); 
     curl_setopt($chs[$key], CURLOPT_URL, $request['url']); 
     $scraper[$key] = $request['scraper']; 
     if (isset($request['post_data'])) 
     { 
      curl_setopt($chs[$key], CURLOPT_POST, 1); 
      curl_setopt($chs[$key], CURLOPT_POSTFIELDS, $request['post_array']); 
     } 
     curl_setopt_array($chs[$key], (isset($request['opts']) ? $request['opts'] + $opts : $opts)); 
     curl_multi_add_handle($mh, $chs[$key]); 
    } 
    do 
    { 
     curl_multi_exec($mh, $running); 
     curl_multi_select($mh); 
    } while ($running > 0); 
    foreach ($chs as $key => $ch) 
    { 
     if (curl_errno($ch)) 
     { 
      $responses[$key] = ['data' => null, 'info' => null, 'error' => curl_error($ch), 'scraper' => $scraper[$key]]; 
     } else 
     { 
      $responses[$key] = ['data' => curl_multi_getcontent($ch), 'info' => curl_getinfo($ch), 'error' => null, 'scraper' => $scraper[$key]]; 
     } 
     curl_multi_remove_handle($mh, $ch); 
    } 
    curl_multi_close($mh); 
    return $responses; 
} 
+0

按你的建议我尝试https://www.blocket.se/,但是这并没有返回任何数据,虽然http_code变为0.看到下面的结果 Array([blocket] => Array([data] => [info] =>数组([url] => https://www.blocket.se/ [content_type] => [http_code] => 0 [header_size] => 0 –

+0

您是否已将网址更改为https://www.blocket .se /' –

+0

@SaadBashir只需复制粘贴我的代码,然后重试。 –

1

正如@Sahil指出的代码是好的。基本上问题是CURL不适用于HTTPS网站。这是因为CA根证书未在php.ini中定义。

如果您有类似的问题,请访问http://curl.haxx.se/docs/caextract.html并下载证书。在您想要的位置将其保存在你的php.ini 定义绝对路径,这个文件例如

curl.cainfo = c:\wamp\cacert.pem 

由于使用CURLOPT_SSL_VERIFYPEER各种网站提到= false使你的网站受到攻击