我认为以下几点应该会有所帮助 - 您需要调整XPath查询,以便定位特定的表格和特定的单元格内容,但主代码似乎可以正常工作。我怀疑原始代码的问题是URL为https
,通常在进行卷曲请求时需要额外的配置设置。有curlrequest
函数中的设置可以删除,我只是从另一个脚本中复制了这些设置。
改变路径以$cacert
到cacert.pem
您的系统上的副本或到live version on curl.haxx.se
$url = 'https://www.sanita.puglia.it/monitorpo/aslfg/monitorps-web/monitorps/monitorPSperASL.do?codNazionale=160115';
function _curlrequest($url=null, $options=null){
$cacert='c:/wwwroot/cacert.pem';
$vbh = fopen('php://temp', 'w+');
$res=array(
'response' => null,
'verbose' => null,
'info' => array('http_code' => 100),
'headers' => null,
'errors' => null
);
if(is_null($url)) return (object)$res;
session_write_close();
/* Initialise curl request object */
$curl=curl_init();
if(parse_url($url,PHP_URL_SCHEME)=='https'){
curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, true);
curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 2);
curl_setopt($curl, CURLOPT_CAINFO, $cacert);
}
/* Define standard options */
curl_setopt($curl, CURLOPT_URL,trim($url));
curl_setopt($curl, CURLOPT_AUTOREFERER, true);
curl_setopt($curl, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($curl, CURLOPT_FAILONERROR, true);
curl_setopt($curl, CURLOPT_HEADER, false);
curl_setopt($curl, CURLINFO_HEADER_OUT, false);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
curl_setopt($curl, CURLOPT_BINARYTRANSFER, true);
curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 20);
curl_setopt($curl, CURLOPT_TIMEOUT, 60);
curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36');
curl_setopt($curl, CURLOPT_MAXREDIRS, 10);
curl_setopt($curl, CURLOPT_ENCODING, '');
curl_setopt($curl,CURLOPT_VERBOSE,true);
curl_setopt($curl,CURLOPT_NOPROGRESS,true);
curl_setopt($curl,CURLOPT_STDERR,$vbh);
/* Assign runtime parameters as options */
if(isset($options) && is_array($options)){
foreach($options as $param => $value) curl_setopt($curl, $param, $value);
}
/* Execute the request and store responses */
$res=(object)array(
'response' => curl_exec($curl),
'info' => (object)curl_getinfo($curl),
'errors' => curl_error($curl)
);
rewind($vbh);
$res->verbose=stream_get_contents($vbh);
fclose($vbh);
curl_close($curl);
return $res;
}
function getdom($data=false, $debug=false){
try{
if(!$data)throw new Exception('No data passed whilst trying to invoke DOMDocument');
libxml_use_internal_errors(true);
$dom = new DOMDocument();
$dom->validateOnParse=false;
$dom->standalone=true;
$dom->strictErrorChecking=false;
$dom->recover=true;
$dom->formatOutput=false;
$dom->loadHTML($data);
$errors=libxml_get_errors();
libxml_clear_errors();
return !empty($errors) && $debug ? $errors : $dom;
}catch(Exception $e){
echo $e->getMessage();
}
}
$obj=_curlrequest($url);
if($obj->info->http_code==200){
$dom=getdom($obj->response);
$xp=new DOMXPath($dom);
$query='//div[ contains(@class,"cRiga3 boxtriageS") ]';
$col=$xp->query($query);
if(!empty($col) && $col->length > 0){
foreach($col as $node)echo $node->nodeValue . '<br />';
}
}
此输出
2
20
37
>1h
1
2
24
10
5
7
32
29
0
3
25
5
0
0
6
2