2017-08-26 103 views
1

试图获取C:\ xampp \ htdocs \ college-project \ index中的非对象的属性.PHP第16行尝试在C: xampp htdocs college-project index.php中获取非对象的属性,在第16行

<?php 
    $start="http://localhost/college-project/test.html"; 

    $already_crawled=array(); 

    function get_details($url) 
    { 
     $options = array('http'=>array('method'=>'GET', 'headers'=>"User-Agent:howBot/0.1\n")); 

     $context=stream_context_create($options); 

     $doc = new DOMDocument(); 
     @$doc->loadHTML(@file_get_contents($url, false, $context)); 

     $title=$doc->getElementsByTagName("title"); 
     $title=$title->item(0)->nodeValue; 

     echo $title."\n"; 
    } 

    function follow_links($url) 
    { 
     global $already_crawled; 

     $options = array('http'=>array('method'=>'GET', 'headers'=>"User-Agent:howBot/0.1\n")); 

     $context=stream_context_create($options); 

     $doc = new DOMDocument(); 
     $doc->loadHTML(file_get_contents($url, false, $context)); 

     $linklist=$doc->getElementsByTagName("a"); 

     foreach($linklist as $link) 
     { 
      $l=$link->getAttribute("href"); 

      if(substr($l,0,1)=="/" && substr($l,0,2)!="//") 
      { 
       $l=parse_url($url)["scheme"]."://".parse_url($url)["host"].$l; 
      } 
      else if(substr($l,0,2)=="//") 
      { 
       $l = parse_url($url)["scheme"].":".$l; 
      } 
      else if(substr($l,0,2)=="./") 
      { 
       $l = parse_url($url)["scheme"]."://".parse_url($url)["host"].dirname(parse_url($url)["path"]).substr($l,1); 
      } 
      else if(substr($l,0,1)=="#") 
      { 
       $l = parse_url($url)["scheme"]."://".parse_url($url)["host"].parse_url($url)["path"].$l; 
      } 
      else if(substr($l,0,3)=="../") 
      { 
       $l = parse_url($url)["scheme"]."://".parse_url($url)["host"]."/".$l; 
      } 
      else if(substr($l,0,11)=="javascript:") 
      { 
       continue; 
      } 
      else if(substr($l,0,5)=="https" && (substr($l,0,4)=="http")) 
      { 
       $l=parse_url["scheme"].".//".parse_url($url)["host"]."/".$l; 
      } 
      if(!in_array($l,$already_crawled)) 
      { 
       $already_crawled[]=$l; 
       echo get_details($l); 
       //echo $l."\n"; 
      } 
     } 

    } 
    follow_links($start); 
    print_r($already_crawled); 
?> 
+1

请test.html的内容 –

+0

所以你的test.html文件没有标题,你会得到PHP错误 –

回答

0

您可以检查是否存在之前尝试呼应

function get_details($url) 
{ 
    $options = array('http'=>array('method'=>'GET', 'headers'=>"User-Agent:howBot/0.1\n")); 

    $context=stream_context_create($options); 

    $doc = new DOMDocument(); 
    @$doc->loadHTML(@file_get_contents($url, false, $context)); 

    $title=$doc->getElementsByTagName("title"); 
    if($title->length AND is_object($title)) 
    { 
     $title=$title->item(0)->nodeValue; 
    }else{ 
     $title=''; 
    } 


    echo $title."\n"; 
} 
+0

我认为这可能会失败,因为项目(0)将不会返回任何内容。 $ title将是一个没有项目的nodeList – Andreas

+0

@Andreas:更新了标题长度检查。感谢您指出这一点 –

+0

它无法正常工作 –

相关问题