2012-08-08 105 views
1

我想抓取谷歌网页图片返回的结果。 Google提供了哪些工具?我正在构建一个需要针对各种主题进行培训的对象识别系统。如何抓取谷歌网页图片

+0

如果滚动到图像API页面的顶部,你会看到,它已被弃用。 – 2012-08-08 04:25:42

回答

2

这可能是对你有用,因为谷歌已经过时他们的搜索API:

谷歌自定义搜索,您可以将网站或 收集的网站搜索了。利用Google的力量创建适合您的需求和兴趣的搜索引擎 ,并在您的网站上展示结果 。根据您指定的网站,您的自定义搜索引擎可以优先考虑或限制搜索结果 。

https://developers.google.com/custom-search/

2

您可以使用谷歌的图像API这一点。
例子:

$url = "https://ajax.googleapis.com/ajax/services/search/images?v=1.0&q=stackoverflow"; 

// sendRequest 
// note how referer is set manually 
$ch = curl_init(); 
curl_setopt($ch, CURLOPT_URL, $url); 
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); 
curl_setopt($ch, CURLOPT_REFERER, /* Enter the URL of your site here */); 
$body = curl_exec($ch); 
curl_close($ch); 

// now, process the JSON string 
$json = json_decode($body); 
// now have some fun with the results... 

的更多信息:https://developers.google.com/image-search/v1/jsondevguide#json_snippets_php

+0

滚动到该页面的顶部,您会看到它已被弃用。 – 2012-08-08 04:24:52

+0

并不是说什么已被弃用......对我来说,代码运行得很好 – Julien 2012-08-08 04:32:12

+0

“截至2011年5月26日,Google图片搜索API已被正式弃用。它将继续按照我们的弃用政策进行工作,但您每天可能提出的请求数量可能会受到限制,我们建议您升级到现在支持图像搜索的自定义搜索API。“ - Google https://developers.google.com/image-search/ – 2012-08-08 04:33:50

0
package GoogleImageDownload; 

import java.io.* 
import java.net.HttpURLConnection; 
import java.net.URL; 

import javax.net.ssl.HttpsURLConnection; 
import org.w3c.dom.* 

public class HttpURLConnectionExample { 

private final String USER_AGENT = "Chrome/44.0.2403.157"; 

public static void main(String[] args) throws Exception { 

    HttpURLConnectionExample http = new HttpURLConnectionExample(); 

    System.out.println("Testing 1 - Send Http GET request"); 
        String url = "https://www.google.co.in/search?tbm=isch&q=test"; 

    http.sendGet(url); 

    System.out.println("\nTesting 2 - Send Http POST request"); 
    //http.sendPost(); 

} 

// HTTP GET request 
private void sendGet(String url) throws Exception { 


    URL obj = new URL(url); 
    HttpsURLConnection con = (HttpsURLConnection) obj.openConnection(); 

    // optional default is GET 
    con.setRequestMethod("GET"); 

    //add request header 
    con.setRequestProperty("User-Agent", USER_AGENT); 

    int responseCode = con.getResponseCode(); 
    System.out.println("\nSending 'GET' request to URL : " + url); 
    System.out.println("Response Code : " + responseCode); 

    BufferedReader in = new BufferedReader(
      new InputStreamReader(con.getInputStream())); 
    String inputLine; 
    StringBuffer response = new StringBuffer(); 

    while ((inputLine = in.readLine()) != null) { 
     response.append(inputLine); 
    } 
    in.close(); 

     //print result 
     String Html2Xml = light_html2xml.Html2Xml(response.toString()); 
     Document convertStringToDocument = DocumentObjectClass.convertStringToDocument(Html2Xml); 
     NodeList Images = convertStringToDocument.getElementsByTagName("img"); 
     for(int i = 0;i<Images.getLength();i++) 
     { 
      Node node= Images.item(i); 
      if (node.getNodeType() == Node.ELEMENT_NODE) 
      { Element elem = (Element) node; 

       if(Integer.parseInt(elem.getAttribute("height").replace("px", ""))>10&&Integer.parseInt(elem.getAttribute("width").replace("px", ""))>10) 
       { 
        System.out.println(elem.getAttribute("src")); 
        try{ 
        saveImage(elem.getAttribute("src"),String.valueOf(i)); 
        } 
        catch(Exception e){System.err.println(e.getMessage());} 

       } 
      } 
     } 
        NodeList href = convertStringToDocument.getElementsByTagName("a"); 
     for(int i = 0;i<href.getLength();i++) 
     { 
      Node node= href.item(i); 
      if (node.getNodeType() == Node.ELEMENT_NODE) 
      { Element elem = (Element) node; 

       if(elem.getAttribute("href")!=null) 
       { 

        try{ 
         sendGet(elem.getAttribute("href"));       } 
        catch(Exception e){System.err.println(e.getMessage());} 

       } 
      } 
     }    

} 


public static void saveImage(String imageUrl,String name) throws IOException { 
URL url = new URL(imageUrl); 
String fileName = url.getFile(); 

String destName = new File(".").getAbsolutePath()+"/"+name+".jpg"; 
System.out.println(destName); 

    OutputStream os; 
     try (InputStream is = url.openStream()) { 
      os = new FileOutputStream(destName); 
      byte[] b = new byte[2048]; 
      int length; 
      while ((length = is.read(b)) != -1) { 
       os.write(b, 0, length); 
} } 
os.close(); 
} 
}