2016-04-26 50 views
0

我创建了一个网页报废器,该报废器从页面中取出数据并将其存储在.csv文件中。我有多个页面执行此程序,但是,有一个页面,当我执行我的程序与该链接时,它给出了一个错误“java.net.SocketTimeoutException:读取超时”在我已创建连接的线jsoup库。我不明白为什么它在该特定页面上发生错误。我的代码和日志在下面提到。
注意:我使用的是jsoup HTML解析器,java 1.7,Netbeans。在一个特定页面上给出“java.net.SocketTimeoutException:读取超时”

public class ComOpen_end_fund { 

    boolean writeCSVToConsole = true; 
    boolean writeCSVToFile = true; 
    boolean sortTheList = true; 
    boolean writeToConsole; 
    boolean writeToFile; 
    public static Document doc = null; 
    public static Elements tbodyElements = null; 
    public static Elements elements = null; 
    public static Elements tdElements = null; 
    public static Elements trElement2 = null; 
    public static String Dcomma = ","; 
    public static String line = ""; 
    public static ArrayList<Elements> sampleList = new ArrayList<Elements>(); 

    public static void createConnection() throws IOException { 
     System.setProperty("http.proxyHost", "191.1.1.202"); 
     System.setProperty("http.proxyPort", "8080"); 
     String tempUrl = "http://mufap.com.pk/nav-report.php?tab=01&fname=&amc=&cat=&strdate=&endate=&submitted=&mnt=&yrs=&s="; 
     doc = Jsoup.connect(tempUrl).get(); //this is line number 42 
    } 

    public static void parsingHTML() throws Exception { 
     for (Element table : doc.getElementsByTag("table")) { 

      for (Element trElement : table.getElementsByTag("tr")) { 
       trElement2 = trElement.getElementsByTag("tr"); 
       tdElements = trElement.getElementsByTag("td"); 
       File fold = new File("C:\\open-end-fund.csv"); 
       fold.delete(); 
       File fnew = new File("C:\\open-end-fund.csv"); 
       FileWriter sb = new FileWriter(fnew, true); 
       if (trElement.hasClass("tab-data")) { 
        for (Iterator<Element> it = tdElements.iterator(); it.hasNext();) { 
         if (it.hasNext()) { 
          sb.append("\r\n"); 

         } 

         for (Iterator<Element> it2 = trElement2.iterator(); it.hasNext();) { 
          Element tdElement2 = it.next(); 
          final String content = tdElement2.text(); 
          if (it2.hasNext()) { 

           sb.append(formatData(content)); 
           sb.append(" , "); 

          } 
         } 

         System.out.println(sb.toString()); 
         sb.flush(); 
         sb.close(); 
        } 
       } 
       System.out.println(sampleList.add(tdElements)); 

      } 
     } 
    } 
    private static final SimpleDateFormat FORMATTER_MMM_d_yyyy = new SimpleDateFormat("MMM d, yyyy", Locale.US); 
    private static final SimpleDateFormat FORMATTER_dd_MMM_yyyy = new SimpleDateFormat("dd-MMM-YYYY", Locale.US); 

    public static String formatData(String text) { 
     String tmp = null; 

     try { 
      Date d = FORMATTER_MMM_d_yyyy.parse(text); 
      tmp = FORMATTER_dd_MMM_yyyy.format(d); 
     } catch (ParseException pe) { 
      tmp = text; 
     } 

     return tmp; 
    } 

    public static void main(String[] args) throws IOException, Exception { 
     createConnection(); //this is line number 100 
     parsingHTML(); 

    } 

} 

,这里是日志猫

Exception in thread "main" java.net.SocketTimeoutException: Read timed out 
    at java.net.SocketInputStream.socketRead0(Native Method) 
    at java.net.SocketInputStream.socketRead(SocketInputStream.java:116) 
    at java.net.SocketInputStream.read(SocketInputStream.java:170) 
    at java.net.SocketInputStream.read(SocketInputStream.java:141) 
    at java.io.BufferedInputStream.fill(BufferedInputStream.java:246) 
    at java.io.BufferedInputStream.read1(BufferedInputStream.java:286) 
    at java.io.BufferedInputStream.read(BufferedInputStream.java:345) 
    at sun.net.www.http.HttpClient.parseHTTPHeader(HttpClient.java:704) 
    at sun.net.www.http.HttpClient.parseHTTP(HttpClient.java:647) 
    at sun.net.www.protocol.http.HttpURLConnection.getInputStream0(HttpURLConnection.java:1536) 
    at sun.net.www.protocol.http.HttpURLConnection.getInputStream(HttpURLConnection.java:1441) 
    at java.net.HttpURLConnection.getResponseCode(HttpURLConnection.java:480) 
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:516) 
    at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:493) 
    at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:205) 
    at org.jsoup.helper.HttpConnection.get(HttpConnection.java:194) 
    at com.open_end_fund.ComOpen_end_fund.createConnection(ComOpen_end_fund.java:42) 
    at com.open_end_fund.ComOpen_end_fund.main(ComOpen_end_fund.java:100) 
C:\Users\talha\AppData\Local\NetBeans\Cache\8.1\executor-snippets\run.xml:53: Java returned: 1 
BUILD FAILED (total time: 3 seconds) 

当我在http://www.mufap.com.pk/nav_returns_performance.php?tab=01
这个链接它工作正常运行这段代码。

回答