2017-02-22 78 views
0

我正在制作命令行工具多次访问网站。我一次使用多个线程访问页面,每个线程使用循环重复访问网站。工具工作正常,并根据需要访问网站,但唯一的问题是面临打开网站并在几分钟后关闭它。所以每次访问的会话持续时间限制在3到4秒。我需要增加此会话持续时间至少60秒。以下是我的代码。HtmlUnit WebClient Session Duration

package directUrl; 

import java.io.IOException; 
import java.net.MalformedURLException; 
import java.net.URL; 

import com.gargoylesoftware.htmlunit.BrowserVersion; 
import com.gargoylesoftware.htmlunit.FailingHttpStatusCodeException; 
import com.gargoylesoftware.htmlunit.WebClient; 

public class ThreadDirectUrl extends Thread { 

    private String url; 
    private String paramUserAgent; 
    private String paramReferer; 
    private int loopSize; 

    public ThreadDirectUrl(String url, String paramUserAgent, String paramReferer, int loopSize) { 
     this.url = url; 
     this.paramUserAgent = paramUserAgent; 
     this.paramReferer = paramReferer; 
     this.loopSize = loopSize; 
    } 

    public void run() { 
     String userAgent = new String(); 

     // Get User Agent 
     if (paramUserAgent.equals("1")) { 
      userAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:17.0) Gecko/17.0 Firefox/17.0"; 
     } else if (paramUserAgent.equals("2")) { 
      userAgent = "Mozilla/5.0 (Linux; U; Android 2.2; en-us; Nexus One Build/FRF91) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1"; 
     } else if (paramUserAgent.equals("3")) { 
      userAgent = "Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_0 like Mac OS X; en-us) AppleWebKit/532.9 (KHTML, like Gecko) Version/4.0.5 Mobile/8A293 Safari/6531.22.7"; 
     } else if (paramUserAgent.equals("4")) { 
      userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/21.0.1180.79 Safari/537.1"; 
     } 

     BrowserVersion bv = new BrowserVersion("Netscape", "Version", userAgent, 0); 

     try { 
      URL openUrl = new URL(url); 
      for (int i = 1; i <= loopSize; i++) { 
       WebClient webClient = new WebClient(bv); 
       webClient.addRequestHeader("Accept-Encoding", "compress, gzip"); 
       webClient.addRequestHeader("Referer", paramReferer); 
       webClient.getOptions().setPrintContentOnFailingStatusCode(true); 
       webClient.getOptions().setThrowExceptionOnFailingStatusCode(false); 
       webClient.getOptions().setThrowExceptionOnScriptError(false); 
       webClient.getOptions().setJavaScriptEnabled(true); 
       webClient.getOptions().setCssEnabled(false); 
       webClient.getOptions().setPopupBlockerEnabled(true); 
       webClient.getOptions().setMaxInMemory(3); 
       webClient.getPage(openUrl); 

       System.out.println(Thread.currentThread().getName() + "----" + i + "----\nSuccess!\nUser Agent: " 
         + bv.getUserAgent() + "\n\n"); 
       Thread.sleep(60000); 
       webClient.getCurrentWindow().getJobManager().removeAllJobs(); 
       webClient.close(); 
      } 
      System.out.println(Thread.currentThread().getName() + "COMPLETED"); 

     } catch (FailingHttpStatusCodeException e) { 
      System.out.println("Error!"); 
     } catch (MalformedURLException e) { 
      System.out.println("Error - Use URL with \"http://\" or \"https://\"!"); 
     } catch (IOException e) { 
      System.out.println("Error!"); 
     } catch (ArrayIndexOutOfBoundsException e) { 
      System.out.println("Error!"); 
     } catch (InterruptedException e) { 
      System.out.println(Thread.currentThread().getName() + "Interrupted"); 
     } finally { 

      System.gc(); 
     } 
    } 
} 

主要类是如下

package directUrl; 

import java.util.logging.Level; 
import java.util.logging.Logger; 

public class DirectUrl { 

    public static void main(String[] args) { 

     // Production Variables 
     String url = args[0]; // URL 
     String paramUserAgent = args[1]; // User Agent Choice 
     String paramReferer = args[2]; // Referrer URL 
     int loopSize = Integer.parseInt(args[3]); // Loop Size 
     int threadSize = Integer.parseInt(args[4]); // Counts of threads 

     Logger logger = Logger.getLogger(""); 
     logger.setLevel(Level.OFF); 

     // Create Multiple Threads 
     ThreadDirectUrl aThread; 
     for (int i = 1; i <= threadSize; i++) { 
      aThread = new ThreadDirectUrl(url, paramUserAgent, paramReferer, loopSize); 
      aThread.setName("thread" + i); 
      aThread.start(); 
     } 

    } 

} 

在类ThreadDirectUrl,我使用;

Thread.sleep(60000); 

getPage()方法但它不工作。请建议。

+0

什么错误,你的睡眠后''弄,你可以通过'LogManager.getLogger(“org.apache.http.wire”),参见头饼干setLevel(ORG .apache.log4j.Level.ALL);'请发布示例URL –

+0

@AhmedAshour没有错误。 –

+0

那么,你期望什么?你得到一个页面(隐含会话),接下来应该做什么?什么'不工作'。 –

回答

0

如果希望服务器看到会话寿命更长,那么请与客户端进行一些操作。

E.g.再次加载相同页面:?

webClient.getPage(openUrl); 

Thread.sleep(60000); 

// then get the same page again 
webClient.getPage(openUrl); 
+0

在发布答案之前,我做了同样的事情,并且在做了一些关于服务器如何查看会话持续时间的研究之后,我做了同样的工作。但我会标记你的答案是正确的。 –

+0

谢谢,也尝试使用刚刚发布的最新版本2.25。 –