0

我试图为某些特定项目刮掉AliExpress,但是当代码到达某个项目(完全非确定性)时,parseItems方法中的urlelement随机过期该方法抛出异常。使用Selenium进行Web抓取:随机抛出的代码StaleElementReferenceException

代码:

package com.ardilgulez.seleniumweb; 

import org.openqa.selenium.By; 
import org.openqa.selenium.WebDriver; 
import org.openqa.selenium.WebElement; 
import org.openqa.selenium.firefox.FirefoxDriver; 
import org.openqa.selenium.support.ui.ExpectedConditions; 
import org.openqa.selenium.support.ui.WebDriverWait; 

import java.util.List; 
import java.util.concurrent.TimeUnit; 

public class App { 

    private static WebDriver firefoxDriver = new FirefoxDriver(); 

    public static boolean parseItems throws StaleElementReferenceException (List<WebElement> items){ 
     System.out.println(items.size()); 
     if(items.size() > 0){ 
      items.forEach((item) -> { 
       WebElement urlelement = item.findElement(By.cssSelector(".detail>h3>a")); 
       String href = urlelement.getAttribute("href"); 
       System.out.println(href); 
       String title = urlelement.getAttribute("title"); 
       System.out.println(title); 
      }); 
     } 
     return true; 
    } 

    public static void main(String[] args) { 
     firefoxDriver.get("https://www.aliexpress.com/"); 
     firefoxDriver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS); 

     WebElement questionElement = firefoxDriver.findElement(By.xpath("//input[@name='SearchText']")); 
     questionElement.sendKeys("ESP8266"); 
     questionElement.submit(); 

     while (true) { 
      try { 
       (new WebDriverWait(firefoxDriver, 10)) 
        .until((WebDriver webDriver) -> ((JavascriptExecutor) webDriver).executeScript("return document.readyState").equals("complete")); 

       (new WebDriverWait(firefoxDriver, 10)) 
        .until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//ul[@id='hs-list-items']"))); 

       (new WebDriverWait(firefoxDriver, 10)) 
        .until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//div[@id='hs-below-list-items']"))); 

       System.out.println("WAIT1"); 

       (new WebDriverWait(firefoxDriver, 20)) 
         .until((WebDriver webDriver) -> { 
          WebElement listItemsUL = (new WebDriverWait(webDriver, 10)) 
           .until(ExpectedConditions.presenceOfElementLocated(By.xpath("//ul[@id='hs-list-items']"))); 

          List<WebElement> items = listItemsUL.findElements(By.tagName("li")); 
          return parseItems(items); 
         }); 

       (new WebDriverWait(firefoxDriver, 20)) 
         .until((WebDriver webDriver) -> { 
          WebElement belowListItemsDiv = (new WebDriverWait(webDriver, 10)) 
           .until(ExpectedConditions.presenceOfElementLocated(By.xpath("//div[@id='hs-below-list-items']"))); 

          WebElement belowListItemsUL = belowListItemsDiv.findElement(By.tagName("ul")); 
          List<WebElement> items = belowListItemsUL.findElements(By.tagName("li")); 
          return parseItems(items); 
         }); 

       System.out.println("WAIT2"); 

       WebElement nextElement = (new WebDriverWait(firefoxDriver, 10)) 
        .until(ExpectedConditions.presenceOfElementLocated(By.xpath("//a[@class='page-next ui-pagination-next']"))); 

       System.out.println(nextElement.toString()); 
       System.out.println("CLICK CLICK"); 
       nextElement.click(); 

      } catch (Exception e) { 
       e.printStackTrace(); 
       break; 
      } 
     } 
    } 
} 

有时候元素的代码获取它的href但代码获取它的标题之前之后甚至会抛出异常。

我不知道我的代码是怎么回事。它实际上工作正常,直到它随机决定不工作,我不知道为什么。

回答

1

它看起来像当你分页时,你不是等待下一页准备就绪,并且该列表可能包含上一页中的元素。

为了确保以前的页面不再缴费尝试等待,直到从列表中的某些元素成为陈旧点击分页按钮,这样以后:

nextElement.click(); 
new WebDriverWait(firefoxDriver, 20)).until ExpectedConditions.stalenessOf(someElementFromTheList)); 
+0

你是一个真棒人@Renato,非常感谢。 – ardilgulez

相关问题