0
我试图为某些特定项目刮掉AliExpress,但是当代码到达某个项目(完全非确定性)时,parseItems方法中的urlelement随机过期该方法抛出异常。使用Selenium进行Web抓取:随机抛出的代码StaleElementReferenceException
代码:
package com.ardilgulez.seleniumweb;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.support.ui.ExpectedConditions;
import org.openqa.selenium.support.ui.WebDriverWait;
import java.util.List;
import java.util.concurrent.TimeUnit;
public class App {
private static WebDriver firefoxDriver = new FirefoxDriver();
public static boolean parseItems throws StaleElementReferenceException (List<WebElement> items){
System.out.println(items.size());
if(items.size() > 0){
items.forEach((item) -> {
WebElement urlelement = item.findElement(By.cssSelector(".detail>h3>a"));
String href = urlelement.getAttribute("href");
System.out.println(href);
String title = urlelement.getAttribute("title");
System.out.println(title);
});
}
return true;
}
public static void main(String[] args) {
firefoxDriver.get("https://www.aliexpress.com/");
firefoxDriver.manage().timeouts().implicitlyWait(5, TimeUnit.SECONDS);
WebElement questionElement = firefoxDriver.findElement(By.xpath("//input[@name='SearchText']"));
questionElement.sendKeys("ESP8266");
questionElement.submit();
while (true) {
try {
(new WebDriverWait(firefoxDriver, 10))
.until((WebDriver webDriver) -> ((JavascriptExecutor) webDriver).executeScript("return document.readyState").equals("complete"));
(new WebDriverWait(firefoxDriver, 10))
.until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//ul[@id='hs-list-items']")));
(new WebDriverWait(firefoxDriver, 10))
.until(ExpectedConditions.visibilityOfElementLocated(By.xpath("//div[@id='hs-below-list-items']")));
System.out.println("WAIT1");
(new WebDriverWait(firefoxDriver, 20))
.until((WebDriver webDriver) -> {
WebElement listItemsUL = (new WebDriverWait(webDriver, 10))
.until(ExpectedConditions.presenceOfElementLocated(By.xpath("//ul[@id='hs-list-items']")));
List<WebElement> items = listItemsUL.findElements(By.tagName("li"));
return parseItems(items);
});
(new WebDriverWait(firefoxDriver, 20))
.until((WebDriver webDriver) -> {
WebElement belowListItemsDiv = (new WebDriverWait(webDriver, 10))
.until(ExpectedConditions.presenceOfElementLocated(By.xpath("//div[@id='hs-below-list-items']")));
WebElement belowListItemsUL = belowListItemsDiv.findElement(By.tagName("ul"));
List<WebElement> items = belowListItemsUL.findElements(By.tagName("li"));
return parseItems(items);
});
System.out.println("WAIT2");
WebElement nextElement = (new WebDriverWait(firefoxDriver, 10))
.until(ExpectedConditions.presenceOfElementLocated(By.xpath("//a[@class='page-next ui-pagination-next']")));
System.out.println(nextElement.toString());
System.out.println("CLICK CLICK");
nextElement.click();
} catch (Exception e) {
e.printStackTrace();
break;
}
}
}
}
有时候元素的代码获取它的href但代码获取它的标题之前之后甚至会抛出异常。
我不知道我的代码是怎么回事。它实际上工作正常,直到它随机决定不工作,我不知道为什么。
你是一个真棒人@Renato,非常感谢。 – ardilgulez