目标是贯穿约10,000条链接。确定哪些页面编号> 3并突出显示第一列。我已经完成了所有这些工作,但问题是它需要Url Fetch时间过长,我遇到了最大运行时错误。无论如何,我可以加快这个代码,所以我可以通过10,000行?加速UrlFetch Google App脚本?
function readColumns() {
//program is going to run through column 3 by going through the amount of rows, truncating last three characters to see if pdf, then highlighting first column
var sheet = SpreadsheetApp.getActiveSheet();
var columns = sheet.getDataRange();
var rowNum = columns.getNumRows();
var values = columns.getValues();
var html;
var htmlString;
for(var i = 1; i <= rowNum; i++){
var columnLogger = values[i][2];
try{
html = UrlFetchApp.fetch(values[i][2],
{
muteHttpExceptions: true,
}
);
}catch(e){
Logger.log("Error at line " + i);
var error = true;
}
htmlString = html.getContentText();
var index = htmlString.indexOf("Pages") + 6;
var pageNumber = parseInt(htmlString.charAt(index),10);
var lastChars = "" + columnLogger.charAt(columnLogger.length-3) + columnLogger.charAt(columnLogger.length-2) + columnLogger.charAt(columnLogger.length-1);
if((error) || (!lastChars.equals("pdf") && values[i][6].equals("") && !pageNumber >= 3)){
//goes back to first column and highlights yellow
var cellRange = sheet.getRange(1, 1, rowNum, 3)
var cell = cellRange.getCell(i+1, 1)
cell.setBackground("yellow");
}
}
}
编辑 - 短脚本:有了这个
var lastChars = "" + columnLogger.charAt(columnLogger.length-3) + columnLogger.charAt(columnLogger.length-2) + columnLogger.charAt(columnLogger.length-1);
:
function foreverCall(){
var start = 1480;
for(;;){
readColumns(start);
start = start + 100;
}
}
function readColumns(start) {
//program is going to run through column 3 by going through the amount of rows, truncating last three characters to see if pdf, then highlighting first column
var sheet = SpreadsheetApp.getActiveSheet();
var columns = sheet.getDataRange();
var rowNum = columns.getNumRows();
var values = columns.getValues();
var html;
var htmlString;
var error;
for(var i = start; i < start+100; i++){
if(loop(values, error, html, htmlString, rowNum, sheet, columns, i)){
var cellRange = sheet.getRange(1, 1, rowNum, 3)
var cell = cellRange.getCell(i, 1)
cell.setBackground("yellow");
}
}
}
function loop(values, error, html, htmlString, rowNum, sheet, columns, i){
var columnLogger = values[i][2];
var lastChars = columnLogger.slice(-4);
if(!lastChars.equals(".pdf") && values[i][6].equals("")){
return true;
}else{
try{
error = false
html = UrlFetchApp.fetch(values[i][2].toString());
if(html == null){
error = true;
}
}catch(e){
Logger.log("Error at line " + i);
error = true;
}
if(!error){
htmlString = html.getContentText();
var index = htmlString.indexOf("Pages") + 6;
var pageNumber = parseInt(htmlString.charAt(index),10);
}
//goes back to first column and highlights yellow
if(error || !pageNumber >= 3){
return true;
}
}
return false;
}
感谢您的回答。所以,我真的只需要这个URL抓取应用程序的前50个字符,但它必须每次加载整个页面。无论如何,为了加快执行时间,使它只加载少量的html?也许某种超时(即20毫秒)会让它跳过这个命令?这将希望截断html。 –
我真的不知道,但我想知道如果你也许可以禁用JavaScript和或cookie的抓取。您可能会得到明显不同的页面,但您可能能够获取所需的文本。真的,这是我的总猜测。 – Cooper
好吧,没关系。这是你建议将它保持在脚本限制之下吗?我编辑了问题 –