2017-08-13 77 views
0

目标是贯穿约10,000条链接。确定哪些页面编号> 3并突出显示第一列。我已经完成了所有这些工作,但问题是它需要Url Fetch时间过长,我遇到了最大运行时错误。无论如何,我可以加快这个代码,所以我可以通过10,000行?加速UrlFetch Google App脚本?

function readColumns() { 
    //program is going to run through column 3 by going through the amount of rows, truncating last three characters to see if pdf, then highlighting first column 
    var sheet = SpreadsheetApp.getActiveSheet(); 
    var columns = sheet.getDataRange(); 
    var rowNum = columns.getNumRows(); 
    var values = columns.getValues(); 
    var html; 
    var htmlString; 

    for(var i = 1; i <= rowNum; i++){ 
    var columnLogger = values[i][2]; 
    try{ 
     html = UrlFetchApp.fetch(values[i][2], 
     { 
     muteHttpExceptions: true, 
     } 
    ); 
    }catch(e){ 
     Logger.log("Error at line " + i); 
     var error = true; 
    } 
    htmlString = html.getContentText(); 
    var index = htmlString.indexOf("Pages") + 6; 
    var pageNumber = parseInt(htmlString.charAt(index),10); 

    var lastChars = "" + columnLogger.charAt(columnLogger.length-3) + columnLogger.charAt(columnLogger.length-2) + columnLogger.charAt(columnLogger.length-1); 

    if((error) || (!lastChars.equals("pdf") && values[i][6].equals("") && !pageNumber >= 3)){ 

     //goes back to first column and highlights yellow 
     var cellRange = sheet.getRange(1, 1, rowNum, 3) 
     var cell = cellRange.getCell(i+1, 1) 
     cell.setBackground("yellow"); 
    } 


    } 


} 

编辑 - 短脚本:有了这个

var lastChars = "" + columnLogger.charAt(columnLogger.length-3) + columnLogger.charAt(columnLogger.length-2) + columnLogger.charAt(columnLogger.length-1); 

function foreverCall(){ 
    var start = 1480; 

    for(;;){ 
    readColumns(start); 
    start = start + 100; 
    } 

} 


function readColumns(start) { 
    //program is going to run through column 3 by going through the amount of rows, truncating last three characters to see if pdf, then highlighting first column 
    var sheet = SpreadsheetApp.getActiveSheet(); 
    var columns = sheet.getDataRange(); 
    var rowNum = columns.getNumRows(); 
    var values = columns.getValues(); 
    var html; 
    var htmlString; 
    var error; 

    for(var i = start; i < start+100; i++){ 
    if(loop(values, error, html, htmlString, rowNum, sheet, columns, i)){ 

     var cellRange = sheet.getRange(1, 1, rowNum, 3) 
     var cell = cellRange.getCell(i, 1) 
     cell.setBackground("yellow"); 

    } 
    } 


} 

function loop(values, error, html, htmlString, rowNum, sheet, columns, i){ 
    var columnLogger = values[i][2]; 


    var lastChars = columnLogger.slice(-4); 

    if(!lastChars.equals(".pdf") && values[i][6].equals("")){ 


     return true; 


    }else{ 

     try{ 
     error = false 
     html = UrlFetchApp.fetch(values[i][2].toString()); 
     if(html == null){ 
      error = true; 
     } 
     }catch(e){ 
     Logger.log("Error at line " + i); 
     error = true; 
     } 
     if(!error){ 
     htmlString = html.getContentText(); 
     var index = htmlString.indexOf("Pages") + 6; 
     var pageNumber = parseInt(htmlString.charAt(index),10); 

     } 
     //goes back to first column and highlights yellow 
     if(error || !pageNumber >= 3){ 
     return true; 
     } 
    } 

    return false; 

} 

回答

2

您可以更换该

var lastChars = columnLogger.slice(-3); 

您也可以从html侧边栏或对话框启动抓取脚本来运行短批次,然后返回到成功处理程序,然后根据返回值启动另一批次。返回值也可以用来在下一行开始下一批。它实际上需要更长的时间才能运行,但通过保持批量很小,您可以保持在脚本限制之下。

enter image description here

+0

感谢您的回答。所以,我真的只需要这个URL抓取应用程序的前50个字符,但它必须每次加载整个页面。无论如何,为了加快执行时间,使它只加载少量的html?也许某种超时(即20毫秒)会让它跳过这个命令?这将希望截断html。 –

+0

我真的不知道,但我想知道如果你也许可以禁用JavaScript和或cookie的抓取。您可能会得到明显不同的页面,但您可能能够获取所需的文本。真的,这是我的总猜测。 – Cooper

+0

好吧,没关系。这是你建议将它保持在脚本限制之下吗?我编辑了问题 –

0

可以与线替换

变种lastChars = columnLogger.slice(-3);

+0

你介意在代码之前用四个空格格式化代码,以便它在浏览器中更具可读性,也许可以解释为什么这会起作用? – PaSTE