2017-04-16 73 views
2

我目前在学习Node.js,javascript等等。我来自C++。Javascript文本数组解析

我需要解析的阵列如:

====================================================================================================== 
No. Name     Cask    Current   Latest   Auto-Update State 
====================================================================================================== 
1/38 5KPlayer    5kplayer   latest   latest 
2/38 Adobe Photoshop CC  adobe-photoshop-cc 16    16 
3/38 Alfred     alfred    3.3.1_806  3.3.2_818  Y   ignored 
4/38 AppCleaner    appcleaner   3.4    3.4    Y   ignored 
5/38 Github Atom   atom    1.15.0   1.15.0   Y   ignored 
6/38 BetterZipQL   betterzipql   latest   latest 
7/38 Boom     boom    1.6,1490693621 1.6,1490693621 
8/38 CheatSheet    cheatsheet   1.2.7   1.2.7 
9/38 Cyberduck    cyberduck   5.4.0.23761  5.4.0.23761 
10/38 Dropbox    dropbox    21.4.25   latest   Y   ignored 

这是安装了Mac上应用的列表,每个应用1行。

如果该应用程序已过期('current'!='latest'),我会保留该行并为其做后期处理。

我想出了一个肮脏的 - 可是working-解决方案:

function parseBrewCUArray(array) { 
    var toUpdate = []; 
    var lines = array.split('\n'); 

    //remove useless lines 
    lines = lines.slice(3); 
    for (var i=0; i<lines.length; i++) { 
     splittedLine = lines[i].split(/[ ]{2,}/); 
     if (splittedLine[3] != splittedLine[4]) { 
      toUpdate.push(splittedLine) 
      console.log(splittedLine); 
     } 
    } 
} 

但是,必须有一个非常更好的解决方案在那里!有人可以优化这一点,使这段代码更美丽?

回答

1

你的代码可以简化为以下几点:

//keeps only the header and the rows where Current !== Latest 
 
function parseBrewCUArray(str) { 
 
    return str.split('\n').filter((row, index) => { 
 
     if(index < 3) return true; 
 
     
 
     var cols = row.split(/ {2,}/); 
 
     return cols[3] !== cols[4] 
 
    }).join("\n"); 
 
} 
 

 
var s = `====================================================================================================== 
 
No. Name     Cask    Current   Latest   Auto-Update State 
 
====================================================================================================== 
 
1/38 5KPlayer    5kplayer   latest   latest 
 
2/38 Adobe Photoshop CC  adobe-photoshop-cc 16    16 
 
3/38 Alfred     alfred    3.3.1_806  3.3.2_818  Y   ignored 
 
4/38 AppCleaner    appcleaner   3.4    3.4    Y   ignored 
 
5/38 Github Atom   atom    1.15.0   1.15.0   Y   ignored 
 
6/38 BetterZipQL   betterzipql   latest   latest 
 
7/38 Boom     boom    1.6,1490693621 1.6,1490693621 
 
8/38 CheatSheet    cheatsheet   1.2.7   1.2.7 
 
9/38 Cyberduck    cyberduck   5.4.0.23761  5.4.0.23761 
 
10/38 Dropbox    dropbox    21.4.25   latest   Y   ignored`; 
 

 
console.log(parseBrewCUArray(s));

但通常我宁愿先解析字符串转换成一些可用的数据结构,然后与

继续

// first the utilities: 
 

 
//most of the time I want null and undefined to be cast to an empty String not to "null"/"undefined". 
 
var string = value => value == null? "": String(value); 
 

 
//a utility to define replacements 
 
var replace = (pattern, replacement="") => value => string(value).replace(pattern, replacement); 
 

 
//escapes special chars that have a special meaning in Regular expressions 
 
var escapeForRegex = replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&"); 
 

 
//RegExp#exec() is awful to be used manually 
 
//too much boilerplate code and I ended too many times in an infinite loop 
 
function matchAll(haystack, needle){ 
 
    var str = string(haystack), 
 
     regex = needle instanceof RegExp? 
 
      needle: 
 
      new RegExp(escapeForRegex(needle), "g"), 
 
     results = [], 
 
     lastMatchIndex = NaN, 
 
     match; 
 
    
 
    while((match = regex.exec(str)) && lastMatchIndex !== match.index){ 
 
     results.push(match); 
 
     lastMatchIndex = match.index; 
 
    } 
 
    return results; 
 
} 
 

 

 
//a generic function that takes a table where columns ain't defined by a seperator but by their alignment 
 
//removes every row that doesn't contain at least one letter or number 
 
//parses the first row to determine the column names and their offsets 
 
//returns an array of objects with the column names as properties 
 
//doesn't handle tabs, because there are too many standards about how wide a tab may be 
 
function parseTableByAlignment(str, allowSingleSpacesInTitle=false){ 
 
    var end, 
 
     rows = string(str).match(/[^\r\n]*[a-zA-Z0-9][^\r\n]*/g), 
 
     removeTrailingDots = replace(/[\.:\s]+$/, ""), 
 
     parseRow = new Function("row", "return {\n" + matchAll(rows.shift(), allowSingleSpacesInTitle? /\S+(?: \S+)*/g: /\S+/g) 
 
      .reduceRight((acc, match) => { 
 
       var row = JSON.stringify(removeTrailingDots(match[0])) + ": row.substring(" + match.index + ", " + end + ").trim()"; 
 
       end = match.index; 
 
       return " " + row + ",\n" + acc; 
 
     }, "}")); 
 

 
    return rows.map(parseRow); 
 
} 
 

 
var s = ` 
 
====================================================================================================== 
 
No. Name     Cask    Current   Latest   Auto-Update State 
 
====================================================================================================== 
 
1/38 5KPlayer    5kplayer   latest   latest 
 
2/38 Adobe Photoshop CC  adobe-photoshop-cc 16    16 
 
3/38 Alfred     alfred    3.3.1_806  3.3.2_818  Y   ignored 
 
4/38 AppCleaner    appcleaner   3.4    3.4    Y   ignored 
 
5/38 Github Atom   atom    1.15.0   1.15.0   Y   ignored 
 
6/38 BetterZipQL   betterzipql   latest   latest 
 
7/38 Boom     boom    1.6,1490693621 1.6,1490693621 
 
8/38 CheatSheet    cheatsheet   1.2.7   1.2.7 
 
9/38 Cyberduck    cyberduck   5.4.0.23761  5.4.0.23761 
 
10/38 Dropbox    dropbox    21.4.25   latest   Y   ignored 
 
`; 
 

 
var data = parseTableByAlignment(s); 
 
console.log(data.filter(item => item.Current !== item.Latest));

使用函数构造函数并将字符串评估为代码...好吧,你会得到大多数否定的回答。所以我添加了第二个实现parseTableByAlignment()没有这个函数的构造函数。结果仍然是:

function parseTableByAlignment(str, allowSingleSpacesInTitle=false){ 
    var previousColumn, 
     rows = string(str).match(/[^\r\n]*[a-zA-Z0-9][^\r\n]*/g), 
     removeTrailingDots = replace(/[\.:\s]+$/, ""), 
     columns = matchAll(rows.shift(), allowSingleSpacesInTitle? /\S+(?: \S+)*/g: /\S+/g) 
      .map(match => { 
       if(previousColumn) previousColumn.end = match.index; 
       return previousColumn = { 
        name: removeTrailingDots(match[0]), 
        start: match.index, 
        end: undefined 
       }; 
      }); 

    return rows.map(row => columns.reduce((obj, column) => { 
     obj[column.name] = row.substring(column.start, column.end).trim(); 
     return obj; 
    }, {})); 
} 
+0

非常感谢这个答案,超出了我的预期。我非常喜欢JSON化的方法。 – deadbird