2016-11-11 89 views
0

我发现来自Apache的这个代码删除变音符号,但它不为我工作(尽管它似乎对其他大多数人的工作)删除变音符号不工作

/* 
Licensed under the Apache License, Version 2.0 (the "License"); 
you may not use this file except in compliance with the License. 
You may obtain a copy of the License at 

http://www.apache.org/licenses/LICENSE-2.0 

Unless required by applicable law or agreed to in writing, software 
distributed under the License is distributed on an "AS IS" BASIS, 
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
See the License for the specific language governing permissions and 
limitations under the License. 
*/ 
var defaultDiacriticsRemovalMap = [ 
    {'base':'A', 'letters':'\u0041\u24B6\uFF21\u00C0\u00C1\u00C2\u1EA6\u1EA4\u1EAA\u1EA8\u00C3\u0100\u0102\u1EB0\u1EAE\u1EB4\u1EB2\u0226\u01E0\u00C4\u01DE\u1EA2\u00C5\u01FA\u01CD\u0200\u0202\u1EA0\u1EAC\u1EB6\u1E00\u0104\u023A\u2C6F'}, 
    {'base':'AA','letters':'\uA732'}, 
    {'base':'AE','letters':'\u00C6\u01FC\u01E2'}, 
    {'base':'AO','letters':'\uA734'}, 
    {'base':'AU','letters':'\uA736'}, 
    {'base':'AV','letters':'\uA738\uA73A'}, 
    {'base':'AY','letters':'\uA73C'}, 
    {'base':'B', 'letters':'\u0042\u24B7\uFF22\u1E02\u1E04\u1E06\u0243\u0182\u0181'}, 
    {'base':'C', 'letters':'\u0043\u24B8\uFF23\u0106\u0108\u010A\u010C\u00C7\u1E08\u0187\u023B\uA73E'}, 
    {'base':'D', 'letters':'\u0044\u24B9\uFF24\u1E0A\u010E\u1E0C\u1E10\u1E12\u1E0E\u0110\u018B\u018A\u0189\uA779'}, 
    {'base':'DZ','letters':'\u01F1\u01C4'}, 
    {'base':'Dz','letters':'\u01F2\u01C5'}, 
    {'base':'E', 'letters':'\u0045\u24BA\uFF25\u00C8\u00C9\u00CA\u1EC0\u1EBE\u1EC4\u1EC2\u1EBC\u0112\u1E14\u1E16\u0114\u0116\u00CB\u1EBA\u011A\u0204\u0206\u1EB8\u1EC6\u0228\u1E1C\u0118\u1E18\u1E1A\u0190\u018E'}, 
    {'base':'F', 'letters':'\u0046\u24BB\uFF26\u1E1E\u0191\uA77B'}, 
    {'base':'G', 'letters':'\u0047\u24BC\uFF27\u01F4\u011C\u1E20\u011E\u0120\u01E6\u0122\u01E4\u0193\uA7A0\uA77D\uA77E'}, 
    {'base':'H', 'letters':'\u0048\u24BD\uFF28\u0124\u1E22\u1E26\u021E\u1E24\u1E28\u1E2A\u0126\u2C67\u2C75\uA78D'}, 
    {'base':'I', 'letters':'\u0049\u24BE\uFF29\u00CC\u00CD\u00CE\u0128\u012A\u012C\u0130\u00CF\u1E2E\u1EC8\u01CF\u0208\u020A\u1ECA\u012E\u1E2C\u0197'}, 
    {'base':'J', 'letters':'\u004A\u24BF\uFF2A\u0134\u0248'}, 
    {'base':'K', 'letters':'\u004B\u24C0\uFF2B\u1E30\u01E8\u1E32\u0136\u1E34\u0198\u2C69\uA740\uA742\uA744\uA7A2'}, 
    {'base':'L', 'letters':'\u004C\u24C1\uFF2C\u013F\u0139\u013D\u1E36\u1E38\u013B\u1E3C\u1E3A\u0141\u023D\u2C62\u2C60\uA748\uA746\uA780'}, 
    {'base':'LJ','letters':'\u01C7'}, 
    {'base':'Lj','letters':'\u01C8'}, 
    {'base':'M', 'letters':'\u004D\u24C2\uFF2D\u1E3E\u1E40\u1E42\u2C6E\u019C'}, 
    {'base':'N', 'letters':'\u004E\u24C3\uFF2E\u01F8\u0143\u00D1\u1E44\u0147\u1E46\u0145\u1E4A\u1E48\u0220\u019D\uA790\uA7A4'}, 
    {'base':'NJ','letters':'\u01CA'}, 
    {'base':'Nj','letters':'\u01CB'}, 
    {'base':'O', 'letters':'\u004F\u24C4\uFF2F\u00D2\u00D3\u00D4\u1ED2\u1ED0\u1ED6\u1ED4\u00D5\u1E4C\u022C\u1E4E\u014C\u1E50\u1E52\u014E\u022E\u0230\u00D6\u022A\u1ECE\u0150\u01D1\u020C\u020E\u01A0\u1EDC\u1EDA\u1EE0\u1EDE\u1EE2\u1ECC\u1ED8\u01EA\u01EC\u00D8\u01FE\u0186\u019F\uA74A\uA74C'}, 
    {'base':'OI','letters':'\u01A2'}, 
    {'base':'OO','letters':'\uA74E'}, 
    {'base':'OU','letters':'\u0222'}, 
    {'base':'OE','letters':'\u008C\u0152'}, 
    {'base':'oe','letters':'\u009C\u0153'}, 
    {'base':'P', 'letters':'\u0050\u24C5\uFF30\u1E54\u1E56\u01A4\u2C63\uA750\uA752\uA754'}, 
    {'base':'Q', 'letters':'\u0051\u24C6\uFF31\uA756\uA758\u024A'}, 
    {'base':'R', 'letters':'\u0052\u24C7\uFF32\u0154\u1E58\u0158\u0210\u0212\u1E5A\u1E5C\u0156\u1E5E\u024C\u2C64\uA75A\uA7A6\uA782'}, 
    {'base':'S', 'letters':'\u0053\u24C8\uFF33\u1E9E\u015A\u1E64\u015C\u1E60\u0160\u1E66\u1E62\u1E68\u0218\u015E\u2C7E\uA7A8\uA784'}, 
    {'base':'T', 'letters':'\u0054\u24C9\uFF34\u1E6A\u0164\u1E6C\u021A\u0162\u1E70\u1E6E\u0166\u01AC\u01AE\u023E\uA786'}, 
    {'base':'TZ','letters':'\uA728'}, 
    {'base':'U', 'letters':'\u0055\u24CA\uFF35\u00D9\u00DA\u00DB\u0168\u1E78\u016A\u1E7A\u016C\u00DC\u01DB\u01D7\u01D5\u01D9\u1EE6\u016E\u0170\u01D3\u0214\u0216\u01AF\u1EEA\u1EE8\u1EEE\u1EEC\u1EF0\u1EE4\u1E72\u0172\u1E76\u1E74\u0244'}, 
    {'base':'V', 'letters':'\u0056\u24CB\uFF36\u1E7C\u1E7E\u01B2\uA75E\u0245'}, 
    {'base':'VY','letters':'\uA760'}, 
    {'base':'W', 'letters':'\u0057\u24CC\uFF37\u1E80\u1E82\u0174\u1E86\u1E84\u1E88\u2C72'}, 
    {'base':'X', 'letters':'\u0058\u24CD\uFF38\u1E8A\u1E8C'}, 
    {'base':'Y', 'letters':'\u0059\u24CE\uFF39\u1EF2\u00DD\u0176\u1EF8\u0232\u1E8E\u0178\u1EF6\u1EF4\u01B3\u024E\u1EFE'}, 
    {'base':'Z', 'letters':'\u005A\u24CF\uFF3A\u0179\u1E90\u017B\u017D\u1E92\u1E94\u01B5\u0224\u2C7F\u2C6B\uA762'}, 
    {'base':'a', 'letters':'\u0061\u24D0\uFF41\u1E9A\u00E0\u00E1\u00E2\u1EA7\u1EA5\u1EAB\u1EA9\u00E3\u0101\u0103\u1EB1\u1EAF\u1EB5\u1EB3\u0227\u01E1\u00E4\u01DF\u1EA3\u00E5\u01FB\u01CE\u0201\u0203\u1EA1\u1EAD\u1EB7\u1E01\u0105\u2C65\u0250'}, 
    {'base':'aa','letters':'\uA733'}, 
    {'base':'ae','letters':'\u00E6\u01FD\u01E3'}, 
    {'base':'ao','letters':'\uA735'}, 
    {'base':'au','letters':'\uA737'}, 
    {'base':'av','letters':'\uA739\uA73B'}, 
    {'base':'ay','letters':'\uA73D'}, 
    {'base':'b', 'letters':'\u0062\u24D1\uFF42\u1E03\u1E05\u1E07\u0180\u0183\u0253'}, 
    {'base':'c', 'letters':'\u0063\u24D2\uFF43\u0107\u0109\u010B\u010D\u00E7\u1E09\u0188\u023C\uA73F\u2184'}, 
    {'base':'d', 'letters':'\u0064\u24D3\uFF44\u1E0B\u010F\u1E0D\u1E11\u1E13\u1E0F\u0111\u018C\u0256\u0257\uA77A'}, 
    {'base':'dz','letters':'\u01F3\u01C6'}, 
    {'base':'e', 'letters':'\u0065\u24D4\uFF45\u00E8\u00E9\u00EA\u1EC1\u1EBF\u1EC5\u1EC3\u1EBD\u0113\u1E15\u1E17\u0115\u0117\u00EB\u1EBB\u011B\u0205\u0207\u1EB9\u1EC7\u0229\u1E1D\u0119\u1E19\u1E1B\u0247\u025B\u01DD'}, 
    {'base':'f', 'letters':'\u0066\u24D5\uFF46\u1E1F\u0192\uA77C'}, 
    {'base':'g', 'letters':'\u0067\u24D6\uFF47\u01F5\u011D\u1E21\u011F\u0121\u01E7\u0123\u01E5\u0260\uA7A1\u1D79\uA77F'}, 
    {'base':'h', 'letters':'\u0068\u24D7\uFF48\u0125\u1E23\u1E27\u021F\u1E25\u1E29\u1E2B\u1E96\u0127\u2C68\u2C76\u0265'}, 
    {'base':'hv','letters':'\u0195'}, 
    {'base':'i', 'letters':'\u0069\u24D8\uFF49\u00EC\u00ED\u00EE\u0129\u012B\u012D\u00EF\u1E2F\u1EC9\u01D0\u0209\u020B\u1ECB\u012F\u1E2D\u0268\u0131'}, 
    {'base':'j', 'letters':'\u006A\u24D9\uFF4A\u0135\u01F0\u0249'}, 
    {'base':'k', 'letters':'\u006B\u24DA\uFF4B\u1E31\u01E9\u1E33\u0137\u1E35\u0199\u2C6A\uA741\uA743\uA745\uA7A3'}, 
    {'base':'l', 'letters':'\u006C\u24DB\uFF4C\u0140\u013A\u013E\u1E37\u1E39\u013C\u1E3D\u1E3B\u017F\u0142\u019A\u026B\u2C61\uA749\uA781\uA747'}, 
    {'base':'lj','letters':'\u01C9'}, 
    {'base':'m', 'letters':'\u006D\u24DC\uFF4D\u1E3F\u1E41\u1E43\u0271\u026F'}, 
    {'base':'n', 'letters':'\u006E\u24DD\uFF4E\u01F9\u0144\u00F1\u1E45\u0148\u1E47\u0146\u1E4B\u1E49\u019E\u0272\u0149\uA791\uA7A5'}, 
    {'base':'nj','letters':'\u01CC'}, 
    {'base':'o', 'letters':'\u006F\u24DE\uFF4F\u00F2\u00F3\u00F4\u1ED3\u1ED1\u1ED7\u1ED5\u00F5\u1E4D\u022D\u1E4F\u014D\u1E51\u1E53\u014F\u022F\u0231\u00F6\u022B\u1ECF\u0151\u01D2\u020D\u020F\u01A1\u1EDD\u1EDB\u1EE1\u1EDF\u1EE3\u1ECD\u1ED9\u01EB\u01ED\u00F8\u01FF\u0254\uA74B\uA74D\u0275'}, 
    {'base':'oi','letters':'\u01A3'}, 
    {'base':'ou','letters':'\u0223'}, 
    {'base':'oo','letters':'\uA74F'}, 
    {'base':'p','letters':'\u0070\u24DF\uFF50\u1E55\u1E57\u01A5\u1D7D\uA751\uA753\uA755'}, 
    {'base':'q','letters':'\u0071\u24E0\uFF51\u024B\uA757\uA759'}, 
    {'base':'r','letters':'\u0072\u24E1\uFF52\u0155\u1E59\u0159\u0211\u0213\u1E5B\u1E5D\u0157\u1E5F\u024D\u027D\uA75B\uA7A7\uA783'}, 
    {'base':'s','letters':'\u0073\u24E2\uFF53\u00DF\u015B\u1E65\u015D\u1E61\u0161\u1E67\u1E63\u1E69\u0219\u015F\u023F\uA7A9\uA785\u1E9B'}, 
    {'base':'t','letters':'\u0074\u24E3\uFF54\u1E6B\u1E97\u0165\u1E6D\u021B\u0163\u1E71\u1E6F\u0167\u01AD\u0288\u2C66\uA787'}, 
    {'base':'tz','letters':'\uA729'}, 
    {'base':'u','letters': '\u0075\u24E4\uFF55\u00F9\u00FA\u00FB\u0169\u1E79\u016B\u1E7B\u016D\u00FC\u01DC\u01D8\u01D6\u01DA\u1EE7\u016F\u0171\u01D4\u0215\u0217\u01B0\u1EEB\u1EE9\u1EEF\u1EED\u1EF1\u1EE5\u1E73\u0173\u1E77\u1E75\u0289'}, 
    {'base':'v','letters':'\u0076\u24E5\uFF56\u1E7D\u1E7F\u028B\uA75F\u028C'}, 
    {'base':'vy','letters':'\uA761'}, 
    {'base':'w','letters':'\u0077\u24E6\uFF57\u1E81\u1E83\u0175\u1E87\u1E85\u1E98\u1E89\u2C73'}, 
    {'base':'x','letters':'\u0078\u24E7\uFF58\u1E8B\u1E8D'}, 
    {'base':'y','letters':'\u0079\u24E8\uFF59\u1EF3\u00FD\u0177\u1EF9\u0233\u1E8F\u00FF\u1EF7\u1E99\u1EF5\u01B4\u024F\u1EFF'}, 
    {'base':'z','letters':'\u007A\u24E9\uFF5A\u017A\u1E91\u017C\u017E\u1E93\u1E95\u01B6\u0225\u0240\u2C6C\uA763'} 
]; 

var diacriticsMap = {}; 
for (var i=0; i < defaultDiacriticsRemovalMap .length; i++){ 
    var letters = defaultDiacriticsRemovalMap [i].letters; 
    for (var j=0; j < letters.length ; j++){ 
     diacriticsMap[letters[j]] = defaultDiacriticsRemovalMap [i].base; 
    } 
} 

// "what?" version ... http://jsperf.com/diacritics/12 
function _removeDiacritics (str) { 
    return str.replace(/[^\u0000-\u007E]/g, function(a){ 
     return diacriticsMap[a] || a; 
    }); 
} 

这被称为:

var destination = file.name.splice(file.name.lastIndexOf('.'), 0, '_' + moment().format()).replaceAll('[^a-zA-Z0-9]', ''); 
destination = _removeDiacritics(destination); 

前:

文件/非项目文件/ NATPKG LAJE阿根廷撰稿人 PIÑEIRO_2016-11-09T184839- 0300_2016-11-11T11:13:00-05:00.mp4

后:

文件/非项目文件/ NATPKG LAJE阿根廷撰稿人 PIÑEIRO_2016-11-09T184839-0300_2016-11- 11T11:13:00-05:00.mp4

我在这里错过了什么?

回答

1

Apache解决方案适用于单个代码序列,其中例如,\ u00D1由Ñ表示。但是,也可以使用两个字符:N(\ u004E)和组合代字符(\ u0303)来创建一个Ñ,就像在字符串中一样。

维基百科: https://en.wikipedia.org/wiki/Unicode_equivalence

被定义为规范等价代码点序列 假设为具有相同的外观和含义印刷或 显示时。例如,代码点U + 006E(拉丁语小写 “n”)后跟U + 0303(组合代字符“◌”)由 定义,Unicode与单个代码点U + 00F1 (西班牙字母的小写字母“ñ”)。


一个天真的解决方案将是一个空字符串的映射添加到组合变音符号:

var defaultDiacriticsRemovalMap = [ 
    {'base':'','letters':'\u0300\u0301\u0302\u0303\u0304\u0305\u0306\u0307\u0308\u0309\u030a'}, // etc. up to \u036f 
... 
] 

更新_removeDiacritics以及

function _removeDiacritics (str) { 
    return str.replace(/[^\u0000-\u007E]/g, function(a){ 
     var mappedChar = diacriticsMap[a]; 
     return mappedChar !== undefined ? mappedChar : a; 
    }); 
} 
+0

会不会有另一种解决方案覆盖这个?在这里列表: http://stackoverflow.com/questions/990904/remove-accents-diacritics-in-a-string-in-javascript 而没有任何工作 –

+0

添加一种可能的方式来删除组合变音符号。 –