2017-04-15 51 views
0

我有一个数组和一个匹配数组的数组。每个数组都有唯一的id值。查找最佳阵列交点

MatchingArray = [1,2,3,4,5,6]

A1 = [1,4,6]

A2 = [2,3,5]

A3 = [1,5]

A4 = [4]

A5 = [1,6]

需要找到 “最佳匹配数”。最佳匹配是来自A1-A5的具有最小长度的子集阵列,其应与匹配阵列具有最大可能的交集。对于这个例子,有两个可能的匹配与最大交集:M1 = [[2,3,5],[1,4,6]]和M2 = [[1,5],[4] ,[1,6]]。但M1.length < M2.length,所以算法应输出M1。

+0

如果需要更多的描述,请留下评论根据回答 –

+0

也许问题描述不好,但算法的全部含义是找到M1。我定义了M1和M2来解释最优解,并不是因为它们在算法中是已知的。 – VahagnNikoghosian

+0

你需要找到更多的'A1-A5'与'MatchingArray'相交的数组? –

回答

1

你可以使用集(或哈希值,无论语言称他们),以优化的时间效率。

将目标数组转换为一个集合,然后从中减去所选的源(即删除公共值)。继续递归直到目标集合为空。跟踪最佳结果(尽可能使用最少的源数组)。如果正在使用的源数组的数量超过了当时已经找到的最佳解决方案的长度,则返回。

这里是在Python代码:

def find_optimal_coverage(target, sources): 
    max_size = len(target) 
    best = None 

    def recurse(target, sources, selected): 
     nonlocal max_size, best 
     if len(target) == 0: 
      best = selected 
      max_size = len(best) - 1 
      return True 
     if len(selected) == max_size: 
      return None 
     for i, source in enumerate(sources): 
      result = recurse(target - set(source), sources[i+1:], 
          selected + [list(source)]) 
      if result: 
       return True 

    target = set(target) # convert to set for faster lookup 
    # limit the source lists to elements that occur in the target 
    sources = list(map(target.intersection, sources)) 
    # limit target to elements that occur in at least one source 
    target = set.union(*sources) 
    # sort sources by decreasing length to maximise probability of 
    # finding optimal solution sooner 
    sources.sort(key = len, reverse = True) 
    if recurse(target, sources, []): 
     return best 

result = find_optimal_coverage(
    [1, 2, 3, 4, 5, 6, 8], 
    [ 
     [1, 4, 6, 7], 
     [2, 3, 5], 
     [1, 5], 
     [4], 
     [1, 6] 
    ] 
) 
print(result) 

看到它在repl.it

运行在JavaScript:

function subtractArray(s, arr) { 
 
    return arr.reduce((s, v) => (s.delete(v), s), new Set(s)); 
 
} 
 

 
function findOptimalCoverage(target, sources) { 
 
    var maxSize = target.size; 
 
    var best = null; 
 
    
 
    function recurse(target, sources, selected) { 
 
     if (target.size == 0) { 
 
      best = selected; 
 
      maxSize = best.length - 1; 
 
      return true; 
 
     } 
 
     if (selected.length == maxSize) return; 
 
     return sources.some((source, i) => 
 
      recurse(subtractArray(target, source), sources.slice(i+1), 
 
        selected.concat([source])) 
 
     ); 
 
    } 
 
    target = new Set(target) // convert to set for faster lookup 
 
    // limit the source arrays to elements that occur in the target 
 
    sources = sources.map(source => source.filter(target.has.bind(target))); 
 
    // limit target to elements that occur in at least one source 
 
    target = new Set([].concat(...sources)); 
 
    // sort sources by decreasing length to maximise probability of 
 
    // finding optimal solution sooner 
 
    sources.sort((a,b) => b.length - a.length); 
 
    if (recurse(target, sources, [])) return best; 
 
} 
 

 
var result = findOptimalCoverage(
 
    [1, 2, 3, 4, 5, 6, 8], 
 
    [ 
 
     [1, 4, 6, 7], 
 
     [2, 3, 5], 
 
     [1, 5], 
 
     [4], 
 
     [1, 6] 
 
    ] 
 
); 
 
console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0; }

+0

好的答案,经过一些调整后,这个算法给了我真正想要的。谢谢。 – VahagnNikoghosian

+0

@VahagnNikoghosian这个algrorithm在数组中不能正常工作,它包含非交集元素并且具有相同数量的intesection元素,是吗?例如,如果你尝试测试数组''[1,4,6,7]'' –

1

实施的算法在javascript:阵列的单独

var matchingArray = [1, 2, 3, 4, 5, 6]; 
 

 
var A1 = [1, 4, 6], 
 
    A2 = [2, 3, 5], 
 
    A3 = [1, 5], 
 
    A4 = [4], 
 
    A5 = [1, 6]; 
 

 

 
var M = [A1, A2, A3, A4, A5]; 
 

 
function compareArrays(M, machingArray) { 
 
    var intersections = [] 
 
    M.forEach(function(A) { 
 
    var partOfItersections; 
 
    if (A.length > 0) { 
 
     var intersectionsCount = getIntersectionCount(A, machingArray); 
 
     partOfItersections = intersectionsCount/A.length; 
 
    } else { 
 
     partOfItersections = 0 
 
    } 
 

 
    intersections.push({ 
 
     length: A.length, 
 
     partOfItersections: partOfItersections 
 
    }); 
 
    }); 
 

 

 
    //alert(JSON.stringify(intersections)); 
 

 
    var maxLength = 0, 
 
    maxPartOfItersections = 0, 
 
    optimalArrays = []; 
 

 
    intersections.forEach(function(arrayData, index) { 
 
    var currentArr = M[index]; 
 
    var currentArrLength = currentArr.length; 
 
    if (maxPartOfItersections < arrayData.partOfItersections) { 
 

 
     setCurrentOptimalArr(arrayData.partOfItersections, currentArr); 
 
    } else if (maxPartOfItersections === arrayData.partOfItersections) { 
 
     if (maxLength < currentArrLength) { 
 
     setCurrentOptimalArr(arrayData.partOfItersections, currentArr); 
 
     } else if (maxLength === currentArrLength) { 
 
     optimalArrays.push(currentArr); 
 
     } 
 
    } 
 
    }); 
 

 
    //alert(JSON.stringify(optimalArrays)); 
 

 
    return optimalArrays; 
 

 
    function setCurrentOptimalArr(intersectionsCount, currentArr) { 
 
    optimalArrays = [currentArr]; 
 
    maxLength = currentArr.length; 
 
    maxPartOfItersections = intersectionsCount; 
 
    } 
 

 
    function getIntersectionCount(A, machingArray) { 
 
    var intersectionCount = 0; 
 

 
    A.forEach(function(elem) { 
 
     if (machingArray.indexOf(elem) != -1) { 
 
     intersectionCount++; 
 
     } 
 
    }); 
 

 

 
    return intersectionCount; 
 
    } 
 
} 
 

 
alert(JSON.stringify(compareArrays(M, matchingArray)));

  1. 计数交集。
  2. 包含更多部分交点的返回数组。

代码更新

+0

尝试使用拼写检查器。 – greybeard

+0

对不起我'分开'。我忘记它是如何正确写入的。您可以编辑它并修复。 –