2010-09-29 70 views
2

我有一个时髦的错误,驱使我坚果。任何人都可以帮我找到它吗?尝试用两个不同的单词来区分一个缺失的最后一个字符(“garble”vs“garbl”)。该函数返回0而不是预期的1.它应该返回1,对吧?任何人都可以在我的Damerau-Levenshtein Distance实现中发现错误吗?

我试着摆弄数组边界,但这只是造成IndexOutOfRangeExceptions

public static class FuzzyStringMatcher 
{ 
    private const int DELETION = 0; 
    private const int INSERTION = 1; 
    private const int SUBSTITUTION = 2; 
    private const int TRANSPOSITION = 3; 

    private const int COST_OF_DELETION = 1; 
    private const int COST_OF_INSERTION = 1; 
    private const int COST_OF_TRANSPOSITION = 1; 
    private const int COST_OF_SUBSTITUTION = 1; 

    public static int Compute_DamerauLevenshtein_Distance(string a, string b) 
    { 
     int[,] rows = new int[a.Length + 1, b.Length + 1]; 
     int cost_ratio; 
     int[] calculations = new int[4]; 

     // 
     // Init the array 
     // 
     for (int i = 0; i < rows.GetUpperBound(0); i++) 
      rows[i, 0] = i; 

     for (int i = 0; i < rows.GetUpperBound(1); i++) 
      rows[0, i] = i; 


     for (int aidx = 1; aidx < rows.GetUpperBound(0); aidx++) 
     { 
      for (int bidx = 1; bidx < rows.GetUpperBound(1); bidx++) 
      { 
       if (a[aidx - 1] == b[bidx - 1]) 
        cost_ratio = 0; 
       else 
        cost_ratio = 1; 

       calculations[DELETION] = rows[aidx - 1, bidx] + COST_OF_DELETION; 
       calculations[INSERTION] = rows[aidx, bidx - 1] + COST_OF_INSERTION; 
       calculations[SUBSTITUTION] = rows[aidx - 1, bidx - 1] + cost_ratio * COST_OF_SUBSTITUTION; 
       calculations[TRANSPOSITION] = int.MaxValue; 

       if (aidx > 1 && bidx > 1 && a[aidx] == b[bidx - 1] && a[aidx - 1] == b[bidx]) 
        calculations[TRANSPOSITION] = rows[aidx - 2, bidx - 2] + cost_ratio * COST_OF_TRANSPOSITION; 

       rows[aidx, bidx] = calculations.Min(); 
      } 
     } 

     int score = rows[rows.GetUpperBound(0) - 1, rows.GetUpperBound(1) - 1]; 
     if (a.Contains(b) || b.Contains(a)) 
      score = score/2; 
     return score; 
    } 
} 

我的实现是基于关闭的维基百科页面给出的Damerau-Levenshtein-Distance

+0

http://code.google.com/p/google-diff-match-patch/可能有助于(通过比较你的代码的方式) – 2010-09-29 02:57:49

回答

2

算法这不是维基百科的文章:

if (a.Contains(b) || b.Contains(a)) 
     score = score/2; 

因为它是你的真实例子 - 和整数除以1/2 == 0,那就可以了。

3

+1 Lou Louco。但除此之外,您似乎有很多索引问题(请注意,wiki样本中的所有4 for个周期都是包含的,并且当从aidx/bidx中减去1时,实际上需要减去2,因为在wiki中样本中的索引起始于1)。我的版本:

public static int Compute_DamerauLevenshtein_Distance2(string a, string b) 
    { 
     int[,] rows = new int[a.Length + 1, b.Length + 1]; 
     int cost_ratio; 
     int[] calculations = new int[4]; 

     for(int i = 0; i <= rows.GetUpperBound(0); i++) 
      rows[i, 0] = i; 

     for(int i = 1; i <= rows.GetUpperBound(1); i++) 
      rows[0, i] = i; 


     for(int aidx = 1; aidx <= rows.GetUpperBound(0); aidx++) 
     { 
      for(int bidx = 1; bidx <= rows.GetUpperBound(1); bidx++) 
      { 
       if(a[aidx - 1] == b[bidx - 1]) 
        cost_ratio = 0; 
       else 
        cost_ratio = 1; 

       calculations[DELETION] = rows[aidx - 1, bidx] + COST_OF_DELETION; 
       calculations[INSERTION] = rows[aidx, bidx - 1] + COST_OF_INSERTION; 
       calculations[SUBSTITUTION] = rows[aidx - 1, bidx - 1] + cost_ratio * COST_OF_SUBSTITUTION; 
       calculations[TRANSPOSITION] = int.MaxValue; 

       if(aidx > 1 && bidx > 1 && a[aidx - 1] == b[bidx - 2] && a[aidx - 2] == b[bidx - 1]) 
        calculations[TRANSPOSITION] = rows[aidx - 2, bidx - 2] + cost_ratio * COST_OF_TRANSPOSITION; 

       rows[aidx, bidx] = calculations.Min(); 
      } 
     } 

     int score = rows[rows.GetUpperBound(0), rows.GetUpperBound(1)]; 
     return score; 
    } 
+0

感谢的人。这非常有帮助! – Amy 2010-09-29 03:40:18

相关问题