2014-11-24 77 views
1

我不知道我将如何攻击我的霍夫曼树的遍历。树是正确的,我很难弄清楚如何以一种好的方式来遍历它。出于某种原因,我的穿越方法没有给出结果...霍夫曼树:遍历

UPDATE: - 清理代码,使其更加面向对象

Node类:

public class Node 
{ 
    public int frekvens; //Frequency 
    public char tegn; //Symbol 
    public Node venstre; //Left child 
    public Node høyre; //Right child 
    public string s; //result string 
    public string resultat; 
    public Node (char c) // Node constructor containing symbol. 
    { 
     frekvens = 1; 
     tegn = c; 
    } 

    public Node (int f, Node venstre, Node høyre) // Node Constructor containing frequency and children 
     { 
      frekvens = f; 
      this.venstre = venstre; 
      this.høyre = høyre; 
     } 

    public Node (Node node) // Node constructor containing a node 
     { 
      frekvens = node.frekvens; 
      tegn = node.tegn; 
      this.venstre = venstre; 
      this.høyre = høyre; 
     } 

    public void ØkMed1() // Inkrement frequency by one 
    { 
     frekvens = frekvens + 1; 
    } 


    public char getVenstreTegn() 
    { 
     return venstre.tegn; 
    } 

    public char getHøyreTegn() 
    { 
     return venstre.tegn; 
    } 

    public int getVenstreFrekvens() 
    { 
     return venstre.frekvens; 
    } 

    public int getHøyreFrekvens() 
    { 
     return høyre.frekvens; 
    } 

    public int getFrekvens() 
    { 
     return frekvens; 
    } 


    public bool ErTegn(char c) 
    { 
     if (c == tegn) 
     { 
      return false; 
     } 
     else 
     { 
      return true; 
     } 
    } 

    //Pretty sure this does not work as intended 
    public string traverser (Node n) //Traverse the tree 
    { 
     if (n.tegn != '\0') //If the node containes a symbol --> a leaf 
     { 
      resultat += s; 
     } 
     else 
     { 
      if (n.getVenstreTegn() == '\0') //If left child does not have a symbol 
      { 
       s += "0"; 
       traverser(n.venstre); 
      } 
      if (n.getHøyreTegn() == '\0') //If right child does not have a symbol 
      { 
       s += "1"; 
       traverser(n.høyre); 
      } 
     } 
     return resultat; 
    } 
    public string Resultat() //Used priviously to check if i got the correct huffman tree 
    { 
     string resultat; 
     resultat = "Tegn: " + Convert.ToString(tegn) +" frekvens: " + Convert.ToString(frekvens) + "\n"; 
     return resultat; 
    } 
} 

Huffman_Tree类:

public class Huffman_Tre 
{ 
    string treString; 
    List<Node> noder = new List<Node>(); 
    public Node rot; 
    public void bygg (string input) 
    { 
     bool funnet; //Found 
     char karakter; //character 

     for (int i = 0; i < input.Length;i++) //Loops through string and sets character 
      //with coresponding freqeuncy in the node list 
     { 
      karakter = input[i]; 
      funnet = false; //default 
      for (int j = 0; j< noder.Count; j++) 
      { 
       if (noder[j].ErTegn(karakter) == false) //if the character already exists 
       { 
        noder[j].ØkMed1(); //inkrement frequency by one 
        funnet = true; 
        break; 
       } 
      } 
      if (!funnet) //if the character does not exist 
      { 
       noder.Add(new Node(karakter)); //add the character to list 
      } 
     } 
     //Sorting node list acending by frequency 
     var sortertListe = noder.OrderBy(c => c.frekvens).ToList(); 

     noder = sortertListe; 

     do 
     { 
      noder.Add(new Node((noder[0].frekvens + noder[1].frekvens), noder[0],noder[1])); 

      //Remove the leaf nodes 
      noder.RemoveAt(0); 
      noder.RemoveAt(0); 
     } while(noder.Count >= 2); 

    } 

    public Node getRot() 
    { 
     return rot; 
    } 

    public string visTre() 
    { 

     foreach (Node node in noder) 
     { 
      treString += node.Resultat(); 
     } 
     return treString; 
    } 
    public bool erNull() 
    { 
     if (noder[0].tegn == '\0') 
     { 
      return true; 
     } 
     else 
      return false; 
    } 
} 

主程序:

private void btnKomprimer_Click(object sender, System.Windows.RoutedEventArgs e) 
    { 
     string input; //The string input I want to compress 
     input = txtInput.Text; //initialize input to text input 
     input = input.ToLower(); 
     txtOutput.Text = ""; 

     Huffman_Tre tre = new Huffman_Tre(); 

     tre.bygg(input); 

     Node rot = new Node(tre.getRot()); 

     txtOutput.Text += rot.traverser(rot); 
    } 
} 
+2

[您是否尝试调试?](http://ericlippert.com/2014/03/05/how-to-debug-small-programs/) – 2014-11-24 12:11:59

+0

Sindre,也许它可能会更方便使用二进制树先。由于huffman的算法将根据字符的频率或形成根的频率总和来确定二进制数字(1或0)。如果你仍然需要更多的提示,只是大喊 – RvdV79 2015-01-14 21:18:26

+0

我忽略了你已经在使用二叉树的事实。我很抱歉。尽管如此,它引发了我建立一个遍历课程的霍夫曼算法,我已将它发布在我的答案中。 – RvdV79 2015-01-16 16:31:09

回答

9

因为我还剩下一点时间,所以我制作了一个霍夫曼树的例子,同时玩C#6.0。它没有被优化(甚至不是很好!),但它作为一个例子很好。它会帮助你看看你的'挑战'可能出现在哪里。由于我的英语远比我的斯堪的纳维亚知识好,所以我用英语命名,我希望你不介意。

首先,让我们从保持频率的类开始。

public sealed class HuffmanFrequencyTable 
{  
    #region Properties 
    /// <summary> 
    /// Holds the characters and their corresponding frequencies 
    /// </summary> 
    public Dictionary<char, int> FrequencyTable { get; set; } = new Dictionary<char, int>(); 

    #endregion 

    #region Methods 

    /// <summary> 
    /// Clears the internal frequency table 
    /// </summary> 
    public void Clear() 
    { 
     FrequencyTable?.Clear();    
    } 

    /// <summary> 
    /// Accepts and parses a new line (string) which is then 
    /// merged with the existing dictionary or frequency table 
    /// </summary> 
    /// <param name="line">The line to parse</param> 
    public void Accept(string line) 
    { 
     if (!string.IsNullOrEmpty(line)) 
     { 
      line.GroupBy(ch => ch). 
       ToDictionary(g => g.Key, g => g.Count()). 
       ToList(). 
       ForEach(x => FrequencyTable[x.Key] = x.Value); 
     } 
    } 

    /// <summary> 
    /// Performs a dump of the frequency table, ordering all characters, lowest frequency first. 
    /// </summary> 
    /// <returns>The frequency table in the format 'character [frequency]'</returns> 
    public override string ToString() 
    {    
     return FrequencyTable?.PrintFrequencies(); 
    } 
    #endregion 
} 

请注意,ToString()方法使用一种扩展方法,其能够“转储”的使用的词典的内容。扩展坐落在一个叫帮手,看起来像这样静态类:现在

/// <summary> 
/// Extension method that helps to write the contents of a generic Dictionary to a string, ordered by it's values and 
/// printing the key and it's value between brackets. 
/// </summary> 
/// <typeparam name="TKey">Generic key</typeparam> 
/// <typeparam name="TValue">Generic value type</typeparam> 
/// <param name="dictionary">The dictionary</param> 
/// <exception cref="ArgumentNullException">Throws an argument null exception if the provided dictionary is null</exception> 
/// <returns></returns> 
public static string PrintFrequencies<TKey, TValue>(this IDictionary<TKey, TValue> dictionary) 
{ 
    if (dictionary == null) 
     throw new ArgumentNullException("dictionary"); 

    var items = from kvp in dictionary 
       orderby kvp.Value 
       select kvp.Key + " [" + kvp.Value + "]"; 

    return string.Join(Environment.NewLine, items); 
} 

,本FrequencyTable的地方,我们可以开始寻找关于如何建立起来的节点。 Huffman使用二叉树,因此最好生成一个具有左右子节点的Node类。我也冒昧地在这里执行遍历算法。该类构建如下:

public sealed class HuffmanNode 
{ 
    #region Properties 

    /// <summary> 
    /// Holds the left node, if applicable, otherwise null 
    /// </summary> 
    public HuffmanNode Left { get; set; } = null; 

    /// <summary> 
    /// Holds the right node, if applicable, otherwise null 
    /// </summary> 
    public HuffmanNode Right { get; set; } = null; 

    /// <summary> 
    /// Holds the Character (or null) for this particular node 
    /// </summary> 
    public char? Character { get; set; } = null; 

    /// <summary> 
    /// Holds the frequency for this particular node, defaulted to 0 
    /// </summary> 
    public int Frequency { get; set; } = default(int); 

    #endregion 

    #region Methods 

    /// <summary> 
    /// Traverses all nodes recursively returning the binary 
    /// path for the corresponding character that has been found. 
    /// </summary> 
    /// <param name="character">The character to find</param> 
    /// <param name="data">The datapath (containing '1's and '0's)</param> 
    /// <returns>The complete binary path for a character within a node</returns> 
    public List<bool> Traverse(char? character, List<bool> data) 
    { 
     //Check the leafs for existing characters 
     if (null == Left && null == Right) 
     { 
      //We're at an endpoint of our 'tree', so return it's data or nothing when the symbol 
      //characters do not match 
      return (bool)character?.Equals(Character) ? data : null; 
     } 
     else 
     { 
      List<bool> left = null; 
      List<bool> right = null; 

      //TODO: If possible refactor with proper C# 6.0 features 
      if (null != Left) 
      { 
       List<bool> leftPath = new List<bool>(data);      
       leftPath.Add(false); //Add a '0' 
       left = Left.Traverse(character, leftPath); //Recursive traversal for child nodes within this left node. 
      } 

      if (null != Right) 
      { 
       List<bool> rightPath = new List<bool>(data);      
       rightPath.Add(true); //Add a '1' 
       right = Right.Traverse(character, rightPath); //Recursive traversal for childnodes within this right node 
      } 

      return (null != left) ? left : right; 
     } 
    }   

    #endregion 
} 

我使用HuffmanTree类中的Node类。从逻辑上讲,从节点构建一棵树。相应HuffmanTree是这样写的:

public sealed class HuffmanTree 
{ 
    #region Fields 
    /// <summary> 
    /// Field for keeping the Huffman nodes in. Internally used. 
    /// </summary> 
    private List<HuffmanNode> nodes = new List<HuffmanNode>();   
    #endregion 

    #region Properties 

    /// <summary> 
    /// Holds the Huffman tree 
    /// </summary> 
    public HuffmanNode Root { get; set; } = null; 

    /// <summary> 
    /// Holds the frequency table for all parsed characters 
    /// </summary> 
    public HuffmanFrequencyTable Frequencies { get; private set; } = new HuffmanFrequencyTable() 

    /// <summary> 
    /// Holds the amount of bits after encoding the tree. 
    /// Primary usable for decoding. 
    /// </summary> 
    public int BitCountForTree { get; private set; } = default(int); 

    #endregion 

    #region Methods 

    /// <summary> 
    /// Builds the Huffman tree 
    /// </summary> 
    /// <param name="source">The source to build the Hufftree from</param> 
    /// <exception cref="ArgumentNullException">Thrown when source is null or empty</exception> 
    public void BuildTree(string source) 
    { 
     nodes.Clear(); //As we build a new tree, first make sure it's clean :) 

     if (string.IsNullOrEmpty(source)) 
      throw new ArgumentNullException("source"); 
     else 
     { 
      Frequencies.Accept(source); 

      foreach (KeyValuePair<char, int> symbol in Frequencies.FrequencyTable) 
      { 
       nodes.Add(new HuffmanNode() { Character = symbol.Key, Frequency = symbol.Value }); 
      } 

      while (nodes.Count > 1) 
      { 
       List<HuffmanNode> orderedNodes = nodes.OrderBy(node => node.Frequency).ToList(); 

       if (orderedNodes.Count >= 2) 
       { 
        List<HuffmanNode> takenNodes = orderedNodes.Take(2).ToList(); 

        HuffmanNode parent = new HuffmanNode() 
        { 
         Character = null, 
         Frequency = takenNodes[0].Frequency + takenNodes[1].Frequency, 
         Left = takenNodes[0], 
         Right = takenNodes[1] 
        }; 

        //Remove the childnodes from the original node list and add the new parent node 
        nodes.Remove(takenNodes[0]); 
        nodes.Remove(takenNodes[1]); 
        nodes.Add(parent); 
       } 
      } 

      Root = nodes.FirstOrDefault(); 
     } 
    } 

    /// <summary> 
    /// Encodes a given string to the corresponding huffman encoding path 
    /// </summary> 
    /// <param name="source">The source to encode</param> 
    /// <returns>The binary huffman representation of the source</returns> 
    public BitArray Encode(string source) 
    { 
     if (!string.IsNullOrEmpty(source)) 
     { 
      List<bool> encodedSource = new List<bool>(); 
      //Traverse the tree for each character in the passed source (string) and add the binary path to the encoded source 
      encodedSource.AddRange(source.SelectMany(character => 
             Root.Traverse(character, new List<bool>()) 
            ).ToList() 
           ); 
      //For decoding, we might need the amount of bits to skip trailing bits. 
      BitCountForTree = encodedSource.Count; 
      return new BitArray(encodedSource.ToArray()); 
     } 
     else return null; 
    } 

    /// <summary> 
    /// Decodes a given binary path to represent it's string value 
    /// </summary> 
    /// <param name="bits">BitArray for traversing the tree</param> 
    /// <returns></returns> 
    public string Decode(BitArray bits) 
    { 
     HuffmanNode current = Root; 
     string decodedString = string.Empty; 

     foreach (bool bit in bits) 
     { 
      //Find the correct current node depending on the bit set or not set. 
      current = (bit ? current.Right ?? current : current.Left ?? current);    

      if (current.IsLeaf()) 
      { 
       decodedString += current.Character; 
       current = Root; 
      } 
     } 

     return decodedString; 
    } 

    #endregion 
} 

什么在此代码是有趣的是,我决定使用BitArrays将保持树二进制路径时,它的建立。这里的public BitArray Encode(string source)方法包含一个肮脏的黑客攻击。我跟踪用于编码的总位数,并将其存储在BitCountForTree属性中。在执行解码时,我将使用此属性来删除可能出现的任何尾随位。有一种更好的方式来执行此操作,但我会将其打开以供您查找。

此外,此类使用为HuffmanNode编写的扩展方法。这是一个简单的,但:

/// <summary> 
    /// Determines whether a given Huffman node is a leaf or not. 
    /// A node is considered to be a leaf when it has no childnodes 
    /// </summary> 
    /// <param name="node">A huffman node</param> 
    /// <returns>True if no children are left, false otherwise</returns> 
    public static bool IsLeaf(this HuffmanNode node) 
    { 
     return (null == node.Left && null == node.Right); 
    } 

这个扩展方法很方便地确定给定节点是否实际上是叶节点。一个叶子是没有子节点的节点,因此二叉树的末端(或者更好的是该树的一个分支)。

现在有趣的部分,我该如何在这里工作。我已经构建了一个具有3个文本框的Windows窗体应用程序。一个用于实际输入,一个用于二进制(编码)输出,最后一个用于显示压缩结果。 我还放置了两个简单的按钮,一个用于执行霍夫曼编码,另一个用于霍夫曼解码。

霍夫曼编码方法被写为以下的(只是在编码按钮的事件处理程序):

string input = tbInput.Text; 
Tree.BuildTree(input); //Build the huffman tree 

BitArray encoded = Tree.Encode(input); //Encode the tree 

//First show the generated binary output 
tbBinaryOutput.Text = string.Join(string.Empty, encoded.Cast<bool>().Select(bit => bit ? "1" : "0")); 

    //Next, convert the binary output to the new characterized output string.  
    byte[] bytes = new byte[(encoded.Length/8) + 1]; 
    encoded.CopyTo(bytes, 0); 

    tbOutput.Text = Encoding.Default.GetString(bytes); //Write the compressed output to the textbox. 

注意,编码的二进制字符串没有任何拖尾比特。我将把它留给C#的编码机制。这个缺点是,我必须在解码时跟踪它。

现在解码也不算太困难。尽管在这个例子中,我正在使用上面编码代码生成的压缩输出。另外,我假设霍夫曼树(和它的频率表!!!)已经建成。通常情况下,频率表存储在压缩文件中,以便重建。

//First convert the compressed output to a bit array again again and skip trailing bits.    
bool[] boolAr = new BitArray(Encoding.Default.GetBytes(tbOutput.Text)).Cast<bool>().Take(Tree.BitCountForTree).ToArray(); 
BitArray encoded = new BitArray(boolAr); 

string decoded = Tree.Decode(encoded); 
MessageBox.Show(decoded, "Decoded result: ", MessageBoxButtons.OK, MessageBoxIcon.Information); 

请注意肮脏的黑客我创建,为Encoding.Default.GetBytes(tbOutput.Text)肯定生成的字节数组,它可能包含尾随其不需要待解码的位。因此,我只根据重建树来获取实际需要的位数。

按下“哈夫编码”按钮后:

所以运行时,使用了“世界著名的句子”“敏捷的棕色狐狸跳过懒惰的程序员”当我的例子提供了以下输出,

HuffEncode

并按下 “哈夫解码” 按钮后:

HuffDecode

现在这段代码可以真正使用一些优化,因为你可能会考虑使用数组而不是字典。还有更多,但由你来考虑。

+0

很好的答案!随着我在编程语言方面的知识的提高,我越来越尊重优秀的代码。我会尽量不要太看重你的代码,而是阅读你的帮助描述,因为我确实想从头开始重新制作这个应用程序。现在应该会变得更加容易:D 这是一个非常有趣的程序,我希望你在写作时也有一些乐趣;) – Woksin 2015-01-16 20:07:13

+0

我真的这样做了,不用担心:-)。我很高兴它有帮助! – RvdV79 2015-01-16 20:09:27