2013-04-27 146 views
1

嗨,我正在编写我们自己的霍夫曼编码项目。我目前无法将二进制1和0写入输出文件。它适用于较小的输入文件,但对于非常大的文件,它不会将任何内容写入输出文件。负责书写的方法是compress方法。任何帮助,将不胜感激。谢谢!写大二进制数据到文件

package proj3; 

import java.io.File; 
import java.io.FileInputStream; 
import java.io.FileNotFoundException; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.util.ArrayList; 
import java.util.Collections; 
import java.util.LinkedHashMap; 
import java.util.Map; 
import java.util.Scanner; 
import java.util.PriorityQueue; 

public class Project3 { 

//variables for PriorityQueue and Huffman Tree 
private static PriorityQueue<BinaryNode<Character>> queue; 
private static BinaryNode<Character> huffTree; 
private static Map<Character, String> table = new LinkedHashMap<Character, String>(); 

/** 
* Method for creating Huffman Tree 
* @param counts Map that contains all characters and their frequencies 
* @return the Huffman Tree 
*/ 
public static BinaryNode<Character> makeTree(Map<Character, Integer> counts) 
{ 
    queue = new PriorityQueue<BinaryNode<Character>>(); 

    for(Character c : counts.keySet()) 
    { 
     BinaryNode<Character> tree = new BinaryNode<Character>(c, counts.get(c), null, null); 
     queue.add(tree); 
    } 

    while(!queue.isEmpty()) 
    { 
     if(queue.size() >= 2) 
     { 
      BinaryNode<Character> n1 = queue.remove(); 
      BinaryNode<Character> n2 = queue.remove(); 
      Integer weight = n1.getFreq() + n2.getFreq(); 
      huffTree = new BinaryNode<Character>(null, weight, n1, n2); 
      queue.add(huffTree); 
     } 
     if(queue.size() == 1) 
     { 
      return queue.remove(); 
     } 
    } 
    return huffTree; 
} 

public static void encode(BinaryNode<Character> node, String s) 
{ 
    if(!node.isLeaf()) 
    { 
     encode(node.getLeft(), s + "0"); 
     encode(node.getRight(), s + "1"); 
    } 
    else 
    { 
     table.put(node.getElement(), s); 
    } 
} 

public static void compress(String in, String out) throws IOException 
{ 
    try 
    { 
     File outFile = new File(out); 
     FileOutputStream compressedFile = new FileOutputStream(outFile); 
     Scanner infile = new Scanner(new FileInputStream(in)); 
     while(infile.hasNext()) 
     { 
      infile.useDelimiter(""); 
      String str = infile.next(); 
      Character character = str.charAt(0); 
      for(Character c : table.keySet()) 
      { 
       if(c == character){ 
        compressedFile.write(table.get(c).getBytes()); 
        compressedFile.flush(); 
       } 
      } 
     } 
     for(Byte b : table.get('^').getBytes()) 
     { 
      compressedFile.write(b); 
     } 
     infile.close(); 
     compressedFile.close(); 
    } 
    catch (FileNotFoundException e) 
    { 
     System.err.println("File not found."); 
     e.printStackTrace(); 
    } 
} 

public static void decompress(String s) 
{ 

} 

public static void printEncodings(Map<Character, String> m) 
{ 
    ArrayList<Character> chars = new ArrayList<Character>(); 

    System.out.println("Character Encodings"); 
    System.out.println("---------------------"); 
    for(Character c : m.keySet()) 
    { 
     chars.add(c); 
     Collections.sort(chars); 
    } 
    for(Character c : chars) 
    { 
     System.out.print(c + "\t" + m.get(c) + "\n"); 
    } 
    System.out.println(); 
    System.out.println("Total Characters: " + chars.size()); 
} 

/** 
* Method for creating map with character and its frequencies 
* @param s the file name to be opened 
* @return the Map containing characters and frequencies 
*/ 
public static Map<Character, Integer> charCount(String s){ 

    Map<Character, Integer> counts = new LinkedHashMap<Character, Integer>(); 
    ArrayList<Character> chars = new ArrayList<Character>(); 

    try { 
     Scanner file = new Scanner(new FileInputStream(s)); 
     while(file.hasNext()){ 
      file.useDelimiter(""); 
      String str = file.next(); 
      Character c = str.charAt(0); 
      if(counts.containsKey(c)){ 
       counts.put(c, counts.get(c) + 1); 
      } 
      else{ 
       counts.put(c, 1); 
      } 
     } 
     counts.put('^', 1); 
     System.out.println("Character Frequencies"); 
     System.out.println("---------------------"); 
     for(Character c : counts.keySet()) 
     { 
      chars.add(c); 
      Collections.sort(chars); 
     } 
     for(Character c : chars){ 
      System.out.println(c + "\t" + counts.get(c)); 
     } 
     System.out.println(); 
     System.out.println("Total characters: " + chars.size() + "\n"); 
     file.close(); 
    } 
    catch (FileNotFoundException e) { 
     System.err.println("File not found."); 
     System.exit(0); 
    } 
    return counts; 
} 

public static void main(String[] args){ 

    if(args.length != 3) 
    { 
     throw new IllegalArgumentException("Invalid number of arguments."); 
    } 
    encode(makeTree(charCount(args[0])), ""); 
    printEncodings(table); 
    try { 
     compress(args[0], args[1]); 
    } catch (IOException e) { 
     e.printStackTrace(); 
    } 
} 

} 
+1

方案是否结束?你是否已经通过该计划来了解它的功能? – 2013-04-27 20:56:55

+0

是的,它打印出所有正确的字符频率和编码表 – somtingwong 2013-04-27 21:00:23

+0

大数据写入时它是空文件吗? – Kowser 2013-04-27 21:21:01

回答

0

我很确定你正在面对compress()方法中的性能/内存问题。编码打印在你的主要方法中,但我相信文件输出会卡住。我可以看到优化至少三种可能在你的代码:

  1. 您使用的是Scanner读取由字符输入文件字符,但您不使用由扫描仪提供的任何解析功能。改为使用InputStreamReader

  2. 您正在循环查找Huffman表中的一组键并检查相等性。您可以简单地使用当前字符来获取它映射到的字符串并省略循环。

  3. 您不需要循环字节数组将其写入输出文件。 write()方法FileOutputStream可以将整个字节数组作为参数。

凭借我卑微的Java技能,我宁愿以下面的方式实现它;请注意,这是未经测试的代码,因为我没有你BinaryNode类:

public static void compress(String in, String out) throws IOException 
{ 
    try 
     { 
      File outFile = new File(out); 
      FileOutputStream compressedFile = new FileOutputStream(outFile); 

      // 1. Use a Reader instead of a Scanner; 
      // make sure to use the correct charset 
      FileInputStream fis= new FileInputStream(in); 
      Reader reader = new InputStreamReader(fis,   
       Charset.forName("US-ASCII")); 

      // use BufferedReader for even better performance 
      Reader bufferedReader = new BufferedReader(reader); 

      int r; 
      while ((r = bufferedReader.read()) != -1) { 
       char ch= (char) r; 

       // 2. Get the string for this character directly instead of 
       // looping the keySet and checking for equivalence 
       String s= table.get(ch); 
       if (s != null) { 
        // 3. Write entire array of bytes instead of 
        // looping and writing bytes one by one 
        compressedFile.write(s.getBytes()); 
       } 
      } 
      fis.close(); 
      compressedFile.close(); 
     } 
    ... 
+0

我发誓我已经把什么东西你已经写了,但它仍然不工作.....是否有可能我的代码的其他部分是如此低效,以至于它不适用于大文件? – somtingwong 2013-04-27 22:31:36

+0

我认为不,其余部分应该足够高效。检查以下内容:1)当args.length!= 3时您正在投掷,但只在您的代码中使用2个参数。 2)args [0]是否指向现有文件?是否存在args [1]中的所有目录? 3)是否在修改代码后清理/重新编译项目? 4)BinaryNode是否实现java.lang.comparable(PriorityQueue必需)? 5)你是否在调试模式下运行代码并检查任何日志或控制台输出是否有错误/堆栈跟踪? 6)输出文件是以0 KB写入还是缺失? 7)你的代码是否终止? – Marcellus 2013-04-28 11:14:41

0

很可能需要调用compressedFile.flush()

提高它像这样

  if(c == character){ 
       compressedFile.write(table.get(c).getBytes()); 
       compressedFile.flush(); 
      } 

另外考虑使用try/catch。将提高执行的完整性。

+0

没有对不起,它仍然没有写任何东西:( – somtingwong 2013-04-27 21:13:55

+0

@ user1813076:再次更新 – Kowser 2013-04-27 21:23:48

+0

对不起,但我相信我更新完全按照你写的,但它仍然无法正常工作.... – somtingwong 2013-04-27 21:31:57