2011-05-09 41 views

回答

4
public class UniqueLineReader extends BufferedReader { 
    Set<String> lines = new HashSet<String>(); 

    public UniqueLineReader(Reader arg0) { 
     super(arg0); 
    } 

    @Override 
    public String readLine() throws IOException { 
     String uniqueLine; 
     if (lines.add(uniqueLine = super.readLine())) 
      return uniqueLine; 
     return ""; 
    } 

    //for testing.. 

    public static void main(String args[]) { 
     try { 
      // Open the file that is the first 
      // command line parameter 
      FileInputStream fstream = new FileInputStream(
        "test.txt"); 
      UniqueLineReader br = new UniqueLineReader(new InputStreamReader(fstream)); 
      String strLine; 
      // Read File Line By Line 
      while ((strLine = br.readLine()) != null) { 
       // Print the content on the console 
       if (strLine != "") 
        System.out.println(strLine); 
      } 
      // Close the input stream 
      in.close(); 
     } catch (Exception e) {// Catch exception if any 
      System.err.println("Error: " + e.getMessage()); 
     } 
    } 

} 

修改的版本:

public class UniqueLineReader extends BufferedReader { 
    Set<String> lines = new HashSet<String>(); 

    public UniqueLineReader(Reader arg0) { 
     super(arg0); 
    } 

    @Override 
    public String readLine() throws IOException { 
     String uniqueLine; 
     while (lines.add(uniqueLine = super.readLine()) == false); //read until encountering a unique line 
      return uniqueLine; 
    } 

    public static void main(String args[]) { 
     try { 
      // Open the file that is the first 
      // command line parameter 
      FileInputStream fstream = new FileInputStream(
        "/home/emil/Desktop/ff.txt"); 
      UniqueLineReader br = new UniqueLineReader(new InputStreamReader(fstream)); 
      String strLine; 
      // Read File Line By Line 
      while ((strLine = br.readLine()) != null) { 
       // Print the content on the console 
        System.out.println(strLine); 
      } 
      // Close the input stream 
      in.close(); 
     } catch (Exception e) {// Catch exception if any 
      System.err.println("Error: " + e.getMessage()); 
     } 

    } 
} 
1

使用BufferedReader读取文本文件并将其存储在LinkedHashSet中。打印出来。

下面是一个例子:

public class DuplicateRemover { 

    public String stripDuplicates(String aHunk) { 
     StringBuilder result = new StringBuilder(); 
     Set<String> uniqueLines = new LinkedHashSet<String>(); 

     String[] chunks = aHunk.split("\n"); 
     uniqueLines.addAll(Arrays.asList(chunks)); 

     for (String chunk : uniqueLines) { 
      result.append(chunk).append("\n"); 
     } 

     return result.toString(); 
    } 

} 

这里的一些单元测试,以验证(忽略我的邪恶复制粘贴;)):

import org.junit.Test; 
import static org.junit.Assert.*; 

public class DuplicateRemoverTest { 

    @Test 
    public void removesDuplicateLines() { 
     String input = "a\nb\nc\nb\nd\n"; 
     String expected = "a\nb\nc\nd\n"; 

     DuplicateRemover remover = new DuplicateRemover(); 

     String actual = remover.stripDuplicates(input); 
     assertEquals(expected, actual); 
    } 

    @Test 
    public void removesDuplicateLinesUnalphabetized() { 
     String input = "z\nb\nc\nb\nz\n"; 
     String expected = "z\nb\nc\n"; 

     DuplicateRemover remover = new DuplicateRemover(); 

     String actual = remover.stripDuplicates(input); 
     assertEquals(expected, actual); 
    } 

} 
+0

嗯排序在文件中的内容,让我。不知道。 – Mike 2011-05-09 02:03:30

2

如果你喂线为LinkedHashSet,它忽略重复的,因为它是一个集合,但保留顺序,因为它是链接的。如果你只是想知道你是否看过以前的行,在继续时将它们放入一个简单的Set,并忽略Set已经包含/包含的行。

0

这里我用一个HashSet存储看出线

Scanner scan;//input 
Set<String> lines = new HashSet<String>(); 
StringBuilder strb = new StringBuilder(); 
while(scan.hasNextLine()){ 
    String line = scan.nextLine(); 
    if(lines.add(line)) strb.append(line); 
} 
+0

但是我们可以确保输入行和输出行的顺序与散列保持一致吗? – 2011-05-09 01:50:59

+0

我也将它们添加到一个字符串生成器中,当你遍历整个文本时,你抛弃了这个集合并保留'strb.toString()' – 2011-05-09 01:53:48

+0

当你添加到一个集合时,你不需要检查如果它已经在那里。另外,HashSets不保证顺序。 – Kal 2011-05-09 01:55:31

1

这里是另一种解决方案。让我们来使用UNIX!

cat MyFile.java | uniq > MyFile.java 

编辑:哦等等,我重新读了这个话题。这是一个合法的解决方案,因为我设法成为语言不可知论者?

+0

我想你可以在这里使用类似的解决方案:http://stackoverflow.com/questions/1088113/is-there-a-java-library-of-unix-functions。不过,如果你在UNIX系统上,我会尝试为脚本编写钩子。 – Mike 2011-05-09 02:13:43

1

它可以轻松地从使用新的Java API流文本或文件中的重复行。 Stream支持不同的聚合特性,如排序,区分和使用不同的Java现有数据结构及其方法。下面的例子可以用它来删除重复或使用流API

package removeword; 

import java.io.IOException; 
import java.nio.file.Files; 
import java.nio.file.OpenOption; 
import java.nio.file.Path; 
import java.nio.file.Paths; 
import java.util.Arrays; 
import java.util.Scanner; 
import java.util.stream.Stream; 
import static java.nio.file.StandardOpenOption.*; 
import static java.util.stream.Collectors.joining; 

public class Java8UniqueWords { 

public static void main(String[] args) throws IOException {   
    Path sourcePath = Paths.get("C:/Users/source.txt"); 
    Path changedPath = Paths.get("C:/Users/removedDouplicate_file.txt"); 
     try (final Stream<String> lines = Files.lines(sourcePath) 
       // .map(line -> line.toLowerCase()) /*optional to use existing string methods*/ 
       .distinct() 
       // .sorted()) /*aggregrate function to sort disctincted line*/ 
     { 
      final String uniqueWords = lines.collect(joining("\n")); 
      System.out.println("Final Output:" + uniqueWords); 
      Files.write(changedPath , uniqueWords.getBytes(),WRITE, TRUNCATE_EXISTING); 
     } 
} 
}