2017-03-03 91 views
1

记录我的工作vb.net应用。在这我有多个文本文件,并需要在文件分割基于一些标识符(重复的字)的记录。 能否请你帮我,因为我是新来vb.net,不知道如何做到这一点。 到目前为止,我已经编码拆分文本文件

If (Directory.Exists(filePath)) Then 
      'search file in the input path by their search pattern 
      For Each File As String In Directory.GetFiles(filePath, "*.txt", SearchOption.TopDirectoryOnly) 

       Console.WriteLine("Reading the current file " + Path.GetFileName(File)) 
       Using sr As StreamReader = New StreamReader(File) 
        Dim Currentline As String 
        Dim Identifier As String 
        Dim statementDate As String 
        Dim currenttext As String 

        'getting the unique identifier from the files and removing the white spaces 
        Identifier = sr.ReadLine.Substring(69, 8) 
        'checks until the EOF 
        While Not sr.EndOfStream 

         currenttext = sr.ReadLine() 
         'loop through until identified not repeated 
         Do Until currenttext.Contains(Identifier) 

          Currentline = sr.ReadLine() 
          Console.WriteLine(Currentline) 


         Loop 
         Console.WriteLine("=========================== Records Ends") 

        End While 
       End Using 

而且,这是一个需要分割文本文件的截屏。 Text file snap shot

在此先感谢。

回答

0

这应该为你工作....

Imports System.IO 
Imports System.Text 

Sub Main() 
    If (Directory.Exists(filePath)) Then 
     For Each File As String In Directory.GetFiles(filePath, "*.txt", SearchOption.TopDirectoryOnly) 
      Dim Record As New StringBuilder 
      Dim Identifier As String = String.Empty 

      Debug.Print("Reading the current file {0}", Path.GetFileName(File)) 
      Using sr As StreamReader = New StreamReader(File) 
       While Not sr.EndOfStream 
        Dim ThisLine As String = sr.ReadLine.Trim 

        Select Case True 
         Case ThisLine.Length = 0 
          ' Skip blank lines 
         Case Identifier.Length = 0 
          ' We need to set the Identifier 
          Identifier = ThisLine 
         Case ThisLine = Identifier 
          ' We have the whole record 
          ProcessRecord(Record.ToString.Trim) 

          ' Reset for next record 
          Record.Clear() 
         Case Else 
          ' Add this line to the current record 
          Record.AppendLine(ThisLine) 
        End Select 
       End While 

       ' Process last record in file 
       ProcessRecord(Record.ToString.Trim) 
      End Using 

      Debug.Print("=========================== File Ends") 
     Next 
    End If 
End Sub 

Sub ProcessRecord(Record As String) 
    If Record.Length > 0 Then 
     Debug.Print(Record) 
     Debug.Print("=========================== Record Ends") 
    End If 
End Sub 

下面

If (Directory.Exists(filePath)) Then 
    For Each File As String In Directory.GetFiles(filePath, "*.txt", SearchOption.TopDirectoryOnly) 
     Dim AllLines() As String = IO.File.ReadAllLines(File) 
     Dim Identifier As String = AllLines.First 
     Dim Records() As String = Split(Join(AllLines, Environment.NewLine), Identifier) 

     For Each Rec As String In Records 
      Debug.Print(Rec) 
      Debug.Print("=========================== Record Ends") 
     Next 
    Next 

    Debug.Print("=========================== File Ends") 
End If 
+0

我dont't要使用ReadAllLines改变Identifier = Mid(sr.ReadLine, 1, 5)。怎么一回事,因为它加载在内存中的文件和可能是内存问题。这可能与StreamReader的 –

+0

@VirenderThakur我修订我的答案。 – MrGadget

0

原来的答复在这个例子中我做了多个文本文件。我希望能有所帮助。

p.s.在Identifier = Mid(sr.ReadLine, 69, 8)

再见

If (Directory.Exists(filePath)) Then 

     Try 
      'search file in the input path by their search pattern 
      For Each File As String In Directory.GetFiles(filePath, "*.txt", SearchOption.TopDirectoryOnly) 

       Console.WriteLine("Reading the current file " + Path.GetFileName(File)) 
       Using sr As StreamReader = New StreamReader(File) 
        Dim Currentline As String = "" 
        Dim Identifier As String = "" 
        Dim currenttext As String = "" 
        Dim Prog As Integer = 0 
        Dim flg As Boolean = True 

        While Not sr.EndOfStream 

         'getting the unique identifier from the files and removing the white spaces 
         Identifier = Mid(sr.ReadLine, 1, 5) 

         Do While Not sr.EndOfStream 

          Do While flg = True 
           Currentline = sr.ReadLine() 
           If Identifier = Currentline.Trim Then 
            Exit Do 
           ElseIf sr.EndOfStream Then 
            currenttext = currenttext + Currentline + vbCrLf 
            Exit Do 
           End If 
           currenttext = currenttext + Currentline + vbCrLf 
          Loop 

          currenttext = currenttext + "=========================== Records Ends" 

          Prog += 1 
          Dim objWriter As New System.IO.StreamWriter(filePath + "\" + Path.GetFileName(File) + "_" + Prog.ToString + ".txt") 
          objWriter.WriteLine(currenttext) 
          objWriter.Close() 
          currenttext = "" 
         Loop 

        End While 

       End Using 

      Next 

      MessageBox.Show("end") 

     Catch ex As Exception 
      MessageBox.Show(ex.Message) 
     End Try 

    End If