我做了一个应用程序来重新格式化CSV文件,倍增内部字段单引号和与像“\ N”的字符串替换它们内部的新行。
一旦数据里面的数据我们可以将'\ n'替换回新行。
我需要这样做,因为我必须处理CSV的应用程序不能正确处理新行。
随意使用和改变。
在蟒蛇:
import sys
def ProcessCSV(filename):
file1 = open(filename, 'r')
filename2 = filename + '.out'
file2 = open(filename2, 'w')
print 'Reformatting {0} to {1}...', filename, filename2
line1 = file1.readline()
while (len(line1) > 0):
line1 = line1.rstrip('\r\n')
line2 = ''
count = 0
lastField = (len(line1) == 0)
while not lastField:
lastField = (line1.find('","') == -1)
res = line1.partition('","')
field = res[0]
line1 = res[2]
count = count + 1
hasStart = False
hasEnd = False
if (count == 1) and (field[:1] == '"') :
field = field[1:]
hasStart = True
elif count > 1:
hasStart = True
while (True):
if (lastField == True) and (field[-1:] == '"') :
field = field[:-1]
hasEnd = True
elif not lastField:
hasEnd = True
if lastField and not hasEnd:
line1 = file1.readline()
if (len(line1) == 0): break
line1 = line1.rstrip('\r\n')
lastField = (line1.find('","') == -1)
res = line1.partition('","')
field = field + '\\n' + res[0]
line1 = res[2]
else:
break
field = field.replace('"', '""')
line2 = line2 + iif(count > 1, ',', '') + iif(hasStart, '"', '') + field + iif(hasEnd, '"', '')
if len(line2) > 0:
file2.write(line2)
file2.write('\n')
line1 = file1.readline()
file1.close()
file2.close()
print 'Done'
def iif(st, v1, v2):
if st:
return v1
else:
return v2
filename = sys.argv[1]
if len(filename) == 0:
print 'You must specify the input file'
else:
ProcessCSV(filename)
在VB.net:这里
Module Module1
Sub Main()
Dim FileName As String
FileName = Command()
If FileName.Length = 0 Then
Console.WriteLine("You must specify the input file")
Else
ProcessCSV(FileName)
End If
End Sub
Sub ProcessCSV(ByVal FileName As String)
Dim File1 As Integer, File2 As Integer
Dim Line1 As String, Line2 As String
Dim Field As String, Count As Long
Dim HasStart As Boolean, HasEnd As Boolean
Dim FileName2 As String, LastField As Boolean
On Error GoTo locError
File1 = FreeFile()
FileOpen(File1, FileName, OpenMode.Input, OpenAccess.Read)
FileName2 = FileName & ".out"
File2 = FreeFile()
FileOpen(File2, FileName2, OpenMode.Output)
Console.WriteLine("Reformatting {0} to {1}...", FileName, FileName2)
Do Until EOF(File1)
Line1 = LineInput(File1)
'
Line2 = ""
Count = 0
LastField = (Len(Line1) = 0)
Do Until LastField
LastField = (InStr(Line1, """,""") = 0)
Field = Strip(Line1, """,""")
Count = Count + 1
HasStart = False
HasEnd = False
'
If (Count = 1) And (Left$(Field, 1) = """") Then
Field = Mid$(Field, 2)
HasStart = True
ElseIf Count > 1 Then
HasStart = True
End If
'
locFinal:
If (LastField) And (Right$(Field, 1) = """") Then
Field = Left$(Field, Len(Field) - 1)
HasEnd = True
ElseIf Not LastField Then
HasEnd = True
End If
'
If LastField And Not HasEnd And Not EOF(File1) Then
Line1 = LineInput(File1)
LastField = (InStr(Line1, """,""") = 0)
Field = Field & "\n" & Strip(Line1, """,""")
GoTo locFinal
End If
'
Field = Replace(Field, """", """""")
'
Line2 = Line2 & IIf(Count > 1, ",", "") & IIf(HasStart, """", "") & Field & IIf(HasEnd, """", "")
Loop
'
If Len(Line2) > 0 Then
PrintLine(File2, Line2)
End If
Loop
FileClose(File1, File2)
Console.WriteLine("Done")
Exit Sub
locError:
Console.WriteLine("Error: " & Err.Description)
End Sub
Function Strip(ByRef Text As String, ByRef Separator As String) As String
Dim nPos As Long
nPos = InStr(Text, Separator)
If nPos > 0 Then
Strip = Left$(Text, nPos - 1)
Text = Mid$(Text, nPos + Len(Separator))
Else
Strip = Text
Text = ""
End If
End Function
End Module
真的没什么,好像畸形的CSV。如果转义字符替换逗号或引号,则** 1 **会有问题,但您并不认为是这种情况。 ** 2 **非常棒,这只是另一个领域。 ** 3 **法定csv - 字段可能用引号包装。 ** 4 **再一次,只要引号被转义,合法的csv:''“'。** 5 **没问题,与* 4 *相同。 ** 6 **如果您尝试一次读取或解析所有内容,则只有一个问题。所以,只要你的解析器可以处理不同长度的行,你应该没问题。你有没有一个CSV实际上无效的例子? (该链接显示引号有问题) – Kobi 2012-07-31 06:10:09
顺便说一句,快速搜索找到了这个:http://www.fec.gov/support/DataConversionTools.shtml(是的,我花了20分钟想“可能有人已经完成了之前“) – Kobi 2012-07-31 06:30:20
我们能否看到Ruby的CSV解析器无法处理的片段? – 2012-07-31 15:54:03