2017-06-16 30 views
0

当前程序我在下面检查目录中的多个文件以查找特定单词,然后将值存储在5个列表中每个文件的人名,地址,角色等。将多个列表写入电子表格中的多列,并设置列标题

我现在正在努力做的事情是,将列表分隔成各自的值,然后将每个列表中的值垂直保存在自己的列中,从而有效地为电子表格中的每个文件保存一行已扫描的目录。我还需要有成为每个列的列标题上零行

from os import walk 
import os 
import os, re, string 
from os import walk 
from xlutils.copy import copy  
from xlrd import open_workbook 
import xlwt 


folderpath = "./filepath"      #Path for the files 
book = open_workbook("spreadsheetname.xlsx")  #Name of the spreadsheet 
OUTPUT_NAME = ("spreadsheetname.xlsx") 

peopleList = []          #List for People Column 
asnList = []          #List for ASN Column 
organizationList = []        #List for Organization Column 
roleList = []          #List for Role Column 
addressList = []         #List for Address Column 


worksheet = copy(book); NAME =(); SHEET_1 = worksheet.get_sheet(0) 
bookS = xlwt.Workbook() 

row = 0 
column = 0 

for(path, dirs, files) in os.walk(folderpath, topdown=True): 

    for filename in files: 
     print(filename) 
     filepath = os.path.join("./filepath", filename) 

     with open(filepath, 'r') as currentfile: 
      for line in currentfile: 

       people = re.findall(r"person: (.*)",line) 
       asn = re.findall(r"aut-num: (.*)",line) 
       organization = re.findall(r"organisation: (.*)",line) 
       role = re.findall(r"role: (.*)",line) 
       address = re.findall(r"address: (.*)",line) 

       people = map(lambda x: x.strip(), people) 
       asn = map(lambda x: x.strip(), asn) 
       organization = map(lambda x: x.strip(), organization) 
       role = map(lambda x: x.strip(), role) 
       address = map(lambda x: x.strip(), address) 

       for person in people: 
        peopleList.append(person) 
       for asn_c in asn: 
        asnList.append(asn_c) 
       for organ_c in organization: 
        organizationList.append(organ_c) 
       for role_c in role: 
        roleList.append(role_c) 
       for address_c in address: 
        addressList.append(address_c) 



for person in set(sorted(peopleList)): 
    SHEET_1.write(row, column, person) 
    row += 1 

for asn_c in set(sorted(asnList)): 
    SHEET_1.write(row, column, asn_c) 
    row += 1 

for organ_c in set(sorted(organizationList)): 
    SHEET_1.write(row, column, organ_c) 
    row += 1 

for role_c in set(sorted(roleList)): 
    SHEET_1.write(row, column, role_c) 
    row += 1 

for address_c in set(sorted(addressList)): 
    SHEET_1.write(row, column, address_c) 
    row += 1 

bookS.save(OUTPUT_NAME) 
+0

我没用过xlwt - 也许你有强迫你使用它的约束 - 但作为另一种选择我之前使用过python的csv库,发现很容易做类似的事情。 https://docs.python.org/2/library/csv.html。 – knoight

回答

0
from os import walk 
import os 
import os, re, string 
from os import walk 
from xlutils.copy import copy  
from xlrd import open_workbook 
import xlwt 


folderpath = "./filepath"      #Path for the files 
book = open_workbook("spreadsheetname.xlsx")  #Name of the spreadsheet 
OUTPUT_NAME = ("spreadsheetname.xlsx") 

worksheet = copy(book); NAME =(); SHEET_1 = worksheet.get_sheet(0) 
bookS = xlwt.Workbook() 

finalList = [] 
headerList = ["People", "Asn", "Organization", "Role", "Address"] 

for(path, dirs, files) in os.walk(folderpath, topdown=True): 

    for filename in files: 
     print(filename) 
     filepath = os.path.join("./filepath", filename) 

     with open(filepath, 'r') as currentfile: 
      for line in currentfile: 

       people = re.findall(r"person: (.*)",line) 
       asn = re.findall(r"aut-num: (.*)",line) 
       organization = re.findall(r"organisation: (.*)",line) 
       role = re.findall(r"role: (.*)",line) 
       address = re.findall(r"address: (.*)",line) 

       people = map(lambda x: x.strip(), people) 
       asn = map(lambda x: x.strip(), asn) 
       organization = map(lambda x: x.strip(), organization) 
       role = map(lambda x: x.strip(), role) 
       address = map(lambda x: x.strip(), address) 

       for x in zip(people, asn, organization, role, address): 
        finalList.append({ 
         "people": x[0], 
         "asn": x[1], 
         "organization": x[2], 
         "role": x[3], 
         "address": x[4] 
        }) 

for i, title in enumerate(headerList): 
    SHEET_1.write(0, i, title) 
for index, x in enumerate(finalList): 
    col = 0 
    for k, v in x.items(): 
     SHEET_1.write(index, col, v) 
     col += 1 
bookS.save(OUTPUT_NAME) 

然而,这并不能消除重复和不排序。这可以通过Python库来完成像numpypandas

相反,你可以做

# for person in set(sorted(peopleList)): 
# SHEET_1.write(row, column, person) 
# row += 1 

# for asn_c in set(sorted(asnList)): 
#  SHEET_1.write(row, column, asn_c) 
#  row += 1 

# for organ_c in set(sorted(organizationList)): 
#  SHEET_1.write(row, column, organ_c) 
#  row += 1 

# for role_c in set(sorted(roleList)): 
#  SHEET_1.write(row, column, role_c) 
#  row += 1 

# for address_c in set(sorted(addressList)): 
#  SHEET_1.write(row, column, address_c) 
#  row += 1 

for i, title in enumerate(headerList): 
    SHEET_1.write(0, i, title) 

peopleList = list(set(sorted(peopleList))) 
asnList = list(set(sorted(asnList))) 
organizationList = list(set(sorted(organizationList))) 
roleList = list(set(sorted(roleList))) 
addressList = list(set(sorted(addressList))) 

for index, zippedItem in enumerate(zip(peopleList, asnList, organizationList, roleList, addressList)): 
    for listIndex, listItem in enumerate(zippedItem): 
     SHEET_1.write(index + 1, listIndex, listItem) 
+0

我似乎无法得到任何这些工作 –