2017-07-04 135 views
1

试图将.xlsx文件导入Python。我有谷歌和谷歌。我有这个代码编写和为.csv工作,但它需要为.xlsx文件。所以我基本拼凑在一起,并希望能够奏效。任何和所有的帮助,非常感谢!使用xlrd将xlsx文件导入Python使用

**添加了我的整个代码和数据文件的一个片段。我需要它导入.xlsx并执行数据。

import xlrd 

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx') 

worksheet = workbook.get_sheet(0, update_pos=True) 

header = [] 
report_card = [] 

# Count Stmnts for Required Data 
valid_count = 0 
app_req_count = 0 
intr_req_count = 0 
oe_intr_req_count = 0 
part_img_req_count = 0 
upc_req_count = 0 
unspsc_req_count = 0 
msds_req_count = 0 

# Count Stmts for Missing Data 
missing_app_count = 0 
missing_intr_count = 0 
missing_oe_intr_count = 0 
missing_mpcc_count = 0 
missing_attr_values_count = 0 
missing_part_img_count = 0 
missing_upc_count = 0 
missing_warr_text_count = 0 
missing_warr_pdf_count = 0 
missing_unspsc_count = 0 
missing_msds_count = 0 


for row_num, row in enumerate(worksheet): 
    if row_num <= 4: 
     # print(row) # Print out the header 
     header.append([row[0], row[2]]) 
    else: 
     hq_line, part_no, part_class, appl_req, appl_count, intr_req, 
     intr_count, oe_intr_req, has_oe_intr, has_attr_editor, 
     has_attr_values, part_img_req, has_part_img, has_mpcc, warr_req, 
     has_warr_txt, has_warr_pdf, msds_req, has_msds, upc_req, has_upc, 
     has_unspsc, attr_count, attrval_count, valid_part = row 

    if valid_part == 'YES': 
     valid_count += 1 

     # Required Parts Count 
     if appl_req == 'YES': 
      app_req_count += 1 
     if intr_req == 'YES': 
      intr_req_count += 1 
     if oe_intr_req == 'YES': 
      oe_intr_req_count += 1 
     if part_img_req == 'YES': 
      part_img_req_count += 1 
     if upc_req == 'YES': 
      upc_req_count += 1 
     if msds_req == 'YES': 
      msds_req_count += 1 

     # Missing Data Counts 
     if appl_req == 'YES' and appl_count == '0': 
      missing_app_count += 1 
     if intr_req == 'YES' and intr_count == '0': 
      missing_intr_count += 1 
     if oe_intr_req == 'YES' and has_oe_intr == '0': 
      missing_oe_intr_count += 1 
     if has_mpcc == 'NO': 
      missing_mpcc_count += 1 
     if has_attr_values == 'NO': 
      missing_attr_values_count += 1 
     if has_part_img == 'NO': 
      missing_part_img_count += 1 
     if upc_req == 'YES' and has_upc == '0': 
      missing_upc_count += 1 
     if warr_req == 'YES' and has_warr_txt == 'NO': 
      missing_warr_text_count += 1 
     if warr_req == 'YES' and has_warr_pdf == 'NO': 
      missing_warr_pdf_count += 1 
     if has_unspsc == 'NO': 
      missing_unspsc_count += 1 
     if msds_req == 'YES' and has_msds == 'NO': 
      missing_msds_count += 1 

# Statements for Required Counts 
valid_parts = ('Number of Valid Parts: ', '{:,}'.format(valid_count)) 
application_required = ('Application Records Required: ', 
'{:,}'.format(app_req_count)) 
interchange_required = ('Interchange Records Required: ', 
'{:,}'.format(intr_req_count)) 
oe_interchange_required = ('OE Interchange Records Required: ', 
'{:,}'.format(oe_intr_req_count)) 
mpcc = ('MPCC Required: ', '{:,}'.format(valid_count)) # Every valid part 
requires a MPCC 
attributes = ('Attributes Required: ', '{:,}'.format(valid_count)) # Every 
valid part requires attributes 
image_required = ('Image Required: ', '{:,}'.format(part_img_req_count)) 
upc = ('UPC Requited: ', '{:,}'.format(upc_req_count)) 
warranties = ('Warranty Text/PDF Required: ', '{:,}'.format(valid_count)) # 
Require warranty text/pdf on all parts 
unspsc = ('UNSPSC Code Required: ', '{:,}'.format(valid_count)) # Require 
UNSPSC Codes for all parts 
msds = ('MSDS Required: ', '{:,}'.format(msds_req_count)) 

# Statements for Missing Counts 
missing_applications = ('Missing Applications: ', 
'{:,}'.format(missing_app_count)) 
missing_interchange = ('Missing Interchange: ', 
'{:,}'.format(missing_intr_count)) 
missing_oe_interchange = ('Missing OE Interchange: ', 
'{:,}'.format(missing_oe_intr_count)) 
missing_mpcc = ('Missing MPCC: ', '{:,}'.format(missing_mpcc_count)) 
missing_attributes = ('Missing Attributes: ', 
'{:,}'.format(missing_attr_values_count)) 
missing_image = ('Missing Image: ', '{:,}'.format(missing_part_img_count)) 
missing_UPC = ('Missing UPC: ', '{:,}'.format(missing_upc_count)) 
missing_warranty_text = ('Missing Warranty Text: ', 
'{:,}'.format(missing_warr_text_count)) 
missing_warranty_pdf = ('Missing Warranty PDF: ', 
'{:,}'.format(missing_warr_pdf_count)) 
missing_unspsc = ('Missing UNSPSC Code: ', 
'{:,}'.format(missing_unspsc_count)) 
missing_msds = ('Missing MSDS: ', '{:,}'.format(missing_msds_count)) 

# CSV Output 
report_card.append(valid_parts) 
report_card.append(application_required) 
report_card.append(interchange_required) 
report_card.append(oe_interchange_required) 
report_card.append(mpcc) 
report_card.append(attributes) 
report_card.append(image_required) 
report_card.append(upc) 
report_card.append(warranties) 
report_card.append(unspsc) 
report_card.append(msds) 
report_card.append(missing_applications) 
report_card.append(missing_interchange) 
report_card.append(missing_oe_interchange) 
report_card.append(missing_mpcc) 
report_card.append(missing_attributes) 
report_card.append(missing_image) 
report_card.append(missing_UPC) 
report_card.append(missing_warranty_text) 
report_card.append(missing_warranty_pdf) 
report_card.append(missing_unspsc) 
report_card.append(missing_msds) 

for row in header: 
    print(row) 

for x in report_card: 
    print(x) 



with open('Report_Card.csv', 'w', newline='') as f: 
    writer = csv.writer(f) 
    writer.writerows(header) 
    writer.writerows(report_card) 

CSV文件:

Supplier Line:,,Gates Rubber - Denver (GAT),,,,,,,,,,,,,,,,,,,,,, 
Summary:,,Parts HQ Abbr,,,,,,,,,,,,,,,,,,,,,, 
ACCT No:,,40013586,,,,,,,,,,,,,,,,,,,,,, 
RecCount:,,10221,,,,,,,,,,,,,,,,,,,,,, 
Applicable Date:,,"June 14, 2017 (Wednesday)",,,,,,,,,,,,,,,,,,,,,, 
,,,,,,,,,,,,,,,,,,,,,,,, 
HQ Line,Part No,Part Class,Appl Req,Appl Count ,Intr Req,Intr Count ,OE Intr Req,Has OE Intr,Has Attr Editor, Has Attr Values,Part IMG Req,Has Part IMG,Has MPCC,Warr Req,Has Warr TXT,Has Warr PDF,MSDS Req,Has MSDS,UPC Req,Has UPC,Has UNSPSC,Attr Count ,AttrVal Count ,Valid Part 
GAT,'27210',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES 
GAT,'27211',O,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,14,YES 
GAT,'27212',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES 
GAT,'27213',S,NO,0,YES,1,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,30,13,YES 
GAT,'27220',S,NO,0,YES,2,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,35,20,YES 
GAT,'27221',S,NO,0,YES,2,YES,NO,YES,YES,YES,YES,YES,YES,YES,YES,NO,NO,YES,YES,YES,35,20,YES 
+0

答:我知道我糟蹋它错了,因为枚举为.csv导入工作,但现在它不认可书作为迭代表。 –

+0

如果您编辑问题以包含来自XLSX文件的小样本,这将有所帮助。 –

+0

如果一切都失败了,请将您的xlsx文件保存为CSV格式,然后使用它。 – boardrider

回答

0

因为它是一个Excel工作簿对象,而不是只是一个.csv文件你的书对象由张。所以首先你必须抓住一张你想要的工作表,然后你需要通过调用sheet.get_rows()获得行。

下面的代码应该可以工作,我在本地测试它。

import xlrd 

book = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx') 

header = [] 
report_card = [] 

# Count Stmnts for Required Data 
valid_count = 0 
app_req_count = 0 
intr_req_count = 0 

# Count Stmts for Missing Data 
missing_app_count = 0 
missing_intr_count = 0 
missing_oe_intr_count = 0 

sheet = book.sheets()[0] 
for row_num, row in enumerate(sheet.get_rows()): 
    if row_num <= 4: 
     # print(row) # Print out the header 
     header.append([row[0], row[2]]) 

print(header) 
+0

已发布我的整个代码和一个数据样本 –

0

您访问xlrd库代码需要一些修正如下:

import xlrd 

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx') 
worksheet = workbook.sheet_by_index(0) 

header = [] 
report_card = [] 

# Count Stmnts for Required Data 
valid_count = 0 
app_req_count = 0 
intr_req_count = 0 
oe_intr_req_count = 0 
part_img_req_count = 0 
upc_req_count = 0 
unspsc_req_count = 0 
msds_req_count = 0 

# Count Stmts for Missing Data 
missing_app_count = 0 
missing_intr_count = 0 
missing_oe_intr_count = 0 
missing_mpcc_count = 0 
missing_attr_values_count = 0 
missing_part_img_count = 0 
missing_upc_count = 0 
missing_warr_text_count = 0 
missing_warr_pdf_count = 0 
missing_unspsc_count = 0 
missing_msds_count = 0 

for row_num in range(worksheet.nrows): 
    row = worksheet.row_values(row_num) 

    if row_num <= 4: 
     # print(row) # Print out the header 
     header.append([row[0], row[2]]) 
    else: 
     hq_line, part_no, part_class, appl_req, appl_count, intr_req, \ 
     intr_count, oe_intr_req, has_oe_intr, has_attr_editor, \ 
     has_attr_values, part_img_req, has_part_img, has_mpcc, warr_req, \ 
     has_warr_txt, has_warr_pdf, msds_req, has_msds, upc_req, has_upc, \ 
     has_unspsc, attr_count, attrval_count, valid_part = row 

     if valid_part == 'YES': 
      valid_count += 1 

      # Required Parts Count 
      if appl_req == 'YES': 
       app_req_count += 1 
      if intr_req == 'YES': 
       intr_req_count += 1 
      if oe_intr_req == 'YES': 
       oe_intr_req_count += 1 
      if part_img_req == 'YES': 
       part_img_req_count += 1 
      if upc_req == 'YES': 
       upc_req_count += 1 
      if msds_req == 'YES': 
       msds_req_count += 1 

      # Missing Data Counts 
      if appl_req == 'YES' and appl_count == '0': 
       missing_app_count += 1 
      if intr_req == 'YES' and intr_count == '0': 
       missing_intr_count += 1 
      if oe_intr_req == 'YES' and has_oe_intr == '0': 
       missing_oe_intr_count += 1 
      if has_mpcc == 'NO': 
       missing_mpcc_count += 1 
      if has_attr_values == 'NO': 
       missing_attr_values_count += 1 
      if has_part_img == 'NO': 
       missing_part_img_count += 1 
      if upc_req == 'YES' and has_upc == '0': 
       missing_upc_count += 1 
      if warr_req == 'YES' and has_warr_txt == 'NO': 
       missing_warr_text_count += 1 
      if warr_req == 'YES' and has_warr_pdf == 'NO': 
       missing_warr_pdf_count += 1 
      if has_unspsc == 'NO': 
       missing_unspsc_count += 1 
      if msds_req == 'YES' and has_msds == 'NO': 
       missing_msds_count += 1 

# Statements for Required Counts 
valid_parts = ('Number of Valid Parts: ', '{:,}'.format(valid_count)) 
application_required = ('Application Records Required: ', '{:,}'.format(app_req_count)) 
interchange_required = ('Interchange Records Required: ', '{:,}'.format(intr_req_count)) 
oe_interchange_required = ('OE Interchange Records Required: ', '{:,}'.format(oe_intr_req_count)) 
mpcc = ('MPCC Required: ', '{:,}'.format(valid_count)) # Every valid part requires a MPCC 
attributes = ('Attributes Required: ', '{:,}'.format(valid_count)) # Every valid part requires attributes 
image_required = ('Image Required: ', '{:,}'.format(part_img_req_count)) 
upc = ('UPC Requited: ', '{:,}'.format(upc_req_count)) 
warranties = ('Warranty Text/PDF Required: ', '{:,}'.format(valid_count)) # Require warranty text/pdf on all parts 
unspsc = ('UNSPSC Code Required: ', '{:,}'.format(valid_count)) # Require UNSPSC Codes for all parts 
msds = ('MSDS Required: ', '{:,}'.format(msds_req_count)) 

# Statements for Missing Counts 
missing_applications = ('Missing Applications: ', '{:,}'.format(missing_app_count)) 
missing_interchange = ('Missing Interchange: ', '{:,}'.format(missing_intr_count)) 
missing_oe_interchange = ('Missing OE Interchange: ', '{:,}'.format(missing_oe_intr_count)) 
missing_mpcc = ('Missing MPCC: ', '{:,}'.format(missing_mpcc_count)) 
missing_attributes = ('Missing Attributes: ', '{:,}'.format(missing_attr_values_count)) 
missing_image = ('Missing Image: ', '{:,}'.format(missing_part_img_count)) 
missing_UPC = ('Missing UPC: ', '{:,}'.format(missing_upc_count)) 
missing_warranty_text = ('Missing Warranty Text: ', '{:,}'.format(missing_warr_text_count)) 
missing_warranty_pdf = ('Missing Warranty PDF: ', '{:,}'.format(missing_warr_pdf_count)) 
missing_unspsc = ('Missing UNSPSC Code: ', '{:,}'.format(missing_unspsc_count)) 
missing_msds = ('Missing MSDS: ', '{:,}'.format(missing_msds_count)) 

# CSV Output 
report_card.append(valid_parts) 
report_card.append(application_required) 
report_card.append(interchange_required) 
report_card.append(oe_interchange_required) 
report_card.append(mpcc) 
report_card.append(attributes) 
report_card.append(image_required) 
report_card.append(upc) 
report_card.append(warranties) 
report_card.append(unspsc) 
report_card.append(msds) 
report_card.append(missing_applications) 
report_card.append(missing_interchange) 
report_card.append(missing_oe_interchange) 
report_card.append(missing_mpcc) 
report_card.append(missing_attributes) 
report_card.append(missing_image) 
report_card.append(missing_UPC) 
report_card.append(missing_warranty_text) 
report_card.append(missing_warranty_pdf) 
report_card.append(missing_unspsc) 
report_card.append(missing_msds) 

for row in header: 
    print(row) 

for x in report_card: 
    print(x) 

with open('Report_Card.csv', 'w', newline='') as f: 
    writer = csv.writer(f) 
    writer.writerows(header) 
    writer.writerows(report_card)   

对于您所提供的数据,这给你一个Report_Card.csv文件,如下所示:

Supplier Line: Gates Rubber - Denver (GAT) 
Summary: Parts HQ Abbr 
ACCT No: 40013586 
RecCount: 10221 
Applicable Date: June 14, 2017 (Wednesday) 
Number of Valid Parts: 6 
Application Records Required: 0 
Interchange Records Required: 6 
OE Interchange Records Required: 6 
MPCC Required: 6 
Attributes Required: 6 
Image Required:  6 
UPC Requited: 6 
Warranty Text/PDF Required:  6 
UNSPSC Code Required: 6 
MSDS Required: 0 
Missing Applications: 0 
Missing Interchange: 0 
Missing OE Interchange:  0 
Missing MPCC: 0 
Missing Attributes:  0 
Missing Image: 0 
Missing UPC: 0 
Missing Warranty Text: 0 
Missing Warranty PDF: 0 
Missing UNSPSC Code: 0 
Missing MSDS: 0 

你的代码当然可以重新思考。而不是有太多的变量,即每列一个,请考虑使用类似Python Counter()这样的工具,就像字典一样。使用列表或词典可以让你的代码更简单。

0

该脚本可以让你的Excel数据表转换成词典列表:

import xlrd 

workbook = xlrd.open_workbook('GAT_US_PartReview_2017-06-23.xlsx', on_demand = True) 
worksheet = workbook.sheet_by_index(0) 
first_row = [] # The row where we stock the name of the column 
for col in range(worksheet.ncols): 
    first_row.append(worksheet.cell_value(0,col)) 
# transform the workbook to a list of dictionaries 
data =[] 
for row in range(1, worksheet.nrows): 
    elm = {} 
    for col in range(worksheet.ncols): 
     elm[first_row[col]]=worksheet.cell_value(row,col) 
    data.append(elm) 
print data 

您还可以使用Pandas

from pandas import * 
xls = ExcelFile('GAT_US_PartReview_2017-06-23.xlsx') 
df = xls.parse(xls.sheet_names[0]) 
print df.to_dict()