2016-04-22 82 views
1

我想读取一堆相同格式的word docx文件,并将数据提取到数据库。我没有任何与文本有关的问题,但我正在努力与复选框。我需要说我是docx4j的新手,但现在一直在为这个问题苦苦挣扎了四天。我真的很看重一些帮助/帮助/建议。Docx4j - 如何获得docx复选框状态

我附上了一个文档(test.docx),我正在阅读。第一个复选框是我自己使用Word插入的,被我的代码检测到,并作为CTSdtCell出现在初始传递中,但其他复选框不是。他们似乎在文件中用CTObject,CTSHape,CTIMageData和CTControl以不同方式表示,我找不到从这些或其中之一获取复选框的方法。

public static void main(String[] args) throws Exception { 
    WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage.load(new java.io.File("test.docx"));  
    MainDocumentPart documentPart = wordMLPackage.getMainDocumentPart(); 
    Finder finder = new Finder(FldChar.class); 
    new TraversalUtil(documentPart.getContent(), finder); 
} 

public static class Finder extends CallbackImpl { 
    protected Class<?> typeToFind; 
    protected Finder(Class<?> typeToFind) { 
     this.typeToFind = typeToFind; 
    } 

    public List<Object> results = new ArrayList<Object>(); 

    @Override 
    public List<Object> apply(Object o) { 
     String txtVal=""; 
     System.out.println(o.getClass().getName()); 

     if (o instanceof org.docx4j.wml.CTSdtCell) { 
      List<Object> objs = ((org.docx4j.wml.CTSdtCell)o).getSdtPr().getRPrOrAliasOrLock(); 
      findCheckbox(objs); 
     } 

     if (o instanceof org.docx4j.wml.SdtRun) { 
      List<Object> objs = ((org.docx4j.wml.SdtRun)o).getSdtPr().getRPrOrAliasOrLock(); 
      findCheckbox(objs); 
     } 

     if (o instanceof org.docx4j.wml.SdtBlock) { 
      List<Object> objs = ((org.docx4j.wml.SdtBlock)o).getSdtPr().getRPrOrAliasOrLock(); 
      findCheckbox(objs); 
     } 

     if (o instanceof org.docx4j.wml.Text) { 
      System.out.println("  Text Value : "+((org.docx4j.wml.Text)o).getValue()); 
     } 

     // Adapt as required 
     if (o.getClass().equals(typeToFind)) { 
      results.add(o); 
     } 
     return null; 
    } 

    private static void findCheckbox(List<Object> objs) { 
     for (Object obj : objs) { 
      if (obj instanceof javax.xml.bind.JAXBElement) { 
       if (((javax.xml.bind.JAXBElement)obj).getDeclaredType().getName().equals("org.docx4j.w14.CTSdtCheckbox")) { 
        JAXBElement<CTSdtCheckbox> elem = ((javax.xml.bind.JAXBElement)obj); 
        org.docx4j.w14.CTSdtCheckbox cb = elem.getValue(); 
        org.docx4j.w14.CTOnOff OnOff=cb.getChecked(); 
        System.out.println("  CheckBox found with value="+OnOff.getVal()); 
       } 
      } 
     } 
    } 
} 

的结果是:

org.docx4j.wml.Tbl 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : WORK INSTRUCTION # 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Inline 
org.docx4j.dml.CTBlip 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : A 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : STEP BY STEP 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : - 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : WORK INSTRUCTION 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Inline 
org.docx4j.dml.CTBlip 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 1234567 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : TASK 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : Chlorine drum change 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : DATE 
org.docx4j.wml.CTSdtCell 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 12/07/2015 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : MACHINE 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : ORIGINATOR 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : D.GROVE 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CLOCK NUMBER 
org.docx4j.wml.CTSdtCell 
     CheckBox found with value=1 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : ? 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : AREA 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CHLORINE HOUSE 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CHECKED 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : (EXPERT) 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : J Clarke 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CLOCK NUMBER 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 4985 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : PPE 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Anchor 
org.docx4j.dml.CTBlip 
org.docx4j.dml.CTColorChangeEffect 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : EYE 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Anchor 
org.docx4j.dml.CTBlip 
org.docx4j.dml.CTColorChangeEffect 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : EAR 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Anchor 
org.docx4j.dml.CTBlip 
org.docx4j.dml.CTColorChangeEffect 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : FOOT 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Anchor 
org.docx4j.dml.CTBlip 
org.docx4j.dml.CTColorChangeEffect 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : HEAD 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Drawing 
org.docx4j.dml.wordprocessingDrawing.Anchor 
org.docx4j.dml.CTBlip 
org.docx4j.dml.CTColorChangeEffect 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : HAND 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShapetype 
org.docx4j.vml.CTStroke 
org.docx4j.vml.CTFormulas 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTF 
org.docx4j.vml.CTPath 
org.docx4j.vml.officedrawing.CTLock 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : COSHH 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : SPECIAL PPE REQUIREMENTS 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : *SITE 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : R/A NUMBER 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CONSIDERATION 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : PRODUCTS 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : B.A. EQUIPMENT 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : 12668 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.CTObject 
org.docx4j.vml.CTShape 
org.docx4j.vml.CTImageData 
org.docx4j.wml.CTControl 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : CHLORINE 
org.docx4j.wml.R 
org.docx4j.wml.Text 
     Text Value : GAS 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tr 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.Tc 
org.docx4j.wml.P 
org.docx4j.wml.P 
org.docx4j.wml.CTBookmark 
org.docx4j.wml.CTMarkupRange 

我现在已经加入从含有难以捉摸的复选框中的一个小区中的MainDocumentPart.getXML()的输出。我看不出任何东西来告诉我价值。有谁能告诉我我错过了什么吗?

<w:tc> 
     <w:tcPr> 
      <w:tcW w:w="1015" w:type="dxa"/> 
      <w:tcBorders> 
       <w:left w:val="single" w:color="auto" w:sz="24" w:space="0"/> 
       <w:bottom w:val="single" w:color="auto" w:sz="24" w:space="0"/> 
       <w:right w:val="single" w:color="auto" w:sz="24" w:space="0"/> 
      </w:tcBorders> 
      <w:vAlign w:val="center"/> 
     </w:tcPr> 
     <w:p w:rsidRPr="00A7008C" w:rsidR="00F909A4" w:rsidP="00017AE9" w:rsidRDefault="000F5760"> 
      <w:pPr> 
       <w:jc w:val="center"/> 
       <w:rPr> 
        <w:b/> 
        <w:color w:val="FFFFFF" w:themeColor="background1"/> 
       </w:rPr> 
      </w:pPr> 
      <w:r> 
       <w:rPr> 
        <w:b/> 
        <w:color w:val="FFFFFF" w:themeColor="background1"/> 
        <w:sz w:val="36"/> 
       </w:rPr> 
       <w:object w:dxaOrig="225" w:dyaOrig="225"> 
        <v:shape type="#_x0000_t75" style="width:12pt;height:29.25pt" id="_x0000_i1063" o:ole=""> 
         <v:imagedata o:title="" r:id="rId17"/> 
        </v:shape> 
        <w:control w:name="CheckBox11" w:shapeid="_x0000_i1063" r:id="rId18"/> 
       </w:object> 
      </w:r> 
      <w:bookmarkEnd w:id="0"/> 
     </w:p> 
    </w:tc> 
+0

我已经添加了包含难以捉摸复选框的单元格的xml。为什么没有显示值? – Richard

回答

0

我已经破解了它! CTImageData指向可以通过文档关系访问的图像。这些图像包含勾号或未勾选的框。通过检查图像的大小,我可以知道它是什么。

我不明白Word比表面使用更多,也不知道这些“复选框”是如何创建的,但它看起来并没有像我的测试那样创建。因此,我不知道这些图像是否会在组织升级MS Office软件时发生变化,再次编辑和保存文档文件。然而,对于我的软件的需求会在初始加载后很快发生变化,因此这种风险对我来说意义不大。

+0

传统Active X控件创建于:开发者菜单>旧版工具.. – JasonPlutext

0

现有的复选框是传统的ActiveX控件:

  <w:object w:dxaOrig="225" w:dyaOrig="225"> 
      <v:shapetype id="_x0000_t75" coordsize="21600,21600" o:spt="75" o:preferrelative="t" path="[email protected]@[email protected]@[email protected]@[email protected]@5xe" filled="f" stroked="f"> 
       <v:stroke joinstyle="miter"/> 
       <v:formulas> 
       : 
       </v:formulas> 
       <v:path o:extrusionok="f" gradientshapeok="t" o:connecttype="rect"/> 
       <o:lock v:ext="edit" aspectratio="t"/> 
      </v:shapetype> 
      <v:shape id="_x0000_i1025" type="#_x0000_t75" style="width:12pt;height:29.25pt" o:ole=""> 
       <v:imagedata r:id="rId15" o:title=""/> 
      </v:shape> 
      <w:control r:id="rId16" w:name="CheckBox" w:shapeid="_x0000_i1025"/> 
      </w:object> 

您所创建的那些是现代XML友好的复选框内容控件。

还有复选框字符和复选框表单域...