2016-04-27 126 views
0

我是Docx4j的新手,需要帮助分割docx基于字符串的文件使用docx4j Java,因此它将输出写入多个文件。需要根据字符串使用docx4j分割docx文件Java?

我试图用Apache POI来做同样的事情,并且得到了输出,但是当试图将它转换成HTML时,在缺少样式时遇到了问题,之后也添加了样式,仍然面临同样的问题。

下面是使用代码的Apache POI:

public static int pos = 0; 
    public static int posc = 0; 
    public static String ind = "n"; 
    final static int DEFAULT_FONT_SIZE = 10; 

    public static void main(String[] args) throws FileNotFoundException, 
      IOException, XmlException { 

     File file = null; 
     File outfilep = null; 
     File outfilec = null; 

     File dir = new File(PropertyUtils.getProperty("INPUT_DIR")); 
     String[] files = dir.list(); 

     if (files.length == 0) { 
      System.out.println("The directory is empty"); 
     } else { 
      for (String aFile : files) { 
       System.out.println(aFile); 
       file = new File(PropertyUtils.getProperty("INPUT_DIR") + aFile 
         + "/" + aFile + ".docx"); 
       outfilep = new File(PropertyUtils.getProperty("INPUT_DIR") 
         + aFile + "/" + aFile + "-Product.docx"); 

       outfilec = new File(PropertyUtils.getProperty("INPUT_DIR") 
         + aFile + "/" + aFile + "-Component.docx"); 

       // Write Soruce file 
      } 
     } 

     XWPFDocument doc = new XWPFDocument(new FileInputStream(file)); 

     XWPFDocument destDoc = new XWPFDocument(); 

     copyLayout(doc, destDoc); 

     XWPFDocument destDocc = new XWPFDocument(); 

     OutputStream out = new FileOutputStream(outfilep); 
     OutputStream outc = new FileOutputStream(outfilec); 

     for (IBodyElement bodyElement : doc.getBodyElements()) { 

      BodyElementType elementType = bodyElement.getElementType(); 

      if (elementType.name().equals("PARAGRAPH")) { 

       XWPFParagraph pr = (XWPFParagraph) bodyElement; 

       if (pr.getText().contains("CONSTRUCTION DETAILS:")) 

       { 
        ind = "y"; 
        System.out.println("ind is Y++++++++++++"); 
       } 

       if (ind == "n") 

       { 

        copyStyle(doc, destDoc, 
          doc.getStyles().getStyle(pr.getStyleID())); 

        XWPFParagraph dstPr = destDoc.createParagraph(); 

        dstPr.createRun(); 

        pos = destDoc.getParagraphs().size() - 1; 

         CTPPr ppr = pr.getCTP().getPPr(); 
         if (ppr == null) ppr = pr.getCTP().addNewPPr(); 
         CTSpacing spacing = ppr.isSetSpacing()? ppr.getSpacing() : ppr.addNewSpacing(); 
         spacing.setAfter(BigInteger.valueOf(0)); 
         spacing.setBefore(BigInteger.valueOf(0)); 
         spacing.setLineRule(STLineSpacingRule.AUTO); 
         spacing.setLine(BigInteger.valueOf(240)); 

        destDoc.setParagraph(pr, pos); 
//     System.out.println("prod " 
//       + destDoc.getParagraphArray(pos).getParagraphText()); 

       } 

       else { 
        copyStyle(doc, destDocc, 
          doc.getStyles().getStyle(pr.getStyleID())); 

        XWPFParagraph dstPrr = destDocc.createParagraph(); 

        dstPrr.createRun(); 

        pos = destDocc.getParagraphs().size() - 1; 
         CTPPr ppr = pr.getCTP().getPPr(); 
         if (ppr == null) ppr = pr.getCTP().addNewPPr(); 
         CTSpacing spacing = ppr.isSetSpacing()? ppr.getSpacing() : ppr.addNewSpacing(); 
         spacing.setAfter(BigInteger.valueOf(0)); 
         spacing.setBefore(BigInteger.valueOf(0)); 
         spacing.setLineRule(STLineSpacingRule.AUTO); 
         spacing.setLine(BigInteger.valueOf(240)); 

        destDocc.setParagraph(pr, pos); 
////     System.out.println("comp " 
////        + destDoc.getParagraphArray(pos).getParagraphText()); 
       } 



      } else if (elementType.name().equals("TABLE")) { 

       XWPFTable table = (XWPFTable) bodyElement; 

       if (ind == "n") 

       { 

        copyStyle(doc, destDoc, 
          doc.getStyles().getStyle(table.getStyleID())); 

        destDoc.createTable(); 

        pos = destDoc.getTables().size() - 1; 

        destDoc.setTable(pos, table); 

//     System.out.println("prodtable "  + destDoc.getParagraphArray(pos).getParagraphText()); 

       } 
       else { 

        copyStyle(doc, destDocc, 
          doc.getStyles().getStyle(table.getStyleID())); 

        destDocc.createTable(); 

        pos = destDocc.getTables().size() - 1; 

        destDocc.setTable(pos, table); 

//     System.out.println("comptable "  + destDoc.getParagraphArray(pos).getParagraphText()); 
       } 


      } 
     } 

     destDoc.write(out); 
     destDocc.write(outc); 
    } 

    // Copy Styles of Table and Paragraph. 
    private static void copyStyle(XWPFDocument srcDoc, XWPFDocument destDoc, 
      XWPFStyle style) { 
     if (destDoc == null || style == null) 
      return; 

     if (destDoc.getStyles() == null) { 
      destDoc.createStyles(); 
     } 

     List<XWPFStyle> usedStyleList = srcDoc.getStyles().getUsedStyleList(
       style); 
     for (XWPFStyle xwpfStyle : usedStyleList) { 
      destDoc.getStyles().addStyle(xwpfStyle); 
     } 
    } 

     private static void copyLayout(XWPFDocument srcDoc, XWPFDocument destDoc) 
     { 
      CTPageMar pgMar = srcDoc.getDocument().getBody().getSectPr().getPgMar(); 

      BigInteger bottom = pgMar.getBottom(); 
      BigInteger footer = pgMar.getFooter(); 
      BigInteger gutter = pgMar.getGutter(); 
      BigInteger header = pgMar.getHeader(); 
      BigInteger left = pgMar.getLeft(); 
      BigInteger right = pgMar.getRight(); 
      BigInteger top = pgMar.getTop(); 

      CTPageMar addNewPgMar = destDoc.getDocument().getBody().addNewSectPr().addNewPgMar(); 

      addNewPgMar.setBottom(bottom); 
      addNewPgMar.setFooter(footer); 
      addNewPgMar.setGutter(gutter); 
      addNewPgMar.setHeader(header); 
      addNewPgMar.setLeft(left); 
      addNewPgMar.setRight(right); 
      addNewPgMar.setTop(top); 

      CTPageSz pgSzSrc = srcDoc.getDocument().getBody().getSectPr().getPgSz(); 

      BigInteger code = pgSzSrc.getCode(); 
      BigInteger h = pgSzSrc.getH(); 
      Enum orient = pgSzSrc.getOrient(); 
      BigInteger w = pgSzSrc.getW(); 

      CTPageSz addNewPgSz = destDoc.getDocument().getBody().addNewSectPr().addNewPgSz(); 

      addNewPgSz.setCode(code); 
      addNewPgSz.setH(h); 
      addNewPgSz.setOrient(orient); 
      addNewPgSz.setW(w); 
     } 
+0

也许你可以分享一些你已经试过的代码,然后也许有人可以帮助指出问题的出在哪里。 – gottlieb76

+0

如果你不发布任何代码或错误,它将不可能帮助你。 – QoP

回答

0

我希望这能解决问题。

public class SplitUsingDocx4j { 

/** 
* @param args 
* @throws Docx4JException 
* @throws FileNotFoundException 
*/ 
public static void main(String[] args) throws Docx4JException, 
     FileNotFoundException { 
    File dir = new File(PropertyUtils.getProperty("INPUT_DIR")); 
    String[] files = dir.list(); 
    File file = null; 
    if (files.length == 0) { 
     System.out.println("The directory is empty"); 
    } else { 
     for (String aFile : files) { 
      System.out.println(aFile); 

      file = new File(PropertyUtils.getProperty("INPUT_DIR") + aFile 
        + "/" + aFile + ".docx"); 
     } 
    } 

    // Creating new documents 
    WordprocessingMLPackage doc1 = WordprocessingMLPackage.createPackage(); 
    WordprocessingMLPackage doc2 = WordprocessingMLPackage.createPackage(); 

    // loading existing document 
    WordprocessingMLPackage wordMLPackage = WordprocessingMLPackage 
      .load(new java.io.File(file.getPath())); 
    MainDocumentPart tempDocPart = wordMLPackage.getMainDocumentPart(); 
    List<Object> obj = wordMLPackage.getMainDocumentPart().getContent(); 

    // for copying styles from existing doc to new docs 
    StyleDefinitionsPart sdp = tempDocPart.getStyleDefinitionsPart(); 
    Styles tempStyle = sdp.getJaxbElement(); 
    doc1.getMainDocumentPart().getStyleDefinitionsPart() 
      .setJaxbElement(tempStyle); 
    doc2.getMainDocumentPart().getStyleDefinitionsPart() 
      .setJaxbElement(tempStyle); 

    boolean flag = false; 
    for (Object object : obj) { 
     if (!flag) { 
      if (object.toString().equalsIgnoreCase("CONSTRUCTION DETAILS:")) { 
       flag = true; 
      } 
      doc1.getMainDocumentPart().addObject(object); 
     } else { 
      doc2.getMainDocumentPart().addObject(object); 
     } 

    } 
    String fileName = file.getName().toString().replace(".docx", ""); 
    doc1.save(new File(fileName + "-1.docx")); 
    doc2.save(new File(fileName + "-2.docx")); 
}} 
+0

谢谢Amrutha ...它的工作原理 –

1

拆分DOCX是很容易的蛮力样的方式做:你可以删除的内容(段等),你不希望,然后保存结果。

这样,原始关系将保持不变,但您的docx容器可能比必要的更大,因为它可能具有不再使用的图像等。

使用这种方式时,仍然有你需要寻找的东西:

  • 书签的开始和结束标记之间的分裂(同一征求意见)
  • 自动编号可能会给出错误的开始数,除非你设置开始于

显然你可以编写代码来解决这些问题。

或者,使用我们的商业企业版的docx4j,你可以使用它的“合并”代码来说你想说段落X到Y,它会给你一个docx只包含这个(即没有无关的图像docx容器,分割书签照顾等)。

+0

我的文档中只有段落和表格,除了docx4j的商业企业版本之外,还有其他方法,就像我使用com.plutext.merge.DocumentBuilder尝试的那样,它是coporate的赞助人。 –

+0

是的,如上所述:克隆docx,然后从内容列表中删除所有想要的位,然后保存。一个简单的自己动手的方法,你可以建立。 – JasonPlutext