如下图的word目录大纲该怎么获取呢?
试过用jacob方式确实可以直接读取到大纲的编号,但是jacob不支持在linux上的调用(不确定反正网络上都说不支持,自己目前没有找到可以解决linux如何调用的问题),没办法只好使用poi硬编码实现了一把,目前只支持读取4个层级以内的大纲(大纲的编号,就是1.、1.1.、…这些编号必须是word自动生成的不是手动写上去的,这是规范),基本原理是读取大纲的标题级别根据大纲标题级别来构造层级关系。
话不多说上才艺:
maven依赖
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.2</version>
</dependency>
ReadWordTest.java
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import java.io.File;
import java.io.FileInputStream;
import java.util.*;
public class ReadWordTest {
/**
* Word中的大纲级别,可以通过getPPr().getOutlineLvl()直接提取,但需要注意,Word中段落级别,通过如下三种方式定义:
* 1、直接对段落进行定义;
* 2、对段落的样式进行定义;
* 3、对段落样式的基础样式进行定义。
* 因此,在通过“getPPr().getOutlineLvl()”提取时,需要依次在如上三处读取。
*
* @param doc
* @param para
* @return
*/
private static String getTitleLvl(XWPFDocument doc, XWPFParagraph para) {
String titleLvl = "";
try {
//判断该段落是否设置了大纲级别
if (para.getCTP().getPPr().getOutlineLvl() != null) {
return String.valueOf(para.getCTP().getPPr().getOutlineLvl().getVal());
}
} catch (Exception e) {
}
try {
//判断该段落的样式是否设置了大纲级别
if (doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl() != null) {
return String.valueOf(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getPPr().getOutlineLvl().getVal());
}
} catch (Exception e) {
}
try {
//判断该段落的样式的基础样式是否设置了大纲级别
if (doc.getStyles().getStyle(doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal())
.getCTStyle().getPPr().getOutlineLvl() != null) {
String styleName = doc.getStyles().getStyle(para.getStyle()).getCTStyle().getBasedOn().getVal();
return String.valueOf(doc.getStyles().getStyle(styleName).getCTStyle().getPPr().getOutlineLvl().getVal());
}
} catch (Exception e) {
}
try {
if (para.getStyleID() != null) {
return para.getStyleID();
}
} catch (Exception e) {
}
return titleLvl;
}
public static void main(String[] args) throws Exception {
File file = new File("C:\\Users\\lenovo\\Desktop\\test5.docx");
FileInputStream fis = new FileInputStream(file);
XWPFDocument xdoc = new XWPFDocument(fis);
List<XWPFParagraph> paragraphs = xdoc.getParagraphs();
List<ReadDto> readDtos = new ArrayList<>();
for (XWPFParagraph paragraph : paragraphs) {
String text = paragraph.getText();
String titleLvl = getTitleLvl(xdoc, paragraph);
if (StringUtils.isNotEmpty(titleLvl)) {
int level = Integer.valueOf(titleLvl);
// System.out.println("text: " + text + ", titleLvl: " + titleLvl);
ReadDto readDto = new ReadDto();
readDto.setText(text);
readDto.setTitleLevel(level);
readDtos.add(readDto);
}
}
int zeroCount = 0;//0出现的次数
int oneCount = 0;//1出现的次数
int twoCount = 0;//2出现的次数
int threeCount = 0;//3出现的次数
int curPoint = 0;//当前指针值
for (int i = 0; i < readDtos.size(); i++) {
int curLevel = readDtos.get(i).getTitleLevel();
if (curLevel > 4) {
throw new RuntimeException("暂不支持目录层级超过4层!!!");
}
if (curPoint == 0) {
zeroCount++;
curPoint = 1;
readDtos.get(i).setPrefix(zeroCount + ".");
} else if (curPoint == 1) {
if (curLevel == 0) {
zeroCount++;
oneCount = 0;
twoCount = 0;
threeCount = 0;
curPoint = 1;
readDtos.get(i).setPrefix(zeroCount + ".");
}
if (curLevel == 1) {
curPoint++;
oneCount++;
readDtos.get(i).setPrefix(zeroCount + "." + "1.");
}
} else if (curPoint == 2) {
if (curLevel == 0) {
zeroCount++;
oneCount = 0;
twoCount = 0;
threeCount = 0;
curPoint = 1;
readDtos.get(i).setPrefix(zeroCount + ".");
} else if (curLevel == 1) {
oneCount++;
twoCount = 0;
curPoint = 2;
readDtos.get(i).setPrefix(zeroCount + "." + oneCount + ".");
} else if (curLevel == 2) {
curPoint = 3;
twoCount++;
threeCount = 0;
readDtos.get(i).setPrefix(zeroCount + "." + oneCount + "." + twoCount + ".");
}
} else if (curPoint == 3) {
if (curLevel == 0) {
zeroCount++;
oneCount = 0;
twoCount = 0;
threeCount = 0;
curPoint = 1;
readDtos.get(i).setPrefix(zeroCount + ".");
} else if (curLevel == 1) {
oneCount++;
curPoint = 2;
twoCount = 0;
readDtos.get(i).setPrefix(zeroCount + "." + oneCount + ".");
} else if (curLevel == 2) {
curPoint = 3;
twoCount++;
threeCount = 0;
readDtos.get(i).setPrefix(zeroCount + "." + oneCount + "." + twoCount + ".");
} else if (curLevel == 3) {
threeCount++;
if (i < readDtos.size() - 1) {
int nextLevel = readDtos.get(i + 1).getTitleLevel();
if (nextLevel > 3) {
throw new RuntimeException("暂不支持目录层级超过4层!!!");
}
if (nextLevel == 3) {
curPoint = 3;
} else if (nextLevel < 3) {
curPoint = nextLevel + 1;
}
}
readDtos.get(i).setPrefix(zeroCount + "." + oneCount + "." + twoCount + "." + threeCount + ".");
}
}
}
for (ReadDto dto : readDtos) {
System.out.println("text:" + dto.getPrefix() + dto.getText() + ",level:" + dto.getTitleLevel());
}
}
}
ReadDto.java
public class ReadDto {
private String prefix;
private String text;
private int titleLevel;
public String getPrefix() {
return prefix;
}
public void setPrefix(String prefix) {
this.prefix = prefix;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
public int getTitleLevel() {
return titleLevel;
}
public void setTitleLevel(int titleLevel) {
this.titleLevel = titleLevel;
}
}
输出如下:
text:1.第一章,level:0
text:1.1.模块一,level:1
text:1.1.1.顶顶顶顶,level:2
text:1.2.模块二,level:1
text:1.2.1.功能一,level:2
text:1.2.2.功能二,level:2
text:1.3.模块三,level:1
text:1.3.1.功能一,level:2
text:2.第二章,level:0
text:2.1.模块一一,level:1
text:2.2.模块二二,level:1
目前的实现写的比较丑陋,后续有空看能不能实现一个优雅点且不限制层级的吧>_<
原网址: 访问
创建于: 2023-05-08 16:40:49
目录: default
标签: 无
未标明原创文章均为采集,版权归作者所有,转载无需和我联系,请注明原出处,南摩阿彌陀佛,知识,不只知道,要得到
最新评论