Java将富文本内容转为WORD，富文本内容中间附带图片（有核心代码）

天天被催要吧富文本的内容导出来，以前的做法就是标签一去，文本导出，现在不一样了正文，标题，图片都要不同的显示出来，最复杂的还是图片的处理，废话不多说开搞

主要使用的依赖

	implementation 'org.jsoup:jsoup:1.+'
    implementation 'org.apache.poi:poi-ooxml:5.2.+'

代码干货

1. 导出方法

    public ResponseEntity<byte[]> exportNews(Demo demo) {
			XWPFDocument document = new XWPFDocument();
            // 设置页边距，数字单位厘米，自动换算
            HTWConverter.pageMar(document,
                    HTWConverter.pageMarConvert(2.8),
                    HTWConverter.pageMarConvert(2.6),
                    HTWConverter.pageMarConvert(3.7),
                    HTWConverter.pageMarConvert(3.5));
            XWPFParagraph timestampParagraph = document.createParagraph();
            timestampParagraph.setAlignment(CENTER);
            XWPFRun timestampRun = timestampParagraph.createRun();
            timestampRun.setFontFamily("小标宋");
            timestampRun.setFontSize(22);
            timestampRun.setText(StringUtils.hasLength(demo.getTitle()) ? demo.getTitle() : "");

            HTWConverter.processHtmlContent(document, demo.getContent());
            String title = demo.getTitle();
            if (StringUtils.hasLength(title)) {
                int length = title.length();
                title = length >= 11 ? title.substring(0, 10) : title;
            } else {
                title = "未命名新闻";
            }
            return Mono.just(ResponseEntity.ok().contentType(MediaType.APPLICATION_OCTET_STREAM)
                    .header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=\"" +
                            URLEncoder.encode(title + ".docx", StandardCharsets.UTF_8) + "\"")
                    .body(HTWConverter.readDoc(document)));
	}

2. 核心工具

/**
 * HTW html to word
 * html富文本内容转word
 *
 * @author <a href="https://github.com/motcs">motcs</a>
 * @since 2024-03-26 星期二
 */
@Log4j2
@Service
public class HTWConverter {

    /**
     * 读取文档
     *
     * @param document 写好的文档
     * @return 返回文档的字节码
     */
    public static byte[] readDoc(XWPFDocument document) {
        try {
            DefaultDataBuffer dataBuffer = new DefaultDataBufferFactory()
                    .allocateBuffer(1024);
            document.write(dataBuffer.asOutputStream());
            return dataBuffer.asInputStream().readAllBytes();
        } catch (IOException e) {
            log.info("文档转换流失败，读取字节发生错误：{}", e.getMessage());
            throw new RuntimeException("文档转换流失败，读取字节发生错误：" + e.getMessage());
        }
    }


    /**
     * 设置文档的页边距，单位为厘米是需要使用pageMarConvert 方法转换
     * pageMarConvert(3.8)  转换的结果，等于实际文档的3.8厘米
     *
     * @param document 文档
     * @param left     左边距
     * @param right    右边距
     * @param top      上边距
     * @param bottom   下边距
     */
    public static void pageMar(XWPFDocument document, long left, long right, long top, long bottom) {
        CTPageMar pageMar = document.getDocument().getBody().addNewSectPr().addNewPgMar();
        pageMar.setLeft(BigInteger.valueOf(left));
        pageMar.setRight(BigInteger.valueOf(right));
        pageMar.setTop(BigInteger.valueOf(top));
        pageMar.setBottom(BigInteger.valueOf(bottom));
    }

    /**
     * 页边距单位换算，换算后单位厘米
     *
     * @param value 边距
     * @return 返回换算后的数值
     */
    public static long pageMarConvert(double value) {
        return (long) ((value / 2.54) * 1440);
    }

    /**
     * 解析数据
     *
     * @param document    插入的文档
     * @param htmlContent 需要解析的内容
     */
    public static void processHtmlContent(XWPFDocument document, String htmlContent) {
        Document doc = Jsoup.parse(htmlContent);
        Elements elements = doc.select("p, img, h1");
        for (Element element : elements) {
            if (element.tagName().equals("p")) {
                addParagraphToDocument(document, element.text());
            } else if (element.tagName().equals("img")) {
                addImageToDocument(document, ImageUrl.builder().url(element.attr("src")).build());
            } else if (element.tagName().equals("h1")) {
                addParagraphH1ToDocument(document, element.text());
            }
        }
    }

    /**
     * 插入正文，字体 仿宋GB2312 字号： 三号
     *
     * @param document 文档
     * @param text     正文内容
     */
    public static void addParagraphToDocument(XWPFDocument document, String text) {
        XWPFParagraph paragraph = document.createParagraph();
        XWPFRun run = paragraph.createRun();
        run.setText(text);
        run.setFontSize(16);
        run.setFontFamily("仿宋GB2312");
        Double fontSizeAsDouble = run.getFontSizeAsDouble();
        paragraph.setIndentationFirstLine(ObjectUtils.isEmpty(fontSizeAsDouble) ?
                420 : fontSizeAsDouble.intValue() * 2 * 20);
    }

    /**
     * 设置标题
     *
     * @param document 文档
     * @param text     标题
     */
    public static void addParagraphH1ToDocument(XWPFDocument document, String text) {
        XWPFParagraph paragraph = document.createParagraph();
        XWPFRun run = paragraph.createRun();
        run.setText(text);
        run.setFontSize(16);
        run.setFontFamily("黑体");
    }

    /**
     * 文档增加图片
     *
     * @param document 文档
     * @param image1   图片信息
     */
    public static void addImageToDocument(XWPFDocument document, ImageUrl image1) {
        String imageUrl = image1.getUrl();
        try {
            BufferedImage image = ImageIO.read(new URL(imageUrl));
            String fileExtension = imageUrl.substring(imageUrl.lastIndexOf('.') + 1);
            log.info("图片后缀: {}", fileExtension);
            ByteArrayOutputStream os = new ByteArrayOutputStream();
            ImageIO.write(image, fileExtension, os);

            try (InputStream is = new ByteArrayInputStream(os.toByteArray())) {
                XWPFParagraph paragraph = document.createParagraph();
                XWPFRun run = paragraph.createRun();
                run.addPicture(is, PICTURE_TYPE_JPEG, "image." + fileExtension,
                        (int) Math.rint(image1.getWidth() * Units.EMU_PER_CENTIMETER),
                        (int) Math.rint(image1.getHeight() * Units.EMU_PER_CENTIMETER));
            } catch (InvalidFormatException e) {
                log.info("图片路径：{}，解析失败：{}", imageUrl, e.getMessage());
                addParagraphToDocument(document,"");
                addParagraphToDocument(document, "解析图片失败!"+imageUrl);
                addParagraphToDocument(document,"");
            }
        } catch (Exception e) {
            log.info("下载图片失败:{}", e.getMessage());
            addParagraphToDocument(document,"");
            addParagraphToDocument(document, "下载图片失败!"+imageUrl);
            addParagraphToDocument(document,"");
        }
    }

}

3. 使用到的工具类

/**
 * @author <a href="https://github.com/motcs">motcs</a>
 * @since 2024-03-26 星期二
 */
@Data
@Builder
public class ImageUrl implements Serializable {

    /**
     * 图片地址
     */
    private String url;

    /**
     * 图片宽度默认16.2
     */
    private double width;

    /**
     * 图片高度默认10.01
     */
    private double height;

    public double getWidth() {
        return width > 0 ? width : 16.2;
    }

    public double getHeight() {
        return height > 0 ? height : 10.01;
    }
    
}

好啦，到这里基本差不多了，样式那些就自己调整了。还可以有更多的扩展，这里仅仅只是做一个简单的示例。

Java将富文本内容转为WORD，富文本内容中间附带图片（有核心代码）

主要使用的依赖

代码干货

1. 导出方法

2. 核心工具

3. 使用到的工具类

悦读