private final static String tempPath = "C:\\Users\\xxx\\Desktop\\Word2Html\\src\\test\\";//图片及相关文件保存的路径
public static void main(String argv[]) {
try {
JFileChooser fileChooser = new JFileChooser();
fileChooser.setDialogTitle("Select a Word Document");
fileChooser.addChoosableFileFilter(new javax.swing.filechooser.FileNameExtensionFilter("Word Documents", "doc", "docx"));
int returnValue = fileChooser.showOpenDialog(null);
if (returnValue == JFileChooser.APPROVE_OPTION) {
File inputFile = fileChooser.getSelectedFile();
String fileName = inputFile.getAbsolutePath();
String defaultOutputDir = System.getProperty("user.home") + "\\Desktop\\";
String outputFileName = defaultOutputDir + inputFile.getName().replaceFirst("[.][^.]+$", "") + ".html";
if (fileName.endsWith(".doc")) {
doc2Html(fileName, outputFileName);
} else if (fileName.endsWith(".docx")) {
docx2Html(fileName, outputFileName);
} catch (Exception e) {
* doc转换为html
* @param fileName
* @param outPutFile
* @throws TransformerException
* @throws IOException
* @throws ParserConfigurationException
public static void doc2Html(String fileName, String outPutFile) throws TransformerException, IOException, ParserConfigurationException {
long startTime = System.currentTimeMillis();
HWPFDocument wordDocument = new HWPFDocument(new FileInputStream(fileName));
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
// 图片保存路径设置
wordToHtmlConverter.setPicturesManager(new PicturesManager() {
public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {
String picturePath = "images" + File.separator + suggestedName;
// 检查并创建图片文件夹
File imageFolder = new File(tempPath + "images");
if (!imageFolder.exists()) {
boolean created = imageFolder.mkdirs(); // 创建文件夹
if (created) {
System.out.println("Images folder created at: " + imageFolder.getAbsolutePath());
} else {
System.out.println("Failed to create images folder.");
// 写入图片数据,确保每次写入
try {
File pictureFile = new File(tempPath + picturePath);
try (FileOutputStream fos = new FileOutputStream(pictureFile)) {
fos.write(content); // 写入图片数据
System.out.println("Image saved to: " + pictureFile.getAbsolutePath());
} catch (IOException e) {
return picturePath; // 返回相对路径
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream out = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(out);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
String htmlContent = new String(out.toByteArray());
htmlContent = htmlContent.replaceAll("TOC \\\\o \"1-3\" \\\\h \\\\z \\\\u", "");
writeFile(htmlContent, outPutFile);
System.out.println("Generate " + outPutFile + " with " + (System.currentTimeMillis() - startTime) + " ms.");
* 写文件
* @param content
* @param path
public static void writeFile(String content, String path) {
FileOutputStream fos = null;
BufferedWriter bw = null;
try {
File file = new File(path);
fos = new FileOutputStream(file);
bw = new BufferedWriter(new OutputStreamWriter(fos, "utf-8"));
} catch (FileNotFoundException fnfe) {
} catch (IOException ioe) {
} finally {
try {
if (bw != null) bw.close();
if (fos != null) fos.close();
} catch (IOException e) {
* docx格式word转换为html
* @param fileName
* @param outPutFile
* @throws TransformerException
* @throws IOException
* @throws ParserConfigurationException
public static void docx2Html(String fileName, String outPutFile) throws TransformerException, IOException, ParserConfigurationException {
long startTime = System.currentTimeMillis();
XWPFDocument document = new XWPFDocument(new FileInputStream(fileName));
// 提取目录
StringBuilder toc = new StringBuilder();
toc.append("<div id='toc'>\n<h2>Table of Contents</h2>\n<ul>\n");
// 遍历文档中的段落,查找标题并构建目录
List<XWPFParagraph> paragraphs = document.getParagraphs();
for (XWPFParagraph paragraph : paragraphs) {
String style = paragraph.getStyle(); // 获取段落样式
if (style != null && (style.equals("Heading 1") || style.equals("Heading 2") || style.equals("Heading 3"))) {
String text = paragraph.getText();
// 根据标题级别构建目录项
toc.append("<li><a href='#" + text.hashCode() + "'>" + text + "</a></li>\n");
// 设置XHTMLOptions
XHTMLOptions options = XHTMLOptions.create().indent(4);
File imageFolder = new File(tempPath);
options.setExtractor(new FileImageExtractor(imageFolder));
options.URIResolver(new FileURIResolver(imageFolder));
File outFile = new File(outPutFile);
OutputStream out = new FileOutputStream(outFile);
// Convert docx to XHTML
XHTMLConverter.getInstance().convert(document, out, options);
System.out.println("Generate " + outPutFile + " with " + (System.currentTimeMillis() - startTime) + " ms.");
// 获取转换后的HTML内容
String htmlContent = new String(((ByteArrayOutputStream) out).toByteArray(), "UTF-8");
// 将TOC插入到HTML的开头
htmlContent = toc + htmlContent;
// 手动添加表格样式(边框)
htmlContent = htmlContent.replaceAll("<table>", "<table style='border: 1px solid black; border-collapse: collapse;'>");
htmlContent = htmlContent.replaceAll("<td>", "<td style='border: 1px solid black; padding: 5px;'>");
htmlContent = htmlContent.replaceAll("<th>", "<th style='border: 1px solid black; padding: 5px;'>");
// 写入到输出文件
writeFile(htmlContent, outPutFile);
<project xmlns="" xmlns:xsi=""