java实现word转html(支持docx及doc文件)

news/2025/1/19 3:56:10/
htmledit_views">
html" title=java>java">private final static String tempPath = "C:\\Users\\xxx\\Desktop\\Word2Html\\src\\test\\";//图片及相关文件保存的路径public static void main(String argv[]) {try {JFileChooser fileChooser = new JFileChooser();fileChooser.setDialogTitle("Select a Word Document");fileChooser.setAcceptAllFileFilterUsed(false);fileChooser.addChoosableFileFilter(new html" title=java>javax.swing.filechooser.FileNameExtensionFilter("Word Documents", "doc", "docx"));int returnValue = fileChooser.showOpenDialog(null);if (returnValue == JFileChooser.APPROVE_OPTION) {File inputFile = fileChooser.getSelectedFile();String fileName = inputFile.getAbsolutePath();String defaultOutputDir = System.getProperty("user.home") + "\\Desktop\\";String outputFileName = defaultOutputDir + inputFile.getName().replaceFirst("[.][^.]+$", "") + ".html";if (fileName.endsWith(".doc")) {doc2Html(fileName, outputFileName);} else if (fileName.endsWith(".docx")) {docx2Html(fileName, outputFileName);}}} catch (Exception e) {e.printStackTrace();}}/*** doc转换为html** @param fileName* @param outPutFile* @throws TransformerException* @throws IOException* @throws ParserConfigurationException*/public static void doc2Html(String fileName, String outPutFile) throws TransformerException, IOException, ParserConfigurationException {long startTime = System.currentTimeMillis();HWPFDocument html" title=word>wordDocument = new HWPFDocument(new FileInputStream(fileName));WordToHtmlConverter html" title=word>wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());// 图片保存路径设置html" title=word>wordToHtmlConverter.setPicturesManager(new PicturesManager() {public String savePicture(byte[] content, PictureType pictureType, String suggestedName, float widthInches, float heightInches) {String picturePath = "images" + File.separator + suggestedName;// 检查并创建图片文件夹File imageFolder = new File(tempPath + "images");if (!imageFolder.exists()) {boolean created = imageFolder.mkdirs(); // 创建文件夹if (created) {System.out.println("Images folder created at: " + imageFolder.getAbsolutePath());} else {System.out.println("Failed to create images folder.");}}// 写入图片数据,确保每次写入try {File pictureFile = new File(tempPath + picturePath);try (FileOutputStream fos = new FileOutputStream(pictureFile)) {fos.write(content);  // 写入图片数据System.out.println("Image saved to: " + pictureFile.getAbsolutePath());}} catch (IOException e) {e.printStackTrace();}return picturePath; // 返回相对路径}});html" title=word>wordToHtmlConverter.processDocument(html" title=word>wordDocument);Document htmlDocument = html" title=word>wordToHtmlConverter.getDocument();ByteArrayOutputStream out = new ByteArrayOutputStream();DOMSource domSource = new DOMSource(htmlDocument);StreamResult streamResult = new StreamResult(out);TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "html");serializer.transform(domSource, streamResult);out.close();String htmlContent = new String(out.toByteArray());htmlContent = htmlContent.replaceAll("TOC \\\\o \"1-3\" \\\\h \\\\z \\\\u", "");writeFile(htmlContent, outPutFile);System.out.println("Generate " + outPutFile + " with " + (System.currentTimeMillis() - startTime) + " ms.");}/*** 写文件** @param content* @param path*/public static void writeFile(String content, String path) {FileOutputStream fos = null;BufferedWriter bw = null;try {File file = new File(path);fos = new FileOutputStream(file);bw = new BufferedWriter(new OutputStreamWriter(fos, "utf-8"));bw.write(content);} catch (FileNotFoundException fnfe) {fnfe.printStackTrace();} catch (IOException ioe) {ioe.printStackTrace();} finally {try {if (bw != null) bw.close();if (fos != null) fos.close();} catch (IOException e) {}}}/*** docx格式html" title=word>word转换为html** @param fileName* @param outPutFile* @throws TransformerException* @throws IOException* @throws ParserConfigurationException*/public static void docx2Html(String fileName, String outPutFile) throws TransformerException, IOException, ParserConfigurationException {long startTime = System.currentTimeMillis();XWPFDocument document = new XWPFDocument(new FileInputStream(fileName));// 提取目录StringBuilder toc = new StringBuilder();toc.append("<div id='toc'>\n<h2>Table of Contents</h2>\n<ul>\n");// 遍历文档中的段落,查找标题并构建目录List<XWPFParagraph> paragraphs = document.getParagraphs();for (XWPFParagraph paragraph : paragraphs) {String style = paragraph.getStyle();  // 获取段落样式if (style != null && (style.equals("Heading 1") || style.equals("Heading 2") || style.equals("Heading 3"))) {String text = paragraph.getText();// 根据标题级别构建目录项toc.append("<li><a href='#" + text.hashCode() + "'>" + text + "</a></li>\n");}}toc.append("</ul>\n</div>\n");// 设置XHTMLOptionsXHTMLOptions options = XHTMLOptions.create().indent(4);File imageFolder = new File(tempPath);options.setExtractor(new FileImageExtractor(imageFolder));options.URIResolver(new FileURIResolver(imageFolder));File outFile = new File(outPutFile);outFile.getParentFile().mkdirs();OutputStream out = new FileOutputStream(outFile);// Convert docx to XHTMLXHTMLConverter.getInstance().convert(document, out, options);System.out.println("Generate " + outPutFile + " with " + (System.currentTimeMillis() - startTime) + " ms.");// 获取转换后的HTML内容String htmlContent = new String(((ByteArrayOutputStream) out).toByteArray(), "UTF-8");// 将TOC插入到HTML的开头htmlContent = toc + htmlContent;// 手动添加表格样式(边框)htmlContent = htmlContent.replaceAll("<table>", "<table style='border: 1px solid black; border-collapse: collapse;'>");htmlContent = htmlContent.replaceAll("<td>", "<td style='border: 1px solid black; padding: 5px;'>");htmlContent = htmlContent.replaceAll("<th>", "<th style='border: 1px solid black; padding: 5px;'>");// 写入到输出文件writeFile(htmlContent, outPutFile);}

pom文件

html" title=java>java"><project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"><modelVersion>4.0.0</modelVersion><groupId>fxma</groupId><artifactId>Word2Html</artifactId><version>0.0.1-SNAPSHOT</version><packaging>jar</packaging><name>Word2Html</name><url>http://maven.apache.org</url><properties><project.build.sourceEncoding>UTF-8</project.build.sourceEncoding></properties><dependencies><dependency><groupId>junit</groupId><artifactId>junit</artifactId><version>3.8.1</version><scope>test</scope></dependency><dependency><groupId>commons-io</groupId><artifactId>commons-io</artifactId><version>2.4</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>3.8</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>3.8</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>3.8</version></dependency><dependency><groupId>fr.opensagres.xdocreport</groupId><artifactId>xdocreport</artifactId><version>1.0.4</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>ooxml-schemas</artifactId><version>1.1</version></dependency></dependencies>
</project>

 

 


http://www.ppmy.cn/news/1564299.html

相关文章

git操作(Windows中GitHub)

使用git控制GitHub中的仓库版本&#xff0c;并在Windows桌面中创建与修改代码&#xff0c;与GitHub仓库进行同步。 创建自己的GitHub仓库 创建一个gen_code实验性仓库用来学习和验证git在Windows下的使用方法&#xff1a; gen_code仓库 注意&#xff0c;创建仓库时不要设置…

Object常用的方法及开发中的使用场景

在前端开发中&#xff0c;Object 对象提供了许多常用的方法&#xff0c;这些方法帮助我们操作对象的属性和结构。以下是常用的 Object 方法及其功能简要说明&#xff1a; 对象常用的方法 1. 创建对象 Object.create(proto[, propertiesObject]) 创建一个具有指定原型对象和属性…

linux 下 Doris 单点部署

目录 1. Doris 下载 2. 环境准备 2.1 Linux 操作系统版本需求 2.2 部署依赖 3. Doris 部署 3.1 修改系统配置 3.1.1 修改系统句柄数 3.1.2 关闭swap分区 3.1.3 修改最大内存映射区域数量 3.2 开放端口 3.3 fe 部署 3.4 be 部署 3.5 be添加到Doris集群 4 验证 4.…

【机器学习:二十、拆分原始训练集】

1. 如何改进模型 模型的改进需求 在机器学习任务中&#xff0c;模型性能的提升通常受限于训练数据、模型架构、优化方法及超参数设置等。模型改进的目标是在测试数据上表现更优&#xff0c;避免过拟合或欠拟合。 常见的改进方向 增大训练数据集&#xff1a;通过数据增强或获…

阿里云轻量应用服务器全新升级,通用型实例峰值带宽高达200Mbps

火伞云1月13日消息&#xff0c;阿里云推出首款全新升级的轻量应用服务器“通用型实例”。新的服务器实例面向中小企业和开发者创新设计&#xff0c;升级支持4vCPU实例规格满足更多通用计算小算力场景需求&#xff0c;标配200Mbps峰值带宽可有效应对突发业务流量&#xff0c;同时…

Vue.js组件开发-如何实现表头搜索

在Vue.js组件开发中&#xff0c;实现表头搜索通常涉及在表格组件的表头添加输入框&#xff0c;并让用户能够输入搜索关键字来过滤表格数据。 以下是一个使用Element UI的el-table组件实现表头搜索的示例&#xff1a; 一、准备阶段 ‌确保Element UI已安装‌&#xff1a; 确保…

第8篇:从入门到精通:掌握Python异常处理

第8篇&#xff1a;异常处理 内容简介 本篇文章将深入探讨Python中的异常处理机制。您将学习异常的基本概念与类型&#xff0c;掌握使用try-except块处理异常的方法&#xff0c;了解finally语句的作用&#xff0c;以及如何抛出和定义自定义异常。通过丰富的代码示例&#xff0…

OpenCV相机标定与3D重建(60)用于立体校正的函数stereoRectify()的使用

操作系统&#xff1a;ubuntu22.04 OpenCV版本&#xff1a;OpenCV4.9 IDE:Visual Studio Code 编程语言&#xff1a;C11 算法描述 为已校准的立体相机的每个头计算校正变换。 cv::stereoRectify 是 OpenCV 中用于立体校正的函数&#xff0c;它基于已知的相机参数和相对位置&am…