java 基于 openOffice 实现在线预览功能

1 原理

将 office 文档转换为 pdf ，返回文件流给前端实现预览

2 controller 代码

    @ApiOperation(value = "系统文件在线预览接口")@GetMapping("/onlinePreview")public void onlinePreview(@RequestParam("url") String url, HttpServletResponse response) throws Exception {attachmentsService.onlinePreview(url, response);}

3 service 代码

public class AttachmentsService {public void onlinePreview(String url, HttpServletResponse response) throws Exception             {//获取文件类型String[] str = SmartStringUtil.split(url, "\\.");if (str.length == 0) {throw new Exception("文件格式不正确");}String suffix = str[str.length - 1];if (!suffix.equals("txt") && !suffix.equals("doc") && !suffix.equals("docx") && !suffix.equals("xls")&& !suffix.equals("xlsx") && !suffix.equals("ppt") && !suffix.equals("pptx")) {throw new Exception("文件格式不支持预览");}InputStream in = FileConvertUtil.convertNetFile(url, suffix);OutputStream outputStream = response.getOutputStream();//创建存放文件内容的数组byte[] buff = new byte[1024];//所读取的内容使用n来接收int n;//当没有读取完时,继续读取,循环while ((n = in.read(buff)) != -1) {//将字节数组的数据全部写入到输出流中outputStream.write(buff, 0, n);}//强制将缓存区的数据进行输出outputStream.flush();//关流outputStream.close();in.close();}}

4 文件格式转换工具类 FileConvertUtil

package com.eccom.business.utils;import com.artofsolving.jodconverter.DefaultDocumentFormatRegistry;
import com.artofsolving.jodconverter.DocumentConverter;
import com.artofsolving.jodconverter.DocumentFormat;
import com.artofsolving.jodconverter.openoffice.connection.OpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.connection.SocketOpenOfficeConnection;
import com.artofsolving.jodconverter.openoffice.converter.StreamOpenOfficeDocumentConverter;
import com.eccom.common.exception.CustomException;
import com.eccom.common.utils.StringUtils;
import org.apache.commons.io.FilenameUtils;import java.io.*;
import java.net.*;
import java.util.Base64;
import java.util.Date;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;/*** 文件格式转换工具类*/
public class FileConvertUtil {//    @Value("${baseplatform.file.preview-max-size}")private static int previewMaxSize = 100;/*** 默认转换后文件后缀*/private static final String DEFAULT_SUFFIX = "pdf";/*** openoffice_port*/private static final Integer OPENOFFICE_PORT = 8100;/*** 【office文档转换为PDF(处理本地文件) 】** @param sourcePath: 源文件路径* @param suffix:     源文件后缀* @return java.io.InputStream 转换后文件输入流*/public static InputStream convertLocaleFile(String sourcePath, String suffix) throws Exception {File inputFile = new File(sourcePath);InputStream inputStream = new FileInputStream(inputFile);return covertCommonByStream(inputStream, suffix);}/*** 【office文档转换为PDF(处理网络文件)】** @param netFileUrl: 网络文件路径* @param suffix:     文件后缀* @return java.io.InputStream 转换后文件输入流*/public static InputStream convertNetFile(String netFileUrl, String suffix) throws Exception {// 创建URLnetFileUrl = getEncodeUrl(netFileUrl).replaceAll("\\+", "%20");URL url = new URL(netFileUrl);// 试图连接并取得返回状态码URLConnection urlconn = url.openConnection();urlconn.connect();HttpURLConnection httpconn = (HttpURLConnection) urlconn;int httpResult = httpconn.getResponseCode();if (httpResult == HttpURLConnection.HTTP_OK) {InputStream inputStream = urlconn.getInputStream();//根据响应获取文件大小(M)int size = urlconn.getContentLength() / 1024 / 1024;if (size > previewMaxSize) {throw new CustomException("文件太大，请下载查看");}return covertCommonByStream(inputStream, suffix);}return null;}/*** 【将文件以流的形式转换】** @param inputStream: 源文件输入流* @param suffix:      源文件后缀* @return java.io.InputStream 转换后文件输入流*/public static InputStream covertCommonByStream(InputStream inputStream, String suffix) throws Exception {ByteArrayOutputStream out = new ByteArrayOutputStream();OpenOfficeConnection connection = new SocketOpenOfficeConnection(OPENOFFICE_PORT);connection.connect();DocumentConverter converter = new StreamOpenOfficeDocumentConverter(connection);DefaultDocumentFormatRegistry formatReg = new DefaultDocumentFormatRegistry();DocumentFormat targetFormat = formatReg.getFormatByFileExtension(DEFAULT_SUFFIX);DocumentFormat sourceFormat = formatReg.getFormatByFileExtension(suffix);converter.convert(inputStream, sourceFormat, out, targetFormat);connection.disconnect();return outputStreamConvertInputStream(out);}/*** 【outputStream转inputStream】** @param out:* @return java.io.ByteArrayInputStream*/public static ByteArrayInputStream outputStreamConvertInputStream(final OutputStream out) {ByteArrayOutputStream baos = (ByteArrayOutputStream) out;return new ByteArrayInputStream(baos.toByteArray());}/*** 【文件压缩】网络文件** @param filePath:* @param zipOut:* @return void*/public static void fileToZip(String filePath, ZipOutputStream zipOut) throws IOException {filePath = getEncodeUrl(filePath).replaceAll("\\+", "%20");// 需要压缩的文件File file = new File(filePath);// 获取文件名称,为解决压缩时重复名称问题，对文件名加时间戳处理String fileName = FilenameUtils.getBaseName(URLDecoder.decode(file.getName(), "UTF-8")) + "-"+ String.valueOf(new Date().getTime()) + "."+ FilenameUtils.getExtension(file.getName());InputStream fileInput = getInputStream(filePath);// 缓冲byte[] bufferArea = new byte[1024 * 10];BufferedInputStream bufferStream = new BufferedInputStream(fileInput, 1024 * 10);// 将当前文件作为一个zip实体写入压缩流,fileName代表压缩文件中的文件名称zipOut.putNextEntry(new ZipEntry(fileName));int length = 0;// 最常规IO操作,不必紧张while ((length = bufferStream.read(bufferArea, 0, 1024 * 10)) != -1) {zipOut.write(bufferArea, 0, length);}//关闭流fileInput.close();// 需要注意的是缓冲流必须要关闭流,否则输出无效bufferStream.close();// 压缩流不必关闭,使用完后再关}/*** 【获取网络文件的输入流】** @param filePath: 网络文件路径* @return java.io.InputStream*/public static InputStream getInputStream(String filePath) throws IOException {InputStream inputStream = null;// 创建URLURL url = new URL(filePath);// 试图连接并取得返回状态码URLConnection urlconn = url.openConnection();urlconn.connect();HttpURLConnection httpconn = (HttpURLConnection) urlconn;int httpResult = httpconn.getResponseCode();if (httpResult == HttpURLConnection.HTTP_OK) {inputStream = urlconn.getInputStream();}return inputStream;}/*** 判断汉字的方法,只要编码在\u4e00到\u9fa5之间的都是汉字,中文符号，空格，+** @param c:* @return boolean*/public static boolean isChineseChar(char c) {return String.valueOf(c).matches("[\u4e00-\u9fa5\u3002\uff1b\uff0c\uff1a\u201c\u201d\uff08\uff09\u3001\uff1f\u300a\u300b\\s\\+]");}/*** 得到中文转码后的 url，只转换 url 中的中文字符** @param url:* @return java.lang.String*/public static String getEncodeUrl(String url) throws UnsupportedEncodingException {String resultURL = StringUtils.EMPTY;for (int i = 0; i < url.length(); i++) {char charAt = url.charAt(i);//只对汉字处理if (isChineseChar(charAt)) {String encode = URLEncoder.encode(charAt + "", "UTF-8");resultURL += encode;} else {resultURL += charAt;}}return resultURL;}public static void main(String[] args) {
//        convertNetFile("http://192.168.161.159:8080/profile/upload/2022/03/28/a9b1c5bb-f7a3-478d-afa9-ece99e5b02c6.docx", ".pdf");
//        convert("c:/Users/admin/Desktop/2.pdf", "c:/Users/admin/Desktop/3.pdf");}
}

5 pom 引入依赖

        <!--openoffice--><dependency><groupId>com.artofsolving</groupId><artifactId>jodconverter</artifactId><version>2.2.1</version></dependency><!-- jxls poi --><dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>3.17</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>3.17</version></dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>3.17</version></dependency><!-- https://mvnrepository.com/artifact/net.sf.jxls/jxls-core --><dependency><groupId>net.sf.jxls</groupId><artifactId>jxls-core</artifactId><version>1.0.6</version></dependency>

6 常见问题

6.1 解决jodconverter 2.2.1 版本不支持docx、xlsx、pptx 转换成PDF格式异常

方案一

解决jodconverter 2.2.1 版本不支持docx、xlsx、pptx 转换成PDF格式异常_gblfy的博客-CSDN博客

com.artofsolving.jodconverter 包下建立 BasicDocumentFormatRegistry 实现 DocumentFormat

package com.artofsolving.jodconverter;import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;/*** 【重写 BasicDocumentFormatRegistry 文档格式】** @author weipeng1* @date 2022-04-01*/
public class BasicDocumentFormatRegistry implements DocumentFormatRegistry {private List/* <DocumentFormat> */ documentFormats = new ArrayList();public void addDocumentFormat(DocumentFormat documentFormat) {documentFormats.add(documentFormat);}protected List/* <DocumentFormat> */ getDocumentFormats() {return documentFormats;}/*** @param extension the file extension* @return the DocumentFormat for this extension, or null if the extension* is not mapped*/@Overridepublic DocumentFormat getFormatByFileExtension(String extension) {if (extension == null) {return null;}//将文件名后缀统一转化if (extension.indexOf("doc") >= 0) {extension = "doc";}if (extension.indexOf("ppt") >= 0) {extension = "ppt";}if (extension.indexOf("xls") >= 0) {extension = "xls";}String lowerExtension = extension.toLowerCase();for (Iterator it = documentFormats.iterator(); it.hasNext(); ) {DocumentFormat format = (DocumentFormat) it.next();if (format.getFileExtension().equals(lowerExtension)) {return format;}}return null;}@Overridepublic DocumentFormat getFormatByMimeType(String mimeType) {for (Iterator it = documentFormats.iterator(); it.hasNext(); ) {DocumentFormat format = (DocumentFormat) it.next();if (format.getMimeType().equals(mimeType)) {return format;}}return null;}
}

方案二

Apache OpenOffice-java调用时的问题总结

目前maven公网仓库中，没有jodconverter-2.2.2.jar,只有jodconverter-2.2.1.jar.而支持新版office和html转换格式的支持，需要2.2.2版本，这个需要自己去下载，并维护到maven私服上。步骤如下：

（1）下载

https://sourceforge.net/projects/jodconverter/?source=typ_redirect 去这个地址下载即可。

（2）上传maven私服

通过步骤1下载下来的是一个.zip的包，解压后在jodconverter-2.2.2\lib目录下可以看到，如下图：

我们可以看到jodconverter-2.2.2.jar包，把这个jar包上传maven私服即可。而这个包里面含有对新版office（.docx .xlsx .pptx）的支持，HTML的支持等。对于juh-3.0.1.jar、ridl-3.0.1.jar、unoil-3.0.1.jar这三个包在maven公网仓库中是可以查到的，所以直接在pom.xml中添加即可。

6.2 乱码问题

网上有很多攻略，大致讲的是向JDK的编码和linux系统中的编码添加可支持的字体即可。

6.3 OpenOfficeDocumentConverter和StreamOpenOfficeDocumentConverter的区别

（1）最终生成文件权限问题

当Apache OpenOffice服务和java程序部署在同一台服务上时，可以使用OpenOfficeDocumentConverter，但是同时需要注意，使用该类转换的PDF文件用户权限为Apache OpenOffice的启动用户权限。例如，java服务使用较低的权限用户worker启动，而Apache OpenOffice使用root用户启动，那么生成的PDF文件也为root，那么java程序如果对后续生成的PDF文件进行读写等操作时，由于java服务为worker权限，会造成读取不到文件流的问题。而使用StreamOpenOfficeDocumentConverter，是由OpenOffice生成完PDF后，把PDF的流传给java服务，由java服务生成的PDF文件，所以不会产生用户权限的问题

（2）性能问题

OpenOfficeDocumentConverter直接由OpenOffice生成PDF文件，而StreamOpenOfficeDocumentConverter是由OpenOffice把PDF流回传给java服务，有java服务生成。所以OpenOfficeDocumentConverter在性能上更快。但是OpenOfficeDocumentConverter的问题是，不能实现java服务和OpenOffice的高可用性（因为两个服务部署在同一台物理机上）

（3）实现java服务与OpenOffice服务的高可用部署

java服务与OpenOffice服务在同一台物理机时，可以使用OpenOfficeDocumentConverter，因为他转换的更快。如果两个服务不在同一台物理机上，可以使用StreamOpenOfficeDocumentConverter类。