POI 实现word(doc/docx)与excel(xls/xlsx)浏览器预览
一、环境准备
1.jdk:1.8
2.maven:3.6
3.springboot:2.2.2
二、MAVEN主要依赖
<dependency><groupId>org.apache.poi</groupId><artifactId>poi</artifactId><version>4.1.0</version>
</dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-scratchpad</artifactId><version>4.1.0</version>
</dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml</artifactId><version>4.1.0</version>
</dependency><dependency><groupId>fr.opensagres.xdocreport</groupId><artifactId>xdocreport</artifactId><version>2.0.2</version>
</dependency><dependency><groupId>org.apache.poi</groupId><artifactId>poi-ooxml-schemas</artifactId><version>4.1.0</version>
</dependency><dependency><groupId>org.apache.poi</groupId><artifactId>ooxml-schemas</artifactId><version>1.4</version>
</dependency>
三、具体实现
1.docToHtml(doc格式)
@RequestMapping("/wordToHtml")public void wordToHtml(HttpServletResponse response){final String path = "C:\\usr\\local\\";final String file = "5页.doc";try{InputStream input = new FileInputStream(path + file);docToHtml(input, response);}catch (Exception e){e.printStackTrace();}}
public void docToHtml(InputStream input, HttpServletResponse response) throws Exception{HWPFDocumentCore wordDocument = WordToHtmlUtils.loadDoc(input);WordToHtmlConverter wordToHtmlConverter = new ImageConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());wordToHtmlConverter.processDocument(wordDocument);Document htmlDocument = wordToHtmlConverter.getDocument();ByteArrayOutputStream outStream = new ByteArrayOutputStream();DOMSource domSource = new DOMSource(htmlDocument);StreamResult streamResult = new StreamResult(outStream);TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty(OutputKeys.ENCODING, "utf-8");serializer.setOutputProperty(OutputKeys.INDENT, "yes");serializer.setOutputProperty(OutputKeys.METHOD, "html");serializer.transform(domSource, streamResult);outStream.close();// 清空responseresponse.reset();OutputStream toClient = new BufferedOutputStream(response.getOutputStream());response.setContentType("text/html");response.setCharacterEncoding("UTF-8");toClient.write(outStream.toByteArray());toClient.flush();toClient.close();}
//图片处理
public class ImageConverter extends WordToHtmlConverter{public ImageConverter(Document document) {super(document);}@Overrideprotected void processImageWithoutPicturesManager(Element currentBlock, boolean inlined, Picture picture){Element imgNode = currentBlock.getOwnerDocument().createElement("img");StringBuffer sb = new StringBuffer();sb.append(Base64.getMimeEncoder().encodeToString(picture.getRawContent()));sb.insert(0, "data:" + picture.getMimeType() + ";base64,");imgNode.setAttribute("src", sb.toString());currentBlock.appendChild(imgNode);}
}
2.docxToHtml(docx格式)
@RequestMapping("/wordToHtml")public void wordToHtml(HttpServletResponse response){final String path = "C:\\usr\\local\\";final String file = "3.docx";try{InputStream input = new FileInputStream(path + file);docxToHtml(input, response);}catch (Exception e){e.printStackTrace();}}
public void docxToHtml(InputStream inputStream, HttpServletResponse response) throws IOException {XWPFDocument docxDocument = new XWPFDocument(inputStream);XHTMLOptions options = XHTMLOptions.create();//图片转base64options.setImageManager(new Base64EmbedImgManager());// 转换htm1ByteArrayOutputStream htmlStream = new ByteArrayOutputStream();XHTMLConverter.getInstance().convert(docxDocument, htmlStream, options);// 清空responseresponse.reset();OutputStream toClient = new BufferedOutputStream(response.getOutputStream());response.setContentType("text/html");response.setCharacterEncoding("UTF-8");toClient.write(htmlStream.toByteArray());toClient.flush();toClient.close();}
3、xls格式
/**
fileUrl为文件名
**/
@RequestMapping("/xlsTest")
public void xlsTest(String fileUrl, HttpServletResponse response) throws Exception {final String path = "D:\\apache-tomcat-8\\apache-tomcat-8.5.77\\webapps\\download\\file\\incorrupt\\";InputStream input=new FileInputStream(path+fileUrl);HSSFWorkbook excelBook=new HSSFWorkbook(input);ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() );excelToHtmlConverter.processWorkbook(excelBook);List pics = excelBook.getAllPictures();if (pics != null) {for (int i = 0; i < pics.size(); i++) {Picture pic = (Picture) pics.get (i);try {pic.writeImageContent (new FileOutputStream (path + pic.suggestFullFileName() ) );} catch (FileNotFoundException e) {e.printStackTrace();}}}Document htmlDocument =excelToHtmlConverter.getDocument();ByteArrayOutputStream outStream = new ByteArrayOutputStream();DOMSource domSource = new DOMSource (htmlDocument);StreamResult streamResult = new StreamResult (outStream);TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8");serializer.setOutputProperty (OutputKeys.INDENT, "yes");serializer.setOutputProperty (OutputKeys.METHOD, "html");serializer.transform (domSource, streamResult);outStream.close();response.reset();OutputStream toClient = new BufferedOutputStream(response.getOutputStream());response.setContentType("text/html");response.setCharacterEncoding("UTF-8");toClient.write(outStream.toByteArray());toClient.flush();toClient.close();
}
4、xlsx格式
import org.apache.poi.hssf.converter.ExcelToHtmlConverter;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.w3c.dom.Document;import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import java.io.*;
import java.util.List;public class TestDemo {final static String path = "D:\\apache-tomcat-8\\apache-tomcat-8.5.77\\webapps\\download\\file\\incorrupt\\";final static String file = "d1f028f4-8b80-4ff0-9756-8d386f157306_工作簿2.xlsx";// private static final String EXCEL_XLS = "xls";
// private static final String EXCEL_XLSX = "xlsx"; public static void main(String[] args) {try{InputStream input = new FileInputStream(path +"/"+ file); HSSFWorkbook excelBook = new HSSFWorkbook();
// //判断Excel文件将07+版本转换为03版本
// if(file.endsWith(EXCEL_XLS)){ //Excel 2003
// excelBook = new HSSFWorkbook(input);
// }
// else if(file.endsWith(EXCEL_XLSX)){ // Excel 2007/2010 Transform xls = new Transform(); XSSFWorkbook workbookOld = new XSSFWorkbook(input); xls.transformXSSF(workbookOld, excelBook);//}ExcelToHtmlConverter excelToHtmlConverter = new ExcelToHtmlConverter (DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument() );excelToHtmlConverter.processWorkbook(excelBook);List pics = excelBook.getAllPictures();if (pics != null) {for (int i = 0; i < pics.size(); i++) {Picture pic = (Picture) pics.get (i);try {pic.writeImageContent (new FileOutputStream(path + pic.suggestFullFileName() ) );} catch (FileNotFoundException e) {e.printStackTrace();}}}Document htmlDocument =excelToHtmlConverter.getDocument();ByteArrayOutputStream outStream = new ByteArrayOutputStream();DOMSource domSource = new DOMSource (htmlDocument);StreamResult streamResult = new StreamResult (outStream);TransformerFactory tf = TransformerFactory.newInstance();Transformer serializer = tf.newTransformer();serializer.setOutputProperty (OutputKeys.ENCODING, "utf-8");serializer.setOutputProperty (OutputKeys.INDENT, "yes");serializer.setOutputProperty (OutputKeys.METHOD, "html");serializer.transform (domSource, streamResult);outStream.close();//Excel转换成HtmlString content = new String(outStream.toByteArray()); System.out.println(content);}catch(Exception e) {e.printStackTrace(); }}
}
xlsx格式转xls格式的工具类
import org.apache.poi.hssf.usermodel.*;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.DataFormat;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.util.CellRangeAddress;
import org.apache.poi.xssf.usermodel.*;import java.util.HashMap;public class Transform { private int lastColumn = 0; private HashMap<Integer, HSSFCellStyle> styleMap = new HashMap(); public void transformXSSF(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew) { HSSFSheet sheetNew; XSSFSheet sheetOld; workbookNew.setMissingCellPolicy(workbookOld.getMissingCellPolicy()); for (int i = 0; i < workbookOld.getNumberOfSheets(); i++) { sheetOld = workbookOld.getSheetAt(i); sheetNew = workbookNew.getSheet(sheetOld.getSheetName()); sheetNew = workbookNew.createSheet(sheetOld.getSheetName()); this.transform(workbookOld, workbookNew, sheetOld, sheetNew); } } private void transform(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew, XSSFSheet sheetOld, HSSFSheet sheetNew) { sheetNew.setDisplayFormulas(sheetOld.isDisplayFormulas()); sheetNew.setDisplayGridlines(sheetOld.isDisplayGridlines()); sheetNew.setDisplayGuts(sheetOld.getDisplayGuts()); sheetNew.setDisplayRowColHeadings(sheetOld.isDisplayRowColHeadings()); sheetNew.setDisplayZeros(sheetOld.isDisplayZeros()); sheetNew.setFitToPage(sheetOld.getFitToPage()); sheetNew.setHorizontallyCenter(sheetOld.getHorizontallyCenter()); sheetNew.setMargin(Sheet.BottomMargin, sheetOld.getMargin(Sheet.BottomMargin)); sheetNew.setMargin(Sheet.FooterMargin, sheetOld.getMargin(Sheet.FooterMargin)); sheetNew.setMargin(Sheet.HeaderMargin, sheetOld.getMargin(Sheet.HeaderMargin)); sheetNew.setMargin(Sheet.LeftMargin, sheetOld.getMargin(Sheet.LeftMargin)); sheetNew.setMargin(Sheet.RightMargin, sheetOld.getMargin(Sheet.RightMargin)); sheetNew.setMargin(Sheet.TopMargin, sheetOld.getMargin(Sheet.TopMargin)); sheetNew.setPrintGridlines(sheetNew.isPrintGridlines()); sheetNew.setRightToLeft(sheetNew.isRightToLeft()); sheetNew.setRowSumsBelow(sheetNew.getRowSumsBelow()); sheetNew.setRowSumsRight(sheetNew.getRowSumsRight()); sheetNew.setVerticallyCenter(sheetOld.getVerticallyCenter()); HSSFRow rowNew; for (Row row : sheetOld) { rowNew = sheetNew.createRow(row.getRowNum()); if (rowNew != null) this.transform(workbookOld, workbookNew, (XSSFRow) row, rowNew); } for (int i = 0; i < this.lastColumn; i++) { sheetNew.setColumnWidth(i, sheetOld.getColumnWidth(i)); sheetNew.setColumnHidden(i, sheetOld.isColumnHidden(i)); } for (int i = 0; i < sheetOld.getNumMergedRegions(); i++) { CellRangeAddress merged = sheetOld.getMergedRegion(i); sheetNew.addMergedRegion(merged); } } private void transform(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew, XSSFRow rowOld, HSSFRow rowNew) { HSSFCell cellNew; rowNew.setHeight(rowOld.getHeight()); for (Cell cell : rowOld) { cellNew = rowNew.createCell(cell.getColumnIndex(), cell.getCellType()); if (cellNew != null) this.transform(workbookOld, workbookNew, (XSSFCell) cell, cellNew); } this.lastColumn = Math.max(this.lastColumn, rowOld.getLastCellNum()); } private void transform(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew, XSSFCell cellOld, HSSFCell cellNew) { cellNew.setCellComment(cellOld.getCellComment()); Integer hash = cellOld.getCellStyle().hashCode(); if (this.styleMap != null && !this.styleMap.containsKey(hash)) { this.transform(workbookOld, workbookNew, hash, cellOld.getCellStyle(), (HSSFCellStyle) workbookNew.createCellStyle()); } cellNew.setCellStyle(this.styleMap.get(hash));switch (cellOld.getCellType().toString()) {case "BLANK":break;case "BOOLEAN":cellNew.setCellValue(cellOld.getBooleanCellValue());break;case "ERROR":cellNew.setCellValue(cellOld.getErrorCellValue());break;case "FORMULA":cellNew.setCellValue(cellOld.getCellFormula());break;case "NUMERIC":cellNew.setCellValue(cellOld.getNumericCellValue());break;case "STRING":cellNew.setCellValue(cellOld.getStringCellValue());break;default:System.out.println("transform: Unbekannter Zellentyp "+ cellOld.getCellType());}} private void transform(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew, Integer hash, XSSFCellStyle styleOld, HSSFCellStyle styleNew) { styleNew.setAlignment(styleOld.getAlignment()); styleNew.setBorderBottom(styleOld.getBorderBottom()); styleNew.setBorderLeft(styleOld.getBorderLeft()); styleNew.setBorderRight(styleOld.getBorderRight()); styleNew.setBorderTop(styleOld.getBorderTop()); //styleNew.setDataFormat(this.transform(workbookOld, workbookNew, // styleOld.getDataFormat())); styleNew.setFillBackgroundColor(styleOld.getFillBackgroundColor()); styleNew.setFillForegroundColor(styleOld.getFillForegroundColor()); styleNew.setFillPattern(styleOld.getFillPattern()); styleNew.setFont(this.transform(workbookNew, (XSSFFont) styleOld.getFont())); styleNew.setHidden(styleOld.getHidden()); styleNew.setIndention(styleOld.getIndention()); styleNew.setLocked(styleOld.getLocked()); styleNew.setVerticalAlignment(styleOld.getVerticalAlignment()); styleNew.setWrapText(styleOld.getWrapText()); this.styleMap.put(hash, styleNew); } private short transform(XSSFWorkbook workbookOld, HSSFWorkbook workbookNew, short index) { DataFormat formatOld = workbookOld.createDataFormat(); DataFormat formatNew = workbookNew.createDataFormat(); return formatNew.getFormat(formatOld.getFormat(index)); } private HSSFFont transform(HSSFWorkbook workbookNew, XSSFFont fontOld) { HSSFFont fontNew = workbookNew.createFont(); //fontNew.setBoldweight(fontOld.getBoldweight());fontNew.setCharSet(fontOld.getCharSet()); fontNew.setColor(fontOld.getColor()); fontNew.setFontName(fontOld.getFontName()); fontNew.setFontHeight(fontOld.getFontHeight()); fontNew.setItalic(fontOld.getItalic()); fontNew.setStrikeout(fontOld.getStrikeout()); fontNew.setTypeOffset(fontOld.getTypeOffset()); fontNew.setUnderline(fontOld.getUnderline()); return fontNew; } }
四、总结
1.主要几个maven包的依赖版本需要一致
2.文档需要标准的word文档,举个例子,从boss直聘上下载下来的简历不能预览,因为里面内容实际是html格式,会出现异常:
Docment is really HTML File,需要把文件另存为标准word格式
3.不能直接修改文件后缀名预览,虽然office能打开,但是不是标准word格式,需要另存为你想要的格式(doc,docx),否则会出现异常java.lang.IllegalArgumentException: The document is really a OOXML file
4.尝试过spire.doc,用的是免费版,文档超过三页不能预览,这一方面官网给出了解释,最终选定poi这个方案
版权声明:本文为linanqi_java原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接和本声明。
本文链接:https://blog.csdn.net/linanqi_java/article/details/109291562