pdf_0">Java实现pdf文件压缩
时间换空间,实现pdf文件无损压缩。
1、依赖准备
市面上操作pdf文件的组件有spire.pdf.free、itextpdf、openpdf、pdfbox
等,它们各有千秋。我们主要完成的场景为压缩,减少文件大小去优化存储、传输等。
在这里选取的组件为aspose-pdf
和itextpdf
,原因是spire.pdf.free
压缩代码比较直观和简单但是只能免费压缩前10页,itextpdf
压缩代码较为复杂开发难度大适合去水印,而openpdf
和pdfbox
也有开发难度较大的问题。
1、aspose-pdf
依赖
可能比较冷门,阿里云maven仓库等没有对应的依赖,无法通过gav坐标添加!因此我们需要到中央仓库下载jar包!
地址为https://mvnrepository.com/artifact/com.aspose/aspose-pdf
建议选择低版本,高版本难以去除版权水印,如这里选择21.11
版本的
将jar引入工程
这里可以参考这篇文章
https://blog.csdn.net/m0_46357847/article/details/140749772
如果是gradle工程,可参考下图
2、itextpdf
依赖
这里主要用于去除aspose-pdf的版权水印,直接添加即可。
<!-- https://mvnrepository.com/artifact/com.itextpdf/itextpdf -->
<dependency><groupId>com.itextpdf</groupId><artifactId>itextpdf</artifactId><version>5.5.13</version>
</dependency>
<!-- https://mvnrepository.com/artifact/com.itextpdf/itext-asian -->
<dependency><groupId>com.itextpdf</groupId><artifactId>itext-asian</artifactId><version>5.2.0</version>
</dependency>
2、压缩代码实现
PdfCompression.java
压缩逻辑与去除水印的逻辑都在这个类上。
java">/*** TODO** @Description* @Author laizhenghua* @Date 2025/2/25 11:28**/
public class PdfCompression {private final static Logger log = LoggerFactory.getLogger(PdfCompression.class);/*** 水印字体常量*/private static final String WATERMARK_TEXT = "Evaluation Only. Created with Aspose.PDF. Copyright 2002-2021 Aspose Pty Ltd.";/*** 压缩比0-100可选 越低压缩比越大*/private int imageQuality = 40;public PdfCompression() {}public PdfCompression(int imageQuality) {this.imageQuality = imageQuality;}public void start(String fileName, String src, String dest) {InputStream inputStream = null;OutputStream outputStream = null;try {File srcFile = new File(src);inputStream = new FileInputStream(srcFile);File destFile = new File(dest);if (!destFile.exists()) {destFile.createNewFile();}outputStream = new FileOutputStream(destFile);start(fileName, inputStream, outputStream);} catch (IOException ex) {log.error(ex.getMessage());ex.printStackTrace();} finally {IoUtil.close(inputStream);IoUtil.close(outputStream);}}public void start(String fileName, InputStream inputStream, OutputStream outputStream) {long startTime = System.currentTimeMillis();int sourceSize = 0;long compressionSize = 0;OutputStream tempOutputStream = null;InputStream tempInputStream = null;try {// 创建临时文件// File tempFile = PathUtil.getDistTempFile(fileName);// 使用 hutool 工具类创建临时文件File tempFile = FileUtil.createTempFile("temp", ".pdf", new File("src/main/resources/static/"), true);tempOutputStream = new FileOutputStream(tempFile);Locale locale = new Locale("zh", "cn");Locale.setDefault(locale);// 记录原始大小单位为MBsourceSize = inputStream.available() / (1024 * 1024);// 读取pdf文档Document document = new Document(inputStream);// 设置压缩属性OptimizationOptions options = new OptimizationOptions();// 删除PDF不必要的对象options.setRemoveUnusedObjects(true);// 链接重复流options.setLinkDuplcateStreams(false);// 删除未使用的流options.setRemoveUnusedStreams(false);// 删除不必要的字体options.setUnembedFonts(true);// 压缩PDF中的图片options.getImageCompressionOptions().setCompressImages(true);// 图片压缩比 0-100可选 越低压缩比越大options.getImageCompressionOptions().setImageQuality(imageQuality);document.optimizeResources(options);// 优化web的PDF文档document.optimize();// 先输出到临时文件方便后续去除水印document.save(tempOutputStream);// 关闭文档-此时 aspose-pdf 使命已达document.close();// tempOutputStream.flush();// 重新记录压缩后的大小compressionSize = tempFile.length() / (1024 * 1024);// 使用 itext-pdf 去除水印// ================== 去除水印 ==================List<MatchItem> matchItemList = new ArrayList<>();// itext-pdf readertempInputStream = new FileInputStream(tempFile);PdfReader reader = new PdfReader(tempInputStream);PdfReaderContentParser parser = new PdfReaderContentParser(reader);// pdf页数int pageSize = reader.getNumberOfPages();for (int pageNum = 1; pageNum <= pageSize; pageNum++) {Rectangle rectangle = reader.getPageSize(pageNum);// 匹配监听KeyWordPositionListener listener = new KeyWordPositionListener();listener.setKeyword(WATERMARK_TEXT);listener.setPageNumber(pageNum);listener.setCurPageSize(rectangle);parser.processContent(pageNum, listener);// 先判断本页中是否存在关键词List<MatchItem> allItems = listener.getAllItems();StringBuilder sbTemp = new StringBuilder();// 将一页中所有的块内容连接起来组成一个字符串for (MatchItem item : allItems) {sbTemp.append(item.getContent());}List<MatchItem> matches = listener.getMatches();// 第一种情况:关键词与块内容完全匹配的项直接返回if (!sbTemp.toString().contains(WATERMARK_TEXT) || matches.size() > 0) {matchItemList.addAll(matches);continue;}// 第二种情况:多个块内容拼成一个关键词,则一个一个来匹配,组装成一个关键词sbTemp = new StringBuilder();List<MatchItem> tempItems = new ArrayList<>();for (MatchItem item : allItems) {if (WATERMARK_TEXT.contains(item.getContent())) {tempItems.add(item);sbTemp.append(item.getContent());// 如果暂存的字符串和关键词 不再匹配时if (!WATERMARK_TEXT.contains(sbTemp.toString())) {sbTemp = new StringBuilder(item.getContent());tempItems.clear();tempItems.add(item);}// 暂存的字符串正好匹配到关键词时if (sbTemp.toString().equalsIgnoreCase(WATERMARK_TEXT)) {// 得到匹配的项matches.add(tempItems.get(0));// 清空暂存的字符串sbTemp = new StringBuilder();// 清空暂存的LISTtempItems.clear();// 继续查找}} else {// 如果找不到则清空sbTemp = new StringBuilder();tempItems.clear();}}matchItemList.addAll(matches);}PdfStamper stamper = new PdfStamper(reader, outputStream);PdfContentByte canvas = null;Map<Integer, List<MatchItem>> mapItem = new HashMap<>();List<MatchItem> itemList = null;for (MatchItem item : matchItemList) {Integer pageNum = item.getPageNum();if (mapItem.containsKey(pageNum)) {itemList = mapItem.get(pageNum);itemList.add(item);} else {itemList = new ArrayList<>();itemList.add(item);mapItem.put(pageNum, itemList);}}// 遍历每一页去修改for (Integer page : mapItem.keySet()) {List<MatchItem> items = mapItem.get(page);// 遍历每一页中的匹配项for (MatchItem item : items) {canvas = stamper.getOverContent(page);float x = item.getX();float y = item.getY();float fontWidth = item.getFontWidth();canvas.saveState();canvas.setColorFill(BaseColor.WHITE);canvas.rectangle(x, y, fontWidth * WATERMARK_TEXT.length(), fontWidth + 2);canvas.fill();canvas.restoreState();// 开始写入文本canvas.beginText();BaseFont bf = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.EMBEDDED);Font font = new Font(bf, fontWidth, Font.BOLD);// 设置字体和大小canvas.setFontAndSize(font.getBaseFont(), fontWidth);// 设置字体的输出位置canvas.setTextMatrix(x, y + fontWidth / 10 + 0.5f);// 要输出的textcanvas.showText("");canvas.endText();}}stamper.close();reader.close();// 使用 hutool 工具类删除临时文件FileUtil.del(tempFile);} catch (Exception ex) {ex.printStackTrace();} finally {IoUtil.close(tempOutputStream);IoUtil.close(tempInputStream);}long endTime = System.currentTimeMillis();long duration = endTime - startTime;log.info("[{}] 压缩成功:[{}MB -> {}MB] 耗时为 {}s", fileName, sourceSize, compressionSize, duration / 1000);}
}
其他新增的辅助类:
MatchItem.java
java">/*** TODO** @Description* @Author laizhenghua* @Date 2025/2/25 09:48**/
public class MatchItem {// 页数private Integer pageNum;// x坐标private Float x;// y坐标private Float y;// 页宽private Float pageWidth;// 页高private Float pageHeight;// 匹配字符private String content;// 字体宽private float fontWidth;// 字体高private float fontHeight = 12;public Integer getPageNum() {return pageNum;}public void setPageNum(Integer pageNum) {this.pageNum = pageNum;}public Float getX() {return x;}public void setX(Float x) {this.x = x;}public Float getY() {return y;}public void setY(Float y) {this.y = y;}public Float getPageWidth() {return pageWidth;}public void setPageWidth(Float pageWidth) {this.pageWidth = pageWidth;}public Float getPageHeight() {return pageHeight;}public void setPageHeight(Float pageHeight) {this.pageHeight = pageHeight;}public String getContent() {return content;}public void setContent(String content) {this.content = content;}public float getFontWidth() {return fontWidth;}public void setFontWidth(float fontWidth) {this.fontWidth = fontWidth;}public float getFontHeight() {return fontHeight;}public void setFontHeight(float fontHeight) {this.fontHeight = fontHeight;}
}
KeyWordPositionListener.java
java">/*** TODO** @Description 用来匹配pdf的关键词-监听类* @Author laizhenghua* @Date 2025/2/25 10:01**/
public class KeyWordPositionListener implements RenderListener {// 存放匹配上的字符信息private final List<MatchItem> matches = new ArrayList<>();// 存放所有的字符信息private List<MatchItem> allItems = new ArrayList<>();private Rectangle curPageSize;/*** 匹配的关键字*/private String keyword;/*** 匹配的当前页*/private Integer pageNumber;@Overridepublic void beginTextBlock() {// do nothing}@Overridepublic void renderText(TextRenderInfo renderInfo) {// 获取字符String content = renderInfo.getText();Rectangle2D.Float textRectangle = renderInfo.getDescentLine().getBoundingRectange();MatchItem item = new MatchItem();item.setContent(content);item.setPageNum(pageNumber);item.setFontHeight(textRectangle.height == 0 ? 12 : textRectangle.height); // 默认12item.setFontWidth(textRectangle.width);item.setPageHeight(curPageSize.getHeight());item.setPageWidth(curPageSize.getWidth());item.setX((float) textRectangle.getX());item.setY((float) textRectangle.getY());// 若keyword是单个字符,匹配上的情况if (content.equalsIgnoreCase(keyword)) {matches.add(item);}// 保存所有的项allItems.add(item);}@Overridepublic void endTextBlock() {// do nothing}@Overridepublic void renderImage(ImageRenderInfo renderInfo) {//do nothing}/*** 设置需要匹配的当前页** @param pageNumber*/public void setPageNumber(Integer pageNumber) {this.pageNumber = pageNumber;}/*** 设置需要匹配的关键字,忽略大小写** @param keyword*/public void setKeyword(String keyword) {this.keyword = keyword;}/*** 返回匹配的结果列表** @return*/public List<MatchItem> getMatches() {return matches;}public void setCurPageSize(Rectangle rect) {this.curPageSize = rect;}public List<MatchItem> getAllItems() {return allItems;}public void setAllItems(List<MatchItem> allItems) {this.allItems = allItems;}
}
3、测试
详见以下代码
java">@Test
public void test4() {ClassPathResource resource = new ClassPathResource("/static/dist.pdf");InputStream inputStream = null;OutputStream outputStream = null;try {File file = new File("src/main/resources/static/output.pdf");if (!file.exists()) {file.createNewFile();}inputStream = resource.getInputStream();outputStream = new FileOutputStream(file);// 创建压缩类PdfCompression pdfCompression = new PdfCompression();// 调用start()方法开始压缩pdfCompression.start(resource.getFilename(), inputStream, outputStream);} catch (IOException ex) {ex.printStackTrace();} finally {IoUtil.close(inputStream);IoUtil.close(outputStream);}
}
执行代码后输出日志
再来看压缩效果