在网上找过很多方法,都有中文乱码的的问题。
这里使用PdfBox方法解决中文的乱码问题
依赖jar包
pdfbox-app 2.0.16
fontbox 2.0.16
<!-- Maven坐标 --><!-- https://mvnrepository.com/artifact/org.apache.pdfbox/pdfbox-app --><dependency><groupId>org.apache.pdfbox</groupId><artifactId>pdfbox-app</artifactId><version>2.0.16</version></dependency><!-- https://mvnrepository.com/artifact/org.apache.pdfbox/fontbox --><dependency><groupId>org.apache.pdfbox</groupId><artifactId>fontbox</artifactId><version>2.0.16</version></dependency>
package com.archser.fserver.util;import java.awt.image.BufferedImage;
import java.io.File;import javax.imageio.ImageIO;import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.ImageType;
import org.apache.pdfbox.rendering.PDFRenderer;public class PDFChangeToImage {// 默认图片分辨率按需求调节,该参数影响生成时间public static final float DEFAULT_DPI = 500;// 默认转换的图片格式为jpg// 如果生成将PDF生成一张长图片时,页数过大会抛出异常建议使用PNG格式public static final String DEFAULT_FORMAT = "jpg";/*** 将PDF每一页保存为一个图片* * @param pdfPath PDF文件地址* @param imgPath 保存文件地址*/public static void pdfToImage(String pdfPath, String imgPath) {try {File file = new File(pdfPath);String name = file.getName().replaceAll("\\.pdf", "");// PdfBox生成图像PDDocument pdDocument = PDDocument.load(file);PDFRenderer renderer = new PDFRenderer(pdDocument);// 存储文件StringBuffer imgFilePath;String savePath = imgPath + "\\" + name;// 验证路径是否存在createDirectory(savePath);for (int i = 0, len = pdDocument.getNumberOfPages(); i < len; i++) {imgFilePath = new StringBuffer();imgFilePath.append(savePath);imgFilePath.append("\\" + name);imgFilePath.append("_");imgFilePath.append(String.valueOf(i + 1));imgFilePath.append("." + DEFAULT_FORMAT);File dstFile = new File(imgFilePath.toString());BufferedImage image = renderer.renderImageWithDPI(i, DEFAULT_DPI);ImageIO.write(image, DEFAULT_FORMAT, dstFile);}} catch (Exception e) {e.printStackTrace();}}/*** 将PDF保存为一个长图片* * @param pdfPath PDF文件地址* @param imgPath 保存文件地址,需要实体文件 例如 C:\\file\\1.jpg* @param page_end 需要转换的页数 -1 为转换全部*/public static void pdfToImage(String pdfPath, String imgPath, int page_end) {try {// 宽度int width = 0;// 保存一张图片中的RGB数据int[] singleImgRGB;int shiftHeight = 0;// 保存每张图片的像素值BufferedImage imageResult = null;// 利用PdfBox生成图像PDDocument pdDocument = PDDocument.load(new File(pdfPath));PDFRenderer renderer = new PDFRenderer(pdDocument);// 循环每个页码for (int i = 0, len = pdDocument.getNumberOfPages(); i < len; i++) {if (i == -1 || i == page_end) {BufferedImage image = renderer.renderImageWithDPI(i, DEFAULT_DPI, ImageType.RGB);int imageHeight = image.getHeight();int imageWidth = image.getWidth();// 使用第一张图片宽度;width = imageWidth;// 保存每页图片的像素值imageResult = new BufferedImage(width, imageHeight, BufferedImage.TYPE_INT_RGB);// 这里有高度,可以将imageHeight*len,我这里值提取一页所以不需要singleImgRGB = image.getRGB(0, 0, width, imageHeight, null, 0, width);// 写入流中imageResult.setRGB(0, shiftHeight, width, imageHeight, singleImgRGB, 0, width);} else if (i > page_end) {continue;}}pdDocument.close();// 写图片ImageIO.write(imageResult, DEFAULT_FORMAT, new File(imgPath));} catch (Exception e) {e.printStackTrace();}}private static boolean createDirectory(String folder) {File dir = new File(folder);if (dir.exists()) {return true;} else {return dir.mkdirs();}}}