添加PDF转图片功能

This commit is contained in:
wanggeng888 2021-03-05 15:12:43 +08:00
parent aead919175
commit 839555c33e
3 changed files with 378 additions and 0 deletions

View File

@ -246,6 +246,25 @@
<artifactId>javax.mail</artifactId>
</dependency>
<!-- email end -->
<!-- pdf start -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
</dependency>
<dependency>
<groupId>com.itextpdf.tool</groupId>
<artifactId>xmlworker</artifactId>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itext-asian</artifactId>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
</dependency>
<!-- pdf end -->
</dependencies>
</project>

View File

@ -0,0 +1,336 @@
package com.cm.common.utils;
import com.itextpdf.awt.geom.Rectangle2D;
import com.itextpdf.text.*;
import com.itextpdf.text.pdf.*;
import com.itextpdf.text.pdf.parser.*;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.io.ByteArrayOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* When you feel like quitting. Think about why you started
* 当你想要放弃的时候想想当初你为何开始
*
* @ClassName: PDFUtil
* @Description: pdf
* @Author: WangGeng
* @Date: 2020/12/26 21:39
* @Version: 1.0
**/
public class PDFUtil {
private static final Logger LOG = LoggerFactory.getLogger(PDFUtil.class);
/**
* PDF转图片
*
* @param pdfBytes pdf字节数组
* @return
* @throws IOException
*/
public static List<byte[]> pdfToImage(byte[] pdfBytes) throws IOException {
List<byte[]> result = new ArrayList<>();
try (PDDocument document = PDDocument.load(pdfBytes)) {
PDFRenderer renderer = new PDFRenderer(document);
for (int i = 0; i < document.getNumberOfPages(); ++i) {
// dpi越大转换后越清晰相对转换速度越慢
BufferedImage bufferedImage = renderer.renderImageWithDPI(i, 200);
ByteArrayOutputStream out = new ByteArrayOutputStream();
ImageIO.write(bufferedImage, "png", out);
result.add(out.toByteArray());
}
}
return result;
}
/**
* PDF转图片
*
* @param pdfPath PDF文件全路径
* @param outFolder 输出文件夹斜杠结尾
* @throws IOException
*/
public static void pdfToImage(String pdfPath, String outFolder) throws IOException {
FileInputStream fileInputStream = new FileInputStream(pdfPath);
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
byte[] buf = new byte[1024];
for (int readLength = 0; (readLength = fileInputStream.read(buf)) > -1; ) {
byteArrayOutputStream.write(buf, 0, readLength);
}
List<byte[]> imageBytesList = pdfToImage(byteArrayOutputStream.toByteArray());
int imageIndex = 0;
for (byte[] bytes : imageBytesList) {
FileOutputStream fileOutputStream = new FileOutputStream(outFolder + imageIndex + ".png");
fileOutputStream.write(bytes);
fileOutputStream.flush();
fileOutputStream.close();
imageIndex++;
}
}
/**
* 生成PDF文件
*
* @param title 标题
* @param titleSize 标题大小
* @param content 正文
* @param contentSize 正文大小
* @param ourFile 输出文件
* @return true生成成功false生成失败
*/
public static boolean createPdf(String title, int titleSize, String content, int contentSize, String ourFile) {
// 中文字体
Document document = new Document();
try {
BaseFont baseFontChinese = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED);
Font titleFont = new Font(baseFontChinese, titleSize, Font.NORMAL);
titleFont.setColor(BaseColor.RED);
Font contentFont = new Font(baseFontChinese, contentSize, Font.NORMAL);
PdfWriter.getInstance(document, new FileOutputStream(ourFile));
document.open();
Paragraph paragraph = new Paragraph(title, titleFont);
paragraph.setAlignment(Element.ALIGN_CENTER);
document.add(paragraph);
document.add(new Paragraph("\n"));
document.add(new Paragraph(content, contentFont));
} catch (Exception e) {
LOG.error(e.getMessage(), e);
return false;
} finally {
document.close();
}
return true;
}
/**
* 添加图片到PDF中并生成新文件
*
* @param sourceFile 源文件
* @param keyword 关键字
* @param outFile 输出文件
* @param imageFile 图片文件
* @return true成功false生成失败
*/
public static boolean addImageAfterKeyword(String sourceFile, String keyword, String outFile, String imageFile) {
PdfReader pdfReader = null;
PdfStamper pdfStamper = null;
try {
pdfReader = new PdfReader(sourceFile);
pdfStamper = new PdfStamper(pdfReader, new FileOutputStream(outFile));
CharInfo lastCharInfo = readPdfAndGetLastKeywordCharInfo(pdfReader, keyword);
if (lastCharInfo == null) {
LOG.debug("未在PDF正文中匹配到关键字");
return false;
}
// 合成PDF
Image image = Image.getInstance(imageFile);
image.scalePercent(50F);
// image.scaleAbsolute(200);
image.setAbsolutePosition((float) (lastCharInfo.getX() + 10), (float) (lastCharInfo.getY() - 25));
PdfContentByte underContent = pdfStamper.getUnderContent(1);
underContent.addImage(image);
pdfStamper.close();
pdfReader.close();
} catch (Exception e) {
LOG.error(e.getMessage(), e);
return false;
} finally {
if (pdfStamper != null) {
try {
pdfStamper.close();
} catch (DocumentException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
if (pdfReader != null) {
pdfReader.close();
}
}
return true;
}
/**
* 读取pdf并获取关键字最后一个字符信息
*
* @return
* @throws Exception
*/
public static CharInfo readPdfAndGetLastKeywordCharInfo(PdfReader pdfReader, String keyword) throws Exception {
List<PageInfo> pageInfos = new ArrayList<>();
// 获取PDF有多少页
int pageCount = pdfReader.getNumberOfPages();
for (int i = 0; i < pageCount; i++) {
int pageNum = i + 1;
// 当前页大小
PdfDictionary pageDictionary = pdfReader.getPageN(pageNum);
Rectangle pageSize = pdfReader.getPageSize(pageDictionary);
float pageWidth = pageSize.getWidth();
float pageHeight = pageSize.getHeight();
// 解析页面内容
PdfDictionary resourcesPdfDictionary = pageDictionary.getAsDict(PdfName.RESOURCES);
// 页面渲染监听器
PdfRenderListener pdfRenderListener = new PdfRenderListener();
PdfContentStreamProcessor pdfContentStreamProcessor = new PdfContentStreamProcessor(pdfRenderListener);
// 解析PDF流
pdfContentStreamProcessor.processContent(ContentByteUtils.getContentBytesForPage(pdfReader, pageNum), resourcesPdfDictionary);
pageInfos.add(new PageInfo(pdfRenderListener.getContent(), pdfRenderListener.getCharInfos()));
}
// 关键字最后一个字符
CharInfo lastCharInfo = null;
for (PageInfo pageInfo : pageInfos) {
int index = pageInfo.getContent().indexOf(keyword);
if (index < 0) {
continue;
}
lastCharInfo = pageInfo.getCharInfos().get(index + keyword.length() - 1);
}
return lastCharInfo;
}
public static class PdfRenderListener implements RenderListener {
private static final List<CharInfo> charInfos = new ArrayList<>();
private static final StringBuffer content = new StringBuffer();
@Override
public void beginTextBlock() {
}
// 逐行解析文本
@Override
public void renderText(TextRenderInfo textRenderInfo) {
// 行内容
List<TextRenderInfo> characterRenderInfos = textRenderInfo.getCharacterRenderInfos();
for (TextRenderInfo characterRenderInfo : characterRenderInfos) {
String word = characterRenderInfo.getText();
Rectangle2D.Float boundingRectange = characterRenderInfo.getAscentLine().getBoundingRectange();
double minX = boundingRectange.getMinX();
double minY = boundingRectange.getMinY();
CharInfo charInfo = new CharInfo(minX, minY, boundingRectange.getWidth(), boundingRectange.getHeight());
charInfos.add(charInfo);
content.append(word);
}
}
@Override
public void endTextBlock() {
}
// 解析图片
@Override
public void renderImage(ImageRenderInfo imageRenderInfo) {
}
public String getContent() {
return content.toString();
}
public List<CharInfo> getCharInfos() {
return charInfos;
}
}
/**
* 页面信息
*/
public static class PageInfo {
private String content;
private List<CharInfo> charInfos;
public PageInfo(String content, List<CharInfo> charInfos) {
this.content = content;
this.charInfos = charInfos;
}
public String getContent() {
return content == null ? "" : content.trim();
}
public void setContent(String content) {
this.content = content;
}
public List<CharInfo> getCharInfos() {
if (charInfos == null) {
return new ArrayList<>();
}
return charInfos;
}
public void setCharInfos(List<CharInfo> charInfos) {
this.charInfos = charInfos;
}
}
/**
* 字体信息
*/
public static class CharInfo {
private double x;
private double y;
private double charWidth;
private double charHeight;
public CharInfo(double x, double y, double charWidth, double charHeight) {
this.x = x;
this.y = y;
this.charWidth = charWidth;
this.charHeight = charHeight;
}
public double getX() {
return x;
}
public void setX(double x) {
this.x = x;
}
public double getY() {
return y;
}
public void setY(double y) {
this.y = y;
}
public double getCharWidth() {
return charWidth;
}
public void setCharWidth(double charWidth) {
this.charWidth = charWidth;
}
public double getCharHeight() {
return charHeight;
}
public void setCharHeight(double charHeight) {
this.charHeight = charHeight;
}
}
}

23
pom.xml
View File

@ -441,6 +441,29 @@
<version>1.56</version>
</dependency>
<!-- bouncycastle end -->
<!-- pdf start -->
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itextpdf</artifactId>
<version>5.5.13.2</version>
</dependency>
<dependency>
<groupId>com.itextpdf.tool</groupId>
<artifactId>xmlworker</artifactId>
<version>5.5.13.2</version>
</dependency>
<dependency>
<groupId>com.itextpdf</groupId>
<artifactId>itext-asian</artifactId>
<version>5.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.20</version>
</dependency>
<!-- pdf end -->
</dependencies>
</dependencyManagement>