From 839555c33e0219243c2c0c6d926ff2db9f47c879 Mon Sep 17 00:00:00 2001 From: wanggeng888 <450292408@qq.com> Date: Fri, 5 Mar 2021 15:12:43 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0PDF=E8=BD=AC=E5=9B=BE?= =?UTF-8?q?=E7=89=87=E5=8A=9F=E8=83=BD?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cloud-common/pom.xml | 19 + .../java/com/cm/common/utils/PDFUtil.java | 336 ++++++++++++++++++ pom.xml | 23 ++ 3 files changed, 378 insertions(+) create mode 100644 cloud-common/src/main/java/com/cm/common/utils/PDFUtil.java diff --git a/cloud-common/pom.xml b/cloud-common/pom.xml index 2342a62..e53b452 100644 --- a/cloud-common/pom.xml +++ b/cloud-common/pom.xml @@ -246,6 +246,25 @@ javax.mail + + + + com.itextpdf + itextpdf + + + com.itextpdf.tool + xmlworker + + + com.itextpdf + itext-asian + + + org.apache.pdfbox + pdfbox + + \ No newline at end of file diff --git a/cloud-common/src/main/java/com/cm/common/utils/PDFUtil.java b/cloud-common/src/main/java/com/cm/common/utils/PDFUtil.java new file mode 100644 index 0000000..8139243 --- /dev/null +++ b/cloud-common/src/main/java/com/cm/common/utils/PDFUtil.java @@ -0,0 +1,336 @@ +package com.cm.common.utils; + +import com.itextpdf.awt.geom.Rectangle2D; +import com.itextpdf.text.*; +import com.itextpdf.text.pdf.*; +import com.itextpdf.text.pdf.parser.*; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.rendering.PDFRenderer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import javax.imageio.ImageIO; +import java.awt.image.BufferedImage; +import java.io.ByteArrayOutputStream; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * When you feel like quitting. Think about why you started + * 当你想要放弃的时候,想想当初你为何开始 + * + * @ClassName: PDFUtil + * @Description: pdf + * @Author: WangGeng + * @Date: 2020/12/26 21:39 + * @Version: 1.0 + **/ +public class PDFUtil { + + private static final Logger LOG = LoggerFactory.getLogger(PDFUtil.class); + + /** + * PDF转图片 + * + * @param pdfBytes pdf字节数组 + * @return + * @throws IOException + */ + public static List pdfToImage(byte[] pdfBytes) throws IOException { + List result = new ArrayList<>(); + try (PDDocument document = PDDocument.load(pdfBytes)) { + PDFRenderer renderer = new PDFRenderer(document); + for (int i = 0; i < document.getNumberOfPages(); ++i) { + // dpi越大转换后越清晰,相对转换速度越慢 + BufferedImage bufferedImage = renderer.renderImageWithDPI(i, 200); + ByteArrayOutputStream out = new ByteArrayOutputStream(); + ImageIO.write(bufferedImage, "png", out); + result.add(out.toByteArray()); + } + } + return result; + } + + /** + * PDF转图片 + * + * @param pdfPath PDF文件全路径 + * @param outFolder 输出文件夹,斜杠结尾 + * @throws IOException + */ + public static void pdfToImage(String pdfPath, String outFolder) throws IOException { + FileInputStream fileInputStream = new FileInputStream(pdfPath); + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + byte[] buf = new byte[1024]; + for (int readLength = 0; (readLength = fileInputStream.read(buf)) > -1; ) { + byteArrayOutputStream.write(buf, 0, readLength); + } + List imageBytesList = pdfToImage(byteArrayOutputStream.toByteArray()); + int imageIndex = 0; + for (byte[] bytes : imageBytesList) { + FileOutputStream fileOutputStream = new FileOutputStream(outFolder + imageIndex + ".png"); + fileOutputStream.write(bytes); + fileOutputStream.flush(); + fileOutputStream.close(); + imageIndex++; + } + } + + /** + * 生成PDF文件 + * + * @param title 标题 + * @param titleSize 标题大小 + * @param content 正文 + * @param contentSize 正文大小 + * @param ourFile 输出文件 + * @return true:生成成功,false:生成失败 + */ + public static boolean createPdf(String title, int titleSize, String content, int contentSize, String ourFile) { + // 中文字体 + Document document = new Document(); + try { + BaseFont baseFontChinese = BaseFont.createFont("STSong-Light", "UniGB-UCS2-H", BaseFont.NOT_EMBEDDED); + Font titleFont = new Font(baseFontChinese, titleSize, Font.NORMAL); + titleFont.setColor(BaseColor.RED); + + Font contentFont = new Font(baseFontChinese, contentSize, Font.NORMAL); + + PdfWriter.getInstance(document, new FileOutputStream(ourFile)); + + document.open(); + Paragraph paragraph = new Paragraph(title, titleFont); + paragraph.setAlignment(Element.ALIGN_CENTER); + document.add(paragraph); + document.add(new Paragraph("\n")); + document.add(new Paragraph(content, contentFont)); + + } catch (Exception e) { + LOG.error(e.getMessage(), e); + return false; + } finally { + document.close(); + } + return true; + } + + /** + * 添加图片到PDF中,并生成新文件 + * + * @param sourceFile 源文件 + * @param keyword 关键字 + * @param outFile 输出文件 + * @param imageFile 图片文件 + * @return true:成功,false,生成失败 + */ + public static boolean addImageAfterKeyword(String sourceFile, String keyword, String outFile, String imageFile) { + PdfReader pdfReader = null; + PdfStamper pdfStamper = null; + try { + pdfReader = new PdfReader(sourceFile); + pdfStamper = new PdfStamper(pdfReader, new FileOutputStream(outFile)); + CharInfo lastCharInfo = readPdfAndGetLastKeywordCharInfo(pdfReader, keyword); + if (lastCharInfo == null) { + LOG.debug("未在PDF正文中匹配到关键字"); + return false; + } + // 合成PDF + Image image = Image.getInstance(imageFile); + image.scalePercent(50F); + // image.scaleAbsolute(200); + + image.setAbsolutePosition((float) (lastCharInfo.getX() + 10), (float) (lastCharInfo.getY() - 25)); + PdfContentByte underContent = pdfStamper.getUnderContent(1); + underContent.addImage(image); + pdfStamper.close(); + pdfReader.close(); + } catch (Exception e) { + LOG.error(e.getMessage(), e); + return false; + } finally { + if (pdfStamper != null) { + try { + pdfStamper.close(); + } catch (DocumentException e) { + e.printStackTrace(); + } catch (IOException e) { + e.printStackTrace(); + } + } + if (pdfReader != null) { + pdfReader.close(); + } + } + return true; + } + + /** + * 读取pdf并获取关键字最后一个字符信息 + * + * @return + * @throws Exception + */ + public static CharInfo readPdfAndGetLastKeywordCharInfo(PdfReader pdfReader, String keyword) throws Exception { + List pageInfos = new ArrayList<>(); + // 获取PDF有多少页 + int pageCount = pdfReader.getNumberOfPages(); + for (int i = 0; i < pageCount; i++) { + int pageNum = i + 1; + // 当前页大小 + PdfDictionary pageDictionary = pdfReader.getPageN(pageNum); + Rectangle pageSize = pdfReader.getPageSize(pageDictionary); + float pageWidth = pageSize.getWidth(); + float pageHeight = pageSize.getHeight(); + // 解析页面内容 + PdfDictionary resourcesPdfDictionary = pageDictionary.getAsDict(PdfName.RESOURCES); + // 页面渲染监听器 + PdfRenderListener pdfRenderListener = new PdfRenderListener(); + PdfContentStreamProcessor pdfContentStreamProcessor = new PdfContentStreamProcessor(pdfRenderListener); + // 解析PDF流 + pdfContentStreamProcessor.processContent(ContentByteUtils.getContentBytesForPage(pdfReader, pageNum), resourcesPdfDictionary); + pageInfos.add(new PageInfo(pdfRenderListener.getContent(), pdfRenderListener.getCharInfos())); + } + + // 关键字最后一个字符 + CharInfo lastCharInfo = null; + for (PageInfo pageInfo : pageInfos) { + int index = pageInfo.getContent().indexOf(keyword); + if (index < 0) { + continue; + } + lastCharInfo = pageInfo.getCharInfos().get(index + keyword.length() - 1); + } + return lastCharInfo; + } + + public static class PdfRenderListener implements RenderListener { + + private static final List charInfos = new ArrayList<>(); + private static final StringBuffer content = new StringBuffer(); + + @Override + public void beginTextBlock() { + } + + // 逐行解析文本 + @Override + public void renderText(TextRenderInfo textRenderInfo) { + // 行内容 + List characterRenderInfos = textRenderInfo.getCharacterRenderInfos(); + for (TextRenderInfo characterRenderInfo : characterRenderInfos) { + String word = characterRenderInfo.getText(); + Rectangle2D.Float boundingRectange = characterRenderInfo.getAscentLine().getBoundingRectange(); + double minX = boundingRectange.getMinX(); + double minY = boundingRectange.getMinY(); + CharInfo charInfo = new CharInfo(minX, minY, boundingRectange.getWidth(), boundingRectange.getHeight()); + charInfos.add(charInfo); + content.append(word); + } + + } + + @Override + public void endTextBlock() { + + } + + // 解析图片 + @Override + public void renderImage(ImageRenderInfo imageRenderInfo) { + + } + + public String getContent() { + return content.toString(); + } + + public List getCharInfos() { + return charInfos; + } + } + + /** + * 页面信息 + */ + public static class PageInfo { + private String content; + private List charInfos; + + public PageInfo(String content, List charInfos) { + this.content = content; + this.charInfos = charInfos; + } + + public String getContent() { + return content == null ? "" : content.trim(); + } + + public void setContent(String content) { + this.content = content; + } + + public List getCharInfos() { + if (charInfos == null) { + return new ArrayList<>(); + } + return charInfos; + } + + public void setCharInfos(List charInfos) { + this.charInfos = charInfos; + } + } + + /** + * 字体信息 + */ + public static class CharInfo { + private double x; + private double y; + private double charWidth; + private double charHeight; + + public CharInfo(double x, double y, double charWidth, double charHeight) { + this.x = x; + this.y = y; + this.charWidth = charWidth; + this.charHeight = charHeight; + } + + public double getX() { + return x; + } + + public void setX(double x) { + this.x = x; + } + + public double getY() { + return y; + } + + public void setY(double y) { + this.y = y; + } + + public double getCharWidth() { + return charWidth; + } + + public void setCharWidth(double charWidth) { + this.charWidth = charWidth; + } + + public double getCharHeight() { + return charHeight; + } + + public void setCharHeight(double charHeight) { + this.charHeight = charHeight; + } + } + +} diff --git a/pom.xml b/pom.xml index fb9b8dc..27b298e 100644 --- a/pom.xml +++ b/pom.xml @@ -441,6 +441,29 @@ 1.56 + + + + com.itextpdf + itextpdf + 5.5.13.2 + + + com.itextpdf.tool + xmlworker + 5.5.13.2 + + + com.itextpdf + itext-asian + 5.2.0 + + + org.apache.pdfbox + pdfbox + 2.0.20 + +