From 2b9dc1c6efe0597b92d641e798f5f13864e73cbf Mon Sep 17 00:00:00 2001 From: Xiao Date: Fri, 24 Sep 2021 19:32:16 -0400 Subject: [PATCH] PDFBOX-5284: Refactor Refactor PDFTabulaTextStripper to improve test design --- .../apache/pdfbox/text/TestTextStripper.java | 97 ++++++++----------- 1 file changed, 40 insertions(+), 57 deletions(-) diff --git a/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java b/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java index a519fa62bec..b6404e6581a 100644 --- a/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java +++ b/pdfbox/src/test/java/org/apache/pdfbox/text/TestTextStripper.java @@ -59,6 +59,10 @@ import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; +import static org.mockito.Mockito.any; +import static org.mockito.Mockito.doAnswer; +import static org.mockito.Mockito.spy; + /** * Test suite for PDFTextStripper. @@ -108,6 +112,40 @@ class TestTextStripper { + PDFTextStripper mockPDFTextStripper1() throws IOException { + PDFTextStripper mockInstance = spy(PDFTextStripper.class); + doAnswer((stubInvo) -> { + PDFont font = stubInvo.getArgument(0); + BoundingBox bbox = font.getBoundingBox(); + if (bbox.getLowerLeftY() < Short.MIN_VALUE) { + bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536)); + } + float glyphHeight = bbox.getHeight() / 2; + PDFontDescriptor fontDescriptor = font.getFontDescriptor(); + if (fontDescriptor != null) { + float capHeight = fontDescriptor.getCapHeight(); + if (Float.compare(capHeight, 0) != 0 + && (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) { + glyphHeight = capHeight; + } + float ascent = fontDescriptor.getAscent(); + float descent = fontDescriptor.getDescent(); + if (ascent > 0 && descent < 0 + && ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) { + glyphHeight = (ascent - descent) / 2; + } + } + float height; + if (font instanceof PDType3Font) { + height = font.getFontMatrix().transformPoint(0, glyphHeight).y; + } else { + height = glyphHeight / 1000; + } + return height; + }).when(mockInstance).computeFontHeight(any(PDFont.class)); + return mockInstance; + } + /** * Logger instance. */ @@ -597,7 +635,8 @@ void testTabula() throws IOException File expectedOutFile = new File("src/test/resources/input","eu-001.pdf-tabula.txt"); File diffFile = new File("target/test-output","eu-001.pdf-tabula-diff.txt"); PDDocument tabulaDocument = Loader.loadPDF(pdfFile); - PDFTextStripper tabulaStripper = new PDFTabulaTextStripper(); + // Construct mock object + PDFTextStripper tabulaStripper = mockPDFTextStripper1(); try (OutputStream os = new FileOutputStream(outFile)) { @@ -616,60 +655,4 @@ void testTabula() throws IOException assertFalse(bFail); } - private class PDFTabulaTextStripper extends PDFTextStripper - { - PDFTabulaTextStripper() throws IOException - { - // empty - } - - @Override - protected float computeFontHeight(PDFont font) throws IOException - { - BoundingBox bbox = font.getBoundingBox(); - if (bbox.getLowerLeftY() < Short.MIN_VALUE) - { - // PDFBOX-2158 and PDFBOX-3130 - // files by Salmat eSolutions / ClibPDF Library - bbox.setLowerLeftY(-(bbox.getLowerLeftY() + 65536)); - } - // 1/2 the bbox is used as the height todo: why? - float glyphHeight = bbox.getHeight() / 2; - - // sometimes the bbox has very high values, but CapHeight is OK - PDFontDescriptor fontDescriptor = font.getFontDescriptor(); - if (fontDescriptor != null) - { - float capHeight = fontDescriptor.getCapHeight(); - if (Float.compare(capHeight, 0) != 0 - && (capHeight < glyphHeight || Float.compare(glyphHeight, 0) == 0)) - { - glyphHeight = capHeight; - } - // PDFBOX-3464, PDFBOX-448: - // sometimes even CapHeight has very high value, but Ascent and Descent are ok - float ascent = fontDescriptor.getAscent(); - float descent = fontDescriptor.getDescent(); - if (ascent > 0 && descent < 0 - && ((ascent - descent) / 2 < glyphHeight || Float.compare(glyphHeight, 0) == 0)) - { - glyphHeight = (ascent - descent) / 2; - } - } - - // transformPoint from glyph space -> text space - float height; - if (font instanceof PDType3Font) - { - height = font.getFontMatrix().transformPoint(0, glyphHeight).y; - } - else - { - height = glyphHeight / 1000; - } - - return height; - } - } - }