M3-APP-03: PDFBox-Extraktion technisch sauber abgegrenzt und
Fehlersemantik korrigiert
This commit is contained in:
@@ -0,0 +1,164 @@
|
||||
package de.gecheckt.pdf.umbenenner.adapter.outbound.pdfextraction;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Tests for {@link PdfTextExtractionPortAdapter}.
|
||||
* <p>
|
||||
* M3-AP-003: Minimal tests validating basic extraction functionality and technical error handling.
|
||||
* In AP-003 scope: all extraction problems are treated as TechnicalError, not ContentError.
|
||||
* No fachliche validation of text content (that is AP-004).
|
||||
* PDFs are created programmatically using PDFBox to avoid external dependencies on test files.
|
||||
*
|
||||
* @since M3-AP-003
|
||||
*/
|
||||
class PdfTextExtractionPortAdapterTest {
|
||||
|
||||
private PdfTextExtractionPortAdapter adapter;
|
||||
|
||||
@TempDir
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
adapter = new PdfTextExtractionPortAdapter();
|
||||
}
|
||||
|
||||
@Test
|
||||
void testNullCandidateThrowsNullPointerException() {
|
||||
assertThrows(NullPointerException.class, () -> adapter.extractTextAndPageCount(null));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testNonExistentFileReturnsTechnicalError() throws Exception {
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
|
||||
"nonexistent.pdf",
|
||||
1,
|
||||
new SourceDocumentLocator("/path/that/does/not/exist.pdf")
|
||||
);
|
||||
|
||||
PdfExtractionResult result = adapter.extractTextAndPageCount(candidate);
|
||||
|
||||
assertInstanceOf(PdfExtractionTechnicalError.class, result);
|
||||
PdfExtractionTechnicalError error = (PdfExtractionTechnicalError) result;
|
||||
assertTrue(error.errorMessage().contains("not found"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testSimplePdfExtractionSuccess() throws Exception {
|
||||
// Create a simple single-page PDF
|
||||
Path pdfFile = tempDir.resolve("simple.pdf");
|
||||
createSimplePdf(pdfFile);
|
||||
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
|
||||
"simple.pdf",
|
||||
Files.size(pdfFile),
|
||||
new SourceDocumentLocator(pdfFile.toAbsolutePath().toString())
|
||||
);
|
||||
|
||||
PdfExtractionResult result = adapter.extractTextAndPageCount(candidate);
|
||||
|
||||
assertInstanceOf(PdfExtractionSuccess.class, result);
|
||||
PdfExtractionSuccess success = (PdfExtractionSuccess) result;
|
||||
assertEquals(1, success.pageCount().value());
|
||||
assertNotNull(success.extractedText());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testMultiPagePdfExtractionSuccess() throws Exception {
|
||||
// Create a three-page PDF
|
||||
Path pdfFile = tempDir.resolve("multipage.pdf");
|
||||
createMultiPagePdf(pdfFile, 3);
|
||||
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
|
||||
"multipage.pdf",
|
||||
Files.size(pdfFile),
|
||||
new SourceDocumentLocator(pdfFile.toAbsolutePath().toString())
|
||||
);
|
||||
|
||||
PdfExtractionResult result = adapter.extractTextAndPageCount(candidate);
|
||||
|
||||
assertInstanceOf(PdfExtractionSuccess.class, result);
|
||||
PdfExtractionSuccess success = (PdfExtractionSuccess) result;
|
||||
assertEquals(3, success.pageCount().value());
|
||||
assertNotNull(success.extractedText());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testReadablePdfWithEmptyTextReturnsSuccess() throws Exception {
|
||||
// Create a PDF with no text content (blank page)
|
||||
// This is a technically readable PDF, so it should succeed
|
||||
Path pdfFile = tempDir.resolve("blank.pdf");
|
||||
createBlankPdf(pdfFile);
|
||||
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
|
||||
"blank.pdf",
|
||||
Files.size(pdfFile),
|
||||
new SourceDocumentLocator(pdfFile.toAbsolutePath().toString())
|
||||
);
|
||||
|
||||
PdfExtractionResult result = adapter.extractTextAndPageCount(candidate);
|
||||
|
||||
// AP-003: Empty text is SUCCESS, not an error
|
||||
// Fachliche Bewertung of text content happens in AP-004
|
||||
assertInstanceOf(PdfExtractionSuccess.class, result);
|
||||
PdfExtractionSuccess success = (PdfExtractionSuccess) result;
|
||||
assertEquals(1, success.pageCount().value());
|
||||
assertNotNull(success.extractedText()); // May be empty, but not null
|
||||
}
|
||||
|
||||
// --- Helper methods to create test PDFs ---
|
||||
|
||||
/**
|
||||
* Creates a simple single-page PDF.
|
||||
*/
|
||||
private void createSimplePdf(Path filePath) throws Exception {
|
||||
PDDocument document = new PDDocument();
|
||||
PDPage page = new PDPage();
|
||||
document.addPage(page);
|
||||
document.save(filePath.toAbsolutePath().toString());
|
||||
document.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a PDF with multiple blank pages.
|
||||
*/
|
||||
private void createMultiPagePdf(Path filePath, int pageCount) throws Exception {
|
||||
PDDocument document = new PDDocument();
|
||||
for (int i = 0; i < pageCount; i++) {
|
||||
PDPage page = new PDPage();
|
||||
document.addPage(page);
|
||||
}
|
||||
document.save(filePath.toAbsolutePath().toString());
|
||||
document.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a blank PDF with a single page and no text.
|
||||
*/
|
||||
private void createBlankPdf(Path filePath) throws Exception {
|
||||
PDDocument document = new PDDocument();
|
||||
PDPage page = new PDPage();
|
||||
document.addPage(page);
|
||||
document.save(filePath.toAbsolutePath().toString());
|
||||
document.close();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user