diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 47e5593..8e26047 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -12,7 +12,12 @@ "Bash(mvn -pl pdf-umbenenner-domain clean compile)", "Bash(mvn help:describe -Dplugin=org.apache.pdfbox:pdfbox -Ddetail=false)", "Bash(cd /d D:/Dev/Projects/pdf-umbenenner-parent)", - "Bash(mvn -v)" + "Bash(mvn -v)", + "Bash(grep -E \"\\\\.java$\")", + "Bash(grep \"\\\\.java$\")", + "Bash(mvn -q clean compile -DskipTests)", + "Bash(mvn -q test)", + "Bash(mvn -q clean test)" ] } } diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M3DocumentProcessingService.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M3DocumentProcessingService.java new file mode 100644 index 0000000..0734d3f --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M3DocumentProcessingService.java @@ -0,0 +1,80 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; +import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome; +import de.gecheckt.pdf.umbenenner.domain.model.M3TechnicalDocumentError; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; + +import java.util.Objects; + +/** + * Orchestrates M3 document processing pipeline: extraction → pre-checks → outcome classification. + *
+ * Converts technical extraction results into M3 processing outcomes through this pipeline: + *
+ * This service produces {@link M3DocumentProcessingOutcome}, a sealed interface that covers + * all four M3 document processing outcomes: + *
+ * This service is stateless and thread-safe. + * + * @since M3-AP-006 + */ +public class M3DocumentProcessingService { + + /** + * Processes a document candidate through the complete M3 pipeline. + *
+ * Pipeline: + *
* Returns {@link M3PreCheckPassed} if both checks pass, or {@link M3PreCheckFailed} * with a specific reason if any check fails. + *
+ * Note: Returns {@link M3DocumentProcessingOutcome} to integrate cleanly with the complete + * M3 document processing pipeline. * * @param candidate the source document metadata * @param extraction the successfully extracted PDF content * @param configuration the startup configuration (used for maxPages limit) - * @return the pre-check decision: passed or failed with reason + * @return the pre-check outcome: passed or failed with reason (both implement {@link M3DocumentProcessingOutcome}) * @throws NullPointerException if any parameter is null */ - public static M3ProcessingDecision evaluate( + public static M3DocumentProcessingOutcome evaluate( SourceDocumentCandidate candidate, PdfExtractionSuccess extraction, StartConfiguration configuration) { diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java index 866df46..bfd10c6 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java @@ -13,8 +13,18 @@ * Current services: *
* M3 processing steps per document: *
- * Per-document errors (extraction failure, pre-check failure) do not abort the overall + * Per-document errors (extraction failure, technical error, pre-check failure) do not abort the overall * batch run. Each candidate ends controlled regardless of its outcome. *
* M3 processing boundary: no KI call, no persistence, no filename generation, @@ -160,25 +162,36 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase { * @param candidate the candidate to process */ private void processCandidate(SourceDocumentCandidate candidate) { + LOG.debug("Processing candidate: {}", candidate.uniqueIdentifier()); + PdfExtractionResult extractionResult = pdfTextExtractionPort.extractTextAndPageCount(candidate); + // Log extraction outcome switch (extractionResult) { - case PdfExtractionSuccess success -> { - M3ProcessingDecision decision = M3PreCheckEvaluator.evaluate(candidate, success, configuration); - switch (decision) { - case M3PreCheckPassed passed -> - LOG.info("M3 pre-checks passed for '{}'. Candidate ready for further processing (M4+).", - candidate.uniqueIdentifier()); - case M3PreCheckFailed failed -> - LOG.info("M3 pre-check failed for '{}': {}", - candidate.uniqueIdentifier(), failed.failureReason()); - } - } + case PdfExtractionSuccess success -> + LOG.debug("PDF extraction successful for '{}'. Pages: {}, Text length: {} chars.", + candidate.uniqueIdentifier(), success.pageCount().value(), success.extractedText().length()); case PdfExtractionContentError contentError -> - LOG.info("PDF content not extractable for '{}': {}", + LOG.debug("PDF content extraction failed for '{}' (content problem): {}", candidate.uniqueIdentifier(), contentError.reason()); case PdfExtractionTechnicalError technicalError -> - LOG.warn("Technical error extracting PDF '{}': {}", + LOG.debug("PDF extraction technical error for '{}': {}", + candidate.uniqueIdentifier(), technicalError.errorMessage()); + } + + // Process through complete M3 pipeline + var m3Outcome = M3DocumentProcessingService.processDocument(candidate, extractionResult, configuration); + + // Log M3 processing outcome + switch (m3Outcome) { + case M3PreCheckPassed passed -> + LOG.info("M3 pre-checks PASSED for '{}'. Candidate ready for further processing (M4+).", + candidate.uniqueIdentifier()); + case M3PreCheckFailed failed -> + LOG.info("M3 pre-checks FAILED for '{}': {} (Deterministic content error – may retry in later run).", + candidate.uniqueIdentifier(), failed.failureReason()); + case M3TechnicalDocumentError technicalError -> + LOG.warn("M3 processing FAILED for '{}': {} (Technical error – may retry in later run).", candidate.uniqueIdentifier(), technicalError.errorMessage()); } } diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M3DocumentProcessingServiceTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M3DocumentProcessingServiceTest.java new file mode 100644 index 0000000..881287f --- /dev/null +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M3DocumentProcessingServiceTest.java @@ -0,0 +1,195 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; +import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome; +import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed; +import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed; +import de.gecheckt.pdf.umbenenner.domain.model.M3TechnicalDocumentError; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError; +import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for {@link M3DocumentProcessingService}. + *
+ * Verifies that all four M3 document processing outcomes are correctly classified. + */ +class M3DocumentProcessingServiceTest { + + @TempDir + Path tempDir; + + private SourceDocumentCandidate candidate; + private StartConfiguration configuration; + + @BeforeEach + void setUp() throws Exception { + // Create test PDF file + Path pdfFile = tempDir.resolve("document.pdf"); + Files.createFile(pdfFile); + SourceDocumentLocator locator = new SourceDocumentLocator(pdfFile.toString()); + candidate = new SourceDocumentCandidate("document.pdf", 2048L, locator); + + // Create directories and files for configuration + Path sourceDir = Files.createDirectories(tempDir.resolve("source")); + Path targetDir = Files.createDirectories(tempDir.resolve("target")); + Path dbFile = tempDir.resolve("db.sqlite"); + Files.createFile(dbFile); + Path promptFile = tempDir.resolve("prompt.txt"); + Files.createFile(promptFile); + + configuration = new StartConfiguration( + sourceDir, + targetDir, + dbFile, + URI.create("http://localhost:8000"), + "gpt-4", + 30, + 3, + 10, + 5000, + promptFile, + tempDir.resolve("lock"), + tempDir.resolve("logs"), + "INFO", + "test-key" + ); + } + + @Test + void testProcessDocument_WithSuccessfulExtraction_AndPassedPreChecks() { + // Arrange: Successful extraction with valid text + var extraction = new PdfExtractionSuccess("This is valid PDF text", new PdfPageCount(5)); + + // Act + M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument( + candidate, extraction, configuration); + + // Assert: Should produce M3PreCheckPassed + assertInstanceOf(M3PreCheckPassed.class, outcome); + M3PreCheckPassed passed = (M3PreCheckPassed) outcome; + assertEquals(candidate, passed.candidate()); + assertEquals(extraction, passed.extraction()); + } + + @Test + void testProcessDocument_WithSuccessfulExtraction_AndFailedPreCheck_NoUsableText() { + // Arrange: Successful extraction but with only whitespace + var extraction = new PdfExtractionSuccess(" \n \t ", new PdfPageCount(1)); + + // Act + M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument( + candidate, extraction, configuration); + + // Assert: Should produce M3PreCheckFailed with appropriate reason + assertInstanceOf(M3PreCheckFailed.class, outcome); + M3PreCheckFailed failed = (M3PreCheckFailed) outcome; + assertEquals(candidate, failed.candidate()); + assertTrue(failed.failureReason().toLowerCase().contains("usable")); + } + + @Test + void testProcessDocument_WithSuccessfulExtraction_AndFailedPreCheck_PageLimitExceeded() { + // Arrange: Successful extraction but exceeds page limit + var extraction = new PdfExtractionSuccess("Valid text content", new PdfPageCount(50)); + + // Act + M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument( + candidate, extraction, configuration); + + // Assert: Should produce M3PreCheckFailed with page limit reason + assertInstanceOf(M3PreCheckFailed.class, outcome); + M3PreCheckFailed failed = (M3PreCheckFailed) outcome; + assertEquals(candidate, failed.candidate()); + assertTrue(failed.failureReason().toLowerCase().contains("page")); + } + + @Test + void testProcessDocument_WithContentError() { + // Arrange: PDF content not extractable (classified as technical document error in M3) + var contentError = new PdfExtractionContentError("PDF is corrupted"); + + // Act + M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument( + candidate, contentError, configuration); + + // Assert: Should produce M3TechnicalDocumentError + assertInstanceOf(M3TechnicalDocumentError.class, outcome); + M3TechnicalDocumentError result = (M3TechnicalDocumentError) outcome; + assertEquals(candidate, result.candidate()); + assertTrue(result.errorMessage().contains("PDF is corrupted")); + } + + @Test + void testProcessDocument_WithTechnicalError() { + // Arrange: Technical error during extraction + var technicalError = new PdfExtractionTechnicalError("I/O error reading file", + new RuntimeException("File not found")); + + // Act + M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument( + candidate, technicalError, configuration); + + // Assert: Should produce M3TechnicalDocumentError + assertInstanceOf(M3TechnicalDocumentError.class, outcome); + M3TechnicalDocumentError result = (M3TechnicalDocumentError) outcome; + assertEquals(candidate, result.candidate()); + assertEquals("I/O error reading file", result.errorMessage()); + assertNotNull(result.cause()); + } + + @Test + void testProcessDocument_WithTechnicalError_WithoutCause() { + // Arrange: Technical error without underlying exception + var technicalError = new PdfExtractionTechnicalError("Unknown error", null); + + // Act + M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument( + candidate, technicalError, configuration); + + // Assert + assertInstanceOf(M3TechnicalDocumentError.class, outcome); + M3TechnicalDocumentError result = (M3TechnicalDocumentError) outcome; + assertNull(result.cause()); + } + + @Test + void testProcessDocument_WithNullCandidate_ThrowsException() { + // Arrange + var extraction = new PdfExtractionSuccess("Text", new PdfPageCount(1)); + + // Act & Assert + assertThrows(NullPointerException.class, + () -> M3DocumentProcessingService.processDocument(null, extraction, configuration)); + } + + @Test + void testProcessDocument_WithNullExtractionResult_ThrowsException() { + // Act & Assert + assertThrows(NullPointerException.class, + () -> M3DocumentProcessingService.processDocument(candidate, null, configuration)); + } + + @Test + void testProcessDocument_WithNullConfiguration_ThrowsException() { + // Arrange + var extraction = new PdfExtractionSuccess("Text", new PdfPageCount(1)); + + // Act & Assert + assertThrows(NullPointerException.class, + () -> M3DocumentProcessingService.processDocument(candidate, extraction, null)); + } +} diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluatorTest.java index d0ac30c..794f358 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluatorTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluatorTest.java @@ -1,10 +1,10 @@ package de.gecheckt.pdf.umbenenner.application.service; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; +import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome; import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed; import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason; import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed; -import de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount; import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; @@ -35,7 +35,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("Some meaningful text", new PdfPageCount(5)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckPassed, "Should pass when text is usable and page count is valid"); M3PreCheckPassed passed = (M3PreCheckPassed) result; @@ -49,7 +49,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("", new PdfPageCount(1)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckFailed, "Should fail with empty text"); M3PreCheckFailed failed = (M3PreCheckFailed) result; @@ -62,7 +62,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess(" \n\t \r\n ", new PdfPageCount(1)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckFailed, "Should fail with whitespace-only text"); M3PreCheckFailed failed = (M3PreCheckFailed) result; @@ -75,7 +75,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("!@#$%^&*()_+-=[]{}|;:',.<>?/", new PdfPageCount(1)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckFailed, "Should fail with special characters only"); M3PreCheckFailed failed = (M3PreCheckFailed) result; @@ -88,7 +88,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("a", new PdfPageCount(1)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckPassed, "Should pass with single letter"); } @@ -99,7 +99,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("5", new PdfPageCount(1)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckPassed, "Should pass with single digit"); } @@ -110,7 +110,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("!@#a$%^&*", new PdfPageCount(1)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckPassed, "Should pass when letters/digits are present among special chars"); } @@ -121,7 +121,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess(" meaningful text ", new PdfPageCount(1)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckPassed, "Should pass when text has meaningful content despite whitespace"); } @@ -132,7 +132,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("Valid text", new PdfPageCount(5)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckPassed, "Should pass when page count equals limit (not exceeded)"); } @@ -143,7 +143,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("Valid text", new PdfPageCount(6)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckFailed, "Should fail when page count exceeds limit"); M3PreCheckFailed failed = (M3PreCheckFailed) result; @@ -156,7 +156,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("Excellent meaningful text with lots of content", new PdfPageCount(100)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckFailed, "Should fail with page limit exceeded even if text is good"); M3PreCheckFailed failed = (M3PreCheckFailed) result; @@ -171,7 +171,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("", new PdfPageCount(10)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckFailed, "Should fail when both checks fail"); // The specific order of checks doesn't matter for M3; just verify one reason is returned @@ -216,7 +216,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("Äußerst äöüß Großes", new PdfPageCount(1)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckPassed, "Should pass with German umlauts (ÄÖÜß)"); } @@ -227,7 +227,7 @@ class M3PreCheckEvaluatorTest { SourceDocumentCandidate candidate = buildCandidate(); PdfExtractionSuccess extraction = new PdfExtractionSuccess("Αβγδ 中文 καλημέρα", new PdfPageCount(1)); - M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); assertTrue(result instanceof M3PreCheckPassed, "Should pass with Greek, Chinese, and other Unicode letters"); } diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3DocumentProcessingOutcome.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3DocumentProcessingOutcome.java new file mode 100644 index 0000000..2fdf3c1 --- /dev/null +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3DocumentProcessingOutcome.java @@ -0,0 +1,33 @@ +package de.gecheckt.pdf.umbenenner.domain.model; + +/** + * Sealed interface representing the complete outcome of M3 document processing. + *
+ * This interface models all four possible M3 document outcomes: + *
+ * Design principles: + *
+ * Error classification: + *
+ * This outcome indicates that a document could not be processed due to technical infrastructure failures, + * such as I/O errors, file access problems, or extraction engine failures. + *
+ * These are typically retryable conditions, as they may be transient issues that could succeed + * in a later batch run. + *
+ * Examples: + *
+ * This is distinct from {@link M3ExtractedContentError}, which represents problems with the document + * content itself rather than infrastructure failures. + * + * @param candidate the source document metadata + * @param errorMessage a description of the technical failure + * @param cause the underlying exception, if any (may be null) + * @since M3-AP-006 + */ +public record M3TechnicalDocumentError( + SourceDocumentCandidate candidate, + String errorMessage, + Throwable cause +) implements M3DocumentProcessingOutcome { + /** + * Constructor with validation. + * + * @param candidate must be non-null + * @param errorMessage must be non-null and non-empty + * @param cause may be null + * @throws NullPointerException if candidate or errorMessage is null + * @throws IllegalArgumentException if errorMessage is empty + */ + public M3TechnicalDocumentError { + Objects.requireNonNull(candidate, "candidate must not be null"); + Objects.requireNonNull(errorMessage, "errorMessage must not be null"); + if (errorMessage.isEmpty()) { + throw new IllegalArgumentException("errorMessage must not be empty"); + } + } +} diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java index f1041b1..b02e82f 100644 --- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java @@ -16,12 +16,23 @@ * Additional classes introduced in M3: *
+ * Verifies that all four outcome types are properly created and validated. + */ +class M3DocumentProcessingOutcomeTest { + + @TempDir + Path tempDir; + + private SourceDocumentCandidate candidate; + + @BeforeEach + void setUp() throws Exception { + Path pdfFile = tempDir.resolve("doc.pdf"); + Files.createFile(pdfFile); + SourceDocumentLocator locator = new SourceDocumentLocator(pdfFile.toString()); + candidate = new SourceDocumentCandidate("doc.pdf", 1024L, locator); + } + + + @Test + void testM3TechnicalDocumentError_ValidConstruction() { + // Act + var error = new M3TechnicalDocumentError(candidate, "I/O error", null); + + // Assert + assertEquals(candidate, error.candidate()); + assertEquals("I/O error", error.errorMessage()); + assertNull(error.cause()); + } + + @Test + void testM3TechnicalDocumentError_WithCause() { + // Arrange + var cause = new RuntimeException("File not found"); + + // Act + var error = new M3TechnicalDocumentError(candidate, "I/O error", cause); + + // Assert + assertEquals(cause, error.cause()); + } + + @Test + void testM3TechnicalDocumentError_WithNullCandidate_ThrowsException() { + assertThrows(NullPointerException.class, + () -> new M3TechnicalDocumentError(null, "Error", null)); + } + + @Test + void testM3TechnicalDocumentError_WithNullErrorMessage_ThrowsException() { + assertThrows(NullPointerException.class, + () -> new M3TechnicalDocumentError(candidate, null, null)); + } + + @Test + void testM3TechnicalDocumentError_WithEmptyErrorMessage_ThrowsException() { + assertThrows(IllegalArgumentException.class, + () -> new M3TechnicalDocumentError(candidate, "", null)); + } + + @Test + void testM3TechnicalDocumentError_IsM3DocumentProcessingOutcome() { + // Verify type relationship + var error = new M3TechnicalDocumentError(candidate, "Error", null); + assertInstanceOf(M3DocumentProcessingOutcome.class, error); + } + + @Test + void testM3PreCheckPassed_IsM3DocumentProcessingOutcome() { + // Verify type relationship + var extraction = new PdfExtractionSuccess("text", new PdfPageCount(1)); + var passed = new M3PreCheckPassed(candidate, extraction); + assertInstanceOf(M3DocumentProcessingOutcome.class, passed); + } + + @Test + void testM3PreCheckFailed_IsM3DocumentProcessingOutcome() { + // Verify type relationship + var failed = new M3PreCheckFailed(candidate, "Test failure reason"); + assertInstanceOf(M3DocumentProcessingOutcome.class, failed); + } + + @Test + void testAllThreeOutcomesAreExhaustive() { + // This test verifies that the three outcome types are the only implementations + // M3 has exactly three outcome types: passed, failed (deterministic), and technical error + + var extraction = new PdfExtractionSuccess("text", new PdfPageCount(1)); + + M3DocumentProcessingOutcome[] outcomes = { + new M3PreCheckPassed(candidate, extraction), + new M3PreCheckFailed(candidate, "Deterministic content failure"), + new M3TechnicalDocumentError(candidate, "Technical extraction error", null) + }; + + for (M3DocumentProcessingOutcome outcome : outcomes) { + assertInstanceOf(M3DocumentProcessingOutcome.class, outcome); + } + } +}