From a703eca5a48e5eacc6a87e37549b3c999384ddcf Mon Sep 17 00:00:00 2001 From: Marcus van Elst Date: Thu, 2 Apr 2026 15:49:42 +0200 Subject: [PATCH] PreCheckFailed auf strukturierten Fehlergrund umgestellt --- .../service/DocumentProcessingService.java | 3 ++- .../service/PreCheckEvaluator.java | 4 +-- .../DefaultBatchRunProcessingUseCase.java | 3 +-- .../DocumentProcessingServiceTest.java | 7 ++--- .../service/PreCheckEvaluatorTest.java | 11 ++++---- .../domain/model/PreCheckFailed.java | 26 +++++++++++-------- .../domain/model/PreCheckFailureReason.java | 15 ++++++++++- .../model/DocumentProcessingOutcomeTest.java | 4 +-- 8 files changed, 45 insertions(+), 28 deletions(-) diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingService.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingService.java index 25fe918..dcb25bc 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingService.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingService.java @@ -3,6 +3,7 @@ package de.gecheckt.pdf.umbenenner.application.service; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason; import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult; @@ -67,7 +68,7 @@ public class DocumentProcessingService { case PdfExtractionContentError contentError -> // PDF content not extractable: classify as pre-check failed (deterministic content error) - new PreCheckFailed(candidate, "PDF content not extractable: " + contentError.reason()); + new PreCheckFailed(candidate, PreCheckFailureReason.CONTENT_NOT_EXTRACTABLE); case PdfExtractionTechnicalError technicalError -> // Technical failure during extraction: potentially retryable diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluator.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluator.java index e8942b4..cd2fa04 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluator.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluator.java @@ -61,7 +61,7 @@ public class PreCheckEvaluator { if (!hasUsableText(extraction.extractedText())) { return new PreCheckFailed( candidate, - PreCheckFailureReason.NO_USABLE_TEXT.getDescription() + PreCheckFailureReason.NO_USABLE_TEXT ); } @@ -69,7 +69,7 @@ public class PreCheckEvaluator { if (extraction.pageCount().exceedsLimit(configuration.maxPages())) { return new PreCheckFailed( candidate, - PreCheckFailureReason.PAGE_LIMIT_EXCEEDED.getDescription() + PreCheckFailureReason.PAGE_LIMIT_EXCEEDED ); } diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java index c8edd6d..c61f77a 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java @@ -10,7 +10,6 @@ import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessExcep import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort; import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; -import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed; import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; @@ -187,7 +186,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa candidate.uniqueIdentifier()); case PreCheckFailed failed -> LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error – may retry in later run).", - candidate.uniqueIdentifier(), failed.failureReason()); + candidate.uniqueIdentifier(), failed.failureReasonDescription()); case TechnicalDocumentError technicalError -> LOG.warn("Processing FAILED for '{}': {} (Technical error – may retry in later run).", candidate.uniqueIdentifier(), technicalError.errorMessage()); diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java index eeda255..52a4718 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java @@ -3,6 +3,7 @@ package de.gecheckt.pdf.umbenenner.application.service; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed; import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError; @@ -98,7 +99,7 @@ class DocumentProcessingServiceTest { assertInstanceOf(PreCheckFailed.class, outcome); PreCheckFailed failed = (PreCheckFailed) outcome; assertEquals(candidate, failed.candidate()); - assertTrue(failed.failureReason().toLowerCase().contains("usable")); + assertEquals(PreCheckFailureReason.NO_USABLE_TEXT, failed.failureReason()); } @Test @@ -114,7 +115,7 @@ class DocumentProcessingServiceTest { assertInstanceOf(PreCheckFailed.class, outcome); PreCheckFailed failed = (PreCheckFailed) outcome; assertEquals(candidate, failed.candidate()); - assertTrue(failed.failureReason().toLowerCase().contains("page")); + assertEquals(PreCheckFailureReason.PAGE_LIMIT_EXCEEDED, failed.failureReason()); } @Test @@ -130,7 +131,7 @@ class DocumentProcessingServiceTest { assertInstanceOf(PreCheckFailed.class, outcome); PreCheckFailed result = (PreCheckFailed) outcome; assertEquals(candidate, result.candidate()); - assertTrue(result.failureReason().contains("PDF is corrupted")); + assertEquals(PreCheckFailureReason.CONTENT_NOT_EXTRACTABLE, result.failureReason()); } @Test diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluatorTest.java index 35bc3f0..8ba1820 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluatorTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluatorTest.java @@ -53,7 +53,7 @@ class PreCheckEvaluatorTest { assertTrue(result instanceof PreCheckFailed, "Should fail with empty text"); PreCheckFailed failed = (PreCheckFailed) result; - assertEquals(PreCheckFailureReason.NO_USABLE_TEXT.getDescription(), failed.failureReason()); + assertEquals(PreCheckFailureReason.NO_USABLE_TEXT, failed.failureReason()); } @Test @@ -66,7 +66,7 @@ class PreCheckEvaluatorTest { assertTrue(result instanceof PreCheckFailed, "Should fail with whitespace-only text"); PreCheckFailed failed = (PreCheckFailed) result; - assertEquals(PreCheckFailureReason.NO_USABLE_TEXT.getDescription(), failed.failureReason()); + assertEquals(PreCheckFailureReason.NO_USABLE_TEXT, failed.failureReason()); } @Test @@ -79,7 +79,7 @@ class PreCheckEvaluatorTest { assertTrue(result instanceof PreCheckFailed, "Should fail with special characters only"); PreCheckFailed failed = (PreCheckFailed) result; - assertEquals(PreCheckFailureReason.NO_USABLE_TEXT.getDescription(), failed.failureReason()); + assertEquals(PreCheckFailureReason.NO_USABLE_TEXT, failed.failureReason()); } @Test @@ -147,7 +147,7 @@ class PreCheckEvaluatorTest { assertTrue(result instanceof PreCheckFailed, "Should fail when page count exceeds limit"); PreCheckFailed failed = (PreCheckFailed) result; - assertEquals(PreCheckFailureReason.PAGE_LIMIT_EXCEEDED.getDescription(), failed.failureReason()); + assertEquals(PreCheckFailureReason.PAGE_LIMIT_EXCEEDED, failed.failureReason()); } @Test @@ -160,7 +160,7 @@ class PreCheckEvaluatorTest { assertTrue(result instanceof PreCheckFailed, "Should fail with page limit exceeded even if text is good"); PreCheckFailed failed = (PreCheckFailed) result; - assertEquals(PreCheckFailureReason.PAGE_LIMIT_EXCEEDED.getDescription(), failed.failureReason()); + assertEquals(PreCheckFailureReason.PAGE_LIMIT_EXCEEDED, failed.failureReason()); } @Test @@ -177,7 +177,6 @@ class PreCheckEvaluatorTest { // The specific order of checks doesn't matter; just verify one reason is returned PreCheckFailed failed = (PreCheckFailed) result; assertNotNull(failed.failureReason()); - assertFalse(failed.failureReason().isEmpty()); } @Test diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailed.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailed.java index 729a81d..6dbb158 100644 --- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailed.java +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailed.java @@ -8,38 +8,42 @@ import java.util.Objects; * This result encapsulates: * *

* Reasons include: *

*

* A document with this decision will not proceed further in the current batch run. * * @param candidate the source document metadata - * @param failureReason a human-readable explanation of the pre-check failure + * @param failureReason the reason for the pre-check failure */ public record PreCheckFailed( SourceDocumentCandidate candidate, - String failureReason + PreCheckFailureReason failureReason ) implements ProcessingDecision, DocumentProcessingOutcome { /** * Constructor with validation. * * @param candidate must be non-null - * @param failureReason must be non-null and non-empty + * @param failureReason must be non-null * @throws NullPointerException if either parameter is null - * @throws IllegalArgumentException if failureReason is empty */ public PreCheckFailed { Objects.requireNonNull(candidate, "candidate must not be null"); Objects.requireNonNull(failureReason, "failureReason must not be null"); - if (failureReason.isEmpty()) { - throw new IllegalArgumentException("failureReason must not be empty"); - } + } + + /** + * Returns a human-readable description of the failure reason. + * + * @return the description of the failure reason + */ + public String failureReasonDescription() { + return failureReason.getDescription(); } } \ No newline at end of file diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java index 21498db..c4c2351 100644 --- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java @@ -10,6 +10,7 @@ package de.gecheckt.pdf.umbenenner.domain.model; *

*

* Note: Technical extraction failures (I/O errors, PDFBox failures) are not pre-check reasons; @@ -33,7 +34,19 @@ public enum PreCheckFailureReason { *

* Retry logic: exactly 1 retry in a later batch run. */ - PAGE_LIMIT_EXCEEDED("Document page count exceeds configured limit"); + PAGE_LIMIT_EXCEEDED("Document page count exceeds configured limit"), + + /** + * The PDF content cannot be extracted due to structural issues in the document. + *

+ * This is a deterministic content error: reprocessing the same file in a later run + * will have the same outcome unless the source file is changed. + *

+ * Examples: Password-protected PDFs, corrupted internal structure, unsupported encryption. + *

+ * Retry logic: exactly 1 retry in a later batch run. + */ + CONTENT_NOT_EXTRACTABLE("PDF content not extractable"); private final String description; diff --git a/pdf-umbenenner-domain/src/test/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcomeTest.java b/pdf-umbenenner-domain/src/test/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcomeTest.java index c39464a..68e9256 100644 --- a/pdf-umbenenner-domain/src/test/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcomeTest.java +++ b/pdf-umbenenner-domain/src/test/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcomeTest.java @@ -89,7 +89,7 @@ class DocumentProcessingOutcomeTest { @Test void testPreCheckFailed_IsDocumentProcessingOutcome() { // Verify type relationship - var failed = new PreCheckFailed(candidate, "Test failure reason"); + var failed = new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT); assertInstanceOf(DocumentProcessingOutcome.class, failed); } @@ -100,7 +100,7 @@ class DocumentProcessingOutcomeTest { DocumentProcessingOutcome[] outcomes = { new PreCheckPassed(candidate, extraction), - new PreCheckFailed(candidate, "Deterministic content failure"), + new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT), new TechnicalDocumentError(candidate, "Technical extraction error", null) };