1
0

PreCheckFailed auf strukturierten Fehlergrund umgestellt

This commit is contained in:
2026-04-02 15:49:42 +02:00
parent 747f22438d
commit a703eca5a4
8 changed files with 45 additions and 28 deletions

View File

@@ -3,6 +3,7 @@ package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
@@ -67,7 +68,7 @@ public class DocumentProcessingService {
case PdfExtractionContentError contentError -> case PdfExtractionContentError contentError ->
// PDF content not extractable: classify as pre-check failed (deterministic content error) // PDF content not extractable: classify as pre-check failed (deterministic content error)
new PreCheckFailed(candidate, "PDF content not extractable: " + contentError.reason()); new PreCheckFailed(candidate, PreCheckFailureReason.CONTENT_NOT_EXTRACTABLE);
case PdfExtractionTechnicalError technicalError -> case PdfExtractionTechnicalError technicalError ->
// Technical failure during extraction: potentially retryable // Technical failure during extraction: potentially retryable

View File

@@ -61,7 +61,7 @@ public class PreCheckEvaluator {
if (!hasUsableText(extraction.extractedText())) { if (!hasUsableText(extraction.extractedText())) {
return new PreCheckFailed( return new PreCheckFailed(
candidate, candidate,
PreCheckFailureReason.NO_USABLE_TEXT.getDescription() PreCheckFailureReason.NO_USABLE_TEXT
); );
} }
@@ -69,7 +69,7 @@ public class PreCheckEvaluator {
if (extraction.pageCount().exceedsLimit(configuration.maxPages())) { if (extraction.pageCount().exceedsLimit(configuration.maxPages())) {
return new PreCheckFailed( return new PreCheckFailed(
candidate, candidate,
PreCheckFailureReason.PAGE_LIMIT_EXCEEDED.getDescription() PreCheckFailureReason.PAGE_LIMIT_EXCEEDED
); );
} }

View File

@@ -10,7 +10,6 @@ import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessExcep
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort; import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService; import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
@@ -187,7 +186,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
candidate.uniqueIdentifier()); candidate.uniqueIdentifier());
case PreCheckFailed failed -> case PreCheckFailed failed ->
LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error may retry in later run).", LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error may retry in later run).",
candidate.uniqueIdentifier(), failed.failureReason()); candidate.uniqueIdentifier(), failed.failureReasonDescription());
case TechnicalDocumentError technicalError -> case TechnicalDocumentError technicalError ->
LOG.warn("Processing FAILED for '{}': {} (Technical error may retry in later run).", LOG.warn("Processing FAILED for '{}': {} (Technical error may retry in later run).",
candidate.uniqueIdentifier(), technicalError.errorMessage()); candidate.uniqueIdentifier(), technicalError.errorMessage());

View File

@@ -3,6 +3,7 @@ package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
@@ -98,7 +99,7 @@ class DocumentProcessingServiceTest {
assertInstanceOf(PreCheckFailed.class, outcome); assertInstanceOf(PreCheckFailed.class, outcome);
PreCheckFailed failed = (PreCheckFailed) outcome; PreCheckFailed failed = (PreCheckFailed) outcome;
assertEquals(candidate, failed.candidate()); assertEquals(candidate, failed.candidate());
assertTrue(failed.failureReason().toLowerCase().contains("usable")); assertEquals(PreCheckFailureReason.NO_USABLE_TEXT, failed.failureReason());
} }
@Test @Test
@@ -114,7 +115,7 @@ class DocumentProcessingServiceTest {
assertInstanceOf(PreCheckFailed.class, outcome); assertInstanceOf(PreCheckFailed.class, outcome);
PreCheckFailed failed = (PreCheckFailed) outcome; PreCheckFailed failed = (PreCheckFailed) outcome;
assertEquals(candidate, failed.candidate()); assertEquals(candidate, failed.candidate());
assertTrue(failed.failureReason().toLowerCase().contains("page")); assertEquals(PreCheckFailureReason.PAGE_LIMIT_EXCEEDED, failed.failureReason());
} }
@Test @Test
@@ -130,7 +131,7 @@ class DocumentProcessingServiceTest {
assertInstanceOf(PreCheckFailed.class, outcome); assertInstanceOf(PreCheckFailed.class, outcome);
PreCheckFailed result = (PreCheckFailed) outcome; PreCheckFailed result = (PreCheckFailed) outcome;
assertEquals(candidate, result.candidate()); assertEquals(candidate, result.candidate());
assertTrue(result.failureReason().contains("PDF is corrupted")); assertEquals(PreCheckFailureReason.CONTENT_NOT_EXTRACTABLE, result.failureReason());
} }
@Test @Test

View File

@@ -53,7 +53,7 @@ class PreCheckEvaluatorTest {
assertTrue(result instanceof PreCheckFailed, "Should fail with empty text"); assertTrue(result instanceof PreCheckFailed, "Should fail with empty text");
PreCheckFailed failed = (PreCheckFailed) result; PreCheckFailed failed = (PreCheckFailed) result;
assertEquals(PreCheckFailureReason.NO_USABLE_TEXT.getDescription(), failed.failureReason()); assertEquals(PreCheckFailureReason.NO_USABLE_TEXT, failed.failureReason());
} }
@Test @Test
@@ -66,7 +66,7 @@ class PreCheckEvaluatorTest {
assertTrue(result instanceof PreCheckFailed, "Should fail with whitespace-only text"); assertTrue(result instanceof PreCheckFailed, "Should fail with whitespace-only text");
PreCheckFailed failed = (PreCheckFailed) result; PreCheckFailed failed = (PreCheckFailed) result;
assertEquals(PreCheckFailureReason.NO_USABLE_TEXT.getDescription(), failed.failureReason()); assertEquals(PreCheckFailureReason.NO_USABLE_TEXT, failed.failureReason());
} }
@Test @Test
@@ -79,7 +79,7 @@ class PreCheckEvaluatorTest {
assertTrue(result instanceof PreCheckFailed, "Should fail with special characters only"); assertTrue(result instanceof PreCheckFailed, "Should fail with special characters only");
PreCheckFailed failed = (PreCheckFailed) result; PreCheckFailed failed = (PreCheckFailed) result;
assertEquals(PreCheckFailureReason.NO_USABLE_TEXT.getDescription(), failed.failureReason()); assertEquals(PreCheckFailureReason.NO_USABLE_TEXT, failed.failureReason());
} }
@Test @Test
@@ -147,7 +147,7 @@ class PreCheckEvaluatorTest {
assertTrue(result instanceof PreCheckFailed, "Should fail when page count exceeds limit"); assertTrue(result instanceof PreCheckFailed, "Should fail when page count exceeds limit");
PreCheckFailed failed = (PreCheckFailed) result; PreCheckFailed failed = (PreCheckFailed) result;
assertEquals(PreCheckFailureReason.PAGE_LIMIT_EXCEEDED.getDescription(), failed.failureReason()); assertEquals(PreCheckFailureReason.PAGE_LIMIT_EXCEEDED, failed.failureReason());
} }
@Test @Test
@@ -160,7 +160,7 @@ class PreCheckEvaluatorTest {
assertTrue(result instanceof PreCheckFailed, "Should fail with page limit exceeded even if text is good"); assertTrue(result instanceof PreCheckFailed, "Should fail with page limit exceeded even if text is good");
PreCheckFailed failed = (PreCheckFailed) result; PreCheckFailed failed = (PreCheckFailed) result;
assertEquals(PreCheckFailureReason.PAGE_LIMIT_EXCEEDED.getDescription(), failed.failureReason()); assertEquals(PreCheckFailureReason.PAGE_LIMIT_EXCEEDED, failed.failureReason());
} }
@Test @Test
@@ -177,7 +177,6 @@ class PreCheckEvaluatorTest {
// The specific order of checks doesn't matter; just verify one reason is returned // The specific order of checks doesn't matter; just verify one reason is returned
PreCheckFailed failed = (PreCheckFailed) result; PreCheckFailed failed = (PreCheckFailed) result;
assertNotNull(failed.failureReason()); assertNotNull(failed.failureReason());
assertFalse(failed.failureReason().isEmpty());
} }
@Test @Test

View File

@@ -8,38 +8,42 @@ import java.util.Objects;
* This result encapsulates: * This result encapsulates:
* <ul> * <ul>
* <li>The original document candidate metadata (for correlation)</li> * <li>The original document candidate metadata (for correlation)</li>
* <li>A description of why the pre-check failed</li> * <li>A reason why the pre-check failed</li>
* </ul> * </ul>
* <p> * <p>
* Reasons include: * Reasons include:
* <ul> * <ul>
* <li>"No usable text" extraction yielded no meaningful content</li> * <li>{@link PreCheckFailureReason#NO_USABLE_TEXT} extraction yielded no meaningful content</li>
* <li>"Page limit exceeded" document exceeds the configured page limit</li> * <li>{@link PreCheckFailureReason#PAGE_LIMIT_EXCEEDED} document exceeds the configured page limit</li>
* <li>"Technical extraction error" I/O or PDFBox failure (may be retryable later)</li>
* </ul> * </ul>
* <p> * <p>
* A document with this decision will not proceed further in the current batch run. * A document with this decision will not proceed further in the current batch run.
* *
* @param candidate the source document metadata * @param candidate the source document metadata
* @param failureReason a human-readable explanation of the pre-check failure * @param failureReason the reason for the pre-check failure
*/ */
public record PreCheckFailed( public record PreCheckFailed(
SourceDocumentCandidate candidate, SourceDocumentCandidate candidate,
String failureReason PreCheckFailureReason failureReason
) implements ProcessingDecision, DocumentProcessingOutcome { ) implements ProcessingDecision, DocumentProcessingOutcome {
/** /**
* Constructor with validation. * Constructor with validation.
* *
* @param candidate must be non-null * @param candidate must be non-null
* @param failureReason must be non-null and non-empty * @param failureReason must be non-null
* @throws NullPointerException if either parameter is null * @throws NullPointerException if either parameter is null
* @throws IllegalArgumentException if failureReason is empty
*/ */
public PreCheckFailed { public PreCheckFailed {
Objects.requireNonNull(candidate, "candidate must not be null"); Objects.requireNonNull(candidate, "candidate must not be null");
Objects.requireNonNull(failureReason, "failureReason must not be null"); Objects.requireNonNull(failureReason, "failureReason must not be null");
if (failureReason.isEmpty()) {
throw new IllegalArgumentException("failureReason must not be empty");
} }
/**
* Returns a human-readable description of the failure reason.
*
* @return the description of the failure reason
*/
public String failureReasonDescription() {
return failureReason.getDescription();
} }
} }

View File

@@ -10,6 +10,7 @@ package de.gecheckt.pdf.umbenenner.domain.model;
* <ul> * <ul>
* <li>{@link #NO_USABLE_TEXT}: The extracted text contains no meaningful content after normalization.</li> * <li>{@link #NO_USABLE_TEXT}: The extracted text contains no meaningful content after normalization.</li>
* <li>{@link #PAGE_LIMIT_EXCEEDED}: The document exceeds the configured page limit.</li> * <li>{@link #PAGE_LIMIT_EXCEEDED}: The document exceeds the configured page limit.</li>
* <li>{@link #CONTENT_NOT_EXTRACTABLE}: The PDF content cannot be extracted due to structural issues.</li>
* </ul> * </ul>
* <p> * <p>
* Note: Technical extraction failures (I/O errors, PDFBox failures) are not pre-check reasons; * Note: Technical extraction failures (I/O errors, PDFBox failures) are not pre-check reasons;
@@ -33,7 +34,19 @@ public enum PreCheckFailureReason {
* <p> * <p>
* Retry logic: exactly 1 retry in a later batch run. * Retry logic: exactly 1 retry in a later batch run.
*/ */
PAGE_LIMIT_EXCEEDED("Document page count exceeds configured limit"); PAGE_LIMIT_EXCEEDED("Document page count exceeds configured limit"),
/**
* The PDF content cannot be extracted due to structural issues in the document.
* <p>
* This is a deterministic content error: reprocessing the same file in a later run
* will have the same outcome unless the source file is changed.
* <p>
* Examples: Password-protected PDFs, corrupted internal structure, unsupported encryption.
* <p>
* Retry logic: exactly 1 retry in a later batch run.
*/
CONTENT_NOT_EXTRACTABLE("PDF content not extractable");
private final String description; private final String description;

View File

@@ -89,7 +89,7 @@ class DocumentProcessingOutcomeTest {
@Test @Test
void testPreCheckFailed_IsDocumentProcessingOutcome() { void testPreCheckFailed_IsDocumentProcessingOutcome() {
// Verify type relationship // Verify type relationship
var failed = new PreCheckFailed(candidate, "Test failure reason"); var failed = new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT);
assertInstanceOf(DocumentProcessingOutcome.class, failed); assertInstanceOf(DocumentProcessingOutcome.class, failed);
} }
@@ -100,7 +100,7 @@ class DocumentProcessingOutcomeTest {
DocumentProcessingOutcome[] outcomes = { DocumentProcessingOutcome[] outcomes = {
new PreCheckPassed(candidate, extraction), new PreCheckPassed(candidate, extraction),
new PreCheckFailed(candidate, "Deterministic content failure"), new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT),
new TechnicalDocumentError(candidate, "Technical extraction error", null) new TechnicalDocumentError(candidate, "Technical extraction error", null)
}; };