PreCheckFailed auf strukturierten Fehlergrund umgestellt

2026-04-02 15:49:42 +02:00
parent 747f22438d
commit a703eca5a4
8 changed files with 45 additions and 28 deletions
@@ -8,38 +8,42 @@ import java.util.Objects;
 * This result encapsulates:
 * <ul>
 *   <li>The original document candidate metadata (for correlation)</li>
- *   <li>A description of why the pre-check failed</li>
+ *   <li>A reason why the pre-check failed</li>
 * </ul>
 * <p>
 * Reasons include:
 * <ul>
- *   <li>"No usable text" – extraction yielded no meaningful content</li>
- *   <li>"Page limit exceeded" – document exceeds the configured page limit</li>
- *   <li>"Technical extraction error" – I/O or PDFBox failure (may be retryable later)</li>
+ *   <li>{@link PreCheckFailureReason#NO_USABLE_TEXT} – extraction yielded no meaningful content</li>
+ *   <li>{@link PreCheckFailureReason#PAGE_LIMIT_EXCEEDED} – document exceeds the configured page limit</li>
 * </ul>
 * <p>
 * A document with this decision will not proceed further in the current batch run.
 *
 * @param candidate the source document metadata
- * @param failureReason a human-readable explanation of the pre-check failure
+ * @param failureReason the reason for the pre-check failure
 */
 public record PreCheckFailed(
    SourceDocumentCandidate candidate,
-    String failureReason
+    PreCheckFailureReason failureReason
 ) implements ProcessingDecision, DocumentProcessingOutcome {
    /**
     * Constructor with validation.
     *
     * @param candidate must be non-null
-     * @param failureReason must be non-null and non-empty
+     * @param failureReason must be non-null
     * @throws NullPointerException if either parameter is null
-     * @throws IllegalArgumentException if failureReason is empty
     */
    public PreCheckFailed {
        Objects.requireNonNull(candidate, "candidate must not be null");
        Objects.requireNonNull(failureReason, "failureReason must not be null");
-        if (failureReason.isEmpty()) {
-            throw new IllegalArgumentException("failureReason must not be empty");
-        }
+    }
+    
+    /**
+     * Returns a human-readable description of the failure reason.
+     * 
+     * @return the description of the failure reason
+     */
+    public String failureReasonDescription() {
+        return failureReason.getDescription();
    }
 }
@@ -10,6 +10,7 @@ package de.gecheckt.pdf.umbenenner.domain.model;
 * <ul>
 *   <li>{@link #NO_USABLE_TEXT}: The extracted text contains no meaningful content after normalization.</li>
 *   <li>{@link #PAGE_LIMIT_EXCEEDED}: The document exceeds the configured page limit.</li>
+ *   <li>{@link #CONTENT_NOT_EXTRACTABLE}: The PDF content cannot be extracted due to structural issues.</li>
 * </ul>
 * <p>
 * Note: Technical extraction failures (I/O errors, PDFBox failures) are not pre-check reasons;
@@ -33,7 +34,19 @@ public enum PreCheckFailureReason {
     * <p>
     * Retry logic: exactly 1 retry in a later batch run.
     */
-    PAGE_LIMIT_EXCEEDED("Document page count exceeds configured limit");
+    PAGE_LIMIT_EXCEEDED("Document page count exceeds configured limit"),
+
+    /**
+     * The PDF content cannot be extracted due to structural issues in the document.
+     * <p>
+     * This is a deterministic content error: reprocessing the same file in a later run
+     * will have the same outcome unless the source file is changed.
+     * <p>
+     * Examples: Password-protected PDFs, corrupted internal structure, unsupported encryption.
+     * <p>
+     * Retry logic: exactly 1 retry in a later batch run.
+     */
+    CONTENT_NOT_EXTRACTABLE("PDF content not extractable");

    private final String description;

@@ -89,7 +89,7 @@ class DocumentProcessingOutcomeTest {
    @Test
    void testPreCheckFailed_IsDocumentProcessingOutcome() {
        // Verify type relationship
-        var failed = new PreCheckFailed(candidate, "Test failure reason");
+        var failed = new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT);
        assertInstanceOf(DocumentProcessingOutcome.class, failed);
    }

@@ -100,7 +100,7 @@ class DocumentProcessingOutcomeTest {

        DocumentProcessingOutcome[] outcomes = {
                new PreCheckPassed(candidate, extraction),
-                new PreCheckFailed(candidate, "Deterministic content failure"),
+                new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT),
                new TechnicalDocumentError(candidate, "Technical extraction error", null)
        };