From 18f9c33bbb92965fbf677cc833608fe927b8845f Mon Sep 17 00:00:00 2001
From: Marcus van Elst <marcusvebo@gmail.com>
Date: Mon, 4 May 2026 15:08:01 +0200
Subject: [PATCH] #78: NO_USABLE_TEXT (Foto-PDF) finalisiert sofort zu
 FAILED_FINAL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Bisher wurde NO_USABLE_TEXT (kein OCR-Text im PDF) wie alle anderen
deterministischen Inhaltsfehler mit der 1-Retry-Regel behandelt und
landete beim ersten Auftreten in FAILED_RETRYABLE. Da ein Bild-Scan ohne
OCR-Text sich zwischen Läufen nicht verändert, ist ein Wiederholversuch
sinnlos – der Status muss sofort FAILED_FINAL sein.

Geändert: ProcessingOutcomeTransition erkennt NO_USABLE_TEXT als
Sonderfall und liefert ohne Retry-Prüfung FAILED_FINAL. PAGE_LIMIT_EXCEEDED
und CONTENT_NOT_EXTRACTABLE behalten die 1-Retry-Regel.

Tests angepasst: Bestehende Tests, die FAILED_RETRYABLE für NO_USABLE_TEXT
erwarteten, wurden auf das korrekte Verhalten umgestellt oder auf
PAGE_LIMIT_EXCEEDED umgeschrieben. Neue Lifecycle-Tests für NO_USABLE_TEXT
(sofort FAILED_FINAL → SKIPPED_FINAL_FAILURE) hinzugefügt.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../service/ProcessingOutcomeTransition.java  | 20 ++++--
 .../DocumentProcessingCoordinatorTest.java    | 69 +++++++++++++++++--
 .../ProcessingOutcomeTransitionTest.java      | 24 +++++--
 .../domain/model/PreCheckFailureReason.java   |  5 +-
 4 files changed, 100 insertions(+), 18 deletions(-)
diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java
index 8897f00..223c0bd 100644
--- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java
+++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java
@@ -6,6 +6,7 @@ import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure;
 import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
 import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
 import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
 import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
 import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
 
@@ -26,10 +27,14 @@ import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
  *   <li><strong>Naming proposal ready:</strong> Status becomes
  *       {@link ProcessingStatus#PROPOSAL_READY}, counters unchanged,
  *       {@code retryable=false}.</li>
- *   <li><strong>Pre-check content error (first occurrence):</strong>
+ *   <li><strong>Pre-check content error {@link PreCheckFailureReason#NO_USABLE_TEXT}:</strong>
+ *       Status becomes {@link ProcessingStatus#FAILED_FINAL} immediately,
+ *       content error counter incremented by 1, {@code retryable=false}.
+ *       Image-only PDFs without OCR text will not yield usable text on retry.</li>
+ *   <li><strong>Pre-check content error (other reason, first occurrence):</strong>
  *       Status becomes {@link ProcessingStatus#FAILED_RETRYABLE},
  *       content error counter incremented by 1, {@code retryable=true}.</li>
- *   <li><strong>Pre-check content error (second or later occurrence):</strong>
+ *   <li><strong>Pre-check content error (other reason, second or later occurrence):</strong>
  *       Status becomes {@link ProcessingStatus#FAILED_FINAL},
  *       content error counter incremented by 1, {@code retryable=false}.</li>
  *   <li><strong>AI functional failure (first occurrence):</strong>
@@ -112,11 +117,16 @@ final class ProcessingOutcomeTransition {
                 );
             }
 
-            case PreCheckFailed ignored2 -> {
-                // Deterministic content error from pre-check: apply the 1-retry rule
+            case PreCheckFailed preCheckFailed -> {
                 FailureCounters updatedCounters = existingCounters.withIncrementedContentErrorCount();
-                boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0;
 
+                if (preCheckFailed.failureReason() == PreCheckFailureReason.NO_USABLE_TEXT) {
+                    // Image-only PDFs without OCR text will not change on retry.
+                    yield new ProcessingOutcome(ProcessingStatus.FAILED_FINAL, updatedCounters, false);
+                }
+
+                // Other deterministic content errors: apply the 1-retry rule
+                boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0;
                 if (isFirstOccurrence) {
                     yield new ProcessingOutcome(ProcessingStatus.FAILED_RETRYABLE, updatedCounters, true);
                 } else {
diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java
index d9e4507..5907807 100644
--- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java
+++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java
@@ -154,13 +154,36 @@ class DocumentProcessingCoordinatorTest {
     }
 
     @Test
-    void process_newDocument_firstContentError_persistsFailedRetryable_contentCounterOne() {
+    void process_newDocument_noUsableText_persistsFailedFinal_contentCounterOne() {
+        // NO_USABLE_TEXT (image-only PDF) finalises immediately — no retry.
         recordRepo.setLookupResult(new DocumentUnknown());
         DocumentProcessingOutcome outcome = new PreCheckFailed(
                 candidate, PreCheckFailureReason.NO_USABLE_TEXT);
 
         processor.process(candidate, fingerprint, outcome, context, attemptStart);
 
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.FAILED_FINAL, attempt.status());
+        assertFalse(attempt.retryable());
+
+        assertEquals(1, recordRepo.createdRecords.size());
+        DocumentRecord record = recordRepo.createdRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
+        assertEquals(1, record.failureCounters().contentErrorCount());
+        assertEquals(0, record.failureCounters().transientErrorCount());
+        assertNotNull(record.lastFailureInstant());
+        assertNull(record.lastSuccessInstant());
+    }
+
+    @Test
+    void process_newDocument_firstPageLimitExceeded_persistsFailedRetryable_contentCounterOne() {
+        recordRepo.setLookupResult(new DocumentUnknown());
+        DocumentProcessingOutcome outcome = new PreCheckFailed(
+                candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
+
+        processor.process(candidate, fingerprint, outcome, context, attemptStart);
+
         assertEquals(1, attemptRepo.savedAttempts.size());
         ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
         assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
@@ -1191,17 +1214,18 @@ class DocumentProcessingCoordinatorTest {
     // -------------------------------------------------------------------------
 
     @Test
-    void process_contentErrorLifecycle_firstRunRetryable_secondRunFinal_thirdRunSkipped() {
-        // Run 1: new document, first deterministic content error → FAILED_RETRYABLE
+    void process_contentErrorLifecycle_pageLimitExceeded_firstRunRetryable_secondRunFinal_thirdRunSkipped() {
+        // PAGE_LIMIT_EXCEEDED follows the 1-retry rule: first run → FAILED_RETRYABLE, second → FAILED_FINAL.
         recordRepo.setLookupResult(new DocumentUnknown());
         DocumentProcessingOutcome contentError = new PreCheckFailed(
-                candidate, PreCheckFailureReason.NO_USABLE_TEXT);
+                candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
 
+        // Run 1: new document, first content error → FAILED_RETRYABLE
         processor.process(candidate, fingerprint, contentError, context, attemptStart);
 
         DocumentRecord afterRun1 = recordRepo.createdRecords.get(0);
         assertEquals(ProcessingStatus.FAILED_RETRYABLE, afterRun1.overallStatus(),
-                "First content error must yield FAILED_RETRYABLE");
+                "First PAGE_LIMIT_EXCEEDED must yield FAILED_RETRYABLE");
         assertEquals(1, afterRun1.failureCounters().contentErrorCount());
         assertTrue(attemptRepo.savedAttempts.get(0).retryable(),
                 "First content error attempt must be retryable");
@@ -1236,6 +1260,36 @@ class DocumentProcessingCoordinatorTest {
                 "Transient error counter must remain 0 after a SKIPPED_FINAL_FAILURE event");
     }
 
+    @Test
+    void process_contentErrorLifecycle_noUsableText_immediatelyFinal_secondRunSkipped() {
+        // NO_USABLE_TEXT (image-only PDF): first run is immediately FAILED_FINAL, second is skipped.
+        recordRepo.setLookupResult(new DocumentUnknown());
+        DocumentProcessingOutcome noTextError = new PreCheckFailed(
+                candidate, PreCheckFailureReason.NO_USABLE_TEXT);
+
+        // Run 1: new document → FAILED_FINAL immediately
+        processor.process(candidate, fingerprint, noTextError, context, attemptStart);
+
+        DocumentRecord afterRun1 = recordRepo.createdRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_FINAL, afterRun1.overallStatus(),
+                "NO_USABLE_TEXT must yield FAILED_FINAL immediately");
+        assertEquals(1, afterRun1.failureCounters().contentErrorCount());
+        assertFalse(attemptRepo.savedAttempts.get(0).retryable());
+
+        // Run 2: terminal FAILED_FINAL → SKIPPED_FINAL_FAILURE; counters must not change
+        recordRepo.setLookupResult(new DocumentTerminalFinalFailure(afterRun1));
+
+        processor.process(candidate, fingerprint, noTextError, context, attemptStart);
+
+        assertEquals(2, attemptRepo.savedAttempts.size());
+        ProcessingAttempt skipAttempt = attemptRepo.savedAttempts.get(1);
+        assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, skipAttempt.status());
+
+        DocumentRecord afterRun2 = recordRepo.updatedRecords.get(0);
+        assertEquals(1, afterRun2.failureCounters().contentErrorCount(),
+                "Content error counter must remain 1 after SKIPPED_FINAL_FAILURE");
+    }
+
     @Test
     void process_transientErrorLifecycle_maxRetriesTransient2_firstRetryable_secondFinal() {
         // maxRetriesTransient=2: first transient error → FAILED_RETRYABLE, second → FAILED_FINAL
@@ -1594,8 +1648,9 @@ class DocumentProcessingCoordinatorTest {
 
     @Test
     void process_firstContentError_retryDecisionLog_containsFingerprintAndFailedRetryable() {
-        // Proves that the retry decision log for a first deterministic content error contains
+        // Proves that the retry decision log for a first retryable content error contains
         // both the document fingerprint and the FAILED_RETRYABLE classification.
+        // Uses PAGE_LIMIT_EXCEEDED which follows the 1-retry rule.
         MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
         DocumentProcessingCoordinator coordinatorWithCapturing =
                 new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
@@ -1604,7 +1659,7 @@ class DocumentProcessingCoordinatorTest {
         recordRepo.setLookupResult(new DocumentUnknown());
 
         coordinatorWithCapturing.process(candidate, fingerprint,
-                new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT),
+                new PreCheckFailed(candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED),
                 context, attemptStart);
 
         assertTrue(capturingLogger.anyWarnContains(FINGERPRINT_HEX),
diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java
index 1d59db6..66bc40f 100644
--- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java
+++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java
@@ -103,13 +103,28 @@ class ProcessingOutcomeTransitionTest {
     // -------------------------------------------------------------------------
 
     @Test
-    void forNewDocument_firstPreCheckFailed_returnsFailedRetryable_contentCounterOne() {
+    void forNewDocument_noUsableText_immediatelyFailedFinal_noRetry() {
         PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.NO_USABLE_TEXT);
 
         ProcessingOutcomeTransition.ProcessingOutcome result =
                 ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_1);
 
-        assertEquals(ProcessingStatus.FAILED_RETRYABLE, result.overallStatus());
+        assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus(),
+                "NO_USABLE_TEXT must finalise immediately without retry");
+        assertFalse(result.retryable());
+        assertEquals(1, result.counters().contentErrorCount());
+        assertEquals(0, result.counters().transientErrorCount());
+    }
+
+    @Test
+    void forNewDocument_firstPageLimitExceeded_returnsFailedRetryable_contentCounterOne() {
+        PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
+
+        ProcessingOutcomeTransition.ProcessingOutcome result =
+                ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_1);
+
+        assertEquals(ProcessingStatus.FAILED_RETRYABLE, result.overallStatus(),
+                "PAGE_LIMIT_EXCEEDED first occurrence must be retryable");
         assertTrue(result.retryable());
         assertEquals(1, result.counters().contentErrorCount());
         assertEquals(0, result.counters().transientErrorCount());
@@ -149,9 +164,10 @@ class ProcessingOutcomeTransitionTest {
 
     @Test
     void forNewDocument_contentError_transientCounterIsIrrelevant() {
-        PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.NO_USABLE_TEXT);
+        // PAGE_LIMIT_EXCEEDED is used here: it follows the 1-retry rule, and a non-zero
+        // transient counter must not influence the content-error decision.
+        PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
 
-        // Counter before: 0 content errors (first occurrence), transient ignored
         ProcessingOutcomeTransition.ProcessingOutcome result =
                 ProcessingOutcomeTransition.forKnownDocument(
                         outcome, new FailureCounters(0, 5), LIMIT_1);
diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java
index c4c2351..9423b55 100644
--- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java
+++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java
@@ -21,9 +21,10 @@ public enum PreCheckFailureReason {
      * The extracted PDF text, after normalization, contains no letters or digits.
      * <p>
      * This is a deterministic content error: reprocessing the same file in a later run
-     * will have the same outcome unless the source file is changed.
+     * will have the same outcome unless the source file is changed (e.g. by adding OCR).
      * <p>
-     * Retry logic: exactly 1 retry in a later batch run.
+     * Retry logic: no retry — the document is immediately finalised to
+     * {@link ProcessingStatus#FAILED_FINAL}.
      */
     NO_USABLE_TEXT("No usable text in extracted PDF content"),