diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java
index 8897f00..223c0bd 100644
--- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java
+++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java
@@ -6,6 +6,7 @@ import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
@@ -26,10 +27,14 @@ import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
*
Naming proposal ready: Status becomes
* {@link ProcessingStatus#PROPOSAL_READY}, counters unchanged,
* {@code retryable=false}.
- * Pre-check content error (first occurrence):
+ * Pre-check content error {@link PreCheckFailureReason#NO_USABLE_TEXT}:
+ * Status becomes {@link ProcessingStatus#FAILED_FINAL} immediately,
+ * content error counter incremented by 1, {@code retryable=false}.
+ * Image-only PDFs without OCR text will not yield usable text on retry.
+ * Pre-check content error (other reason, first occurrence):
* Status becomes {@link ProcessingStatus#FAILED_RETRYABLE},
* content error counter incremented by 1, {@code retryable=true}.
- * Pre-check content error (second or later occurrence):
+ * Pre-check content error (other reason, second or later occurrence):
* Status becomes {@link ProcessingStatus#FAILED_FINAL},
* content error counter incremented by 1, {@code retryable=false}.
* AI functional failure (first occurrence):
@@ -112,11 +117,16 @@ final class ProcessingOutcomeTransition {
);
}
- case PreCheckFailed ignored2 -> {
- // Deterministic content error from pre-check: apply the 1-retry rule
+ case PreCheckFailed preCheckFailed -> {
FailureCounters updatedCounters = existingCounters.withIncrementedContentErrorCount();
- boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0;
+ if (preCheckFailed.failureReason() == PreCheckFailureReason.NO_USABLE_TEXT) {
+ // Image-only PDFs without OCR text will not change on retry.
+ yield new ProcessingOutcome(ProcessingStatus.FAILED_FINAL, updatedCounters, false);
+ }
+
+ // Other deterministic content errors: apply the 1-retry rule
+ boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0;
if (isFirstOccurrence) {
yield new ProcessingOutcome(ProcessingStatus.FAILED_RETRYABLE, updatedCounters, true);
} else {
diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java
index d9e4507..5907807 100644
--- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java
+++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java
@@ -154,13 +154,36 @@ class DocumentProcessingCoordinatorTest {
}
@Test
- void process_newDocument_firstContentError_persistsFailedRetryable_contentCounterOne() {
+ void process_newDocument_noUsableText_persistsFailedFinal_contentCounterOne() {
+ // NO_USABLE_TEXT (image-only PDF) finalises immediately — no retry.
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome outcome = new PreCheckFailed(
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
processor.process(candidate, fingerprint, outcome, context, attemptStart);
+ assertEquals(1, attemptRepo.savedAttempts.size());
+ ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+ assertEquals(ProcessingStatus.FAILED_FINAL, attempt.status());
+ assertFalse(attempt.retryable());
+
+ assertEquals(1, recordRepo.createdRecords.size());
+ DocumentRecord record = recordRepo.createdRecords.get(0);
+ assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
+ assertEquals(1, record.failureCounters().contentErrorCount());
+ assertEquals(0, record.failureCounters().transientErrorCount());
+ assertNotNull(record.lastFailureInstant());
+ assertNull(record.lastSuccessInstant());
+ }
+
+ @Test
+ void process_newDocument_firstPageLimitExceeded_persistsFailedRetryable_contentCounterOne() {
+ recordRepo.setLookupResult(new DocumentUnknown());
+ DocumentProcessingOutcome outcome = new PreCheckFailed(
+ candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
+
+ processor.process(candidate, fingerprint, outcome, context, attemptStart);
+
assertEquals(1, attemptRepo.savedAttempts.size());
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
@@ -1191,17 +1214,18 @@ class DocumentProcessingCoordinatorTest {
// -------------------------------------------------------------------------
@Test
- void process_contentErrorLifecycle_firstRunRetryable_secondRunFinal_thirdRunSkipped() {
- // Run 1: new document, first deterministic content error → FAILED_RETRYABLE
+ void process_contentErrorLifecycle_pageLimitExceeded_firstRunRetryable_secondRunFinal_thirdRunSkipped() {
+ // PAGE_LIMIT_EXCEEDED follows the 1-retry rule: first run → FAILED_RETRYABLE, second → FAILED_FINAL.
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome contentError = new PreCheckFailed(
- candidate, PreCheckFailureReason.NO_USABLE_TEXT);
+ candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
+ // Run 1: new document, first content error → FAILED_RETRYABLE
processor.process(candidate, fingerprint, contentError, context, attemptStart);
DocumentRecord afterRun1 = recordRepo.createdRecords.get(0);
assertEquals(ProcessingStatus.FAILED_RETRYABLE, afterRun1.overallStatus(),
- "First content error must yield FAILED_RETRYABLE");
+ "First PAGE_LIMIT_EXCEEDED must yield FAILED_RETRYABLE");
assertEquals(1, afterRun1.failureCounters().contentErrorCount());
assertTrue(attemptRepo.savedAttempts.get(0).retryable(),
"First content error attempt must be retryable");
@@ -1236,6 +1260,36 @@ class DocumentProcessingCoordinatorTest {
"Transient error counter must remain 0 after a SKIPPED_FINAL_FAILURE event");
}
+ @Test
+ void process_contentErrorLifecycle_noUsableText_immediatelyFinal_secondRunSkipped() {
+ // NO_USABLE_TEXT (image-only PDF): first run is immediately FAILED_FINAL, second is skipped.
+ recordRepo.setLookupResult(new DocumentUnknown());
+ DocumentProcessingOutcome noTextError = new PreCheckFailed(
+ candidate, PreCheckFailureReason.NO_USABLE_TEXT);
+
+ // Run 1: new document → FAILED_FINAL immediately
+ processor.process(candidate, fingerprint, noTextError, context, attemptStart);
+
+ DocumentRecord afterRun1 = recordRepo.createdRecords.get(0);
+ assertEquals(ProcessingStatus.FAILED_FINAL, afterRun1.overallStatus(),
+ "NO_USABLE_TEXT must yield FAILED_FINAL immediately");
+ assertEquals(1, afterRun1.failureCounters().contentErrorCount());
+ assertFalse(attemptRepo.savedAttempts.get(0).retryable());
+
+ // Run 2: terminal FAILED_FINAL → SKIPPED_FINAL_FAILURE; counters must not change
+ recordRepo.setLookupResult(new DocumentTerminalFinalFailure(afterRun1));
+
+ processor.process(candidate, fingerprint, noTextError, context, attemptStart);
+
+ assertEquals(2, attemptRepo.savedAttempts.size());
+ ProcessingAttempt skipAttempt = attemptRepo.savedAttempts.get(1);
+ assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, skipAttempt.status());
+
+ DocumentRecord afterRun2 = recordRepo.updatedRecords.get(0);
+ assertEquals(1, afterRun2.failureCounters().contentErrorCount(),
+ "Content error counter must remain 1 after SKIPPED_FINAL_FAILURE");
+ }
+
@Test
void process_transientErrorLifecycle_maxRetriesTransient2_firstRetryable_secondFinal() {
// maxRetriesTransient=2: first transient error → FAILED_RETRYABLE, second → FAILED_FINAL
@@ -1594,8 +1648,9 @@ class DocumentProcessingCoordinatorTest {
@Test
void process_firstContentError_retryDecisionLog_containsFingerprintAndFailedRetryable() {
- // Proves that the retry decision log for a first deterministic content error contains
+ // Proves that the retry decision log for a first retryable content error contains
// both the document fingerprint and the FAILED_RETRYABLE classification.
+ // Uses PAGE_LIMIT_EXCEEDED which follows the 1-retry rule.
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturing =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
@@ -1604,7 +1659,7 @@ class DocumentProcessingCoordinatorTest {
recordRepo.setLookupResult(new DocumentUnknown());
coordinatorWithCapturing.process(candidate, fingerprint,
- new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT),
+ new PreCheckFailed(candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED),
context, attemptStart);
assertTrue(capturingLogger.anyWarnContains(FINGERPRINT_HEX),
diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java
index 1d59db6..66bc40f 100644
--- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java
+++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java
@@ -103,13 +103,28 @@ class ProcessingOutcomeTransitionTest {
// -------------------------------------------------------------------------
@Test
- void forNewDocument_firstPreCheckFailed_returnsFailedRetryable_contentCounterOne() {
+ void forNewDocument_noUsableText_immediatelyFailedFinal_noRetry() {
PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.NO_USABLE_TEXT);
ProcessingOutcomeTransition.ProcessingOutcome result =
ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_1);
- assertEquals(ProcessingStatus.FAILED_RETRYABLE, result.overallStatus());
+ assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus(),
+ "NO_USABLE_TEXT must finalise immediately without retry");
+ assertFalse(result.retryable());
+ assertEquals(1, result.counters().contentErrorCount());
+ assertEquals(0, result.counters().transientErrorCount());
+ }
+
+ @Test
+ void forNewDocument_firstPageLimitExceeded_returnsFailedRetryable_contentCounterOne() {
+ PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
+
+ ProcessingOutcomeTransition.ProcessingOutcome result =
+ ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_1);
+
+ assertEquals(ProcessingStatus.FAILED_RETRYABLE, result.overallStatus(),
+ "PAGE_LIMIT_EXCEEDED first occurrence must be retryable");
assertTrue(result.retryable());
assertEquals(1, result.counters().contentErrorCount());
assertEquals(0, result.counters().transientErrorCount());
@@ -149,9 +164,10 @@ class ProcessingOutcomeTransitionTest {
@Test
void forNewDocument_contentError_transientCounterIsIrrelevant() {
- PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.NO_USABLE_TEXT);
+ // PAGE_LIMIT_EXCEEDED is used here: it follows the 1-retry rule, and a non-zero
+ // transient counter must not influence the content-error decision.
+ PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
- // Counter before: 0 content errors (first occurrence), transient ignored
ProcessingOutcomeTransition.ProcessingOutcome result =
ProcessingOutcomeTransition.forKnownDocument(
outcome, new FailureCounters(0, 5), LIMIT_1);
diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java
index c4c2351..9423b55 100644
--- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java
+++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/PreCheckFailureReason.java
@@ -21,9 +21,10 @@ public enum PreCheckFailureReason {
* The extracted PDF text, after normalization, contains no letters or digits.
*
* This is a deterministic content error: reprocessing the same file in a later run
- * will have the same outcome unless the source file is changed.
+ * will have the same outcome unless the source file is changed (e.g. by adding OCR).
*
- * Retry logic: exactly 1 retry in a later batch run.
+ * Retry logic: no retry — the document is immediately finalised to
+ * {@link ProcessingStatus#FAILED_FINAL}.
*/
NO_USABLE_TEXT("No usable text in extracted PDF content"),