#78: NO_USABLE_TEXT (Foto-PDF) finalisiert sofort zu FAILED_FINAL
Bisher wurde NO_USABLE_TEXT (kein OCR-Text im PDF) wie alle anderen deterministischen Inhaltsfehler mit der 1-Retry-Regel behandelt und landete beim ersten Auftreten in FAILED_RETRYABLE. Da ein Bild-Scan ohne OCR-Text sich zwischen Läufen nicht verändert, ist ein Wiederholversuch sinnlos – der Status muss sofort FAILED_FINAL sein. Geändert: ProcessingOutcomeTransition erkennt NO_USABLE_TEXT als Sonderfall und liefert ohne Retry-Prüfung FAILED_FINAL. PAGE_LIMIT_EXCEEDED und CONTENT_NOT_EXTRACTABLE behalten die 1-Retry-Regel. Tests angepasst: Bestehende Tests, die FAILED_RETRYABLE für NO_USABLE_TEXT erwarteten, wurden auf das korrekte Verhalten umgestellt oder auf PAGE_LIMIT_EXCEEDED umgeschrieben. Neue Lifecycle-Tests für NO_USABLE_TEXT (sofort FAILED_FINAL → SKIPPED_FINAL_FAILURE) hinzugefügt. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
+15
-5
@@ -6,6 +6,7 @@ import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure;
|
|||||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
|
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
|
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
|
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
|
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
|
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
|
||||||
|
|
||||||
@@ -26,10 +27,14 @@ import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
|
|||||||
* <li><strong>Naming proposal ready:</strong> Status becomes
|
* <li><strong>Naming proposal ready:</strong> Status becomes
|
||||||
* {@link ProcessingStatus#PROPOSAL_READY}, counters unchanged,
|
* {@link ProcessingStatus#PROPOSAL_READY}, counters unchanged,
|
||||||
* {@code retryable=false}.</li>
|
* {@code retryable=false}.</li>
|
||||||
* <li><strong>Pre-check content error (first occurrence):</strong>
|
* <li><strong>Pre-check content error {@link PreCheckFailureReason#NO_USABLE_TEXT}:</strong>
|
||||||
|
* Status becomes {@link ProcessingStatus#FAILED_FINAL} immediately,
|
||||||
|
* content error counter incremented by 1, {@code retryable=false}.
|
||||||
|
* Image-only PDFs without OCR text will not yield usable text on retry.</li>
|
||||||
|
* <li><strong>Pre-check content error (other reason, first occurrence):</strong>
|
||||||
* Status becomes {@link ProcessingStatus#FAILED_RETRYABLE},
|
* Status becomes {@link ProcessingStatus#FAILED_RETRYABLE},
|
||||||
* content error counter incremented by 1, {@code retryable=true}.</li>
|
* content error counter incremented by 1, {@code retryable=true}.</li>
|
||||||
* <li><strong>Pre-check content error (second or later occurrence):</strong>
|
* <li><strong>Pre-check content error (other reason, second or later occurrence):</strong>
|
||||||
* Status becomes {@link ProcessingStatus#FAILED_FINAL},
|
* Status becomes {@link ProcessingStatus#FAILED_FINAL},
|
||||||
* content error counter incremented by 1, {@code retryable=false}.</li>
|
* content error counter incremented by 1, {@code retryable=false}.</li>
|
||||||
* <li><strong>AI functional failure (first occurrence):</strong>
|
* <li><strong>AI functional failure (first occurrence):</strong>
|
||||||
@@ -112,11 +117,16 @@ final class ProcessingOutcomeTransition {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
case PreCheckFailed ignored2 -> {
|
case PreCheckFailed preCheckFailed -> {
|
||||||
// Deterministic content error from pre-check: apply the 1-retry rule
|
|
||||||
FailureCounters updatedCounters = existingCounters.withIncrementedContentErrorCount();
|
FailureCounters updatedCounters = existingCounters.withIncrementedContentErrorCount();
|
||||||
boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0;
|
|
||||||
|
|
||||||
|
if (preCheckFailed.failureReason() == PreCheckFailureReason.NO_USABLE_TEXT) {
|
||||||
|
// Image-only PDFs without OCR text will not change on retry.
|
||||||
|
yield new ProcessingOutcome(ProcessingStatus.FAILED_FINAL, updatedCounters, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Other deterministic content errors: apply the 1-retry rule
|
||||||
|
boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0;
|
||||||
if (isFirstOccurrence) {
|
if (isFirstOccurrence) {
|
||||||
yield new ProcessingOutcome(ProcessingStatus.FAILED_RETRYABLE, updatedCounters, true);
|
yield new ProcessingOutcome(ProcessingStatus.FAILED_RETRYABLE, updatedCounters, true);
|
||||||
} else {
|
} else {
|
||||||
|
|||||||
+62
-7
@@ -154,13 +154,36 @@ class DocumentProcessingCoordinatorTest {
|
|||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void process_newDocument_firstContentError_persistsFailedRetryable_contentCounterOne() {
|
void process_newDocument_noUsableText_persistsFailedFinal_contentCounterOne() {
|
||||||
|
// NO_USABLE_TEXT (image-only PDF) finalises immediately — no retry.
|
||||||
recordRepo.setLookupResult(new DocumentUnknown());
|
recordRepo.setLookupResult(new DocumentUnknown());
|
||||||
DocumentProcessingOutcome outcome = new PreCheckFailed(
|
DocumentProcessingOutcome outcome = new PreCheckFailed(
|
||||||
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
|
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
|
||||||
|
|
||||||
processor.process(candidate, fingerprint, outcome, context, attemptStart);
|
processor.process(candidate, fingerprint, outcome, context, attemptStart);
|
||||||
|
|
||||||
|
assertEquals(1, attemptRepo.savedAttempts.size());
|
||||||
|
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
|
||||||
|
assertEquals(ProcessingStatus.FAILED_FINAL, attempt.status());
|
||||||
|
assertFalse(attempt.retryable());
|
||||||
|
|
||||||
|
assertEquals(1, recordRepo.createdRecords.size());
|
||||||
|
DocumentRecord record = recordRepo.createdRecords.get(0);
|
||||||
|
assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
|
||||||
|
assertEquals(1, record.failureCounters().contentErrorCount());
|
||||||
|
assertEquals(0, record.failureCounters().transientErrorCount());
|
||||||
|
assertNotNull(record.lastFailureInstant());
|
||||||
|
assertNull(record.lastSuccessInstant());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void process_newDocument_firstPageLimitExceeded_persistsFailedRetryable_contentCounterOne() {
|
||||||
|
recordRepo.setLookupResult(new DocumentUnknown());
|
||||||
|
DocumentProcessingOutcome outcome = new PreCheckFailed(
|
||||||
|
candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
|
||||||
|
|
||||||
|
processor.process(candidate, fingerprint, outcome, context, attemptStart);
|
||||||
|
|
||||||
assertEquals(1, attemptRepo.savedAttempts.size());
|
assertEquals(1, attemptRepo.savedAttempts.size());
|
||||||
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
|
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
|
||||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
|
assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
|
||||||
@@ -1191,17 +1214,18 @@ class DocumentProcessingCoordinatorTest {
|
|||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void process_contentErrorLifecycle_firstRunRetryable_secondRunFinal_thirdRunSkipped() {
|
void process_contentErrorLifecycle_pageLimitExceeded_firstRunRetryable_secondRunFinal_thirdRunSkipped() {
|
||||||
// Run 1: new document, first deterministic content error → FAILED_RETRYABLE
|
// PAGE_LIMIT_EXCEEDED follows the 1-retry rule: first run → FAILED_RETRYABLE, second → FAILED_FINAL.
|
||||||
recordRepo.setLookupResult(new DocumentUnknown());
|
recordRepo.setLookupResult(new DocumentUnknown());
|
||||||
DocumentProcessingOutcome contentError = new PreCheckFailed(
|
DocumentProcessingOutcome contentError = new PreCheckFailed(
|
||||||
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
|
candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
|
||||||
|
|
||||||
|
// Run 1: new document, first content error → FAILED_RETRYABLE
|
||||||
processor.process(candidate, fingerprint, contentError, context, attemptStart);
|
processor.process(candidate, fingerprint, contentError, context, attemptStart);
|
||||||
|
|
||||||
DocumentRecord afterRun1 = recordRepo.createdRecords.get(0);
|
DocumentRecord afterRun1 = recordRepo.createdRecords.get(0);
|
||||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, afterRun1.overallStatus(),
|
assertEquals(ProcessingStatus.FAILED_RETRYABLE, afterRun1.overallStatus(),
|
||||||
"First content error must yield FAILED_RETRYABLE");
|
"First PAGE_LIMIT_EXCEEDED must yield FAILED_RETRYABLE");
|
||||||
assertEquals(1, afterRun1.failureCounters().contentErrorCount());
|
assertEquals(1, afterRun1.failureCounters().contentErrorCount());
|
||||||
assertTrue(attemptRepo.savedAttempts.get(0).retryable(),
|
assertTrue(attemptRepo.savedAttempts.get(0).retryable(),
|
||||||
"First content error attempt must be retryable");
|
"First content error attempt must be retryable");
|
||||||
@@ -1236,6 +1260,36 @@ class DocumentProcessingCoordinatorTest {
|
|||||||
"Transient error counter must remain 0 after a SKIPPED_FINAL_FAILURE event");
|
"Transient error counter must remain 0 after a SKIPPED_FINAL_FAILURE event");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void process_contentErrorLifecycle_noUsableText_immediatelyFinal_secondRunSkipped() {
|
||||||
|
// NO_USABLE_TEXT (image-only PDF): first run is immediately FAILED_FINAL, second is skipped.
|
||||||
|
recordRepo.setLookupResult(new DocumentUnknown());
|
||||||
|
DocumentProcessingOutcome noTextError = new PreCheckFailed(
|
||||||
|
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
|
||||||
|
|
||||||
|
// Run 1: new document → FAILED_FINAL immediately
|
||||||
|
processor.process(candidate, fingerprint, noTextError, context, attemptStart);
|
||||||
|
|
||||||
|
DocumentRecord afterRun1 = recordRepo.createdRecords.get(0);
|
||||||
|
assertEquals(ProcessingStatus.FAILED_FINAL, afterRun1.overallStatus(),
|
||||||
|
"NO_USABLE_TEXT must yield FAILED_FINAL immediately");
|
||||||
|
assertEquals(1, afterRun1.failureCounters().contentErrorCount());
|
||||||
|
assertFalse(attemptRepo.savedAttempts.get(0).retryable());
|
||||||
|
|
||||||
|
// Run 2: terminal FAILED_FINAL → SKIPPED_FINAL_FAILURE; counters must not change
|
||||||
|
recordRepo.setLookupResult(new DocumentTerminalFinalFailure(afterRun1));
|
||||||
|
|
||||||
|
processor.process(candidate, fingerprint, noTextError, context, attemptStart);
|
||||||
|
|
||||||
|
assertEquals(2, attemptRepo.savedAttempts.size());
|
||||||
|
ProcessingAttempt skipAttempt = attemptRepo.savedAttempts.get(1);
|
||||||
|
assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, skipAttempt.status());
|
||||||
|
|
||||||
|
DocumentRecord afterRun2 = recordRepo.updatedRecords.get(0);
|
||||||
|
assertEquals(1, afterRun2.failureCounters().contentErrorCount(),
|
||||||
|
"Content error counter must remain 1 after SKIPPED_FINAL_FAILURE");
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void process_transientErrorLifecycle_maxRetriesTransient2_firstRetryable_secondFinal() {
|
void process_transientErrorLifecycle_maxRetriesTransient2_firstRetryable_secondFinal() {
|
||||||
// maxRetriesTransient=2: first transient error → FAILED_RETRYABLE, second → FAILED_FINAL
|
// maxRetriesTransient=2: first transient error → FAILED_RETRYABLE, second → FAILED_FINAL
|
||||||
@@ -1594,8 +1648,9 @@ class DocumentProcessingCoordinatorTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
void process_firstContentError_retryDecisionLog_containsFingerprintAndFailedRetryable() {
|
void process_firstContentError_retryDecisionLog_containsFingerprintAndFailedRetryable() {
|
||||||
// Proves that the retry decision log for a first deterministic content error contains
|
// Proves that the retry decision log for a first retryable content error contains
|
||||||
// both the document fingerprint and the FAILED_RETRYABLE classification.
|
// both the document fingerprint and the FAILED_RETRYABLE classification.
|
||||||
|
// Uses PAGE_LIMIT_EXCEEDED which follows the 1-retry rule.
|
||||||
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
|
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
|
||||||
DocumentProcessingCoordinator coordinatorWithCapturing =
|
DocumentProcessingCoordinator coordinatorWithCapturing =
|
||||||
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
|
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
|
||||||
@@ -1604,7 +1659,7 @@ class DocumentProcessingCoordinatorTest {
|
|||||||
recordRepo.setLookupResult(new DocumentUnknown());
|
recordRepo.setLookupResult(new DocumentUnknown());
|
||||||
|
|
||||||
coordinatorWithCapturing.process(candidate, fingerprint,
|
coordinatorWithCapturing.process(candidate, fingerprint,
|
||||||
new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT),
|
new PreCheckFailed(candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED),
|
||||||
context, attemptStart);
|
context, attemptStart);
|
||||||
|
|
||||||
assertTrue(capturingLogger.anyWarnContains(FINGERPRINT_HEX),
|
assertTrue(capturingLogger.anyWarnContains(FINGERPRINT_HEX),
|
||||||
|
|||||||
+20
-4
@@ -103,13 +103,28 @@ class ProcessingOutcomeTransitionTest {
|
|||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
void forNewDocument_firstPreCheckFailed_returnsFailedRetryable_contentCounterOne() {
|
void forNewDocument_noUsableText_immediatelyFailedFinal_noRetry() {
|
||||||
PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.NO_USABLE_TEXT);
|
PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.NO_USABLE_TEXT);
|
||||||
|
|
||||||
ProcessingOutcomeTransition.ProcessingOutcome result =
|
ProcessingOutcomeTransition.ProcessingOutcome result =
|
||||||
ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_1);
|
ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_1);
|
||||||
|
|
||||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, result.overallStatus());
|
assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus(),
|
||||||
|
"NO_USABLE_TEXT must finalise immediately without retry");
|
||||||
|
assertFalse(result.retryable());
|
||||||
|
assertEquals(1, result.counters().contentErrorCount());
|
||||||
|
assertEquals(0, result.counters().transientErrorCount());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void forNewDocument_firstPageLimitExceeded_returnsFailedRetryable_contentCounterOne() {
|
||||||
|
PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
|
||||||
|
|
||||||
|
ProcessingOutcomeTransition.ProcessingOutcome result =
|
||||||
|
ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_1);
|
||||||
|
|
||||||
|
assertEquals(ProcessingStatus.FAILED_RETRYABLE, result.overallStatus(),
|
||||||
|
"PAGE_LIMIT_EXCEEDED first occurrence must be retryable");
|
||||||
assertTrue(result.retryable());
|
assertTrue(result.retryable());
|
||||||
assertEquals(1, result.counters().contentErrorCount());
|
assertEquals(1, result.counters().contentErrorCount());
|
||||||
assertEquals(0, result.counters().transientErrorCount());
|
assertEquals(0, result.counters().transientErrorCount());
|
||||||
@@ -149,9 +164,10 @@ class ProcessingOutcomeTransitionTest {
|
|||||||
|
|
||||||
@Test
|
@Test
|
||||||
void forNewDocument_contentError_transientCounterIsIrrelevant() {
|
void forNewDocument_contentError_transientCounterIsIrrelevant() {
|
||||||
PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.NO_USABLE_TEXT);
|
// PAGE_LIMIT_EXCEEDED is used here: it follows the 1-retry rule, and a non-zero
|
||||||
|
// transient counter must not influence the content-error decision.
|
||||||
|
PreCheckFailed outcome = new PreCheckFailed(candidate(), PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
|
||||||
|
|
||||||
// Counter before: 0 content errors (first occurrence), transient ignored
|
|
||||||
ProcessingOutcomeTransition.ProcessingOutcome result =
|
ProcessingOutcomeTransition.ProcessingOutcome result =
|
||||||
ProcessingOutcomeTransition.forKnownDocument(
|
ProcessingOutcomeTransition.forKnownDocument(
|
||||||
outcome, new FailureCounters(0, 5), LIMIT_1);
|
outcome, new FailureCounters(0, 5), LIMIT_1);
|
||||||
|
|||||||
+3
-2
@@ -21,9 +21,10 @@ public enum PreCheckFailureReason {
|
|||||||
* The extracted PDF text, after normalization, contains no letters or digits.
|
* The extracted PDF text, after normalization, contains no letters or digits.
|
||||||
* <p>
|
* <p>
|
||||||
* This is a deterministic content error: reprocessing the same file in a later run
|
* This is a deterministic content error: reprocessing the same file in a later run
|
||||||
* will have the same outcome unless the source file is changed.
|
* will have the same outcome unless the source file is changed (e.g. by adding OCR).
|
||||||
* <p>
|
* <p>
|
||||||
* Retry logic: exactly 1 retry in a later batch run.
|
* Retry logic: no retry — the document is immediately finalised to
|
||||||
|
* {@link ProcessingStatus#FAILED_FINAL}.
|
||||||
*/
|
*/
|
||||||
NO_USABLE_TEXT("No usable text in extracted PDF content"),
|
NO_USABLE_TEXT("No usable text in extracted PDF content"),
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user