M4 AP-006 Idempotenz- und Persistenzlogik integrieren

2026-04-02 23:36:22 +02:00
parent 8ee4041feb
commit 00c4cf1e5c
7 changed files with 1598 additions and 174 deletions
@@ -0,0 +1,425 @@
+package de.gecheckt.pdf.umbenenner.application.service;
+
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown;
+import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
+import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnicalFailure;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
+import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
+import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
+import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
+import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
+import de.gecheckt.pdf.umbenenner.domain.model.RunId;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
+import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+/**
+ * Unit tests for {@link M4DocumentProcessor}.
+ * <p>
+ * Covers:
+ * <ul>
+ *   <li>M4 minimal rules: status, counter and retryable flag mapping</li>
+ *   <li>Skip logic for SUCCESS and FAILED_FINAL documents</li>
+ *   <li>New document path (DocumentUnknown)</li>
+ *   <li>Known processable document path (DocumentKnownProcessable)</li>
+ *   <li>Persistence lookup failure: no attempt written</li>
+ *   <li>Persistence write failure: controlled failure, no crash</li>
+ *   <li>Skip events do not change error counters</li>
+ * </ul>
+ */
+class M4DocumentProcessorTest {
+
+    private static final String FINGERPRINT_HEX =
+            "a".repeat(64); // 64 lowercase hex chars
+
+    private CapturingDocumentRecordRepository recordRepo;
+    private CapturingProcessingAttemptRepository attemptRepo;
+    private M4DocumentProcessor processor;
+
+    private SourceDocumentCandidate candidate;
+    private DocumentFingerprint fingerprint;
+    private BatchRunContext context;
+    private Instant attemptStart;
+
+    @BeforeEach
+    void setUp() {
+        recordRepo = new CapturingDocumentRecordRepository();
+        attemptRepo = new CapturingProcessingAttemptRepository();
+        processor = new M4DocumentProcessor(recordRepo, attemptRepo);
+
+        candidate = new SourceDocumentCandidate(
+                "test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf"));
+        fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
+        context = new BatchRunContext(new RunId("run-001"), Instant.now());
+        attemptStart = Instant.now();
+    }
+
+    // -------------------------------------------------------------------------
+    // New document (DocumentUnknown) path
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_newDocument_preCheckPassed_persistsSuccessStatus() {
+        recordRepo.setLookupResult(new DocumentUnknown());
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        // One attempt written
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.SUCCESS, attempt.status());
+        assertFalse(attempt.retryable());
+        assertNull(attempt.failureClass());
+        assertNull(attempt.failureMessage());
+
+        // One master record created
+        assertEquals(1, recordRepo.createdRecords.size());
+        DocumentRecord record = recordRepo.createdRecords.get(0);
+        assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
+        assertEquals(0, record.failureCounters().contentErrorCount());
+        assertEquals(0, record.failureCounters().transientErrorCount());
+        assertNotNull(record.lastSuccessInstant());
+        assertNull(record.lastFailureInstant());
+    }
+
+    @Test
+    void process_newDocument_firstContentError_persistsFailedRetryable_contentCounterOne() {
+        recordRepo.setLookupResult(new DocumentUnknown());
+        DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
+                candidate, PreCheckFailureReason.NO_USABLE_TEXT);
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
+        assertTrue(attempt.retryable());
+
+        assertEquals(1, recordRepo.createdRecords.size());
+        DocumentRecord record = recordRepo.createdRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
+        assertEquals(1, record.failureCounters().contentErrorCount());
+        assertEquals(0, record.failureCounters().transientErrorCount());
+        assertNotNull(record.lastFailureInstant());
+        assertNull(record.lastSuccessInstant());
+    }
+
+    @Test
+    void process_newDocument_technicalError_persistsFailedRetryable_transientCounterOne() {
+        recordRepo.setLookupResult(new DocumentUnknown());
+        DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError(
+                candidate, "I/O error", null);
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
+        assertTrue(attempt.retryable());
+
+        assertEquals(1, recordRepo.createdRecords.size());
+        DocumentRecord record = recordRepo.createdRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
+        assertEquals(0, record.failureCounters().contentErrorCount());
+        assertEquals(1, record.failureCounters().transientErrorCount());
+    }
+
+    // -------------------------------------------------------------------------
+    // Known processable document path (DocumentKnownProcessable)
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_knownDocument_secondContentError_persistsFailedFinal_contentCounterTwo() {
+        // Existing record: first content error already recorded
+        DocumentRecord existingRecord = buildRecord(
+                ProcessingStatus.FAILED_RETRYABLE,
+                new FailureCounters(1, 0));
+        recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
+                candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.FAILED_FINAL, attempt.status());
+        assertFalse(attempt.retryable());
+
+        assertEquals(1, recordRepo.updatedRecords.size());
+        DocumentRecord record = recordRepo.updatedRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
+        assertEquals(2, record.failureCounters().contentErrorCount());
+        assertEquals(0, record.failureCounters().transientErrorCount());
+    }
+
+    @Test
+    void process_knownDocument_technicalError_incrementsTransientCounter_remainsRetryable() {
+        DocumentRecord existingRecord = buildRecord(
+                ProcessingStatus.FAILED_RETRYABLE,
+                new FailureCounters(0, 2));
+        recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+
+        DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError(
+                candidate, "Timeout", null);
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, recordRepo.updatedRecords.size());
+        DocumentRecord record = recordRepo.updatedRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
+        assertEquals(0, record.failureCounters().contentErrorCount());
+        assertEquals(3, record.failureCounters().transientErrorCount());
+        assertTrue(attemptRepo.savedAttempts.get(0).retryable());
+    }
+
+    @Test
+    void process_knownDocument_preCheckPassed_persistsSuccess() {
+        DocumentRecord existingRecord = buildRecord(
+                ProcessingStatus.FAILED_RETRYABLE,
+                new FailureCounters(0, 1));
+        recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, recordRepo.updatedRecords.size());
+        DocumentRecord record = recordRepo.updatedRecords.get(0);
+        assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
+        // Counters unchanged on success
+        assertEquals(0, record.failureCounters().contentErrorCount());
+        assertEquals(1, record.failureCounters().transientErrorCount());
+        assertNotNull(record.lastSuccessInstant());
+    }
+
+    // -------------------------------------------------------------------------
+    // Skip paths
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_terminalSuccess_persistsSkipAttemptWithSkippedAlreadyProcessed() {
+        DocumentRecord existingRecord = buildRecord(
+                ProcessingStatus.SUCCESS,
+                FailureCounters.zero());
+        recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.SKIPPED_ALREADY_PROCESSED, attempt.status());
+        assertFalse(attempt.retryable());
+        assertNull(attempt.failureClass());
+        assertNull(attempt.failureMessage());
+
+        // Master record updated (only updatedAt changes)
+        assertEquals(1, recordRepo.updatedRecords.size());
+        DocumentRecord record = recordRepo.updatedRecords.get(0);
+        // Status and counters remain unchanged
+        assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
+        assertEquals(0, record.failureCounters().contentErrorCount());
+        assertEquals(0, record.failureCounters().transientErrorCount());
+    }
+
+    @Test
+    void process_terminalFinalFailure_persistsSkipAttemptWithSkippedFinalFailure() {
+        DocumentRecord existingRecord = buildRecord(
+                ProcessingStatus.FAILED_FINAL,
+                new FailureCounters(2, 0));
+        recordRepo.setLookupResult(new DocumentTerminalFinalFailure(existingRecord));
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
+                candidate, PreCheckFailureReason.NO_USABLE_TEXT);
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, attempt.status());
+        assertFalse(attempt.retryable());
+
+        // Master record updated (only updatedAt changes); counters unchanged
+        assertEquals(1, recordRepo.updatedRecords.size());
+        DocumentRecord record = recordRepo.updatedRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
+        assertEquals(2, record.failureCounters().contentErrorCount());
+        assertEquals(0, record.failureCounters().transientErrorCount());
+    }
+
+    @Test
+    void process_skipEvent_doesNotChangeErrorCounters() {
+        FailureCounters originalCounters = new FailureCounters(1, 3);
+        DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, originalCounters);
+        recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
+
+        processor.process(candidate, fingerprint,
+                new PreCheckPassed(candidate, new PdfExtractionSuccess("t", new PdfPageCount(1))),
+                context, attemptStart);
+
+        DocumentRecord updated = recordRepo.updatedRecords.get(0);
+        assertEquals(originalCounters.contentErrorCount(), updated.failureCounters().contentErrorCount(),
+                "Skip must not change content error counter");
+        assertEquals(originalCounters.transientErrorCount(), updated.failureCounters().transientErrorCount(),
+                "Skip must not change transient error counter");
+    }
+
+    // -------------------------------------------------------------------------
+    // Persistence lookup failure
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_persistenceLookupFailure_noAttemptWritten_noException() {
+        recordRepo.setLookupResult(new PersistenceLookupTechnicalFailure("DB unavailable", null));
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        // Must not throw
+        assertDoesNotThrow(() ->
+                processor.process(candidate, fingerprint, m3Outcome, context, attemptStart));
+
+        // No attempt written, no record created/updated
+        assertEquals(0, attemptRepo.savedAttempts.size(),
+                "No attempt must be written when lookup fails");
+        assertEquals(0, recordRepo.createdRecords.size());
+        assertEquals(0, recordRepo.updatedRecords.size());
+    }
+
+    // -------------------------------------------------------------------------
+    // Persistence write failure: controlled, no crash
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_persistenceWriteFailure_doesNotThrow_batchContinues() {
+        recordRepo.setLookupResult(new DocumentUnknown());
+        // Make the attempt save throw
+        attemptRepo.failOnSave = true;
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        // Must not propagate the exception
+        assertDoesNotThrow(() ->
+                processor.process(candidate, fingerprint, m3Outcome, context, attemptStart));
+    }
+
+    // -------------------------------------------------------------------------
+    // Attempt number monotonicity
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_attemptNumberIsAssignedFromRepository() {
+        recordRepo.setLookupResult(new DocumentUnknown());
+        attemptRepo.nextAttemptNumber = 3; // Simulate 2 prior attempts
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        assertEquals(3, attemptRepo.savedAttempts.get(0).attemptNumber(),
+                "Attempt number must be taken from the repository");
+    }
+
+    // -------------------------------------------------------------------------
+    // Helpers
+    // -------------------------------------------------------------------------
+
+    private DocumentRecord buildRecord(ProcessingStatus status, FailureCounters counters) {
+        Instant now = Instant.now();
+        return new DocumentRecord(
+                fingerprint,
+                new SourceDocumentLocator("/tmp/test.pdf"),
+                "test.pdf",
+                status,
+                counters,
+                status == ProcessingStatus.SUCCESS ? null : now,
+                status == ProcessingStatus.SUCCESS ? now : null,
+                now,
+                now
+        );
+    }
+
+    // -------------------------------------------------------------------------
+    // Capturing test doubles
+    // -------------------------------------------------------------------------
+
+    private static class CapturingDocumentRecordRepository implements DocumentRecordRepository {
+        private DocumentRecordLookupResult lookupResult = new DocumentUnknown();
+        final List<DocumentRecord> createdRecords = new ArrayList<>();
+        final List<DocumentRecord> updatedRecords = new ArrayList<>();
+
+        void setLookupResult(DocumentRecordLookupResult result) {
+            this.lookupResult = result;
+        }
+
+        @Override
+        public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) {
+            return lookupResult;
+        }
+
+        @Override
+        public void create(DocumentRecord record) {
+            createdRecords.add(record);
+        }
+
+        @Override
+        public void update(DocumentRecord record) {
+            updatedRecords.add(record);
+        }
+    }
+
+    private static class CapturingProcessingAttemptRepository implements ProcessingAttemptRepository {
+        final List<ProcessingAttempt> savedAttempts = new ArrayList<>();
+        int nextAttemptNumber = 1;
+        boolean failOnSave = false;
+
+        @Override
+        public int loadNextAttemptNumber(DocumentFingerprint fingerprint) {
+            return nextAttemptNumber;
+        }
+
+        @Override
+        public void save(ProcessingAttempt attempt) {
+            if (failOnSave) {
+                throw new DocumentPersistenceException("Simulated save failure");
+            }
+            savedAttempts.add(attempt);
+        }
+
+        @Override
+        public List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint) {
+            return List.copyOf(savedAttempts);
+        }
+    }
+}
@@ -2,12 +2,24 @@ package de.gecheckt.pdf.umbenenner.application.usecase;

 import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
 import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError;
 import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
 import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
 import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
 import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
 import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
+import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor;
 import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
 import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
 import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
 import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
@@ -37,10 +49,11 @@ import static org.junit.jupiter.api.Assertions.*;
 * <ul>
 *   <li>Lock acquisition and release lifecycle</li>
 *   <li>Source folder scanning and per-document processing loop</li>
- *   <li>Happy path: candidate passes pre-checks, ends controlled without KI or target copy</li>
+ *   <li>Happy path: candidate passes pre-checks, M4 persistence is invoked</li>
 *   <li>Deterministic content errors: no usable text, page limit exceeded</li>
 *   <li>Technical extraction errors: controlled per-document end, batch continues</li>
 *   <li>Source folder access failure: batch fails with FAILURE outcome</li>
+ *   <li>M4 idempotency: fingerprint failure → not historised</li>
 * </ul>
 */
 class BatchRunProcessingUseCaseTest {
@@ -57,8 +70,9 @@ class BatchRunProcessingUseCaseTest {
        MockRunLockPort lockPort = new MockRunLockPort();
        StartConfiguration config = buildConfig(tempDir);

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("test-run-1"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);
@@ -73,8 +87,9 @@ class BatchRunProcessingUseCaseTest {
        CountingRunLockPort lockPort = new CountingRunLockPort(true);
        StartConfiguration config = buildConfig(tempDir);

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("test-run-2"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);
@@ -92,8 +107,9 @@ class BatchRunProcessingUseCaseTest {
        CountingRunLockPort lockPort = new CountingRunLockPort(true);
        StartConfiguration config = buildConfig(tempDir);

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("test-run-f1"), Instant.now());

        useCase.execute(context);
@@ -108,8 +124,9 @@ class BatchRunProcessingUseCaseTest {
        ErrorAfterAcquireLockPort lockPort = new ErrorAfterAcquireLockPort();
        StartConfiguration config = buildConfig(tempDir);

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("test-run-3"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);
@@ -128,8 +145,9 @@ class BatchRunProcessingUseCaseTest {
        MockRunLockPort lockPort = new MockRunLockPort();
        StartConfiguration config = buildConfig(tempDir);

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("empty"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);
@@ -138,25 +156,26 @@ class BatchRunProcessingUseCaseTest {
    }

    @Test
-    void execute_happyPath_candidatePassesPreChecks_endsControlledWithoutKiOrCopy() throws Exception {
+    void execute_happyPath_candidatePassesPreChecks_m4PersistenceInvoked() throws Exception {
        MockRunLockPort lockPort = new MockRunLockPort();
        StartConfiguration config = buildConfig(tempDir);

-        // Candidate with usable text within page limit
        SourceDocumentCandidate candidate = makeCandidate("document.pdf");
        PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice text", new PdfPageCount(1));
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
        FixedExtractionPort extractionPort = new FixedExtractionPort(success);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("happy"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

-        // Batch run succeeds; document ended controlled at boundary (no KI, no copy)
        assertTrue(outcome.isSuccess(), "Happy path should yield SUCCESS");
        assertEquals(1, extractionPort.callCount(), "Extraction should be called exactly once");
+        assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called exactly once");
    }

    @Test
@@ -165,40 +184,42 @@ class BatchRunProcessingUseCaseTest {
        StartConfiguration config = buildConfig(tempDir);

        SourceDocumentCandidate candidate = makeCandidate("image-only.pdf");
-        // Extraction returns text with no letters or digits
        PdfExtractionSuccess emptySuccess = new PdfExtractionSuccess("   ", new PdfPageCount(1));
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
        FixedExtractionPort extractionPort = new FixedExtractionPort(emptySuccess);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("no-text"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

-        // Document ends with pre-check failure; batch itself still succeeds
        assertTrue(outcome.isSuccess(), "No-usable-text pre-check failure should not abort the batch run");
+        assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for content errors");
    }

    @Test
    void execute_pageLimitExceeded_candidateEndsControlled_batchContinues() throws Exception {
        MockRunLockPort lockPort = new MockRunLockPort();
-        // Config has maxPages=3; document has 10 pages
        StartConfiguration config = buildConfig(tempDir);

        SourceDocumentCandidate candidate = makeCandidate("big.pdf");
        PdfExtractionSuccess manyPages = new PdfExtractionSuccess("Some text", new PdfPageCount(10));
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
        FixedExtractionPort extractionPort = new FixedExtractionPort(manyPages);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("page-limit"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

-        // maxPages in buildConfig is 3; 10 pages exceeds limit – pre-check fails, batch continues
        assertTrue(outcome.isSuccess(), "Page limit exceeded should not abort the batch run");
+        assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for page limit errors");
    }

    @Test
@@ -210,14 +231,17 @@ class BatchRunProcessingUseCaseTest {
        PdfExtractionContentError contentError = new PdfExtractionContentError("PDF is encrypted");
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
        FixedExtractionPort extractionPort = new FixedExtractionPort(contentError);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("content-error"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

        assertTrue(outcome.isSuccess(), "Extraction content error should not abort the batch run");
+        assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for content errors");
    }

    @Test
@@ -229,14 +253,17 @@ class BatchRunProcessingUseCaseTest {
        PdfExtractionTechnicalError technicalError = new PdfExtractionTechnicalError("I/O error reading file", null);
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
        FixedExtractionPort extractionPort = new FixedExtractionPort(technicalError);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("tech-error"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

        assertTrue(outcome.isSuccess(), "Technical extraction error should not abort the batch run");
+        assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for technical errors");
    }

    @Test
@@ -248,18 +275,71 @@ class BatchRunProcessingUseCaseTest {
            throw new SourceDocumentAccessException("Source folder not readable");
        };

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, failingPort, new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, failingPort, new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("access-fail"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

        assertTrue(outcome.isFailure(), "Source folder access failure should yield FAILURE outcome");
        assertFalse(outcome.isSuccess(), "Source folder access failure must not be SUCCESS");
-        // Lock must still be released
        assertTrue(lockPort.wasReleaseCalled(), "Lock should be released even when source access fails");
    }

+    // -------------------------------------------------------------------------
+    // M4-specific: fingerprint failure → not historised
+    // -------------------------------------------------------------------------
+
+    @Test
+    void execute_fingerprintFailure_candidateNotHistorised_batchContinues() throws Exception {
+        MockRunLockPort lockPort = new MockRunLockPort();
+        StartConfiguration config = buildConfig(tempDir);
+
+        SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf");
+        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
+
+        // Fingerprint always fails → M4 processor must NOT be called
+        FingerprintPort alwaysFailingFingerprintPort = c ->
+                new FingerprintTechnicalError("Cannot read file", null);
+
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, new NoOpExtractionPort(),
+                alwaysFailingFingerprintPort, m4Processor);
+        BatchRunContext context = new BatchRunContext(new RunId("fp-fail"), Instant.now());
+
+        BatchRunOutcome outcome = useCase.execute(context);
+
+        assertTrue(outcome.isSuccess(), "Fingerprint failure should not abort the batch run");
+        assertEquals(0, m4Processor.processCallCount(),
+                "M4 processor must NOT be called when fingerprint computation fails (pre-fingerprint failure)");
+    }
+
+    @Test
+    void execute_fingerprintFailure_extractionNotCalled() throws Exception {
+        MockRunLockPort lockPort = new MockRunLockPort();
+        StartConfiguration config = buildConfig(tempDir);
+
+        SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf");
+        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
+        FixedExtractionPort extractionPort = new FixedExtractionPort(
+                new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        FingerprintPort alwaysFailingFingerprintPort = c ->
+                new FingerprintTechnicalError("Cannot read file", null);
+
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                alwaysFailingFingerprintPort, new NoOpM4DocumentProcessor());
+        BatchRunContext context = new BatchRunContext(new RunId("fp-fail-no-extract"), Instant.now());
+
+        useCase.execute(context);
+
+        assertEquals(0, extractionPort.callCount(),
+                "PDF extraction must NOT be called when fingerprint computation fails");
+    }
+
    /**
     * Mixed-batch test: one document per outcome type in a single run.
     * Proves that no individual outcome aborts the overall batch.
@@ -267,18 +347,18 @@ class BatchRunProcessingUseCaseTest {
    @Test
    void execute_mixedBatch_allOutcomeTypes_batchOverallSucceeds() throws Exception {
        MockRunLockPort lockPort = new MockRunLockPort();
-        // maxPages=3 in buildConfig; pageLimitCandidate has 10 pages → exceeds limit
        StartConfiguration config = buildConfig(tempDir);

-        SourceDocumentCandidate goodCandidate          = makeCandidate("good.pdf");
-        SourceDocumentCandidate noTextCandidate        = makeCandidate("notext.pdf");
-        SourceDocumentCandidate pageLimitCandidate     = makeCandidate("toobig.pdf");
+        SourceDocumentCandidate goodCandidate           = makeCandidate("good.pdf");
+        SourceDocumentCandidate noTextCandidate         = makeCandidate("notext.pdf");
+        SourceDocumentCandidate pageLimitCandidate      = makeCandidate("toobig.pdf");
        SourceDocumentCandidate technicalErrorCandidate = makeCandidate("broken.pdf");
-        SourceDocumentCandidate contentErrorCandidate  = makeCandidate("encrypted.pdf");
+        SourceDocumentCandidate contentErrorCandidate   = makeCandidate("encrypted.pdf");
+        SourceDocumentCandidate fpFailCandidate         = makeCandidate("unreadable.pdf");

        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(
                goodCandidate, noTextCandidate, pageLimitCandidate,
-                technicalErrorCandidate, contentErrorCandidate));
+                technicalErrorCandidate, contentErrorCandidate, fpFailCandidate));

        MappedExtractionPort extractionPort = new MappedExtractionPort()
                .with(goodCandidate,           new PdfExtractionSuccess("Invoice text", new PdfPageCount(1)))
@@ -287,16 +367,31 @@ class BatchRunProcessingUseCaseTest {
                .with(technicalErrorCandidate, new PdfExtractionTechnicalError("I/O error", null))
                .with(contentErrorCandidate,   new PdfExtractionContentError("PDF is encrypted"));

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        // fpFailCandidate gets a fingerprint failure; others get a valid fingerprint
+        FingerprintPort mappedFingerprintPort = candidate -> {
+            if (candidate.uniqueIdentifier().equals("unreadable.pdf")) {
+                return new FingerprintTechnicalError("Cannot read", null);
+            }
+            return new FingerprintSuccess(makeFingerprint(candidate.uniqueIdentifier()));
+        };
+
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
+
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                mappedFingerprintPort, m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("mixed"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

-        assertTrue(outcome.isSuccess(),
-                "Mixed batch with all outcome types must yield batch SUCCESS");
+        assertTrue(outcome.isSuccess(), "Mixed batch with all outcome types must yield batch SUCCESS");
+        // 5 candidates with successful fingerprint → M4 processor called 5 times
+        // 1 candidate with fingerprint failure → M4 processor NOT called
+        assertEquals(5, m4Processor.processCallCount(),
+                "M4 processor must be called for each candidate with a successful fingerprint");
+        // Extraction called for 5 candidates (not for fpFailCandidate)
        assertEquals(5, extractionPort.callCount(),
-                "Extraction must be attempted for each of the 5 candidates");
+                "Extraction must be attempted for each of the 5 candidates with a valid fingerprint");
    }

    @Test
@@ -312,21 +407,35 @@ class BatchRunProcessingUseCaseTest {
        PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice content", new PdfPageCount(2));
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(candidates);
        FixedExtractionPort extractionPort = new FixedExtractionPort(success);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("multi"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

        assertTrue(outcome.isSuccess(), "All three candidates processed should yield SUCCESS");
        assertEquals(3, extractionPort.callCount(), "Extraction should be called once per candidate");
+        assertEquals(3, m4Processor.processCallCount(), "M4 processor should be called once per candidate");
    }

    // -------------------------------------------------------------------------
    // Helpers
    // -------------------------------------------------------------------------

+    private static DefaultBatchRunProcessingUseCase buildUseCase(
+            StartConfiguration config,
+            RunLockPort lockPort,
+            SourceDocumentCandidatesPort candidatesPort,
+            PdfTextExtractionPort extractionPort,
+            FingerprintPort fingerprintPort,
+            M4DocumentProcessor m4Processor) {
+        return new DefaultBatchRunProcessingUseCase(
+                config, lockPort, candidatesPort, extractionPort, fingerprintPort, m4Processor);
+    }
+
    private static StartConfiguration buildConfig(Path tempDir) throws Exception {
        Path sourceDir = Files.createDirectories(tempDir.resolve("source"));
        Path targetDir = Files.createDirectories(tempDir.resolve("target"));
@@ -357,6 +466,13 @@ class BatchRunProcessingUseCaseTest {
        return new SourceDocumentCandidate(filename, 1024L, new SourceDocumentLocator("/tmp/" + filename));
    }

+    /** Creates a deterministic fake fingerprint from a string (padded to 64 hex chars). */
+    private static DocumentFingerprint makeFingerprint(String seed) {
+        String hex = String.format("%064x", Math.abs(seed.hashCode()));
+        // Ensure exactly 64 lowercase hex chars
+        return new DocumentFingerprint(hex.substring(0, 64));
+    }
+
    // -------------------------------------------------------------------------
    // Mock / Stub implementations
    // -------------------------------------------------------------------------
@@ -480,4 +596,88 @@ class BatchRunProcessingUseCaseTest {

        int callCount() { return calls; }
    }
-}
+
+    /**
+     * Fingerprint port that always returns a deterministic success based on the candidate's
+     * unique identifier.
+     */
+    private static class AlwaysSuccessFingerprintPort implements FingerprintPort {
+        @Override
+        public FingerprintResult computeFingerprint(SourceDocumentCandidate candidate) {
+            String hex = String.format("%064x", Math.abs(candidate.uniqueIdentifier().hashCode()));
+            return new FingerprintSuccess(new DocumentFingerprint(hex.substring(0, 64)));
+        }
+    }
+
+    /**
+     * No-op M4DocumentProcessor that does nothing (for tests that only care about
+     * lock/batch lifecycle, not M4 persistence).
+     */
+    private static class NoOpM4DocumentProcessor extends M4DocumentProcessor {
+        NoOpM4DocumentProcessor() {
+            super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository());
+        }
+    }
+
+    /**
+     * Tracking M4DocumentProcessor that counts how many times {@code process()} is called.
+     */
+    private static class TrackingM4DocumentProcessor extends M4DocumentProcessor {
+        private int processCallCount = 0;
+
+        TrackingM4DocumentProcessor() {
+            super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository());
+        }
+
+        @Override
+        public void process(
+                de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate,
+                de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint fingerprint,
+                de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome m3Outcome,
+                de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext context,
+                java.time.Instant attemptStart) {
+            processCallCount++;
+            // Delegate to super so the real logic runs (with no-op repos)
+            super.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+        }
+
+        int processCallCount() { return processCallCount; }
+    }
+
+    /** No-op DocumentRecordRepository for use in test M4DocumentProcessor instances. */
+    private static class NoOpDocumentRecordRepository implements DocumentRecordRepository {
+        @Override
+        public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) {
+            // Return DocumentUnknown so the M4 processor always takes the "new document" path
+            return new DocumentUnknown();
+        }
+
+        @Override
+        public void create(DocumentRecord record) {
+            // No-op
+        }
+
+        @Override
+        public void update(DocumentRecord record) {
+            // No-op
+        }
+    }
+
+    /** No-op ProcessingAttemptRepository for use in test M4DocumentProcessor instances. */
+    private static class NoOpProcessingAttemptRepository implements ProcessingAttemptRepository {
+        @Override
+        public int loadNextAttemptNumber(DocumentFingerprint fingerprint) {
+            return 1;
+        }
+
+        @Override
+        public void save(ProcessingAttempt attempt) {
+            // No-op
+        }
+
+        @Override
+        public List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint) {
+            return List.of();
+        }
+    }
+}