M4 AP-006 Idempotenz- und Persistenzlogik integrieren

2026-04-02 23:36:22 +02:00
parent 8ee4041feb
commit 00c4cf1e5c
7 changed files with 1598 additions and 174 deletions
@@ -0,0 +1,558 @@
+package de.gecheckt.pdf.umbenenner.application.service;
+
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown;
+import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
+import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnicalFailure;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
+import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
+import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
+import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+
+import java.time.Instant;
+import java.util.Objects;
+
+/**
+ * Application-level service that implements the M4 per-document processing logic.
+ * <p>
+ * This service is the single authoritative place for the M4 decision rules:
+ * idempotency checks, status/counter mapping, and consistent two-level persistence.
+ * It is intentionally tightly scoped to AP-006 and contains no M5+ logic.
+ *
+ * <h2>M4 processing order per candidate</h2>
+ * <ol>
+ *   <li>Load the document master record by fingerprint.</li>
+ *   <li>If the overall status is {@link ProcessingStatus#SUCCESS} → create and persist
+ *       a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}.</li>
+ *   <li>If the overall status is {@link ProcessingStatus#FAILED_FINAL} → create and persist
+ *       a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.</li>
+ *   <li>Otherwise execute the M3 flow (already done by the caller) and map the result
+ *       into M4 status, counters and retryable flag.</li>
+ *   <li>Persist exactly one historised processing attempt for the identified document.</li>
+ *   <li>Persist the updated document master record.</li>
+ * </ol>
+ *
+ * <h2>M4 minimal rules</h2>
+ * <ul>
+ *   <li>Already successful documents are skipped in later runs.</li>
+ *   <li>Already finally failed documents are skipped in later runs.</li>
+ *   <li>First historised deterministic content failure from M3 →
+ *       {@link ProcessingStatus#FAILED_RETRYABLE}, content error counter becomes 1,
+ *       {@code retryable=true}.</li>
+ *   <li>Second historised deterministic content failure in a later run →
+ *       {@link ProcessingStatus#FAILED_FINAL}, content error counter becomes 2,
+ *       {@code retryable=false}.</li>
+ *   <li>Document-related technical failures after successful fingerprinting remain
+ *       {@link ProcessingStatus#FAILED_RETRYABLE}, increment transient error counter,
+ *       {@code retryable=true}.</li>
+ *   <li>Skip events do not change error counters.</li>
+ * </ul>
+ *
+ * <h2>Persistence consistency</h2>
+ * <p>
+ * For every identified document, both the processing attempt and the master record are
+ * written in sequence. If either write fails, the failure is logged and the batch run
+ * continues with the next candidate. No partial state is intentionally left; if the
+ * attempt write succeeds but the master record write fails, the inconsistency is bounded
+ * to that one document and is logged clearly. True transactionality across two separate
+ * repository calls is not available without a larger architectural change; this is
+ * documented as a known limitation of the M4 scope.
+ *
+ * <h2>Pre-fingerprint failures</h2>
+ * <p>
+ * Failures that occur before a successful fingerprint is available are <em>not</em>
+ * historised in SQLite. They are handled by the caller and logged as non-identifiable
+ * run events.
+ *
+ * @since M4-AP-006
+ */
+public class M4DocumentProcessor {
+
+    private static final Logger LOG = LogManager.getLogger(M4DocumentProcessor.class);
+
+    private final DocumentRecordRepository documentRecordRepository;
+    private final ProcessingAttemptRepository processingAttemptRepository;
+
+    /**
+     * Creates the M4 document processor with the required persistence ports.
+     *
+     * @param documentRecordRepository    port for reading and writing the document master record;
+     *                                    must not be null
+     * @param processingAttemptRepository port for writing and reading the attempt history;
+     *                                    must not be null
+     * @throws NullPointerException if any parameter is null
+     */
+    public M4DocumentProcessor(
+            DocumentRecordRepository documentRecordRepository,
+            ProcessingAttemptRepository processingAttemptRepository) {
+        this.documentRecordRepository =
+                Objects.requireNonNull(documentRecordRepository, "documentRecordRepository must not be null");
+        this.processingAttemptRepository =
+                Objects.requireNonNull(processingAttemptRepository, "processingAttemptRepository must not be null");
+    }
+
+    /**
+     * Applies the full M4 processing logic for one identified document candidate.
+     * <p>
+     * The caller must have already computed a valid {@link DocumentFingerprint} for the
+     * candidate. The M3 outcome (from the PDF extraction and pre-check pipeline) is
+     * provided as {@code m3Outcome} and is used only when the document is not in a
+     * terminal state.
+     * <p>
+     * This method never throws. All persistence failures are caught, logged, and
+     * treated as controlled per-document failures so the batch run can continue.
+     *
+     * @param candidate   the source document candidate being processed; must not be null
+     * @param fingerprint the successfully computed fingerprint for this candidate;
+     *                    must not be null
+     * @param m3Outcome   the result of the M3 pipeline (PDF extraction + pre-checks);
+     *                    must not be null
+     * @param context     the current batch run context (for run ID and timing);
+     *                    must not be null
+     * @param attemptStart the instant at which processing of this candidate began;
+     *                     must not be null
+     */
+    public void process(
+            SourceDocumentCandidate candidate,
+            DocumentFingerprint fingerprint,
+            DocumentProcessingOutcome m3Outcome,
+            BatchRunContext context,
+            Instant attemptStart) {
+
+        Objects.requireNonNull(candidate, "candidate must not be null");
+        Objects.requireNonNull(fingerprint, "fingerprint must not be null");
+        Objects.requireNonNull(m3Outcome, "m3Outcome must not be null");
+        Objects.requireNonNull(context, "context must not be null");
+        Objects.requireNonNull(attemptStart, "attemptStart must not be null");
+
+        // Step 1: Load the document master record
+        DocumentRecordLookupResult lookupResult =
+                documentRecordRepository.findByFingerprint(fingerprint);
+
+        // Step 2: Handle persistence lookup failure – cannot safely proceed
+        if (lookupResult instanceof PersistenceLookupTechnicalFailure failure) {
+            LOG.error("Cannot process '{}': master record lookup failed: {}",
+                    candidate.uniqueIdentifier(), failure.errorMessage());
+            return;
+        }
+
+        // Step 3: Determine the action based on the lookup result
+        switch (lookupResult) {
+            case DocumentTerminalSuccess terminalSuccess -> {
+                // Document already successfully processed → skip
+                LOG.info("Skipping '{}': already successfully processed (fingerprint: {}).",
+                        candidate.uniqueIdentifier(), fingerprint.sha256Hex());
+                persistSkipAttempt(
+                        candidate, fingerprint, terminalSuccess.record(),
+                        ProcessingStatus.SKIPPED_ALREADY_PROCESSED,
+                        context, attemptStart);
+            }
+
+            case DocumentTerminalFinalFailure terminalFailure -> {
+                // Document finally failed → skip
+                LOG.info("Skipping '{}': already finally failed (fingerprint: {}).",
+                        candidate.uniqueIdentifier(), fingerprint.sha256Hex());
+                persistSkipAttempt(
+                        candidate, fingerprint, terminalFailure.record(),
+                        ProcessingStatus.SKIPPED_FINAL_FAILURE,
+                        context, attemptStart);
+            }
+
+            case DocumentUnknown ignored -> {
+                // New document – process and create a new master record
+                processAndPersistNewDocument(candidate, fingerprint, m3Outcome, context, attemptStart);
+            }
+
+            case DocumentKnownProcessable knownProcessable -> {
+                // Known but not terminal – process and update the existing master record
+                processAndPersistKnownDocument(
+                        candidate, fingerprint, m3Outcome, knownProcessable.record(),
+                        context, attemptStart);
+            }
+
+            default ->
+                // Exhaustive sealed hierarchy; this branch is unreachable
+                LOG.error("Unexpected lookup result type for '{}': {}",
+                        candidate.uniqueIdentifier(), lookupResult.getClass().getSimpleName());
+        }
+    }
+
+    // -------------------------------------------------------------------------
+    // Skip path
+    // -------------------------------------------------------------------------
+
+    /**
+     * Persists a skip attempt and updates the master record's {@code updatedAt} timestamp.
+     * <p>
+     * Skip events do not change any failure counter. The master record's overall status
+     * remains unchanged (terminal).
+     *
+     * @param candidate    the candidate being skipped
+     * @param fingerprint  the document fingerprint
+     * @param existingRecord the current master record (already terminal)
+     * @param skipStatus   the skip status to record ({@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}
+     *                     or {@link ProcessingStatus#SKIPPED_FINAL_FAILURE})
+     * @param context      the current batch run context
+     * @param attemptStart the start instant of this processing attempt
+     */
+    private void persistSkipAttempt(
+            SourceDocumentCandidate candidate,
+            DocumentFingerprint fingerprint,
+            DocumentRecord existingRecord,
+            ProcessingStatus skipStatus,
+            BatchRunContext context,
+            Instant attemptStart) {
+
+        Instant now = Instant.now();
+
+        try {
+            int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
+
+            ProcessingAttempt skipAttempt = new ProcessingAttempt(
+                    fingerprint,
+                    context.runId(),
+                    attemptNumber,
+                    attemptStart,
+                    now,
+                    skipStatus,
+                    null,   // no failure class for skip
+                    null,   // no failure message for skip
+                    false   // not retryable
+            );
+
+            // Write attempt first, then update master record
+            processingAttemptRepository.save(skipAttempt);
+
+            // Update master record: only updatedAt changes; status and counters stay the same
+            DocumentRecord updatedRecord = new DocumentRecord(
+                    existingRecord.fingerprint(),
+                    new SourceDocumentLocator(candidate.locator().value()),
+                    candidate.uniqueIdentifier(),
+                    existingRecord.overallStatus(),          // terminal status unchanged
+                    existingRecord.failureCounters(),        // counters unchanged for skip
+                    existingRecord.lastFailureInstant(),
+                    existingRecord.lastSuccessInstant(),
+                    existingRecord.createdAt(),
+                    now                                      // updatedAt = now
+            );
+            documentRecordRepository.update(updatedRecord);
+
+            LOG.debug("Skip attempt #{} persisted for '{}' with status {}.",
+                    attemptNumber, candidate.uniqueIdentifier(), skipStatus);
+
+        } catch (DocumentPersistenceException e) {
+            LOG.error("Failed to persist skip attempt for '{}': {}",
+                    candidate.uniqueIdentifier(), e.getMessage(), e);
+        }
+    }
+
+    // -------------------------------------------------------------------------
+    // New document path
+    // -------------------------------------------------------------------------
+
+    /**
+     * Processes a newly discovered document (no existing master record) and persists
+     * both the attempt and the new master record.
+     *
+     * @param candidate    the candidate being processed
+     * @param fingerprint  the document fingerprint
+     * @param m3Outcome    the M3 pipeline result
+     * @param context      the current batch run context
+     * @param attemptStart the start instant of this processing attempt
+     */
+    private void processAndPersistNewDocument(
+            SourceDocumentCandidate candidate,
+            DocumentFingerprint fingerprint,
+            DocumentProcessingOutcome m3Outcome,
+            BatchRunContext context,
+            Instant attemptStart) {
+
+        Instant now = Instant.now();
+
+        // Map M3 outcome to M4 status/counters for a brand-new document
+        M4Outcome m4 = mapM3OutcomeForNewDocument(m3Outcome);
+
+        try {
+            // Attempt number is always 1 for a new document
+            int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
+
+            ProcessingAttempt attempt = buildAttempt(
+                    fingerprint, context, attemptNumber, attemptStart, now, m4);
+
+            // Create the new master record
+            DocumentRecord newRecord = new DocumentRecord(
+                    fingerprint,
+                    new SourceDocumentLocator(candidate.locator().value()),
+                    candidate.uniqueIdentifier(),
+                    m4.overallStatus(),
+                    m4.counters(),
+                    m4.overallStatus() == ProcessingStatus.SUCCESS ? null : now,  // lastFailureInstant
+                    m4.overallStatus() == ProcessingStatus.SUCCESS ? now : null,  // lastSuccessInstant
+                    now,   // createdAt
+                    now    // updatedAt
+            );
+
+            // Persist attempt first, then master record
+            processingAttemptRepository.save(attempt);
+            documentRecordRepository.create(newRecord);
+
+            LOG.info("New document '{}' processed: status={}, contentErrors={}, transientErrors={}.",
+                    candidate.uniqueIdentifier(),
+                    m4.overallStatus(),
+                    m4.counters().contentErrorCount(),
+                    m4.counters().transientErrorCount());
+
+        } catch (DocumentPersistenceException e) {
+            LOG.error("Failed to persist processing result for new document '{}': {}",
+                    candidate.uniqueIdentifier(), e.getMessage(), e);
+        }
+    }
+
+    // -------------------------------------------------------------------------
+    // Known processable document path
+    // -------------------------------------------------------------------------
+
+    /**
+     * Processes a known but non-terminal document and updates both the attempt history
+     * and the master record.
+     *
+     * @param candidate      the candidate being processed
+     * @param fingerprint    the document fingerprint
+     * @param m3Outcome      the M3 pipeline result
+     * @param existingRecord the current master record (not terminal)
+     * @param context        the current batch run context
+     * @param attemptStart   the start instant of this processing attempt
+     */
+    private void processAndPersistKnownDocument(
+            SourceDocumentCandidate candidate,
+            DocumentFingerprint fingerprint,
+            DocumentProcessingOutcome m3Outcome,
+            DocumentRecord existingRecord,
+            BatchRunContext context,
+            Instant attemptStart) {
+
+        Instant now = Instant.now();
+
+        // Map M3 outcome to M4 status/counters, taking existing counters into account
+        M4Outcome m4 = mapM3OutcomeForKnownDocument(m3Outcome, existingRecord.failureCounters());
+
+        try {
+            int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
+
+            ProcessingAttempt attempt = buildAttempt(
+                    fingerprint, context, attemptNumber, attemptStart, now, m4);
+
+            // Update the master record with new status, counters and timestamps
+            DocumentRecord updatedRecord = new DocumentRecord(
+                    existingRecord.fingerprint(),
+                    new SourceDocumentLocator(candidate.locator().value()),
+                    candidate.uniqueIdentifier(),
+                    m4.overallStatus(),
+                    m4.counters(),
+                    m4.overallStatus() == ProcessingStatus.SUCCESS
+                            ? existingRecord.lastFailureInstant() : now,
+                    m4.overallStatus() == ProcessingStatus.SUCCESS
+                            ? now : existingRecord.lastSuccessInstant(),
+                    existingRecord.createdAt(),
+                    now   // updatedAt
+            );
+
+            // Persist attempt first, then master record
+            processingAttemptRepository.save(attempt);
+            documentRecordRepository.update(updatedRecord);
+
+            LOG.info("Known document '{}' processed: status={}, contentErrors={}, transientErrors={}.",
+                    candidate.uniqueIdentifier(),
+                    m4.overallStatus(),
+                    m4.counters().contentErrorCount(),
+                    m4.counters().transientErrorCount());
+
+        } catch (DocumentPersistenceException e) {
+            LOG.error("Failed to persist processing result for known document '{}': {}",
+                    candidate.uniqueIdentifier(), e.getMessage(), e);
+        }
+    }
+
+    // -------------------------------------------------------------------------
+    // M3 → M4 outcome mapping
+    // -------------------------------------------------------------------------
+
+    /**
+     * Maps an M3 outcome to M4 status, counters, and retryable flag for a brand-new
+     * document (no prior history, counters start at zero).
+     *
+     * @param m3Outcome the M3 pipeline result
+     * @return the M4 outcome with status, counters and retryable flag
+     */
+    private M4Outcome mapM3OutcomeForNewDocument(DocumentProcessingOutcome m3Outcome) {
+        return mapM3OutcomeForKnownDocument(m3Outcome, FailureCounters.zero());
+    }
+
+    /**
+     * Maps an M3 outcome to M4 status, counters, and retryable flag, taking the
+     * existing failure counters into account.
+     * <p>
+     * <strong>M4 minimal rules applied here:</strong>
+     * <ul>
+     *   <li>M3 success → {@link ProcessingStatus#SUCCESS}, counters unchanged,
+     *       {@code retryable=false}.</li>
+     *   <li>M3 deterministic content error (first occurrence, contentErrorCount == 0) →
+     *       {@link ProcessingStatus#FAILED_RETRYABLE}, contentErrorCount +1,
+     *       {@code retryable=true}.</li>
+     *   <li>M3 deterministic content error (second occurrence, contentErrorCount >= 1) →
+     *       {@link ProcessingStatus#FAILED_FINAL}, contentErrorCount +1,
+     *       {@code retryable=false}.</li>
+     *   <li>M3 technical error → {@link ProcessingStatus#FAILED_RETRYABLE},
+     *       transientErrorCount +1, {@code retryable=true}.</li>
+     * </ul>
+     *
+     * @param m3Outcome        the M3 pipeline result
+     * @param existingCounters the current failure counters from the master record
+     * @return the M4 outcome with updated status, counters and retryable flag
+     */
+    private M4Outcome mapM3OutcomeForKnownDocument(
+            DocumentProcessingOutcome m3Outcome,
+            FailureCounters existingCounters) {
+
+        return switch (m3Outcome) {
+            case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored -> {
+                // M3 success: document passed all pre-checks
+                // In M4 scope (no KI, no target copy), PreCheckPassed is the terminal success
+                yield new M4Outcome(
+                        ProcessingStatus.SUCCESS,
+                        existingCounters,   // counters unchanged on success
+                        false               // not retryable
+                );
+            }
+
+            case PreCheckFailed contentError -> {
+                // Deterministic content error: apply the 1-retry rule
+                FailureCounters updatedCounters = existingCounters.withIncrementedContentErrorCount();
+                boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0;
+
+                if (isFirstOccurrence) {
+                    // First content error → FAILED_RETRYABLE
+                    yield new M4Outcome(
+                            ProcessingStatus.FAILED_RETRYABLE,
+                            updatedCounters,
+                            true
+                    );
+                } else {
+                    // Second (or later) content error → FAILED_FINAL
+                    yield new M4Outcome(
+                            ProcessingStatus.FAILED_FINAL,
+                            updatedCounters,
+                            false
+                    );
+                }
+            }
+
+            case TechnicalDocumentError technicalError -> {
+                // Technical error after fingerprinting: always FAILED_RETRYABLE, increment transient counter
+                yield new M4Outcome(
+                        ProcessingStatus.FAILED_RETRYABLE,
+                        existingCounters.withIncrementedTransientErrorCount(),
+                        true
+                );
+            }
+        };
+    }
+
+    // -------------------------------------------------------------------------
+    // Helper: build ProcessingAttempt
+    // -------------------------------------------------------------------------
+
+    /**
+     * Constructs a {@link ProcessingAttempt} from the given parameters and M4 outcome.
+     *
+     * @param fingerprint    the document fingerprint
+     * @param context        the current batch run context
+     * @param attemptNumber  the monotonic attempt number
+     * @param startedAt      the start instant of this attempt
+     * @param endedAt        the end instant of this attempt
+     * @param m4             the M4 outcome (status, counters, retryable)
+     * @return the constructed processing attempt
+     */
+    private ProcessingAttempt buildAttempt(
+            DocumentFingerprint fingerprint,
+            BatchRunContext context,
+            int attemptNumber,
+            Instant startedAt,
+            Instant endedAt,
+            M4Outcome m4) {
+
+        String failureClass = null;
+        String failureMessage = null;
+
+        if (m4.overallStatus() == ProcessingStatus.FAILED_RETRYABLE
+                || m4.overallStatus() == ProcessingStatus.FAILED_FINAL) {
+            failureClass = m4.overallStatus().name();
+            failureMessage = buildFailureMessage(m4);
+        }
+
+        return new ProcessingAttempt(
+                fingerprint,
+                context.runId(),
+                attemptNumber,
+                startedAt,
+                endedAt,
+                m4.overallStatus(),
+                failureClass,
+                failureMessage,
+                m4.retryable()
+        );
+    }
+
+    /**
+     * Builds a human-readable failure message from the M4 outcome.
+     *
+     * @param m4 the M4 outcome
+     * @return a non-null failure message string
+     */
+    private String buildFailureMessage(M4Outcome m4) {
+        return switch (m4.overallStatus()) {
+            case FAILED_RETRYABLE -> "Processing failed (retryable). "
+                    + "ContentErrors=" + m4.counters().contentErrorCount()
+                    + ", TransientErrors=" + m4.counters().transientErrorCount();
+            case FAILED_FINAL -> "Processing failed finally (not retryable). "
+                    + "ContentErrors=" + m4.counters().contentErrorCount()
+                    + ", TransientErrors=" + m4.counters().transientErrorCount();
+            default -> m4.overallStatus().name();
+        };
+    }
+
+    // -------------------------------------------------------------------------
+    // Internal value type: M4 outcome
+    // -------------------------------------------------------------------------
+
+    /**
+     * Internal value type carrying the M4 status, updated counters, and retryable flag
+     * after mapping from an M3 outcome.
+     * <p>
+     * Tightly scoped to {@link M4DocumentProcessor}; not exposed outside this class.
+     *
+     * @param overallStatus the M4 overall status to persist
+     * @param counters      the updated failure counters to persist
+     * @param retryable     whether the failure is retryable in a later run
+     */
+    private record M4Outcome(
+            ProcessingStatus overallStatus,
+            FailureCounters counters,
+            boolean retryable) {
+    }
+}
@@ -1,28 +1,65 @@
 /**
- * Application-level services for business logic evaluation.
+ * Application-level services for business logic evaluation and M4 orchestration.
 * <p>
- * This package contains stateless, pure-logic services that evaluate document content
- * and apply business rules. Services in this package:
+ * This package contains stateless, pure-logic services that evaluate document content,
+ * apply business rules, and orchestrate the M4 per-document processing flow.
+ * Services in this package:
 * <ul>
 *   <li>Do not manage state or resources</li>
- *   <li>Do not depend on infrastructure (database, filesystem, network)</li>
+ *   <li>Do not depend on infrastructure (database, filesystem, network) directly;
+ *       they interact with infrastructure exclusively through outbound ports</li>
 *   <li>Can be tested with simple unit tests and in-memory mocks</li>
- *   <li>Are reused by multiple use cases or adapters</li>
 * </ul>
 *
- * Current services:
+ * <h2>Current services</h2>
 * <ul>
- *   <li>{@link de.gecheckt.pdf.umbenenner.application.service.PreCheckEvaluator} — Pre-check evaluation</li>
- *   <li>{@link de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService} — Complete document processing pipeline orchestration</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.application.service.PreCheckEvaluator}
+ *       — Pre-check evaluation (M3)</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService}
+ *       — Complete M3 document processing pipeline orchestration</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor}
+ *       — M4 per-document idempotency, status/counter mapping and consistent
+ *       two-level persistence (AP-006)</li>
 * </ul>
 *
- * Document Processing Pipeline:
- * The {@link de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService} coordinates
- * the complete processing workflow:
+ * <h2>M4 processing flow ({@code M4DocumentProcessor})</h2>
+ * <p>
+ * The {@link de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor}
+ * implements the verbindliche M4 processing order per candidate:
 * <ol>
- *   <li>Convert technical PDF extraction results to processing outcomes</li>
- *   <li>Route successful extractions through pre-check validation</li>
- *   <li>Classify extraction and pre-check failures with appropriate error types</li>
+ *   <li>Load the document master record by fingerprint.</li>
+ *   <li>If overall status is {@code SUCCESS} → persist a skip attempt with
+ *       {@code SKIPPED_ALREADY_PROCESSED}; do not change counters.</li>
+ *   <li>If overall status is {@code FAILED_FINAL} → persist a skip attempt with
+ *       {@code SKIPPED_FINAL_FAILURE}; do not change counters.</li>
+ *   <li>Otherwise map the M3 outcome into M4 status, counters and retryable flag
+ *       using the M4 minimal rules.</li>
+ *   <li>Persist exactly one historised processing attempt.</li>
+ *   <li>Persist the updated document master record.</li>
 * </ol>
+ *
+ * <h2>M4 minimal rules (status and counter semantics)</h2>
+ * <ul>
+ *   <li>First deterministic content error → {@code FAILED_RETRYABLE},
+ *       content error counter +1, {@code retryable=true}.</li>
+ *   <li>Second deterministic content error → {@code FAILED_FINAL},
+ *       content error counter +1 (cumulative = 2), {@code retryable=false}.</li>
+ *   <li>Technical error after fingerprinting → {@code FAILED_RETRYABLE},
+ *       transient error counter +1, {@code retryable=true}.</li>
+ *   <li>Skip events do not change any failure counter.</li>
+ * </ul>
+ *
+ * <h2>Persistence consistency</h2>
+ * <p>
+ * For every identified document, the processing attempt and the master record are
+ * written in sequence. If either write fails, the failure is caught and logged;
+ * the batch run continues with the next candidate. True transactionality across
+ * two separate repository calls is not available in the M4 scope; this is a known
+ * and documented limitation.
+ *
+ * <h2>Pre-fingerprint failures</h2>
+ * <p>
+ * Failures that occur before a successful fingerprint is available are not handled
+ * by this package. They are handled by the use case and are not historised in SQLite.
 */
 package de.gecheckt.pdf.umbenenner.application.service;
@@ -3,54 +3,80 @@ package de.gecheckt.pdf.umbenenner.application.usecase;
 import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
 import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
 import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError;
 import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
 import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
 import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
 import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
 import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
 import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService;
+import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor;
 import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
-import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
-import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
-import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
-import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
 import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
-import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
-import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
 import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;

 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;

+import java.time.Instant;
 import java.util.List;
+import java.util.Objects;

 /**
 * Batch processing implementation of {@link BatchRunProcessingUseCase}.
 * <p>
- * Orchestrates the complete batch processing workflow:
+ * Orchestrates the complete M4 batch processing workflow per candidate:
 * <ol>
- *   <li>Acquire exclusive run lock to prevent concurrent instances</li>
- *   <li>Scan source folder for PDF candidates</li>
- *   <li>For each candidate: extract text and page count, run pre-checks</li>
- *   <li>Log per-document decision; end each document controlled without KI or target copy</li>
- *   <li>Release lock and return structured outcome for Bootstrap exit code mapping</li>
+ *   <li>Acquire exclusive run lock to prevent concurrent instances.</li>
+ *   <li>Scan source folder for PDF candidates.</li>
+ *   <li>For each candidate, execute the M4 processing order:
+ *     <ol type="a">
+ *       <li>Compute fingerprint.</li>
+ *       <li>Load document master record.</li>
+ *       <li>If already {@code SUCCESS} → persist skip attempt with
+ *           {@code SKIPPED_ALREADY_PROCESSED}.</li>
+ *       <li>If already {@code FAILED_FINAL} → persist skip attempt with
+ *           {@code SKIPPED_FINAL_FAILURE}.</li>
+ *       <li>Otherwise execute the M3 pipeline (extraction + pre-checks).</li>
+ *       <li>Map M3 result into M4 status, counters and retryable flag.</li>
+ *       <li>Persist exactly one historised processing attempt.</li>
+ *       <li>Persist the updated document master record.</li>
 *     </ol>
+ *   </li>
+ *   <li>Release lock and return structured outcome for Bootstrap exit code mapping.</li>
+ * </ol>
+ *
+ * <h2>Idempotency</h2>
 * <p>
- * Processing boundary:
- * <ul>
- *   <li>Documents that pass pre-checks end controlled and are ready for further processing (KI, persistence, copy)</li>
- *   <li>Documents with deterministic content errors (no usable text, page limit exceeded) end controlled</li>
- *   <li>Documents with technical extraction errors end controlled; they do not abort the overall run</li>
- *   <li>If the source folder itself is inaccessible, the run fails with {@link BatchRunOutcome#FAILURE}</li>
- * </ul>
+ * Documents are identified exclusively by their SHA-256 content fingerprint. A document
+ * whose overall status is {@code SUCCESS} or {@code FAILED_FINAL} is skipped in every
+ * subsequent run; only a skip attempt is historised.
+ *
+ * <h2>Pre-fingerprint failures</h2>
 * <p>
- * Non-Goals (not implemented):
+ * If the fingerprint computation fails (e.g. the file is no longer readable), the
+ * candidate is logged as a non-identifiable run event and is <em>not</em> historised
+ * in SQLite. The batch run continues with the next candidate.
+ *
+ * <h2>Persistence consistency</h2>
+ * <p>
+ * For every identified document, the processing attempt and the master record are
+ * written in sequence by {@link M4DocumentProcessor}. Persistence failures for a single
+ * document are caught and logged; the batch run continues with the remaining candidates.
+ *
+ * <h2>Non-Goals (not implemented in M4)</h2>
 * <ul>
- *   <li>No fingerprinting or SQLite persistence</li>
- *   <li>No KI/AI integration or prompt loading</li>
- *   <li>No filename generation or target file copy</li>
- *   <li>No cross-run retry logic</li>
+ *   <li>No KI/AI integration or prompt loading.</li>
+ *   <li>No filename generation or target file copy.</li>
+ *   <li>No M5+ retry rules for KI or target copy failures.</li>
 * </ul>
+ *
+ * @since M3-AP-004 (extended in M4-AP-006)
 */
 public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCase {

@@ -60,28 +86,44 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
    private final RunLockPort runLockPort;
    private final SourceDocumentCandidatesPort sourceDocumentCandidatesPort;
    private final PdfTextExtractionPort pdfTextExtractionPort;
+    private final FingerprintPort fingerprintPort;
+    private final M4DocumentProcessor m4DocumentProcessor;

    /**
-     * Creates the batch use case with the already-loaded startup configuration and all required ports.
+     * Creates the batch use case with the already-loaded startup configuration and all
+     * required ports for the M4 flow.
     * <p>
     * The configuration is loaded and validated by Bootstrap before use case creation;
-     * the use case receives the result directly and does not re-read it.
+     * the use case receives the result directly and does not re-read the properties file.
     *
-     * @param configuration the validated startup configuration
-     * @param runLockPort for exclusive run locking
-     * @param sourceDocumentCandidatesPort for loading PDF candidates from the source folder
-     * @param pdfTextExtractionPort for extracting text and page count from a single PDF
+     * @param configuration               the validated startup configuration; must not be null
+     * @param runLockPort                 for exclusive run locking; must not be null
+     * @param sourceDocumentCandidatesPort for loading PDF candidates from the source folder;
+     *                                    must not be null
+     * @param pdfTextExtractionPort       for extracting text and page count from a single PDF;
+     *                                    must not be null
+     * @param fingerprintPort             for computing the SHA-256 fingerprint of a candidate;
+     *                                    must not be null
+     * @param m4DocumentProcessor         for applying M4 decision logic and persisting results;
+     *                                    must not be null
     * @throws NullPointerException if any parameter is null
     */
    public DefaultBatchRunProcessingUseCase(
            StartConfiguration configuration,
            RunLockPort runLockPort,
            SourceDocumentCandidatesPort sourceDocumentCandidatesPort,
-            PdfTextExtractionPort pdfTextExtractionPort) {
-        this.configuration = configuration;
-        this.runLockPort = runLockPort;
-        this.sourceDocumentCandidatesPort = sourceDocumentCandidatesPort;
-        this.pdfTextExtractionPort = pdfTextExtractionPort;
+            PdfTextExtractionPort pdfTextExtractionPort,
+            FingerprintPort fingerprintPort,
+            M4DocumentProcessor m4DocumentProcessor) {
+        this.configuration = Objects.requireNonNull(configuration, "configuration must not be null");
+        this.runLockPort = Objects.requireNonNull(runLockPort, "runLockPort must not be null");
+        this.sourceDocumentCandidatesPort = Objects.requireNonNull(
+                sourceDocumentCandidatesPort, "sourceDocumentCandidatesPort must not be null");
+        this.pdfTextExtractionPort = Objects.requireNonNull(
+                pdfTextExtractionPort, "pdfTextExtractionPort must not be null");
+        this.fingerprintPort = Objects.requireNonNull(fingerprintPort, "fingerprintPort must not be null");
+        this.m4DocumentProcessor = Objects.requireNonNull(
+                m4DocumentProcessor, "m4DocumentProcessor must not be null");
    }

    @Override
@@ -96,12 +138,15 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
                lockAcquired = true;
                LOG.debug("Run lock acquired successfully.");
            } catch (RunLockUnavailableException e) {
-                LOG.warn("Run lock not available – another instance is already running. This instance terminates immediately.");
+                LOG.warn("Run lock not available – another instance is already running. "
+                        + "This instance terminates immediately.");
                return BatchRunOutcome.LOCK_UNAVAILABLE;
            }

-            LOG.debug("Configuration in use: source={}, target={}", configuration.sourceFolder(), configuration.targetFolder());
-            LOG.info("Batch run started. RunId: {}, Start: {}", context.runId(), context.startInstant());
+            LOG.debug("Configuration in use: source={}, target={}",
+                    configuration.sourceFolder(), configuration.targetFolder());
+            LOG.info("Batch run started. RunId: {}, Start: {}",
+                    context.runId(), context.startInstant());

            // Step 2: Load PDF candidates from source folder
            List<SourceDocumentCandidate> candidates;
@@ -113,12 +158,13 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
            }
            LOG.info("Found {} PDF candidate(s) in source folder.", candidates.size());

-            // Step 3: Process each candidate through the pipeline
+            // Step 3: Process each candidate through the M4 pipeline
            for (SourceDocumentCandidate candidate : candidates) {
-                processCandidate(candidate);
+                processCandidate(candidate, context);
            }

-            LOG.info("Batch run completed. Processed {} candidate(s). RunId: {}", candidates.size(), context.runId());
+            LOG.info("Batch run completed. Processed {} candidate(s). RunId: {}",
+                    candidates.size(), context.runId());
            return BatchRunOutcome.SUCCESS;

        } catch (Exception e) {
@@ -126,8 +172,8 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
            return BatchRunOutcome.FAILURE;
        } finally {
            // Release the run lock only if it was successfully acquired.
-            // If acquire() threw RunLockUnavailableException, the lock belongs to another instance
-            // and must not be deleted by this instance.
+            // If acquire() threw RunLockUnavailableException, the lock belongs to another
+            // instance and must not be deleted by this instance.
            if (lockAcquired) {
                try {
                    runLockPort.release();
@@ -140,56 +186,105 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
    }

    /**
-     * Processes a single PDF candidate through the complete pipeline.
+     * Processes a single PDF candidate through the complete M4 pipeline.
     * <p>
-     * Processing steps per document:
+     * M4 processing order:
     * <ol>
-     *   <li>Log candidate recognition</li>
-     *   <li>Extract text and page count from the PDF via {@link PdfTextExtractionPort}</li>
-     *   <li>Process extraction result through pre-checks via {@link DocumentProcessingService}</li>
-     *   <li>Log extraction outcome and final decision</li>
+     *   <li>Record the attempt start instant.</li>
+     *   <li>Compute the SHA-256 fingerprint of the candidate file content.</li>
+     *   <li>If fingerprint computation fails: log as non-identifiable run event and
+     *       return — no SQLite record is created.</li>
+     *   <li>Execute the M3 pipeline (PDF extraction + pre-checks).</li>
+     *   <li>Delegate to {@link M4DocumentProcessor} for idempotency check, status/counter
+     *       mapping, and consistent two-level persistence.</li>
     * </ol>
     * <p>
-     * Per-document errors (extraction failure, technical error, pre-check failure) do not abort the overall
-     * batch run. Each candidate ends controlled regardless of its outcome.
-     * <p>
-     * Processing boundary: no KI call, no persistence, no filename generation,
-     * no target file copy is initiated here, even for candidates that pass all pre-checks.
+     * Per-document errors do not abort the overall batch run. Each candidate ends
+     * controlled regardless of its outcome.
     *
     * @param candidate the candidate to process
+     * @param context   the current batch run context
     */
-    private void processCandidate(SourceDocumentCandidate candidate) {
+    private void processCandidate(SourceDocumentCandidate candidate, BatchRunContext context) {
        LOG.debug("Processing candidate: {}", candidate.uniqueIdentifier());

-        PdfExtractionResult extractionResult = pdfTextExtractionPort.extractTextAndPageCount(candidate);
+        // Record the attempt start instant before any work begins
+        Instant attemptStart = Instant.now();
+
+        // Step M4-1: Compute fingerprint
+        FingerprintResult fingerprintResult = fingerprintPort.computeFingerprint(candidate);
+
+        switch (fingerprintResult) {
+            case FingerprintTechnicalError fingerprintError -> {
+                // Pre-fingerprint failure: not historised in SQLite
+                LOG.warn("Fingerprint computation failed for '{}': {} — candidate skipped (not historised).",
+                        candidate.uniqueIdentifier(), fingerprintError.errorMessage());
+                return;
+            }
+
+            case FingerprintSuccess fingerprintSuccess -> {
+                DocumentFingerprint fingerprint = fingerprintSuccess.fingerprint();
+                LOG.debug("Fingerprint computed for '{}': {}",
+                        candidate.uniqueIdentifier(), fingerprint.sha256Hex());
+
+                // Step M4-2..M4-8: Execute M3 pipeline and delegate M4 logic to the processor
+                // The M3 pipeline runs only if the document is not in a terminal state;
+                // M4DocumentProcessor handles the terminal check internally.
+                // We run M3 eagerly here and pass the result; M4DocumentProcessor will
+                // ignore it for terminal documents.
+                DocumentProcessingOutcome m3Outcome = runM3Pipeline(candidate);
+
+                // Delegate idempotency check, status mapping, and persistence to M4DocumentProcessor
+                m4DocumentProcessor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+            }
+        }
+    }
+
+    /**
+     * Runs the M3 pipeline (PDF text extraction + pre-checks) for the given candidate.
+     * <p>
+     * This method is called after a successful fingerprint computation. The result is
+     * passed to {@link M4DocumentProcessor}, which applies it only when the document is
+     * not in a terminal state.
+     *
+     * @param candidate the candidate to run through the M3 pipeline
+     * @return the M3 pipeline outcome (pre-check passed, pre-check failed, or technical error)
+     */
+    private DocumentProcessingOutcome runM3Pipeline(SourceDocumentCandidate candidate) {
+        PdfExtractionResult extractionResult =
+                pdfTextExtractionPort.extractTextAndPageCount(candidate);

        // Log extraction outcome
        switch (extractionResult) {
-            case PdfExtractionSuccess success ->
+            case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess success ->
                LOG.debug("PDF extraction successful for '{}'. Pages: {}, Text length: {} chars.",
-                        candidate.uniqueIdentifier(), success.pageCount().value(), success.extractedText().length());
-            case PdfExtractionContentError contentError ->
+                        candidate.uniqueIdentifier(),
+                        success.pageCount().value(),
+                        success.extractedText().length());
+            case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError contentError ->
                LOG.debug("PDF content extraction failed for '{}' (content problem): {}",
                        candidate.uniqueIdentifier(), contentError.reason());
-            case PdfExtractionTechnicalError technicalError ->
+            case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError technicalError ->
                LOG.debug("PDF extraction technical error for '{}': {}",
                        candidate.uniqueIdentifier(), technicalError.errorMessage());
        }

-        // Process through complete pipeline
-        var outcome = DocumentProcessingService.processDocument(candidate, extractionResult, configuration);
+        DocumentProcessingOutcome outcome =
+                DocumentProcessingService.processDocument(candidate, extractionResult, configuration);

-        // Log processing outcome
+        // Log M3 outcome
        switch (outcome) {
-            case PreCheckPassed passed ->
-                LOG.info("Pre-checks PASSED for '{}'. Candidate ready for further processing.",
+            case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed passed ->
+                LOG.info("Pre-checks PASSED for '{}'. Candidate ready for M4 persistence.",
                        candidate.uniqueIdentifier());
-            case PreCheckFailed failed ->
-                LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error – may retry in later run).",
+            case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed ->
+                LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error).",
                        candidate.uniqueIdentifier(), failed.failureReasonDescription());
-            case TechnicalDocumentError technicalError ->
-                LOG.warn("Processing FAILED for '{}': {} (Technical error – may retry in later run).",
+            case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError ->
+                LOG.warn("Processing FAILED for '{}': {} (Technical error – retryable).",
                        candidate.uniqueIdentifier(), technicalError.errorMessage());
        }
+
+        return outcome;
    }
 }
@@ -4,9 +4,25 @@
 * Implementations:
 * <ul>
 *   <li>{@link de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase}
- *       — Production implementation with run lock and controlled batch cycle</li>
+ *       — Production implementation with run lock, M4 fingerprint-based idempotency,
+ *       and consistent two-level persistence (extended in M4-AP-006)</li>
 * </ul>
 * <p>
+ * <h2>M4 processing order (AP-006)</h2>
+ * <p>
+ * For each candidate, {@link de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase}
+ * enforces this order:
+ * <ol>
+ *   <li>Compute SHA-256 fingerprint of the candidate file content.</li>
+ *   <li>If fingerprint computation fails: log as non-identifiable run event;
+ *       do NOT write any SQLite record; continue with next candidate.</li>
+ *   <li>Run the M3 pipeline (PDF extraction + pre-checks).</li>
+ *   <li>Delegate to {@link de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor}
+ *       for idempotency check, status/counter mapping, and consistent persistence.</li>
+ * </ol>
+ * <p>
 * All implementations are infrastructure-agnostic and interact only through ports.
+ *
+ * @since M2 (extended in M4-AP-006)
 */
 package de.gecheckt.pdf.umbenenner.application.usecase;
@@ -0,0 +1,425 @@
+package de.gecheckt.pdf.umbenenner.application.service;
+
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown;
+import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
+import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnicalFailure;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
+import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
+import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
+import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
+import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
+import de.gecheckt.pdf.umbenenner.domain.model.RunId;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
+import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+/**
+ * Unit tests for {@link M4DocumentProcessor}.
+ * <p>
+ * Covers:
+ * <ul>
+ *   <li>M4 minimal rules: status, counter and retryable flag mapping</li>
+ *   <li>Skip logic for SUCCESS and FAILED_FINAL documents</li>
+ *   <li>New document path (DocumentUnknown)</li>
+ *   <li>Known processable document path (DocumentKnownProcessable)</li>
+ *   <li>Persistence lookup failure: no attempt written</li>
+ *   <li>Persistence write failure: controlled failure, no crash</li>
+ *   <li>Skip events do not change error counters</li>
+ * </ul>
+ */
+class M4DocumentProcessorTest {
+
+    private static final String FINGERPRINT_HEX =
+            "a".repeat(64); // 64 lowercase hex chars
+
+    private CapturingDocumentRecordRepository recordRepo;
+    private CapturingProcessingAttemptRepository attemptRepo;
+    private M4DocumentProcessor processor;
+
+    private SourceDocumentCandidate candidate;
+    private DocumentFingerprint fingerprint;
+    private BatchRunContext context;
+    private Instant attemptStart;
+
+    @BeforeEach
+    void setUp() {
+        recordRepo = new CapturingDocumentRecordRepository();
+        attemptRepo = new CapturingProcessingAttemptRepository();
+        processor = new M4DocumentProcessor(recordRepo, attemptRepo);
+
+        candidate = new SourceDocumentCandidate(
+                "test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf"));
+        fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
+        context = new BatchRunContext(new RunId("run-001"), Instant.now());
+        attemptStart = Instant.now();
+    }
+
+    // -------------------------------------------------------------------------
+    // New document (DocumentUnknown) path
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_newDocument_preCheckPassed_persistsSuccessStatus() {
+        recordRepo.setLookupResult(new DocumentUnknown());
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        // One attempt written
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.SUCCESS, attempt.status());
+        assertFalse(attempt.retryable());
+        assertNull(attempt.failureClass());
+        assertNull(attempt.failureMessage());
+
+        // One master record created
+        assertEquals(1, recordRepo.createdRecords.size());
+        DocumentRecord record = recordRepo.createdRecords.get(0);
+        assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
+        assertEquals(0, record.failureCounters().contentErrorCount());
+        assertEquals(0, record.failureCounters().transientErrorCount());
+        assertNotNull(record.lastSuccessInstant());
+        assertNull(record.lastFailureInstant());
+    }
+
+    @Test
+    void process_newDocument_firstContentError_persistsFailedRetryable_contentCounterOne() {
+        recordRepo.setLookupResult(new DocumentUnknown());
+        DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
+                candidate, PreCheckFailureReason.NO_USABLE_TEXT);
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
+        assertTrue(attempt.retryable());
+
+        assertEquals(1, recordRepo.createdRecords.size());
+        DocumentRecord record = recordRepo.createdRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
+        assertEquals(1, record.failureCounters().contentErrorCount());
+        assertEquals(0, record.failureCounters().transientErrorCount());
+        assertNotNull(record.lastFailureInstant());
+        assertNull(record.lastSuccessInstant());
+    }
+
+    @Test
+    void process_newDocument_technicalError_persistsFailedRetryable_transientCounterOne() {
+        recordRepo.setLookupResult(new DocumentUnknown());
+        DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError(
+                candidate, "I/O error", null);
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
+        assertTrue(attempt.retryable());
+
+        assertEquals(1, recordRepo.createdRecords.size());
+        DocumentRecord record = recordRepo.createdRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
+        assertEquals(0, record.failureCounters().contentErrorCount());
+        assertEquals(1, record.failureCounters().transientErrorCount());
+    }
+
+    // -------------------------------------------------------------------------
+    // Known processable document path (DocumentKnownProcessable)
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_knownDocument_secondContentError_persistsFailedFinal_contentCounterTwo() {
+        // Existing record: first content error already recorded
+        DocumentRecord existingRecord = buildRecord(
+                ProcessingStatus.FAILED_RETRYABLE,
+                new FailureCounters(1, 0));
+        recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
+                candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.FAILED_FINAL, attempt.status());
+        assertFalse(attempt.retryable());
+
+        assertEquals(1, recordRepo.updatedRecords.size());
+        DocumentRecord record = recordRepo.updatedRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
+        assertEquals(2, record.failureCounters().contentErrorCount());
+        assertEquals(0, record.failureCounters().transientErrorCount());
+    }
+
+    @Test
+    void process_knownDocument_technicalError_incrementsTransientCounter_remainsRetryable() {
+        DocumentRecord existingRecord = buildRecord(
+                ProcessingStatus.FAILED_RETRYABLE,
+                new FailureCounters(0, 2));
+        recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+
+        DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError(
+                candidate, "Timeout", null);
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, recordRepo.updatedRecords.size());
+        DocumentRecord record = recordRepo.updatedRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
+        assertEquals(0, record.failureCounters().contentErrorCount());
+        assertEquals(3, record.failureCounters().transientErrorCount());
+        assertTrue(attemptRepo.savedAttempts.get(0).retryable());
+    }
+
+    @Test
+    void process_knownDocument_preCheckPassed_persistsSuccess() {
+        DocumentRecord existingRecord = buildRecord(
+                ProcessingStatus.FAILED_RETRYABLE,
+                new FailureCounters(0, 1));
+        recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, recordRepo.updatedRecords.size());
+        DocumentRecord record = recordRepo.updatedRecords.get(0);
+        assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
+        // Counters unchanged on success
+        assertEquals(0, record.failureCounters().contentErrorCount());
+        assertEquals(1, record.failureCounters().transientErrorCount());
+        assertNotNull(record.lastSuccessInstant());
+    }
+
+    // -------------------------------------------------------------------------
+    // Skip paths
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_terminalSuccess_persistsSkipAttemptWithSkippedAlreadyProcessed() {
+        DocumentRecord existingRecord = buildRecord(
+                ProcessingStatus.SUCCESS,
+                FailureCounters.zero());
+        recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.SKIPPED_ALREADY_PROCESSED, attempt.status());
+        assertFalse(attempt.retryable());
+        assertNull(attempt.failureClass());
+        assertNull(attempt.failureMessage());
+
+        // Master record updated (only updatedAt changes)
+        assertEquals(1, recordRepo.updatedRecords.size());
+        DocumentRecord record = recordRepo.updatedRecords.get(0);
+        // Status and counters remain unchanged
+        assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
+        assertEquals(0, record.failureCounters().contentErrorCount());
+        assertEquals(0, record.failureCounters().transientErrorCount());
+    }
+
+    @Test
+    void process_terminalFinalFailure_persistsSkipAttemptWithSkippedFinalFailure() {
+        DocumentRecord existingRecord = buildRecord(
+                ProcessingStatus.FAILED_FINAL,
+                new FailureCounters(2, 0));
+        recordRepo.setLookupResult(new DocumentTerminalFinalFailure(existingRecord));
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
+                candidate, PreCheckFailureReason.NO_USABLE_TEXT);
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+        assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, attempt.status());
+        assertFalse(attempt.retryable());
+
+        // Master record updated (only updatedAt changes); counters unchanged
+        assertEquals(1, recordRepo.updatedRecords.size());
+        DocumentRecord record = recordRepo.updatedRecords.get(0);
+        assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
+        assertEquals(2, record.failureCounters().contentErrorCount());
+        assertEquals(0, record.failureCounters().transientErrorCount());
+    }
+
+    @Test
+    void process_skipEvent_doesNotChangeErrorCounters() {
+        FailureCounters originalCounters = new FailureCounters(1, 3);
+        DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, originalCounters);
+        recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
+
+        processor.process(candidate, fingerprint,
+                new PreCheckPassed(candidate, new PdfExtractionSuccess("t", new PdfPageCount(1))),
+                context, attemptStart);
+
+        DocumentRecord updated = recordRepo.updatedRecords.get(0);
+        assertEquals(originalCounters.contentErrorCount(), updated.failureCounters().contentErrorCount(),
+                "Skip must not change content error counter");
+        assertEquals(originalCounters.transientErrorCount(), updated.failureCounters().transientErrorCount(),
+                "Skip must not change transient error counter");
+    }
+
+    // -------------------------------------------------------------------------
+    // Persistence lookup failure
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_persistenceLookupFailure_noAttemptWritten_noException() {
+        recordRepo.setLookupResult(new PersistenceLookupTechnicalFailure("DB unavailable", null));
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        // Must not throw
+        assertDoesNotThrow(() ->
+                processor.process(candidate, fingerprint, m3Outcome, context, attemptStart));
+
+        // No attempt written, no record created/updated
+        assertEquals(0, attemptRepo.savedAttempts.size(),
+                "No attempt must be written when lookup fails");
+        assertEquals(0, recordRepo.createdRecords.size());
+        assertEquals(0, recordRepo.updatedRecords.size());
+    }
+
+    // -------------------------------------------------------------------------
+    // Persistence write failure: controlled, no crash
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_persistenceWriteFailure_doesNotThrow_batchContinues() {
+        recordRepo.setLookupResult(new DocumentUnknown());
+        // Make the attempt save throw
+        attemptRepo.failOnSave = true;
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        // Must not propagate the exception
+        assertDoesNotThrow(() ->
+                processor.process(candidate, fingerprint, m3Outcome, context, attemptStart));
+    }
+
+    // -------------------------------------------------------------------------
+    // Attempt number monotonicity
+    // -------------------------------------------------------------------------
+
+    @Test
+    void process_attemptNumberIsAssignedFromRepository() {
+        recordRepo.setLookupResult(new DocumentUnknown());
+        attemptRepo.nextAttemptNumber = 3; // Simulate 2 prior attempts
+
+        DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+                candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+        assertEquals(1, attemptRepo.savedAttempts.size());
+        assertEquals(3, attemptRepo.savedAttempts.get(0).attemptNumber(),
+                "Attempt number must be taken from the repository");
+    }
+
+    // -------------------------------------------------------------------------
+    // Helpers
+    // -------------------------------------------------------------------------
+
+    private DocumentRecord buildRecord(ProcessingStatus status, FailureCounters counters) {
+        Instant now = Instant.now();
+        return new DocumentRecord(
+                fingerprint,
+                new SourceDocumentLocator("/tmp/test.pdf"),
+                "test.pdf",
+                status,
+                counters,
+                status == ProcessingStatus.SUCCESS ? null : now,
+                status == ProcessingStatus.SUCCESS ? now : null,
+                now,
+                now
+        );
+    }
+
+    // -------------------------------------------------------------------------
+    // Capturing test doubles
+    // -------------------------------------------------------------------------
+
+    private static class CapturingDocumentRecordRepository implements DocumentRecordRepository {
+        private DocumentRecordLookupResult lookupResult = new DocumentUnknown();
+        final List<DocumentRecord> createdRecords = new ArrayList<>();
+        final List<DocumentRecord> updatedRecords = new ArrayList<>();
+
+        void setLookupResult(DocumentRecordLookupResult result) {
+            this.lookupResult = result;
+        }
+
+        @Override
+        public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) {
+            return lookupResult;
+        }
+
+        @Override
+        public void create(DocumentRecord record) {
+            createdRecords.add(record);
+        }
+
+        @Override
+        public void update(DocumentRecord record) {
+            updatedRecords.add(record);
+        }
+    }
+
+    private static class CapturingProcessingAttemptRepository implements ProcessingAttemptRepository {
+        final List<ProcessingAttempt> savedAttempts = new ArrayList<>();
+        int nextAttemptNumber = 1;
+        boolean failOnSave = false;
+
+        @Override
+        public int loadNextAttemptNumber(DocumentFingerprint fingerprint) {
+            return nextAttemptNumber;
+        }
+
+        @Override
+        public void save(ProcessingAttempt attempt) {
+            if (failOnSave) {
+                throw new DocumentPersistenceException("Simulated save failure");
+            }
+            savedAttempts.add(attempt);
+        }
+
+        @Override
+        public List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint) {
+            return List.copyOf(savedAttempts);
+        }
+    }
+}
@@ -2,12 +2,24 @@ package de.gecheckt.pdf.umbenenner.application.usecase;

 import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
 import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError;
 import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
 import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
 import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
 import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
 import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
+import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor;
 import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
 import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
 import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
 import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
@@ -37,10 +49,11 @@ import static org.junit.jupiter.api.Assertions.*;
 * <ul>
 *   <li>Lock acquisition and release lifecycle</li>
 *   <li>Source folder scanning and per-document processing loop</li>
- *   <li>Happy path: candidate passes pre-checks, ends controlled without KI or target copy</li>
+ *   <li>Happy path: candidate passes pre-checks, M4 persistence is invoked</li>
 *   <li>Deterministic content errors: no usable text, page limit exceeded</li>
 *   <li>Technical extraction errors: controlled per-document end, batch continues</li>
 *   <li>Source folder access failure: batch fails with FAILURE outcome</li>
+ *   <li>M4 idempotency: fingerprint failure → not historised</li>
 * </ul>
 */
 class BatchRunProcessingUseCaseTest {
@@ -57,8 +70,9 @@ class BatchRunProcessingUseCaseTest {
        MockRunLockPort lockPort = new MockRunLockPort();
        StartConfiguration config = buildConfig(tempDir);

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("test-run-1"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);
@@ -73,8 +87,9 @@ class BatchRunProcessingUseCaseTest {
        CountingRunLockPort lockPort = new CountingRunLockPort(true);
        StartConfiguration config = buildConfig(tempDir);

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("test-run-2"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);
@@ -92,8 +107,9 @@ class BatchRunProcessingUseCaseTest {
        CountingRunLockPort lockPort = new CountingRunLockPort(true);
        StartConfiguration config = buildConfig(tempDir);

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("test-run-f1"), Instant.now());

        useCase.execute(context);
@@ -108,8 +124,9 @@ class BatchRunProcessingUseCaseTest {
        ErrorAfterAcquireLockPort lockPort = new ErrorAfterAcquireLockPort();
        StartConfiguration config = buildConfig(tempDir);

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("test-run-3"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);
@@ -128,8 +145,9 @@ class BatchRunProcessingUseCaseTest {
        MockRunLockPort lockPort = new MockRunLockPort();
        StartConfiguration config = buildConfig(tempDir);

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("empty"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);
@@ -138,25 +156,26 @@ class BatchRunProcessingUseCaseTest {
    }

    @Test
-    void execute_happyPath_candidatePassesPreChecks_endsControlledWithoutKiOrCopy() throws Exception {
+    void execute_happyPath_candidatePassesPreChecks_m4PersistenceInvoked() throws Exception {
        MockRunLockPort lockPort = new MockRunLockPort();
        StartConfiguration config = buildConfig(tempDir);

-        // Candidate with usable text within page limit
        SourceDocumentCandidate candidate = makeCandidate("document.pdf");
        PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice text", new PdfPageCount(1));
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
        FixedExtractionPort extractionPort = new FixedExtractionPort(success);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("happy"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

-        // Batch run succeeds; document ended controlled at boundary (no KI, no copy)
        assertTrue(outcome.isSuccess(), "Happy path should yield SUCCESS");
        assertEquals(1, extractionPort.callCount(), "Extraction should be called exactly once");
+        assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called exactly once");
    }

    @Test
@@ -165,40 +184,42 @@ class BatchRunProcessingUseCaseTest {
        StartConfiguration config = buildConfig(tempDir);

        SourceDocumentCandidate candidate = makeCandidate("image-only.pdf");
-        // Extraction returns text with no letters or digits
        PdfExtractionSuccess emptySuccess = new PdfExtractionSuccess("   ", new PdfPageCount(1));
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
        FixedExtractionPort extractionPort = new FixedExtractionPort(emptySuccess);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("no-text"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

-        // Document ends with pre-check failure; batch itself still succeeds
        assertTrue(outcome.isSuccess(), "No-usable-text pre-check failure should not abort the batch run");
+        assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for content errors");
    }

    @Test
    void execute_pageLimitExceeded_candidateEndsControlled_batchContinues() throws Exception {
        MockRunLockPort lockPort = new MockRunLockPort();
-        // Config has maxPages=3; document has 10 pages
        StartConfiguration config = buildConfig(tempDir);

        SourceDocumentCandidate candidate = makeCandidate("big.pdf");
        PdfExtractionSuccess manyPages = new PdfExtractionSuccess("Some text", new PdfPageCount(10));
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
        FixedExtractionPort extractionPort = new FixedExtractionPort(manyPages);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("page-limit"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

-        // maxPages in buildConfig is 3; 10 pages exceeds limit – pre-check fails, batch continues
        assertTrue(outcome.isSuccess(), "Page limit exceeded should not abort the batch run");
+        assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for page limit errors");
    }

    @Test
@@ -210,14 +231,17 @@ class BatchRunProcessingUseCaseTest {
        PdfExtractionContentError contentError = new PdfExtractionContentError("PDF is encrypted");
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
        FixedExtractionPort extractionPort = new FixedExtractionPort(contentError);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("content-error"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

        assertTrue(outcome.isSuccess(), "Extraction content error should not abort the batch run");
+        assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for content errors");
    }

    @Test
@@ -229,14 +253,17 @@ class BatchRunProcessingUseCaseTest {
        PdfExtractionTechnicalError technicalError = new PdfExtractionTechnicalError("I/O error reading file", null);
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
        FixedExtractionPort extractionPort = new FixedExtractionPort(technicalError);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("tech-error"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

        assertTrue(outcome.isSuccess(), "Technical extraction error should not abort the batch run");
+        assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for technical errors");
    }

    @Test
@@ -248,18 +275,71 @@ class BatchRunProcessingUseCaseTest {
            throw new SourceDocumentAccessException("Source folder not readable");
        };

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, failingPort, new NoOpExtractionPort());
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, failingPort, new NoOpExtractionPort(),
+                new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
        BatchRunContext context = new BatchRunContext(new RunId("access-fail"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

        assertTrue(outcome.isFailure(), "Source folder access failure should yield FAILURE outcome");
        assertFalse(outcome.isSuccess(), "Source folder access failure must not be SUCCESS");
-        // Lock must still be released
        assertTrue(lockPort.wasReleaseCalled(), "Lock should be released even when source access fails");
    }

+    // -------------------------------------------------------------------------
+    // M4-specific: fingerprint failure → not historised
+    // -------------------------------------------------------------------------
+
+    @Test
+    void execute_fingerprintFailure_candidateNotHistorised_batchContinues() throws Exception {
+        MockRunLockPort lockPort = new MockRunLockPort();
+        StartConfiguration config = buildConfig(tempDir);
+
+        SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf");
+        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
+
+        // Fingerprint always fails → M4 processor must NOT be called
+        FingerprintPort alwaysFailingFingerprintPort = c ->
+                new FingerprintTechnicalError("Cannot read file", null);
+
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, new NoOpExtractionPort(),
+                alwaysFailingFingerprintPort, m4Processor);
+        BatchRunContext context = new BatchRunContext(new RunId("fp-fail"), Instant.now());
+
+        BatchRunOutcome outcome = useCase.execute(context);
+
+        assertTrue(outcome.isSuccess(), "Fingerprint failure should not abort the batch run");
+        assertEquals(0, m4Processor.processCallCount(),
+                "M4 processor must NOT be called when fingerprint computation fails (pre-fingerprint failure)");
+    }
+
+    @Test
+    void execute_fingerprintFailure_extractionNotCalled() throws Exception {
+        MockRunLockPort lockPort = new MockRunLockPort();
+        StartConfiguration config = buildConfig(tempDir);
+
+        SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf");
+        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
+        FixedExtractionPort extractionPort = new FixedExtractionPort(
+                new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+        FingerprintPort alwaysFailingFingerprintPort = c ->
+                new FingerprintTechnicalError("Cannot read file", null);
+
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                alwaysFailingFingerprintPort, new NoOpM4DocumentProcessor());
+        BatchRunContext context = new BatchRunContext(new RunId("fp-fail-no-extract"), Instant.now());
+
+        useCase.execute(context);
+
+        assertEquals(0, extractionPort.callCount(),
+                "PDF extraction must NOT be called when fingerprint computation fails");
+    }
+
    /**
     * Mixed-batch test: one document per outcome type in a single run.
     * Proves that no individual outcome aborts the overall batch.
@@ -267,7 +347,6 @@ class BatchRunProcessingUseCaseTest {
    @Test
    void execute_mixedBatch_allOutcomeTypes_batchOverallSucceeds() throws Exception {
        MockRunLockPort lockPort = new MockRunLockPort();
-        // maxPages=3 in buildConfig; pageLimitCandidate has 10 pages → exceeds limit
        StartConfiguration config = buildConfig(tempDir);

        SourceDocumentCandidate goodCandidate           = makeCandidate("good.pdf");
@@ -275,10 +354,11 @@ class BatchRunProcessingUseCaseTest {
        SourceDocumentCandidate pageLimitCandidate      = makeCandidate("toobig.pdf");
        SourceDocumentCandidate technicalErrorCandidate = makeCandidate("broken.pdf");
        SourceDocumentCandidate contentErrorCandidate   = makeCandidate("encrypted.pdf");
+        SourceDocumentCandidate fpFailCandidate         = makeCandidate("unreadable.pdf");

        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(
                goodCandidate, noTextCandidate, pageLimitCandidate,
-                technicalErrorCandidate, contentErrorCandidate));
+                technicalErrorCandidate, contentErrorCandidate, fpFailCandidate));

        MappedExtractionPort extractionPort = new MappedExtractionPort()
                .with(goodCandidate,           new PdfExtractionSuccess("Invoice text", new PdfPageCount(1)))
@@ -287,16 +367,31 @@ class BatchRunProcessingUseCaseTest {
                .with(technicalErrorCandidate, new PdfExtractionTechnicalError("I/O error", null))
                .with(contentErrorCandidate,   new PdfExtractionContentError("PDF is encrypted"));

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        // fpFailCandidate gets a fingerprint failure; others get a valid fingerprint
+        FingerprintPort mappedFingerprintPort = candidate -> {
+            if (candidate.uniqueIdentifier().equals("unreadable.pdf")) {
+                return new FingerprintTechnicalError("Cannot read", null);
+            }
+            return new FingerprintSuccess(makeFingerprint(candidate.uniqueIdentifier()));
+        };
+
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
+
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                mappedFingerprintPort, m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("mixed"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

-        assertTrue(outcome.isSuccess(),
-                "Mixed batch with all outcome types must yield batch SUCCESS");
+        assertTrue(outcome.isSuccess(), "Mixed batch with all outcome types must yield batch SUCCESS");
+        // 5 candidates with successful fingerprint → M4 processor called 5 times
+        // 1 candidate with fingerprint failure → M4 processor NOT called
+        assertEquals(5, m4Processor.processCallCount(),
+                "M4 processor must be called for each candidate with a successful fingerprint");
+        // Extraction called for 5 candidates (not for fpFailCandidate)
        assertEquals(5, extractionPort.callCount(),
-                "Extraction must be attempted for each of the 5 candidates");
+                "Extraction must be attempted for each of the 5 candidates with a valid fingerprint");
    }

    @Test
@@ -312,21 +407,35 @@ class BatchRunProcessingUseCaseTest {
        PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice content", new PdfPageCount(2));
        FixedCandidatesPort candidatesPort = new FixedCandidatesPort(candidates);
        FixedExtractionPort extractionPort = new FixedExtractionPort(success);
+        TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();

-        DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
-                config, lockPort, candidatesPort, extractionPort);
+        DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+                config, lockPort, candidatesPort, extractionPort,
+                new AlwaysSuccessFingerprintPort(), m4Processor);
        BatchRunContext context = new BatchRunContext(new RunId("multi"), Instant.now());

        BatchRunOutcome outcome = useCase.execute(context);

        assertTrue(outcome.isSuccess(), "All three candidates processed should yield SUCCESS");
        assertEquals(3, extractionPort.callCount(), "Extraction should be called once per candidate");
+        assertEquals(3, m4Processor.processCallCount(), "M4 processor should be called once per candidate");
    }

    // -------------------------------------------------------------------------
    // Helpers
    // -------------------------------------------------------------------------

+    private static DefaultBatchRunProcessingUseCase buildUseCase(
+            StartConfiguration config,
+            RunLockPort lockPort,
+            SourceDocumentCandidatesPort candidatesPort,
+            PdfTextExtractionPort extractionPort,
+            FingerprintPort fingerprintPort,
+            M4DocumentProcessor m4Processor) {
+        return new DefaultBatchRunProcessingUseCase(
+                config, lockPort, candidatesPort, extractionPort, fingerprintPort, m4Processor);
+    }
+
    private static StartConfiguration buildConfig(Path tempDir) throws Exception {
        Path sourceDir = Files.createDirectories(tempDir.resolve("source"));
        Path targetDir = Files.createDirectories(tempDir.resolve("target"));
@@ -357,6 +466,13 @@ class BatchRunProcessingUseCaseTest {
        return new SourceDocumentCandidate(filename, 1024L, new SourceDocumentLocator("/tmp/" + filename));
    }

+    /** Creates a deterministic fake fingerprint from a string (padded to 64 hex chars). */
+    private static DocumentFingerprint makeFingerprint(String seed) {
+        String hex = String.format("%064x", Math.abs(seed.hashCode()));
+        // Ensure exactly 64 lowercase hex chars
+        return new DocumentFingerprint(hex.substring(0, 64));
+    }
+
    // -------------------------------------------------------------------------
    // Mock / Stub implementations
    // -------------------------------------------------------------------------
@@ -480,4 +596,88 @@ class BatchRunProcessingUseCaseTest {

        int callCount() { return calls; }
    }
+
+    /**
+     * Fingerprint port that always returns a deterministic success based on the candidate's
+     * unique identifier.
+     */
+    private static class AlwaysSuccessFingerprintPort implements FingerprintPort {
+        @Override
+        public FingerprintResult computeFingerprint(SourceDocumentCandidate candidate) {
+            String hex = String.format("%064x", Math.abs(candidate.uniqueIdentifier().hashCode()));
+            return new FingerprintSuccess(new DocumentFingerprint(hex.substring(0, 64)));
+        }
+    }
+
+    /**
+     * No-op M4DocumentProcessor that does nothing (for tests that only care about
+     * lock/batch lifecycle, not M4 persistence).
+     */
+    private static class NoOpM4DocumentProcessor extends M4DocumentProcessor {
+        NoOpM4DocumentProcessor() {
+            super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository());
+        }
+    }
+
+    /**
+     * Tracking M4DocumentProcessor that counts how many times {@code process()} is called.
+     */
+    private static class TrackingM4DocumentProcessor extends M4DocumentProcessor {
+        private int processCallCount = 0;
+
+        TrackingM4DocumentProcessor() {
+            super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository());
+        }
+
+        @Override
+        public void process(
+                de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate,
+                de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint fingerprint,
+                de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome m3Outcome,
+                de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext context,
+                java.time.Instant attemptStart) {
+            processCallCount++;
+            // Delegate to super so the real logic runs (with no-op repos)
+            super.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+        }
+
+        int processCallCount() { return processCallCount; }
+    }
+
+    /** No-op DocumentRecordRepository for use in test M4DocumentProcessor instances. */
+    private static class NoOpDocumentRecordRepository implements DocumentRecordRepository {
+        @Override
+        public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) {
+            // Return DocumentUnknown so the M4 processor always takes the "new document" path
+            return new DocumentUnknown();
+        }
+
+        @Override
+        public void create(DocumentRecord record) {
+            // No-op
+        }
+
+        @Override
+        public void update(DocumentRecord record) {
+            // No-op
+        }
+    }
+
+    /** No-op ProcessingAttemptRepository for use in test M4DocumentProcessor instances. */
+    private static class NoOpProcessingAttemptRepository implements ProcessingAttemptRepository {
+        @Override
+        public int loadNextAttemptNumber(DocumentFingerprint fingerprint) {
+            return 1;
+        }
+
+        @Override
+        public void save(ProcessingAttempt attempt) {
+            // No-op
+        }
+
+        @Override
+        public List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint) {
+            return List.of();
+        }
+    }
 }
@@ -5,16 +5,26 @@ import org.apache.logging.log4j.Logger;

 import de.gecheckt.pdf.umbenenner.adapter.in.cli.SchedulerBatchCommand;
 import de.gecheckt.pdf.umbenenner.adapter.out.configuration.PropertiesConfigurationPortAdapter;
+import de.gecheckt.pdf.umbenenner.adapter.out.fingerprint.Sha256FingerprintAdapter;
 import de.gecheckt.pdf.umbenenner.adapter.out.lock.FilesystemRunLockPortAdapter;
 import de.gecheckt.pdf.umbenenner.adapter.out.pdfextraction.PdfTextExtractionPortAdapter;
 import de.gecheckt.pdf.umbenenner.adapter.out.sourcedocument.SourceDocumentCandidatesPortAdapter;
+import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteDocumentRecordRepositoryAdapter;
+import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter;
+import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter;
 import de.gecheckt.pdf.umbenenner.application.config.InvalidStartConfigurationException;
 import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
 import de.gecheckt.pdf.umbenenner.application.config.StartConfigurationValidator;
 import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
 import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase;
 import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
+import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
+import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
 import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
+import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor;
 import de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase;
 import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
 import de.gecheckt.pdf.umbenenner.domain.model.RunId;
@@ -29,20 +39,35 @@ import java.util.UUID;
 * <p>
 * Responsibilities:
 * <ol>
- *   <li>Load and validate the startup configuration</li>
- *   <li>Resolve the run-lock file path (with default fallback)</li>
- *   <li>Create and wire all ports and adapters</li>
- *   <li>Start the CLI adapter and execute the batch use case</li>
- *   <li>Map the batch outcome to a process exit code</li>
+ *   <li>Load and validate the startup configuration.</li>
+ *   <li>Resolve the run-lock file path (with default fallback).</li>
+ *   <li>Initialise the SQLite schema (M4: before the batch document loop begins).</li>
+ *   <li>Create and wire all ports and adapters, including the M4 persistence ports.</li>
+ *   <li>Start the CLI adapter and execute the batch use case.</li>
+ *   <li>Map the batch outcome to a process exit code.</li>
 * </ol>
- * <p>
- * Exit code semantics:
+ *
+ * <h2>Exit code semantics</h2>
 * <ul>
 *   <li>{@code 0}: Batch run executed successfully; individual document failures do not
- *       change the exit code as long as the run itself completed without a hard infrastructure error.</li>
- *   <li>{@code 1}: Hard start, bootstrap, or configuration failure that prevented the run
- *       from beginning, or a critical infrastructure failure during the run.</li>
+ *       change the exit code as long as the run itself completed without a hard
+ *       infrastructure error.</li>
+ *   <li>{@code 1}: Hard start, bootstrap, configuration, or schema-initialisation failure
+ *       that prevented the run from beginning, or a critical infrastructure failure
+ *       during the run.</li>
 * </ul>
+ *
+ * <h2>M4 wiring</h2>
+ * <p>
+ * The production constructor wires the following M4 adapters:
+ * <ul>
+ *   <li>{@link Sha256FingerprintAdapter} — SHA-256 content fingerprinting.</li>
+ *   <li>{@link SqliteSchemaInitializationAdapter} — schema initialisation at startup.</li>
+ *   <li>{@link SqliteDocumentRecordRepositoryAdapter} — document master record CRUD.</li>
+ *   <li>{@link SqliteProcessingAttemptRepositoryAdapter} — attempt history CRUD.</li>
+ * </ul>
+ *
+ * @since M2 (extended in M4-AP-006)
 */
 public class BootstrapRunner {

@@ -83,7 +108,7 @@ public class BootstrapRunner {
     * <p>
     * Receives the already-loaded and validated {@link StartConfiguration} and run lock port.
     * The factory is responsible for creating and wiring any additional outbound ports
-     * required by the use case (e.g., source document port, PDF extraction port).
+     * required by the use case (e.g., source document port, PDF extraction port, M4 ports).
     */
    @FunctionalInterface
    public interface UseCaseFactory {
@@ -101,23 +126,41 @@ public class BootstrapRunner {
    /**
     * Creates the BootstrapRunner with default factories for production use.
     * <p>
-     * Wires the full processing pipeline:
+     * Wires the full M4 processing pipeline:
     * <ul>
-     *   <li>{@link PropertiesConfigurationPortAdapter} for configuration loading</li>
-     *   <li>{@link FilesystemRunLockPortAdapter} for exclusive run locking</li>
-     *   <li>{@link SourceDocumentCandidatesPortAdapter} for PDF candidate discovery</li>
-     *   <li>{@link PdfTextExtractionPortAdapter} for PDFBox-based text and page count extraction</li>
+     *   <li>{@link PropertiesConfigurationPortAdapter} for configuration loading.</li>
+     *   <li>{@link FilesystemRunLockPortAdapter} for exclusive run locking.</li>
+     *   <li>{@link SourceDocumentCandidatesPortAdapter} for PDF candidate discovery.</li>
+     *   <li>{@link PdfTextExtractionPortAdapter} for PDFBox-based text and page count extraction.</li>
+     *   <li>{@link Sha256FingerprintAdapter} for SHA-256 content fingerprinting.</li>
+     *   <li>{@link SqliteDocumentRecordRepositoryAdapter} for document master record CRUD.</li>
+     *   <li>{@link SqliteProcessingAttemptRepositoryAdapter} for attempt history CRUD.</li>
     * </ul>
+     * <p>
+     * Schema initialisation is performed in {@link #run()} before the use case is created,
+     * using {@link SqliteSchemaInitializationAdapter}.
     */
    public BootstrapRunner() {
        this.configPortFactory = PropertiesConfigurationPortAdapter::new;
        this.runLockPortFactory = FilesystemRunLockPortAdapter::new;
        this.validatorFactory = StartConfigurationValidator::new;
-        this.useCaseFactory = (config, lock) -> new DefaultBatchRunProcessingUseCase(
+        this.useCaseFactory = (config, lock) -> {
+            String jdbcUrl = buildJdbcUrl(config);
+            FingerprintPort fingerprintPort = new Sha256FingerprintAdapter();
+            DocumentRecordRepository documentRecordRepository =
+                    new SqliteDocumentRecordRepositoryAdapter(jdbcUrl);
+            ProcessingAttemptRepository processingAttemptRepository =
+                    new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl);
+            M4DocumentProcessor m4Processor =
+                    new M4DocumentProcessor(documentRecordRepository, processingAttemptRepository);
+            return new DefaultBatchRunProcessingUseCase(
                    config,
                    lock,
                    new SourceDocumentCandidatesPortAdapter(config.sourceFolder()),
-                new PdfTextExtractionPortAdapter());
+                    new PdfTextExtractionPortAdapter(),
+                    fingerprintPort,
+                    m4Processor);
+        };
        this.commandFactory = SchedulerBatchCommand::new;
    }

@@ -145,11 +188,17 @@ public class BootstrapRunner {
    /**
     * Runs the application startup sequence.
     * <p>
-     * AP-003: Manually wires the object graph and invokes the CLI command.
-     * AP-005: Wires ConfigurationPort adapter and passes it to the use case.
-     * AP-006: Validates configuration before allowing processing to start.
+     * M4 additions:
+     * <ul>
+     *   <li>Derives the SQLite JDBC URL from the configured {@code sqlite.file} path.</li>
+     *   <li>Initialises the M4 SQLite schema via
+     *       {@link PersistenceSchemaInitializationPort#initializeSchema()} before the
+     *       batch document loop begins. A schema initialisation failure aborts the run
+     *       with exit code&nbsp;1.</li>
+     * </ul>
     *
-     * @return exit code: 0 for success, 1 for invalid configuration or unexpected failure
+     * @return exit code: 0 for success, 1 for invalid configuration, schema failure,
+     *         or unexpected bootstrap failure
     */
    public int run() {
        LOG.info("Bootstrap flow started.");
@@ -160,61 +209,105 @@ public class BootstrapRunner {
            // Step 2: Load configuration
            var config = configPort.loadConfiguration();

-            // Step 3: Validate configuration (AP-006)
+            // Step 3: Validate configuration
            StartConfigurationValidator validator = validatorFactory.create();
            validator.validate(config);

-            // Step 4: Resolve lock file path – apply default if not configured (AP-006)
+            // Step 4: Resolve lock file path – apply default if not configured
            Path lockFilePath = config.runtimeLockFile();
            if (lockFilePath == null || lockFilePath.toString().isBlank()) {
                lockFilePath = Paths.get("pdf-umbenenner.lock");
-                LOG.info("runtime.lock.file not configured, using default lock path: {}", lockFilePath.toAbsolutePath());
+                LOG.info("runtime.lock.file not configured, using default lock path: {}",
+                        lockFilePath.toAbsolutePath());
            }
            RunLockPort runLockPort = runLockPortFactory.create(lockFilePath);

-            // Step 5: Create the batch run context
-            // Generate a unique run ID and initialize the run context
+            // Step 5 (M4): Initialise the SQLite schema before the batch loop begins.
+            // A failure here is a hard start error → exit code 1.
+            initializeSchema(config);
+
+            // Step 6: Create the batch run context
            RunId runId = new RunId(UUID.randomUUID().toString());
            BatchRunContext runContext = new BatchRunContext(runId, Instant.now());
            LOG.info("Batch run started. RunId: {}", runId);

-            // Step 6: Create the use case with the validated config and run lock (application layer).
+            // Step 7: Create the use case with the validated config and run lock.
            // Config is passed directly; the use case does not re-read the properties file.
-            // Adapters (source document port, PDF extraction port) are wired by the factory.
+            // Adapters (source document port, PDF extraction port, M4 ports) are wired by the factory.
            BatchRunProcessingUseCase useCase = useCaseFactory.create(config, runLockPort);

-            // Step 7: Create the CLI command adapter with the use case
+            // Step 8: Create the CLI command adapter with the use case
            SchedulerBatchCommand command = commandFactory.create(useCase);

-            // Step 8: Execute the command with the run context and handle the outcome
+            // Step 9: Execute the command with the run context and handle the outcome
            BatchRunOutcome outcome = command.run(runContext);

-            // Mark run as completed (AP-003)
+            // Mark run as completed
            runContext.setEndInstant(Instant.now());

            if (outcome.isSuccess()) {
                LOG.info("Batch run completed successfully. RunId: {}", runContext.runId());
                return 0;
            } else if (outcome.isLockUnavailable()) {
-                LOG.warn("Batch run aborted: another instance is already running. RunId: {}", runContext.runId());
+                LOG.warn("Batch run aborted: another instance is already running. RunId: {}",
+                        runContext.runId());
                return 1;
            } else {
                LOG.error("Batch run failed. RunId: {}", runContext.runId());
                return 1;
            }
        } catch (InvalidStartConfigurationException e) {
-            // Controlled failure for invalid configuration - log clearly without stack trace
+            // Controlled failure for invalid configuration – log clearly without stack trace
            LOG.error("Configuration validation failed: {}", e.getMessage());
            return 1;
        } catch (IllegalStateException e) {
            // Configuration loading failed due to missing/invalid required properties
-            // Treat as invalid configuration for controlled failure
            LOG.error("Configuration loading failed: {}", e.getMessage());
            return 1;
+        } catch (DocumentPersistenceException e) {
+            // Schema initialisation failed – hard start error
+            LOG.error("SQLite schema initialisation failed: {}", e.getMessage(), e);
+            return 1;
        } catch (Exception e) {
            LOG.error("Bootstrap failure during startup.", e);
            return 1;
        }
    }

+    /**
+     * Initialises the M4 SQLite schema using the configured SQLite file path.
+     * <p>
+     * This method is called once at startup, before the batch document loop begins.
+     * It uses the production {@link SqliteSchemaInitializationAdapter} directly because
+     * schema initialisation is a startup concern, not a per-document concern, and the
+     * {@link UseCaseFactory} abstraction is not the right place for it.
+     * <p>
+     * If the {@code sqlite.file} configuration is null or blank, schema initialisation
+     * is skipped with a warning. This allows the existing test infrastructure (which
+     * uses the custom {@link UseCaseFactory}) to continue working without a real SQLite
+     * file.
+     *
+     * @param config the validated startup configuration
+     * @throws DocumentPersistenceException if schema initialisation fails
+     */
+    private void initializeSchema(StartConfiguration config) {
+        if (config.sqliteFile() == null) {
+            LOG.warn("sqlite.file not configured – skipping schema initialisation.");
+            return;
+        }
+        String jdbcUrl = buildJdbcUrl(config);
+        PersistenceSchemaInitializationPort schemaPort = new SqliteSchemaInitializationAdapter(jdbcUrl);
+        schemaPort.initializeSchema();
+        LOG.info("M4 SQLite schema initialised at: {}", jdbcUrl);
+    }
+
+    /**
+     * Builds the JDBC URL for the SQLite database from the configured file path.
+     *
+     * @param config the startup configuration containing the SQLite file path
+     * @return the JDBC URL in the form {@code jdbc:sqlite:/path/to/file.db}
+     */
+    static String buildJdbcUrl(StartConfiguration config) {
+        return "jdbc:sqlite:" + config.sqliteFile().toAbsolutePath().toString().replace('\\', '/');
+    }
 }