diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessor.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessor.java new file mode 100644 index 0000000..e0546a4 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessor.java @@ -0,0 +1,558 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown; +import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters; +import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnicalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository; +import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; +import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; +import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.time.Instant; +import java.util.Objects; + +/** + * Application-level service that implements the M4 per-document processing logic. + *

+ * This service is the single authoritative place for the M4 decision rules: + * idempotency checks, status/counter mapping, and consistent two-level persistence. + * It is intentionally tightly scoped to AP-006 and contains no M5+ logic. + * + *

M4 processing order per candidate

+ *
    + *
  1. Load the document master record by fingerprint.
  2. + *
  3. If the overall status is {@link ProcessingStatus#SUCCESS} → create and persist + * a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}.
  4. + *
  5. If the overall status is {@link ProcessingStatus#FAILED_FINAL} → create and persist + * a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.
  6. + *
  7. Otherwise execute the M3 flow (already done by the caller) and map the result + * into M4 status, counters and retryable flag.
  8. + *
  9. Persist exactly one historised processing attempt for the identified document.
  10. + *
  11. Persist the updated document master record.
  12. + *
+ * + *

M4 minimal rules

+ * + * + *

Persistence consistency

+ *

+ * For every identified document, both the processing attempt and the master record are + * written in sequence. If either write fails, the failure is logged and the batch run + * continues with the next candidate. No partial state is intentionally left; if the + * attempt write succeeds but the master record write fails, the inconsistency is bounded + * to that one document and is logged clearly. True transactionality across two separate + * repository calls is not available without a larger architectural change; this is + * documented as a known limitation of the M4 scope. + * + *

Pre-fingerprint failures

+ *

+ * Failures that occur before a successful fingerprint is available are not + * historised in SQLite. They are handled by the caller and logged as non-identifiable + * run events. + * + * @since M4-AP-006 + */ +public class M4DocumentProcessor { + + private static final Logger LOG = LogManager.getLogger(M4DocumentProcessor.class); + + private final DocumentRecordRepository documentRecordRepository; + private final ProcessingAttemptRepository processingAttemptRepository; + + /** + * Creates the M4 document processor with the required persistence ports. + * + * @param documentRecordRepository port for reading and writing the document master record; + * must not be null + * @param processingAttemptRepository port for writing and reading the attempt history; + * must not be null + * @throws NullPointerException if any parameter is null + */ + public M4DocumentProcessor( + DocumentRecordRepository documentRecordRepository, + ProcessingAttemptRepository processingAttemptRepository) { + this.documentRecordRepository = + Objects.requireNonNull(documentRecordRepository, "documentRecordRepository must not be null"); + this.processingAttemptRepository = + Objects.requireNonNull(processingAttemptRepository, "processingAttemptRepository must not be null"); + } + + /** + * Applies the full M4 processing logic for one identified document candidate. + *

+ * The caller must have already computed a valid {@link DocumentFingerprint} for the + * candidate. The M3 outcome (from the PDF extraction and pre-check pipeline) is + * provided as {@code m3Outcome} and is used only when the document is not in a + * terminal state. + *

+ * This method never throws. All persistence failures are caught, logged, and + * treated as controlled per-document failures so the batch run can continue. + * + * @param candidate the source document candidate being processed; must not be null + * @param fingerprint the successfully computed fingerprint for this candidate; + * must not be null + * @param m3Outcome the result of the M3 pipeline (PDF extraction + pre-checks); + * must not be null + * @param context the current batch run context (for run ID and timing); + * must not be null + * @param attemptStart the instant at which processing of this candidate began; + * must not be null + */ + public void process( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentProcessingOutcome m3Outcome, + BatchRunContext context, + Instant attemptStart) { + + Objects.requireNonNull(candidate, "candidate must not be null"); + Objects.requireNonNull(fingerprint, "fingerprint must not be null"); + Objects.requireNonNull(m3Outcome, "m3Outcome must not be null"); + Objects.requireNonNull(context, "context must not be null"); + Objects.requireNonNull(attemptStart, "attemptStart must not be null"); + + // Step 1: Load the document master record + DocumentRecordLookupResult lookupResult = + documentRecordRepository.findByFingerprint(fingerprint); + + // Step 2: Handle persistence lookup failure – cannot safely proceed + if (lookupResult instanceof PersistenceLookupTechnicalFailure failure) { + LOG.error("Cannot process '{}': master record lookup failed: {}", + candidate.uniqueIdentifier(), failure.errorMessage()); + return; + } + + // Step 3: Determine the action based on the lookup result + switch (lookupResult) { + case DocumentTerminalSuccess terminalSuccess -> { + // Document already successfully processed → skip + LOG.info("Skipping '{}': already successfully processed (fingerprint: {}).", + candidate.uniqueIdentifier(), fingerprint.sha256Hex()); + persistSkipAttempt( + candidate, fingerprint, terminalSuccess.record(), + ProcessingStatus.SKIPPED_ALREADY_PROCESSED, + context, attemptStart); + } + + case DocumentTerminalFinalFailure terminalFailure -> { + // Document finally failed → skip + LOG.info("Skipping '{}': already finally failed (fingerprint: {}).", + candidate.uniqueIdentifier(), fingerprint.sha256Hex()); + persistSkipAttempt( + candidate, fingerprint, terminalFailure.record(), + ProcessingStatus.SKIPPED_FINAL_FAILURE, + context, attemptStart); + } + + case DocumentUnknown ignored -> { + // New document – process and create a new master record + processAndPersistNewDocument(candidate, fingerprint, m3Outcome, context, attemptStart); + } + + case DocumentKnownProcessable knownProcessable -> { + // Known but not terminal – process and update the existing master record + processAndPersistKnownDocument( + candidate, fingerprint, m3Outcome, knownProcessable.record(), + context, attemptStart); + } + + default -> + // Exhaustive sealed hierarchy; this branch is unreachable + LOG.error("Unexpected lookup result type for '{}': {}", + candidate.uniqueIdentifier(), lookupResult.getClass().getSimpleName()); + } + } + + // ------------------------------------------------------------------------- + // Skip path + // ------------------------------------------------------------------------- + + /** + * Persists a skip attempt and updates the master record's {@code updatedAt} timestamp. + *

+ * Skip events do not change any failure counter. The master record's overall status + * remains unchanged (terminal). + * + * @param candidate the candidate being skipped + * @param fingerprint the document fingerprint + * @param existingRecord the current master record (already terminal) + * @param skipStatus the skip status to record ({@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED} + * or {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}) + * @param context the current batch run context + * @param attemptStart the start instant of this processing attempt + */ + private void persistSkipAttempt( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentRecord existingRecord, + ProcessingStatus skipStatus, + BatchRunContext context, + Instant attemptStart) { + + Instant now = Instant.now(); + + try { + int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint); + + ProcessingAttempt skipAttempt = new ProcessingAttempt( + fingerprint, + context.runId(), + attemptNumber, + attemptStart, + now, + skipStatus, + null, // no failure class for skip + null, // no failure message for skip + false // not retryable + ); + + // Write attempt first, then update master record + processingAttemptRepository.save(skipAttempt); + + // Update master record: only updatedAt changes; status and counters stay the same + DocumentRecord updatedRecord = new DocumentRecord( + existingRecord.fingerprint(), + new SourceDocumentLocator(candidate.locator().value()), + candidate.uniqueIdentifier(), + existingRecord.overallStatus(), // terminal status unchanged + existingRecord.failureCounters(), // counters unchanged for skip + existingRecord.lastFailureInstant(), + existingRecord.lastSuccessInstant(), + existingRecord.createdAt(), + now // updatedAt = now + ); + documentRecordRepository.update(updatedRecord); + + LOG.debug("Skip attempt #{} persisted for '{}' with status {}.", + attemptNumber, candidate.uniqueIdentifier(), skipStatus); + + } catch (DocumentPersistenceException e) { + LOG.error("Failed to persist skip attempt for '{}': {}", + candidate.uniqueIdentifier(), e.getMessage(), e); + } + } + + // ------------------------------------------------------------------------- + // New document path + // ------------------------------------------------------------------------- + + /** + * Processes a newly discovered document (no existing master record) and persists + * both the attempt and the new master record. + * + * @param candidate the candidate being processed + * @param fingerprint the document fingerprint + * @param m3Outcome the M3 pipeline result + * @param context the current batch run context + * @param attemptStart the start instant of this processing attempt + */ + private void processAndPersistNewDocument( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentProcessingOutcome m3Outcome, + BatchRunContext context, + Instant attemptStart) { + + Instant now = Instant.now(); + + // Map M3 outcome to M4 status/counters for a brand-new document + M4Outcome m4 = mapM3OutcomeForNewDocument(m3Outcome); + + try { + // Attempt number is always 1 for a new document + int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint); + + ProcessingAttempt attempt = buildAttempt( + fingerprint, context, attemptNumber, attemptStart, now, m4); + + // Create the new master record + DocumentRecord newRecord = new DocumentRecord( + fingerprint, + new SourceDocumentLocator(candidate.locator().value()), + candidate.uniqueIdentifier(), + m4.overallStatus(), + m4.counters(), + m4.overallStatus() == ProcessingStatus.SUCCESS ? null : now, // lastFailureInstant + m4.overallStatus() == ProcessingStatus.SUCCESS ? now : null, // lastSuccessInstant + now, // createdAt + now // updatedAt + ); + + // Persist attempt first, then master record + processingAttemptRepository.save(attempt); + documentRecordRepository.create(newRecord); + + LOG.info("New document '{}' processed: status={}, contentErrors={}, transientErrors={}.", + candidate.uniqueIdentifier(), + m4.overallStatus(), + m4.counters().contentErrorCount(), + m4.counters().transientErrorCount()); + + } catch (DocumentPersistenceException e) { + LOG.error("Failed to persist processing result for new document '{}': {}", + candidate.uniqueIdentifier(), e.getMessage(), e); + } + } + + // ------------------------------------------------------------------------- + // Known processable document path + // ------------------------------------------------------------------------- + + /** + * Processes a known but non-terminal document and updates both the attempt history + * and the master record. + * + * @param candidate the candidate being processed + * @param fingerprint the document fingerprint + * @param m3Outcome the M3 pipeline result + * @param existingRecord the current master record (not terminal) + * @param context the current batch run context + * @param attemptStart the start instant of this processing attempt + */ + private void processAndPersistKnownDocument( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentProcessingOutcome m3Outcome, + DocumentRecord existingRecord, + BatchRunContext context, + Instant attemptStart) { + + Instant now = Instant.now(); + + // Map M3 outcome to M4 status/counters, taking existing counters into account + M4Outcome m4 = mapM3OutcomeForKnownDocument(m3Outcome, existingRecord.failureCounters()); + + try { + int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint); + + ProcessingAttempt attempt = buildAttempt( + fingerprint, context, attemptNumber, attemptStart, now, m4); + + // Update the master record with new status, counters and timestamps + DocumentRecord updatedRecord = new DocumentRecord( + existingRecord.fingerprint(), + new SourceDocumentLocator(candidate.locator().value()), + candidate.uniqueIdentifier(), + m4.overallStatus(), + m4.counters(), + m4.overallStatus() == ProcessingStatus.SUCCESS + ? existingRecord.lastFailureInstant() : now, + m4.overallStatus() == ProcessingStatus.SUCCESS + ? now : existingRecord.lastSuccessInstant(), + existingRecord.createdAt(), + now // updatedAt + ); + + // Persist attempt first, then master record + processingAttemptRepository.save(attempt); + documentRecordRepository.update(updatedRecord); + + LOG.info("Known document '{}' processed: status={}, contentErrors={}, transientErrors={}.", + candidate.uniqueIdentifier(), + m4.overallStatus(), + m4.counters().contentErrorCount(), + m4.counters().transientErrorCount()); + + } catch (DocumentPersistenceException e) { + LOG.error("Failed to persist processing result for known document '{}': {}", + candidate.uniqueIdentifier(), e.getMessage(), e); + } + } + + // ------------------------------------------------------------------------- + // M3 → M4 outcome mapping + // ------------------------------------------------------------------------- + + /** + * Maps an M3 outcome to M4 status, counters, and retryable flag for a brand-new + * document (no prior history, counters start at zero). + * + * @param m3Outcome the M3 pipeline result + * @return the M4 outcome with status, counters and retryable flag + */ + private M4Outcome mapM3OutcomeForNewDocument(DocumentProcessingOutcome m3Outcome) { + return mapM3OutcomeForKnownDocument(m3Outcome, FailureCounters.zero()); + } + + /** + * Maps an M3 outcome to M4 status, counters, and retryable flag, taking the + * existing failure counters into account. + *

+ * M4 minimal rules applied here: + *

+ * + * @param m3Outcome the M3 pipeline result + * @param existingCounters the current failure counters from the master record + * @return the M4 outcome with updated status, counters and retryable flag + */ + private M4Outcome mapM3OutcomeForKnownDocument( + DocumentProcessingOutcome m3Outcome, + FailureCounters existingCounters) { + + return switch (m3Outcome) { + case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored -> { + // M3 success: document passed all pre-checks + // In M4 scope (no KI, no target copy), PreCheckPassed is the terminal success + yield new M4Outcome( + ProcessingStatus.SUCCESS, + existingCounters, // counters unchanged on success + false // not retryable + ); + } + + case PreCheckFailed contentError -> { + // Deterministic content error: apply the 1-retry rule + FailureCounters updatedCounters = existingCounters.withIncrementedContentErrorCount(); + boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0; + + if (isFirstOccurrence) { + // First content error → FAILED_RETRYABLE + yield new M4Outcome( + ProcessingStatus.FAILED_RETRYABLE, + updatedCounters, + true + ); + } else { + // Second (or later) content error → FAILED_FINAL + yield new M4Outcome( + ProcessingStatus.FAILED_FINAL, + updatedCounters, + false + ); + } + } + + case TechnicalDocumentError technicalError -> { + // Technical error after fingerprinting: always FAILED_RETRYABLE, increment transient counter + yield new M4Outcome( + ProcessingStatus.FAILED_RETRYABLE, + existingCounters.withIncrementedTransientErrorCount(), + true + ); + } + }; + } + + // ------------------------------------------------------------------------- + // Helper: build ProcessingAttempt + // ------------------------------------------------------------------------- + + /** + * Constructs a {@link ProcessingAttempt} from the given parameters and M4 outcome. + * + * @param fingerprint the document fingerprint + * @param context the current batch run context + * @param attemptNumber the monotonic attempt number + * @param startedAt the start instant of this attempt + * @param endedAt the end instant of this attempt + * @param m4 the M4 outcome (status, counters, retryable) + * @return the constructed processing attempt + */ + private ProcessingAttempt buildAttempt( + DocumentFingerprint fingerprint, + BatchRunContext context, + int attemptNumber, + Instant startedAt, + Instant endedAt, + M4Outcome m4) { + + String failureClass = null; + String failureMessage = null; + + if (m4.overallStatus() == ProcessingStatus.FAILED_RETRYABLE + || m4.overallStatus() == ProcessingStatus.FAILED_FINAL) { + failureClass = m4.overallStatus().name(); + failureMessage = buildFailureMessage(m4); + } + + return new ProcessingAttempt( + fingerprint, + context.runId(), + attemptNumber, + startedAt, + endedAt, + m4.overallStatus(), + failureClass, + failureMessage, + m4.retryable() + ); + } + + /** + * Builds a human-readable failure message from the M4 outcome. + * + * @param m4 the M4 outcome + * @return a non-null failure message string + */ + private String buildFailureMessage(M4Outcome m4) { + return switch (m4.overallStatus()) { + case FAILED_RETRYABLE -> "Processing failed (retryable). " + + "ContentErrors=" + m4.counters().contentErrorCount() + + ", TransientErrors=" + m4.counters().transientErrorCount(); + case FAILED_FINAL -> "Processing failed finally (not retryable). " + + "ContentErrors=" + m4.counters().contentErrorCount() + + ", TransientErrors=" + m4.counters().transientErrorCount(); + default -> m4.overallStatus().name(); + }; + } + + // ------------------------------------------------------------------------- + // Internal value type: M4 outcome + // ------------------------------------------------------------------------- + + /** + * Internal value type carrying the M4 status, updated counters, and retryable flag + * after mapping from an M3 outcome. + *

+ * Tightly scoped to {@link M4DocumentProcessor}; not exposed outside this class. + * + * @param overallStatus the M4 overall status to persist + * @param counters the updated failure counters to persist + * @param retryable whether the failure is retryable in a later run + */ + private record M4Outcome( + ProcessingStatus overallStatus, + FailureCounters counters, + boolean retryable) { + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java index 4fa7068..64107ad 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java @@ -1,28 +1,65 @@ /** - * Application-level services for business logic evaluation. + * Application-level services for business logic evaluation and M4 orchestration. *

- * This package contains stateless, pure-logic services that evaluate document content - * and apply business rules. Services in this package: + * This package contains stateless, pure-logic services that evaluate document content, + * apply business rules, and orchestrate the M4 per-document processing flow. + * Services in this package: *

* - * Current services: + *

Current services

* * - * Document Processing Pipeline: - * The {@link de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService} coordinates - * the complete processing workflow: + *

M4 processing flow ({@code M4DocumentProcessor})

+ *

+ * The {@link de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor} + * implements the verbindliche M4 processing order per candidate: *

    - *
  1. Convert technical PDF extraction results to processing outcomes
  2. - *
  3. Route successful extractions through pre-check validation
  4. - *
  5. Classify extraction and pre-check failures with appropriate error types
  6. + *
  7. Load the document master record by fingerprint.
  8. + *
  9. If overall status is {@code SUCCESS} → persist a skip attempt with + * {@code SKIPPED_ALREADY_PROCESSED}; do not change counters.
  10. + *
  11. If overall status is {@code FAILED_FINAL} → persist a skip attempt with + * {@code SKIPPED_FINAL_FAILURE}; do not change counters.
  12. + *
  13. Otherwise map the M3 outcome into M4 status, counters and retryable flag + * using the M4 minimal rules.
  14. + *
  15. Persist exactly one historised processing attempt.
  16. + *
  17. Persist the updated document master record.
  18. *
+ * + *

M4 minimal rules (status and counter semantics)

+ * + * + *

Persistence consistency

+ *

+ * For every identified document, the processing attempt and the master record are + * written in sequence. If either write fails, the failure is caught and logged; + * the batch run continues with the next candidate. True transactionality across + * two separate repository calls is not available in the M4 scope; this is a known + * and documented limitation. + * + *

Pre-fingerprint failures

+ *

+ * Failures that occur before a successful fingerprint is available are not handled + * by this package. They are handled by the use case and are not historised in SQLite. */ package de.gecheckt.pdf.umbenenner.application.service; diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java index c61f77a..669c749 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java @@ -3,54 +3,80 @@ package de.gecheckt.pdf.umbenenner.application.usecase; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError; import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException; import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException; import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort; import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService; +import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; -import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; -import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed; -import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; -import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult; -import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; -import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError; import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.time.Instant; import java.util.List; +import java.util.Objects; /** * Batch processing implementation of {@link BatchRunProcessingUseCase}. *

- * Orchestrates the complete batch processing workflow: + * Orchestrates the complete M4 batch processing workflow per candidate: *

    - *
  1. Acquire exclusive run lock to prevent concurrent instances
  2. - *
  3. Scan source folder for PDF candidates
  4. - *
  5. For each candidate: extract text and page count, run pre-checks
  6. - *
  7. Log per-document decision; end each document controlled without KI or target copy
  8. - *
  9. Release lock and return structured outcome for Bootstrap exit code mapping
  10. + *
  11. Acquire exclusive run lock to prevent concurrent instances.
  12. + *
  13. Scan source folder for PDF candidates.
  14. + *
  15. For each candidate, execute the M4 processing order: + *
      + *
    1. Compute fingerprint.
    2. + *
    3. Load document master record.
    4. + *
    5. If already {@code SUCCESS} → persist skip attempt with + * {@code SKIPPED_ALREADY_PROCESSED}.
    6. + *
    7. If already {@code FAILED_FINAL} → persist skip attempt with + * {@code SKIPPED_FINAL_FAILURE}.
    8. + *
    9. Otherwise execute the M3 pipeline (extraction + pre-checks).
    10. + *
    11. Map M3 result into M4 status, counters and retryable flag.
    12. + *
    13. Persist exactly one historised processing attempt.
    14. + *
    15. Persist the updated document master record.
    16. + *
    + *
  16. + *
  17. Release lock and return structured outcome for Bootstrap exit code mapping.
  18. *
+ * + *

Idempotency

*

- * Processing boundary: - *

+ * Documents are identified exclusively by their SHA-256 content fingerprint. A document + * whose overall status is {@code SUCCESS} or {@code FAILED_FINAL} is skipped in every + * subsequent run; only a skip attempt is historised. + * + *

Pre-fingerprint failures

*

- * Non-Goals (not implemented): + * If the fingerprint computation fails (e.g. the file is no longer readable), the + * candidate is logged as a non-identifiable run event and is not historised + * in SQLite. The batch run continues with the next candidate. + * + *

Persistence consistency

+ *

+ * For every identified document, the processing attempt and the master record are + * written in sequence by {@link M4DocumentProcessor}. Persistence failures for a single + * document are caught and logged; the batch run continues with the remaining candidates. + * + *

Non-Goals (not implemented in M4)

* + * + * @since M3-AP-004 (extended in M4-AP-006) */ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCase { @@ -60,28 +86,44 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa private final RunLockPort runLockPort; private final SourceDocumentCandidatesPort sourceDocumentCandidatesPort; private final PdfTextExtractionPort pdfTextExtractionPort; + private final FingerprintPort fingerprintPort; + private final M4DocumentProcessor m4DocumentProcessor; /** - * Creates the batch use case with the already-loaded startup configuration and all required ports. + * Creates the batch use case with the already-loaded startup configuration and all + * required ports for the M4 flow. *

* The configuration is loaded and validated by Bootstrap before use case creation; - * the use case receives the result directly and does not re-read it. + * the use case receives the result directly and does not re-read the properties file. * - * @param configuration the validated startup configuration - * @param runLockPort for exclusive run locking - * @param sourceDocumentCandidatesPort for loading PDF candidates from the source folder - * @param pdfTextExtractionPort for extracting text and page count from a single PDF + * @param configuration the validated startup configuration; must not be null + * @param runLockPort for exclusive run locking; must not be null + * @param sourceDocumentCandidatesPort for loading PDF candidates from the source folder; + * must not be null + * @param pdfTextExtractionPort for extracting text and page count from a single PDF; + * must not be null + * @param fingerprintPort for computing the SHA-256 fingerprint of a candidate; + * must not be null + * @param m4DocumentProcessor for applying M4 decision logic and persisting results; + * must not be null * @throws NullPointerException if any parameter is null */ public DefaultBatchRunProcessingUseCase( StartConfiguration configuration, RunLockPort runLockPort, SourceDocumentCandidatesPort sourceDocumentCandidatesPort, - PdfTextExtractionPort pdfTextExtractionPort) { - this.configuration = configuration; - this.runLockPort = runLockPort; - this.sourceDocumentCandidatesPort = sourceDocumentCandidatesPort; - this.pdfTextExtractionPort = pdfTextExtractionPort; + PdfTextExtractionPort pdfTextExtractionPort, + FingerprintPort fingerprintPort, + M4DocumentProcessor m4DocumentProcessor) { + this.configuration = Objects.requireNonNull(configuration, "configuration must not be null"); + this.runLockPort = Objects.requireNonNull(runLockPort, "runLockPort must not be null"); + this.sourceDocumentCandidatesPort = Objects.requireNonNull( + sourceDocumentCandidatesPort, "sourceDocumentCandidatesPort must not be null"); + this.pdfTextExtractionPort = Objects.requireNonNull( + pdfTextExtractionPort, "pdfTextExtractionPort must not be null"); + this.fingerprintPort = Objects.requireNonNull(fingerprintPort, "fingerprintPort must not be null"); + this.m4DocumentProcessor = Objects.requireNonNull( + m4DocumentProcessor, "m4DocumentProcessor must not be null"); } @Override @@ -96,12 +138,15 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa lockAcquired = true; LOG.debug("Run lock acquired successfully."); } catch (RunLockUnavailableException e) { - LOG.warn("Run lock not available – another instance is already running. This instance terminates immediately."); + LOG.warn("Run lock not available – another instance is already running. " + + "This instance terminates immediately."); return BatchRunOutcome.LOCK_UNAVAILABLE; } - LOG.debug("Configuration in use: source={}, target={}", configuration.sourceFolder(), configuration.targetFolder()); - LOG.info("Batch run started. RunId: {}, Start: {}", context.runId(), context.startInstant()); + LOG.debug("Configuration in use: source={}, target={}", + configuration.sourceFolder(), configuration.targetFolder()); + LOG.info("Batch run started. RunId: {}, Start: {}", + context.runId(), context.startInstant()); // Step 2: Load PDF candidates from source folder List candidates; @@ -113,12 +158,13 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa } LOG.info("Found {} PDF candidate(s) in source folder.", candidates.size()); - // Step 3: Process each candidate through the pipeline + // Step 3: Process each candidate through the M4 pipeline for (SourceDocumentCandidate candidate : candidates) { - processCandidate(candidate); + processCandidate(candidate, context); } - LOG.info("Batch run completed. Processed {} candidate(s). RunId: {}", candidates.size(), context.runId()); + LOG.info("Batch run completed. Processed {} candidate(s). RunId: {}", + candidates.size(), context.runId()); return BatchRunOutcome.SUCCESS; } catch (Exception e) { @@ -126,8 +172,8 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa return BatchRunOutcome.FAILURE; } finally { // Release the run lock only if it was successfully acquired. - // If acquire() threw RunLockUnavailableException, the lock belongs to another instance - // and must not be deleted by this instance. + // If acquire() threw RunLockUnavailableException, the lock belongs to another + // instance and must not be deleted by this instance. if (lockAcquired) { try { runLockPort.release(); @@ -140,56 +186,105 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa } /** - * Processes a single PDF candidate through the complete pipeline. + * Processes a single PDF candidate through the complete M4 pipeline. *

- * Processing steps per document: + * M4 processing order: *

    - *
  1. Log candidate recognition
  2. - *
  3. Extract text and page count from the PDF via {@link PdfTextExtractionPort}
  4. - *
  5. Process extraction result through pre-checks via {@link DocumentProcessingService}
  6. - *
  7. Log extraction outcome and final decision
  8. + *
  9. Record the attempt start instant.
  10. + *
  11. Compute the SHA-256 fingerprint of the candidate file content.
  12. + *
  13. If fingerprint computation fails: log as non-identifiable run event and + * return — no SQLite record is created.
  14. + *
  15. Execute the M3 pipeline (PDF extraction + pre-checks).
  16. + *
  17. Delegate to {@link M4DocumentProcessor} for idempotency check, status/counter + * mapping, and consistent two-level persistence.
  18. *
*

- * Per-document errors (extraction failure, technical error, pre-check failure) do not abort the overall - * batch run. Each candidate ends controlled regardless of its outcome. - *

- * Processing boundary: no KI call, no persistence, no filename generation, - * no target file copy is initiated here, even for candidates that pass all pre-checks. + * Per-document errors do not abort the overall batch run. Each candidate ends + * controlled regardless of its outcome. * * @param candidate the candidate to process + * @param context the current batch run context */ - private void processCandidate(SourceDocumentCandidate candidate) { + private void processCandidate(SourceDocumentCandidate candidate, BatchRunContext context) { LOG.debug("Processing candidate: {}", candidate.uniqueIdentifier()); - PdfExtractionResult extractionResult = pdfTextExtractionPort.extractTextAndPageCount(candidate); + // Record the attempt start instant before any work begins + Instant attemptStart = Instant.now(); + + // Step M4-1: Compute fingerprint + FingerprintResult fingerprintResult = fingerprintPort.computeFingerprint(candidate); + + switch (fingerprintResult) { + case FingerprintTechnicalError fingerprintError -> { + // Pre-fingerprint failure: not historised in SQLite + LOG.warn("Fingerprint computation failed for '{}': {} — candidate skipped (not historised).", + candidate.uniqueIdentifier(), fingerprintError.errorMessage()); + return; + } + + case FingerprintSuccess fingerprintSuccess -> { + DocumentFingerprint fingerprint = fingerprintSuccess.fingerprint(); + LOG.debug("Fingerprint computed for '{}': {}", + candidate.uniqueIdentifier(), fingerprint.sha256Hex()); + + // Step M4-2..M4-8: Execute M3 pipeline and delegate M4 logic to the processor + // The M3 pipeline runs only if the document is not in a terminal state; + // M4DocumentProcessor handles the terminal check internally. + // We run M3 eagerly here and pass the result; M4DocumentProcessor will + // ignore it for terminal documents. + DocumentProcessingOutcome m3Outcome = runM3Pipeline(candidate); + + // Delegate idempotency check, status mapping, and persistence to M4DocumentProcessor + m4DocumentProcessor.process(candidate, fingerprint, m3Outcome, context, attemptStart); + } + } + } + + /** + * Runs the M3 pipeline (PDF text extraction + pre-checks) for the given candidate. + *

+ * This method is called after a successful fingerprint computation. The result is + * passed to {@link M4DocumentProcessor}, which applies it only when the document is + * not in a terminal state. + * + * @param candidate the candidate to run through the M3 pipeline + * @return the M3 pipeline outcome (pre-check passed, pre-check failed, or technical error) + */ + private DocumentProcessingOutcome runM3Pipeline(SourceDocumentCandidate candidate) { + PdfExtractionResult extractionResult = + pdfTextExtractionPort.extractTextAndPageCount(candidate); // Log extraction outcome switch (extractionResult) { - case PdfExtractionSuccess success -> + case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess success -> LOG.debug("PDF extraction successful for '{}'. Pages: {}, Text length: {} chars.", - candidate.uniqueIdentifier(), success.pageCount().value(), success.extractedText().length()); - case PdfExtractionContentError contentError -> + candidate.uniqueIdentifier(), + success.pageCount().value(), + success.extractedText().length()); + case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError contentError -> LOG.debug("PDF content extraction failed for '{}' (content problem): {}", candidate.uniqueIdentifier(), contentError.reason()); - case PdfExtractionTechnicalError technicalError -> + case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError technicalError -> LOG.debug("PDF extraction technical error for '{}': {}", candidate.uniqueIdentifier(), technicalError.errorMessage()); } - // Process through complete pipeline - var outcome = DocumentProcessingService.processDocument(candidate, extractionResult, configuration); + DocumentProcessingOutcome outcome = + DocumentProcessingService.processDocument(candidate, extractionResult, configuration); - // Log processing outcome + // Log M3 outcome switch (outcome) { - case PreCheckPassed passed -> - LOG.info("Pre-checks PASSED for '{}'. Candidate ready for further processing.", + case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed passed -> + LOG.info("Pre-checks PASSED for '{}'. Candidate ready for M4 persistence.", candidate.uniqueIdentifier()); - case PreCheckFailed failed -> - LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error – may retry in later run).", + case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed -> + LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error).", candidate.uniqueIdentifier(), failed.failureReasonDescription()); - case TechnicalDocumentError technicalError -> - LOG.warn("Processing FAILED for '{}': {} (Technical error – may retry in later run).", + case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError -> + LOG.warn("Processing FAILED for '{}': {} (Technical error – retryable).", candidate.uniqueIdentifier(), technicalError.errorMessage()); } + + return outcome; } -} \ No newline at end of file +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/package-info.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/package-info.java index 4a9e1c4..da46568 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/package-info.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/package-info.java @@ -4,9 +4,25 @@ * Implementations: *

*

+ *

M4 processing order (AP-006)

+ *

+ * For each candidate, {@link de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase} + * enforces this order: + *

    + *
  1. Compute SHA-256 fingerprint of the candidate file content.
  2. + *
  3. If fingerprint computation fails: log as non-identifiable run event; + * do NOT write any SQLite record; continue with next candidate.
  4. + *
  5. Run the M3 pipeline (PDF extraction + pre-checks).
  6. + *
  7. Delegate to {@link de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor} + * for idempotency check, status/counter mapping, and consistent persistence.
  8. + *
+ *

* All implementations are infrastructure-agnostic and interact only through ports. + * + * @since M2 (extended in M4-AP-006) */ -package de.gecheckt.pdf.umbenenner.application.usecase; \ No newline at end of file +package de.gecheckt.pdf.umbenenner.application.usecase; diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessorTest.java new file mode 100644 index 0000000..39761cf --- /dev/null +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessorTest.java @@ -0,0 +1,425 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown; +import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters; +import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnicalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository; +import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; +import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed; +import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; +import de.gecheckt.pdf.umbenenner.domain.model.RunId; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; +import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.util.ArrayList; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Unit tests for {@link M4DocumentProcessor}. + *

+ * Covers: + *

+ */ +class M4DocumentProcessorTest { + + private static final String FINGERPRINT_HEX = + "a".repeat(64); // 64 lowercase hex chars + + private CapturingDocumentRecordRepository recordRepo; + private CapturingProcessingAttemptRepository attemptRepo; + private M4DocumentProcessor processor; + + private SourceDocumentCandidate candidate; + private DocumentFingerprint fingerprint; + private BatchRunContext context; + private Instant attemptStart; + + @BeforeEach + void setUp() { + recordRepo = new CapturingDocumentRecordRepository(); + attemptRepo = new CapturingProcessingAttemptRepository(); + processor = new M4DocumentProcessor(recordRepo, attemptRepo); + + candidate = new SourceDocumentCandidate( + "test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf")); + fingerprint = new DocumentFingerprint(FINGERPRINT_HEX); + context = new BatchRunContext(new RunId("run-001"), Instant.now()); + attemptStart = Instant.now(); + } + + // ------------------------------------------------------------------------- + // New document (DocumentUnknown) path + // ------------------------------------------------------------------------- + + @Test + void process_newDocument_preCheckPassed_persistsSuccessStatus() { + recordRepo.setLookupResult(new DocumentUnknown()); + DocumentProcessingOutcome m3Outcome = new PreCheckPassed( + candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); + + processor.process(candidate, fingerprint, m3Outcome, context, attemptStart); + + // One attempt written + assertEquals(1, attemptRepo.savedAttempts.size()); + ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0); + assertEquals(ProcessingStatus.SUCCESS, attempt.status()); + assertFalse(attempt.retryable()); + assertNull(attempt.failureClass()); + assertNull(attempt.failureMessage()); + + // One master record created + assertEquals(1, recordRepo.createdRecords.size()); + DocumentRecord record = recordRepo.createdRecords.get(0); + assertEquals(ProcessingStatus.SUCCESS, record.overallStatus()); + assertEquals(0, record.failureCounters().contentErrorCount()); + assertEquals(0, record.failureCounters().transientErrorCount()); + assertNotNull(record.lastSuccessInstant()); + assertNull(record.lastFailureInstant()); + } + + @Test + void process_newDocument_firstContentError_persistsFailedRetryable_contentCounterOne() { + recordRepo.setLookupResult(new DocumentUnknown()); + DocumentProcessingOutcome m3Outcome = new PreCheckFailed( + candidate, PreCheckFailureReason.NO_USABLE_TEXT); + + processor.process(candidate, fingerprint, m3Outcome, context, attemptStart); + + assertEquals(1, attemptRepo.savedAttempts.size()); + ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0); + assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status()); + assertTrue(attempt.retryable()); + + assertEquals(1, recordRepo.createdRecords.size()); + DocumentRecord record = recordRepo.createdRecords.get(0); + assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus()); + assertEquals(1, record.failureCounters().contentErrorCount()); + assertEquals(0, record.failureCounters().transientErrorCount()); + assertNotNull(record.lastFailureInstant()); + assertNull(record.lastSuccessInstant()); + } + + @Test + void process_newDocument_technicalError_persistsFailedRetryable_transientCounterOne() { + recordRepo.setLookupResult(new DocumentUnknown()); + DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError( + candidate, "I/O error", null); + + processor.process(candidate, fingerprint, m3Outcome, context, attemptStart); + + assertEquals(1, attemptRepo.savedAttempts.size()); + ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0); + assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status()); + assertTrue(attempt.retryable()); + + assertEquals(1, recordRepo.createdRecords.size()); + DocumentRecord record = recordRepo.createdRecords.get(0); + assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus()); + assertEquals(0, record.failureCounters().contentErrorCount()); + assertEquals(1, record.failureCounters().transientErrorCount()); + } + + // ------------------------------------------------------------------------- + // Known processable document path (DocumentKnownProcessable) + // ------------------------------------------------------------------------- + + @Test + void process_knownDocument_secondContentError_persistsFailedFinal_contentCounterTwo() { + // Existing record: first content error already recorded + DocumentRecord existingRecord = buildRecord( + ProcessingStatus.FAILED_RETRYABLE, + new FailureCounters(1, 0)); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + + DocumentProcessingOutcome m3Outcome = new PreCheckFailed( + candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED); + + processor.process(candidate, fingerprint, m3Outcome, context, attemptStart); + + assertEquals(1, attemptRepo.savedAttempts.size()); + ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0); + assertEquals(ProcessingStatus.FAILED_FINAL, attempt.status()); + assertFalse(attempt.retryable()); + + assertEquals(1, recordRepo.updatedRecords.size()); + DocumentRecord record = recordRepo.updatedRecords.get(0); + assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus()); + assertEquals(2, record.failureCounters().contentErrorCount()); + assertEquals(0, record.failureCounters().transientErrorCount()); + } + + @Test + void process_knownDocument_technicalError_incrementsTransientCounter_remainsRetryable() { + DocumentRecord existingRecord = buildRecord( + ProcessingStatus.FAILED_RETRYABLE, + new FailureCounters(0, 2)); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + + DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError( + candidate, "Timeout", null); + + processor.process(candidate, fingerprint, m3Outcome, context, attemptStart); + + assertEquals(1, recordRepo.updatedRecords.size()); + DocumentRecord record = recordRepo.updatedRecords.get(0); + assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus()); + assertEquals(0, record.failureCounters().contentErrorCount()); + assertEquals(3, record.failureCounters().transientErrorCount()); + assertTrue(attemptRepo.savedAttempts.get(0).retryable()); + } + + @Test + void process_knownDocument_preCheckPassed_persistsSuccess() { + DocumentRecord existingRecord = buildRecord( + ProcessingStatus.FAILED_RETRYABLE, + new FailureCounters(0, 1)); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + + DocumentProcessingOutcome m3Outcome = new PreCheckPassed( + candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); + + processor.process(candidate, fingerprint, m3Outcome, context, attemptStart); + + assertEquals(1, recordRepo.updatedRecords.size()); + DocumentRecord record = recordRepo.updatedRecords.get(0); + assertEquals(ProcessingStatus.SUCCESS, record.overallStatus()); + // Counters unchanged on success + assertEquals(0, record.failureCounters().contentErrorCount()); + assertEquals(1, record.failureCounters().transientErrorCount()); + assertNotNull(record.lastSuccessInstant()); + } + + // ------------------------------------------------------------------------- + // Skip paths + // ------------------------------------------------------------------------- + + @Test + void process_terminalSuccess_persistsSkipAttemptWithSkippedAlreadyProcessed() { + DocumentRecord existingRecord = buildRecord( + ProcessingStatus.SUCCESS, + FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord)); + + DocumentProcessingOutcome m3Outcome = new PreCheckPassed( + candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); + + processor.process(candidate, fingerprint, m3Outcome, context, attemptStart); + + assertEquals(1, attemptRepo.savedAttempts.size()); + ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0); + assertEquals(ProcessingStatus.SKIPPED_ALREADY_PROCESSED, attempt.status()); + assertFalse(attempt.retryable()); + assertNull(attempt.failureClass()); + assertNull(attempt.failureMessage()); + + // Master record updated (only updatedAt changes) + assertEquals(1, recordRepo.updatedRecords.size()); + DocumentRecord record = recordRepo.updatedRecords.get(0); + // Status and counters remain unchanged + assertEquals(ProcessingStatus.SUCCESS, record.overallStatus()); + assertEquals(0, record.failureCounters().contentErrorCount()); + assertEquals(0, record.failureCounters().transientErrorCount()); + } + + @Test + void process_terminalFinalFailure_persistsSkipAttemptWithSkippedFinalFailure() { + DocumentRecord existingRecord = buildRecord( + ProcessingStatus.FAILED_FINAL, + new FailureCounters(2, 0)); + recordRepo.setLookupResult(new DocumentTerminalFinalFailure(existingRecord)); + + DocumentProcessingOutcome m3Outcome = new PreCheckFailed( + candidate, PreCheckFailureReason.NO_USABLE_TEXT); + + processor.process(candidate, fingerprint, m3Outcome, context, attemptStart); + + assertEquals(1, attemptRepo.savedAttempts.size()); + ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0); + assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, attempt.status()); + assertFalse(attempt.retryable()); + + // Master record updated (only updatedAt changes); counters unchanged + assertEquals(1, recordRepo.updatedRecords.size()); + DocumentRecord record = recordRepo.updatedRecords.get(0); + assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus()); + assertEquals(2, record.failureCounters().contentErrorCount()); + assertEquals(0, record.failureCounters().transientErrorCount()); + } + + @Test + void process_skipEvent_doesNotChangeErrorCounters() { + FailureCounters originalCounters = new FailureCounters(1, 3); + DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, originalCounters); + recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord)); + + processor.process(candidate, fingerprint, + new PreCheckPassed(candidate, new PdfExtractionSuccess("t", new PdfPageCount(1))), + context, attemptStart); + + DocumentRecord updated = recordRepo.updatedRecords.get(0); + assertEquals(originalCounters.contentErrorCount(), updated.failureCounters().contentErrorCount(), + "Skip must not change content error counter"); + assertEquals(originalCounters.transientErrorCount(), updated.failureCounters().transientErrorCount(), + "Skip must not change transient error counter"); + } + + // ------------------------------------------------------------------------- + // Persistence lookup failure + // ------------------------------------------------------------------------- + + @Test + void process_persistenceLookupFailure_noAttemptWritten_noException() { + recordRepo.setLookupResult(new PersistenceLookupTechnicalFailure("DB unavailable", null)); + + DocumentProcessingOutcome m3Outcome = new PreCheckPassed( + candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); + + // Must not throw + assertDoesNotThrow(() -> + processor.process(candidate, fingerprint, m3Outcome, context, attemptStart)); + + // No attempt written, no record created/updated + assertEquals(0, attemptRepo.savedAttempts.size(), + "No attempt must be written when lookup fails"); + assertEquals(0, recordRepo.createdRecords.size()); + assertEquals(0, recordRepo.updatedRecords.size()); + } + + // ------------------------------------------------------------------------- + // Persistence write failure: controlled, no crash + // ------------------------------------------------------------------------- + + @Test + void process_persistenceWriteFailure_doesNotThrow_batchContinues() { + recordRepo.setLookupResult(new DocumentUnknown()); + // Make the attempt save throw + attemptRepo.failOnSave = true; + + DocumentProcessingOutcome m3Outcome = new PreCheckPassed( + candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); + + // Must not propagate the exception + assertDoesNotThrow(() -> + processor.process(candidate, fingerprint, m3Outcome, context, attemptStart)); + } + + // ------------------------------------------------------------------------- + // Attempt number monotonicity + // ------------------------------------------------------------------------- + + @Test + void process_attemptNumberIsAssignedFromRepository() { + recordRepo.setLookupResult(new DocumentUnknown()); + attemptRepo.nextAttemptNumber = 3; // Simulate 2 prior attempts + + DocumentProcessingOutcome m3Outcome = new PreCheckPassed( + candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); + + processor.process(candidate, fingerprint, m3Outcome, context, attemptStart); + + assertEquals(1, attemptRepo.savedAttempts.size()); + assertEquals(3, attemptRepo.savedAttempts.get(0).attemptNumber(), + "Attempt number must be taken from the repository"); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private DocumentRecord buildRecord(ProcessingStatus status, FailureCounters counters) { + Instant now = Instant.now(); + return new DocumentRecord( + fingerprint, + new SourceDocumentLocator("/tmp/test.pdf"), + "test.pdf", + status, + counters, + status == ProcessingStatus.SUCCESS ? null : now, + status == ProcessingStatus.SUCCESS ? now : null, + now, + now + ); + } + + // ------------------------------------------------------------------------- + // Capturing test doubles + // ------------------------------------------------------------------------- + + private static class CapturingDocumentRecordRepository implements DocumentRecordRepository { + private DocumentRecordLookupResult lookupResult = new DocumentUnknown(); + final List createdRecords = new ArrayList<>(); + final List updatedRecords = new ArrayList<>(); + + void setLookupResult(DocumentRecordLookupResult result) { + this.lookupResult = result; + } + + @Override + public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) { + return lookupResult; + } + + @Override + public void create(DocumentRecord record) { + createdRecords.add(record); + } + + @Override + public void update(DocumentRecord record) { + updatedRecords.add(record); + } + } + + private static class CapturingProcessingAttemptRepository implements ProcessingAttemptRepository { + final List savedAttempts = new ArrayList<>(); + int nextAttemptNumber = 1; + boolean failOnSave = false; + + @Override + public int loadNextAttemptNumber(DocumentFingerprint fingerprint) { + return nextAttemptNumber; + } + + @Override + public void save(ProcessingAttempt attempt) { + if (failOnSave) { + throw new DocumentPersistenceException("Simulated save failure"); + } + savedAttempts.add(attempt); + } + + @Override + public List findAllByFingerprint(DocumentFingerprint fingerprint) { + return List.copyOf(savedAttempts); + } + } +} diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java index d0ca083..f13411f 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java @@ -2,12 +2,24 @@ package de.gecheckt.pdf.umbenenner.application.usecase; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError; import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException; import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException; import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort; +import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; @@ -37,10 +49,11 @@ import static org.junit.jupiter.api.Assertions.*; *
    *
  • Lock acquisition and release lifecycle
  • *
  • Source folder scanning and per-document processing loop
  • - *
  • Happy path: candidate passes pre-checks, ends controlled without KI or target copy
  • + *
  • Happy path: candidate passes pre-checks, M4 persistence is invoked
  • *
  • Deterministic content errors: no usable text, page limit exceeded
  • *
  • Technical extraction errors: controlled per-document end, batch continues
  • *
  • Source folder access failure: batch fails with FAILURE outcome
  • + *
  • M4 idempotency: fingerprint failure → not historised
  • *
*/ class BatchRunProcessingUseCaseTest { @@ -57,8 +70,9 @@ class BatchRunProcessingUseCaseTest { MockRunLockPort lockPort = new MockRunLockPort(); StartConfiguration config = buildConfig(tempDir); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort()); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(), + new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor()); BatchRunContext context = new BatchRunContext(new RunId("test-run-1"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); @@ -73,8 +87,9 @@ class BatchRunProcessingUseCaseTest { CountingRunLockPort lockPort = new CountingRunLockPort(true); StartConfiguration config = buildConfig(tempDir); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort()); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(), + new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor()); BatchRunContext context = new BatchRunContext(new RunId("test-run-2"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); @@ -92,8 +107,9 @@ class BatchRunProcessingUseCaseTest { CountingRunLockPort lockPort = new CountingRunLockPort(true); StartConfiguration config = buildConfig(tempDir); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort()); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(), + new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor()); BatchRunContext context = new BatchRunContext(new RunId("test-run-f1"), Instant.now()); useCase.execute(context); @@ -108,8 +124,9 @@ class BatchRunProcessingUseCaseTest { ErrorAfterAcquireLockPort lockPort = new ErrorAfterAcquireLockPort(); StartConfiguration config = buildConfig(tempDir); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort()); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(), + new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor()); BatchRunContext context = new BatchRunContext(new RunId("test-run-3"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); @@ -128,8 +145,9 @@ class BatchRunProcessingUseCaseTest { MockRunLockPort lockPort = new MockRunLockPort(); StartConfiguration config = buildConfig(tempDir); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort()); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(), + new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor()); BatchRunContext context = new BatchRunContext(new RunId("empty"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); @@ -138,25 +156,26 @@ class BatchRunProcessingUseCaseTest { } @Test - void execute_happyPath_candidatePassesPreChecks_endsControlledWithoutKiOrCopy() throws Exception { + void execute_happyPath_candidatePassesPreChecks_m4PersistenceInvoked() throws Exception { MockRunLockPort lockPort = new MockRunLockPort(); StartConfiguration config = buildConfig(tempDir); - // Candidate with usable text within page limit SourceDocumentCandidate candidate = makeCandidate("document.pdf"); PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice text", new PdfPageCount(1)); FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate)); FixedExtractionPort extractionPort = new FixedExtractionPort(success); + TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor(); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, candidatesPort, extractionPort); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, candidatesPort, extractionPort, + new AlwaysSuccessFingerprintPort(), m4Processor); BatchRunContext context = new BatchRunContext(new RunId("happy"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); - // Batch run succeeds; document ended controlled at boundary (no KI, no copy) assertTrue(outcome.isSuccess(), "Happy path should yield SUCCESS"); assertEquals(1, extractionPort.callCount(), "Extraction should be called exactly once"); + assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called exactly once"); } @Test @@ -165,40 +184,42 @@ class BatchRunProcessingUseCaseTest { StartConfiguration config = buildConfig(tempDir); SourceDocumentCandidate candidate = makeCandidate("image-only.pdf"); - // Extraction returns text with no letters or digits PdfExtractionSuccess emptySuccess = new PdfExtractionSuccess(" ", new PdfPageCount(1)); FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate)); FixedExtractionPort extractionPort = new FixedExtractionPort(emptySuccess); + TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor(); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, candidatesPort, extractionPort); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, candidatesPort, extractionPort, + new AlwaysSuccessFingerprintPort(), m4Processor); BatchRunContext context = new BatchRunContext(new RunId("no-text"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); - // Document ends with pre-check failure; batch itself still succeeds assertTrue(outcome.isSuccess(), "No-usable-text pre-check failure should not abort the batch run"); + assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for content errors"); } @Test void execute_pageLimitExceeded_candidateEndsControlled_batchContinues() throws Exception { MockRunLockPort lockPort = new MockRunLockPort(); - // Config has maxPages=3; document has 10 pages StartConfiguration config = buildConfig(tempDir); SourceDocumentCandidate candidate = makeCandidate("big.pdf"); PdfExtractionSuccess manyPages = new PdfExtractionSuccess("Some text", new PdfPageCount(10)); FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate)); FixedExtractionPort extractionPort = new FixedExtractionPort(manyPages); + TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor(); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, candidatesPort, extractionPort); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, candidatesPort, extractionPort, + new AlwaysSuccessFingerprintPort(), m4Processor); BatchRunContext context = new BatchRunContext(new RunId("page-limit"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); - // maxPages in buildConfig is 3; 10 pages exceeds limit – pre-check fails, batch continues assertTrue(outcome.isSuccess(), "Page limit exceeded should not abort the batch run"); + assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for page limit errors"); } @Test @@ -210,14 +231,17 @@ class BatchRunProcessingUseCaseTest { PdfExtractionContentError contentError = new PdfExtractionContentError("PDF is encrypted"); FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate)); FixedExtractionPort extractionPort = new FixedExtractionPort(contentError); + TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor(); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, candidatesPort, extractionPort); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, candidatesPort, extractionPort, + new AlwaysSuccessFingerprintPort(), m4Processor); BatchRunContext context = new BatchRunContext(new RunId("content-error"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); assertTrue(outcome.isSuccess(), "Extraction content error should not abort the batch run"); + assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for content errors"); } @Test @@ -229,14 +253,17 @@ class BatchRunProcessingUseCaseTest { PdfExtractionTechnicalError technicalError = new PdfExtractionTechnicalError("I/O error reading file", null); FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate)); FixedExtractionPort extractionPort = new FixedExtractionPort(technicalError); + TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor(); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, candidatesPort, extractionPort); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, candidatesPort, extractionPort, + new AlwaysSuccessFingerprintPort(), m4Processor); BatchRunContext context = new BatchRunContext(new RunId("tech-error"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); assertTrue(outcome.isSuccess(), "Technical extraction error should not abort the batch run"); + assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for technical errors"); } @Test @@ -248,18 +275,71 @@ class BatchRunProcessingUseCaseTest { throw new SourceDocumentAccessException("Source folder not readable"); }; - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, failingPort, new NoOpExtractionPort()); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, failingPort, new NoOpExtractionPort(), + new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor()); BatchRunContext context = new BatchRunContext(new RunId("access-fail"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); assertTrue(outcome.isFailure(), "Source folder access failure should yield FAILURE outcome"); assertFalse(outcome.isSuccess(), "Source folder access failure must not be SUCCESS"); - // Lock must still be released assertTrue(lockPort.wasReleaseCalled(), "Lock should be released even when source access fails"); } + // ------------------------------------------------------------------------- + // M4-specific: fingerprint failure → not historised + // ------------------------------------------------------------------------- + + @Test + void execute_fingerprintFailure_candidateNotHistorised_batchContinues() throws Exception { + MockRunLockPort lockPort = new MockRunLockPort(); + StartConfiguration config = buildConfig(tempDir); + + SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf"); + FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate)); + TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor(); + + // Fingerprint always fails → M4 processor must NOT be called + FingerprintPort alwaysFailingFingerprintPort = c -> + new FingerprintTechnicalError("Cannot read file", null); + + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, candidatesPort, new NoOpExtractionPort(), + alwaysFailingFingerprintPort, m4Processor); + BatchRunContext context = new BatchRunContext(new RunId("fp-fail"), Instant.now()); + + BatchRunOutcome outcome = useCase.execute(context); + + assertTrue(outcome.isSuccess(), "Fingerprint failure should not abort the batch run"); + assertEquals(0, m4Processor.processCallCount(), + "M4 processor must NOT be called when fingerprint computation fails (pre-fingerprint failure)"); + } + + @Test + void execute_fingerprintFailure_extractionNotCalled() throws Exception { + MockRunLockPort lockPort = new MockRunLockPort(); + StartConfiguration config = buildConfig(tempDir); + + SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf"); + FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate)); + FixedExtractionPort extractionPort = new FixedExtractionPort( + new PdfExtractionSuccess("text", new PdfPageCount(1))); + + FingerprintPort alwaysFailingFingerprintPort = c -> + new FingerprintTechnicalError("Cannot read file", null); + + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, candidatesPort, extractionPort, + alwaysFailingFingerprintPort, new NoOpM4DocumentProcessor()); + BatchRunContext context = new BatchRunContext(new RunId("fp-fail-no-extract"), Instant.now()); + + useCase.execute(context); + + assertEquals(0, extractionPort.callCount(), + "PDF extraction must NOT be called when fingerprint computation fails"); + } + /** * Mixed-batch test: one document per outcome type in a single run. * Proves that no individual outcome aborts the overall batch. @@ -267,18 +347,18 @@ class BatchRunProcessingUseCaseTest { @Test void execute_mixedBatch_allOutcomeTypes_batchOverallSucceeds() throws Exception { MockRunLockPort lockPort = new MockRunLockPort(); - // maxPages=3 in buildConfig; pageLimitCandidate has 10 pages → exceeds limit StartConfiguration config = buildConfig(tempDir); - SourceDocumentCandidate goodCandidate = makeCandidate("good.pdf"); - SourceDocumentCandidate noTextCandidate = makeCandidate("notext.pdf"); - SourceDocumentCandidate pageLimitCandidate = makeCandidate("toobig.pdf"); + SourceDocumentCandidate goodCandidate = makeCandidate("good.pdf"); + SourceDocumentCandidate noTextCandidate = makeCandidate("notext.pdf"); + SourceDocumentCandidate pageLimitCandidate = makeCandidate("toobig.pdf"); SourceDocumentCandidate technicalErrorCandidate = makeCandidate("broken.pdf"); - SourceDocumentCandidate contentErrorCandidate = makeCandidate("encrypted.pdf"); + SourceDocumentCandidate contentErrorCandidate = makeCandidate("encrypted.pdf"); + SourceDocumentCandidate fpFailCandidate = makeCandidate("unreadable.pdf"); FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of( goodCandidate, noTextCandidate, pageLimitCandidate, - technicalErrorCandidate, contentErrorCandidate)); + technicalErrorCandidate, contentErrorCandidate, fpFailCandidate)); MappedExtractionPort extractionPort = new MappedExtractionPort() .with(goodCandidate, new PdfExtractionSuccess("Invoice text", new PdfPageCount(1))) @@ -287,16 +367,31 @@ class BatchRunProcessingUseCaseTest { .with(technicalErrorCandidate, new PdfExtractionTechnicalError("I/O error", null)) .with(contentErrorCandidate, new PdfExtractionContentError("PDF is encrypted")); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, candidatesPort, extractionPort); + // fpFailCandidate gets a fingerprint failure; others get a valid fingerprint + FingerprintPort mappedFingerprintPort = candidate -> { + if (candidate.uniqueIdentifier().equals("unreadable.pdf")) { + return new FingerprintTechnicalError("Cannot read", null); + } + return new FingerprintSuccess(makeFingerprint(candidate.uniqueIdentifier())); + }; + + TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor(); + + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, candidatesPort, extractionPort, + mappedFingerprintPort, m4Processor); BatchRunContext context = new BatchRunContext(new RunId("mixed"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); - assertTrue(outcome.isSuccess(), - "Mixed batch with all outcome types must yield batch SUCCESS"); + assertTrue(outcome.isSuccess(), "Mixed batch with all outcome types must yield batch SUCCESS"); + // 5 candidates with successful fingerprint → M4 processor called 5 times + // 1 candidate with fingerprint failure → M4 processor NOT called + assertEquals(5, m4Processor.processCallCount(), + "M4 processor must be called for each candidate with a successful fingerprint"); + // Extraction called for 5 candidates (not for fpFailCandidate) assertEquals(5, extractionPort.callCount(), - "Extraction must be attempted for each of the 5 candidates"); + "Extraction must be attempted for each of the 5 candidates with a valid fingerprint"); } @Test @@ -312,21 +407,35 @@ class BatchRunProcessingUseCaseTest { PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice content", new PdfPageCount(2)); FixedCandidatesPort candidatesPort = new FixedCandidatesPort(candidates); FixedExtractionPort extractionPort = new FixedExtractionPort(success); + TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor(); - DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( - config, lockPort, candidatesPort, extractionPort); + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, candidatesPort, extractionPort, + new AlwaysSuccessFingerprintPort(), m4Processor); BatchRunContext context = new BatchRunContext(new RunId("multi"), Instant.now()); BatchRunOutcome outcome = useCase.execute(context); assertTrue(outcome.isSuccess(), "All three candidates processed should yield SUCCESS"); assertEquals(3, extractionPort.callCount(), "Extraction should be called once per candidate"); + assertEquals(3, m4Processor.processCallCount(), "M4 processor should be called once per candidate"); } // ------------------------------------------------------------------------- // Helpers // ------------------------------------------------------------------------- + private static DefaultBatchRunProcessingUseCase buildUseCase( + StartConfiguration config, + RunLockPort lockPort, + SourceDocumentCandidatesPort candidatesPort, + PdfTextExtractionPort extractionPort, + FingerprintPort fingerprintPort, + M4DocumentProcessor m4Processor) { + return new DefaultBatchRunProcessingUseCase( + config, lockPort, candidatesPort, extractionPort, fingerprintPort, m4Processor); + } + private static StartConfiguration buildConfig(Path tempDir) throws Exception { Path sourceDir = Files.createDirectories(tempDir.resolve("source")); Path targetDir = Files.createDirectories(tempDir.resolve("target")); @@ -357,6 +466,13 @@ class BatchRunProcessingUseCaseTest { return new SourceDocumentCandidate(filename, 1024L, new SourceDocumentLocator("/tmp/" + filename)); } + /** Creates a deterministic fake fingerprint from a string (padded to 64 hex chars). */ + private static DocumentFingerprint makeFingerprint(String seed) { + String hex = String.format("%064x", Math.abs(seed.hashCode())); + // Ensure exactly 64 lowercase hex chars + return new DocumentFingerprint(hex.substring(0, 64)); + } + // ------------------------------------------------------------------------- // Mock / Stub implementations // ------------------------------------------------------------------------- @@ -480,4 +596,88 @@ class BatchRunProcessingUseCaseTest { int callCount() { return calls; } } -} \ No newline at end of file + + /** + * Fingerprint port that always returns a deterministic success based on the candidate's + * unique identifier. + */ + private static class AlwaysSuccessFingerprintPort implements FingerprintPort { + @Override + public FingerprintResult computeFingerprint(SourceDocumentCandidate candidate) { + String hex = String.format("%064x", Math.abs(candidate.uniqueIdentifier().hashCode())); + return new FingerprintSuccess(new DocumentFingerprint(hex.substring(0, 64))); + } + } + + /** + * No-op M4DocumentProcessor that does nothing (for tests that only care about + * lock/batch lifecycle, not M4 persistence). + */ + private static class NoOpM4DocumentProcessor extends M4DocumentProcessor { + NoOpM4DocumentProcessor() { + super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository()); + } + } + + /** + * Tracking M4DocumentProcessor that counts how many times {@code process()} is called. + */ + private static class TrackingM4DocumentProcessor extends M4DocumentProcessor { + private int processCallCount = 0; + + TrackingM4DocumentProcessor() { + super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository()); + } + + @Override + public void process( + de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate, + de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint fingerprint, + de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome m3Outcome, + de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext context, + java.time.Instant attemptStart) { + processCallCount++; + // Delegate to super so the real logic runs (with no-op repos) + super.process(candidate, fingerprint, m3Outcome, context, attemptStart); + } + + int processCallCount() { return processCallCount; } + } + + /** No-op DocumentRecordRepository for use in test M4DocumentProcessor instances. */ + private static class NoOpDocumentRecordRepository implements DocumentRecordRepository { + @Override + public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) { + // Return DocumentUnknown so the M4 processor always takes the "new document" path + return new DocumentUnknown(); + } + + @Override + public void create(DocumentRecord record) { + // No-op + } + + @Override + public void update(DocumentRecord record) { + // No-op + } + } + + /** No-op ProcessingAttemptRepository for use in test M4DocumentProcessor instances. */ + private static class NoOpProcessingAttemptRepository implements ProcessingAttemptRepository { + @Override + public int loadNextAttemptNumber(DocumentFingerprint fingerprint) { + return 1; + } + + @Override + public void save(ProcessingAttempt attempt) { + // No-op + } + + @Override + public List findAllByFingerprint(DocumentFingerprint fingerprint) { + return List.of(); + } + } +} diff --git a/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java b/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java index a8ce304..1fe653b 100644 --- a/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java +++ b/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java @@ -5,16 +5,26 @@ import org.apache.logging.log4j.Logger; import de.gecheckt.pdf.umbenenner.adapter.in.cli.SchedulerBatchCommand; import de.gecheckt.pdf.umbenenner.adapter.out.configuration.PropertiesConfigurationPortAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.fingerprint.Sha256FingerprintAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.lock.FilesystemRunLockPortAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.pdfextraction.PdfTextExtractionPortAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.sourcedocument.SourceDocumentCandidatesPortAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteDocumentRecordRepositoryAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter; import de.gecheckt.pdf.umbenenner.application.config.InvalidStartConfigurationException; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; import de.gecheckt.pdf.umbenenner.application.config.StartConfigurationValidator; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase; import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort; +import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort; +import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor; import de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; import de.gecheckt.pdf.umbenenner.domain.model.RunId; @@ -29,20 +39,35 @@ import java.util.UUID; *

* Responsibilities: *

    - *
  1. Load and validate the startup configuration
  2. - *
  3. Resolve the run-lock file path (with default fallback)
  4. - *
  5. Create and wire all ports and adapters
  6. - *
  7. Start the CLI adapter and execute the batch use case
  8. - *
  9. Map the batch outcome to a process exit code
  10. + *
  11. Load and validate the startup configuration.
  12. + *
  13. Resolve the run-lock file path (with default fallback).
  14. + *
  15. Initialise the SQLite schema (M4: before the batch document loop begins).
  16. + *
  17. Create and wire all ports and adapters, including the M4 persistence ports.
  18. + *
  19. Start the CLI adapter and execute the batch use case.
  20. + *
  21. Map the batch outcome to a process exit code.
  22. *
- *

- * Exit code semantics: + * + *

Exit code semantics

*
    *
  • {@code 0}: Batch run executed successfully; individual document failures do not - * change the exit code as long as the run itself completed without a hard infrastructure error.
  • - *
  • {@code 1}: Hard start, bootstrap, or configuration failure that prevented the run - * from beginning, or a critical infrastructure failure during the run.
  • + * change the exit code as long as the run itself completed without a hard + * infrastructure error. + *
  • {@code 1}: Hard start, bootstrap, configuration, or schema-initialisation failure + * that prevented the run from beginning, or a critical infrastructure failure + * during the run.
  • *
+ * + *

M4 wiring

+ *

+ * The production constructor wires the following M4 adapters: + *

    + *
  • {@link Sha256FingerprintAdapter} — SHA-256 content fingerprinting.
  • + *
  • {@link SqliteSchemaInitializationAdapter} — schema initialisation at startup.
  • + *
  • {@link SqliteDocumentRecordRepositoryAdapter} — document master record CRUD.
  • + *
  • {@link SqliteProcessingAttemptRepositoryAdapter} — attempt history CRUD.
  • + *
+ * + * @since M2 (extended in M4-AP-006) */ public class BootstrapRunner { @@ -83,7 +108,7 @@ public class BootstrapRunner { *

* Receives the already-loaded and validated {@link StartConfiguration} and run lock port. * The factory is responsible for creating and wiring any additional outbound ports - * required by the use case (e.g., source document port, PDF extraction port). + * required by the use case (e.g., source document port, PDF extraction port, M4 ports). */ @FunctionalInterface public interface UseCaseFactory { @@ -101,34 +126,52 @@ public class BootstrapRunner { /** * Creates the BootstrapRunner with default factories for production use. *

- * Wires the full processing pipeline: + * Wires the full M4 processing pipeline: *

    - *
  • {@link PropertiesConfigurationPortAdapter} for configuration loading
  • - *
  • {@link FilesystemRunLockPortAdapter} for exclusive run locking
  • - *
  • {@link SourceDocumentCandidatesPortAdapter} for PDF candidate discovery
  • - *
  • {@link PdfTextExtractionPortAdapter} for PDFBox-based text and page count extraction
  • + *
  • {@link PropertiesConfigurationPortAdapter} for configuration loading.
  • + *
  • {@link FilesystemRunLockPortAdapter} for exclusive run locking.
  • + *
  • {@link SourceDocumentCandidatesPortAdapter} for PDF candidate discovery.
  • + *
  • {@link PdfTextExtractionPortAdapter} for PDFBox-based text and page count extraction.
  • + *
  • {@link Sha256FingerprintAdapter} for SHA-256 content fingerprinting.
  • + *
  • {@link SqliteDocumentRecordRepositoryAdapter} for document master record CRUD.
  • + *
  • {@link SqliteProcessingAttemptRepositoryAdapter} for attempt history CRUD.
  • *
+ *

+ * Schema initialisation is performed in {@link #run()} before the use case is created, + * using {@link SqliteSchemaInitializationAdapter}. */ public BootstrapRunner() { this.configPortFactory = PropertiesConfigurationPortAdapter::new; this.runLockPortFactory = FilesystemRunLockPortAdapter::new; this.validatorFactory = StartConfigurationValidator::new; - this.useCaseFactory = (config, lock) -> new DefaultBatchRunProcessingUseCase( - config, - lock, - new SourceDocumentCandidatesPortAdapter(config.sourceFolder()), - new PdfTextExtractionPortAdapter()); + this.useCaseFactory = (config, lock) -> { + String jdbcUrl = buildJdbcUrl(config); + FingerprintPort fingerprintPort = new Sha256FingerprintAdapter(); + DocumentRecordRepository documentRecordRepository = + new SqliteDocumentRecordRepositoryAdapter(jdbcUrl); + ProcessingAttemptRepository processingAttemptRepository = + new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl); + M4DocumentProcessor m4Processor = + new M4DocumentProcessor(documentRecordRepository, processingAttemptRepository); + return new DefaultBatchRunProcessingUseCase( + config, + lock, + new SourceDocumentCandidatesPortAdapter(config.sourceFolder()), + new PdfTextExtractionPortAdapter(), + fingerprintPort, + m4Processor); + }; this.commandFactory = SchedulerBatchCommand::new; } /** * Creates the BootstrapRunner with custom factories for testing. * - * @param configPortFactory factory for creating ConfigurationPort instances + * @param configPortFactory factory for creating ConfigurationPort instances * @param runLockPortFactory factory for creating RunLockPort instances - * @param validatorFactory factory for creating StartConfigurationValidator instances - * @param useCaseFactory factory for creating BatchRunProcessingUseCase instances - * @param commandFactory factory for creating SchedulerBatchCommand instances + * @param validatorFactory factory for creating StartConfigurationValidator instances + * @param useCaseFactory factory for creating BatchRunProcessingUseCase instances + * @param commandFactory factory for creating SchedulerBatchCommand instances */ public BootstrapRunner(ConfigurationPortFactory configPortFactory, RunLockPortFactory runLockPortFactory, @@ -145,11 +188,17 @@ public class BootstrapRunner { /** * Runs the application startup sequence. *

- * AP-003: Manually wires the object graph and invokes the CLI command. - * AP-005: Wires ConfigurationPort adapter and passes it to the use case. - * AP-006: Validates configuration before allowing processing to start. + * M4 additions: + *

    + *
  • Derives the SQLite JDBC URL from the configured {@code sqlite.file} path.
  • + *
  • Initialises the M4 SQLite schema via + * {@link PersistenceSchemaInitializationPort#initializeSchema()} before the + * batch document loop begins. A schema initialisation failure aborts the run + * with exit code 1.
  • + *
* - * @return exit code: 0 for success, 1 for invalid configuration or unexpected failure + * @return exit code: 0 for success, 1 for invalid configuration, schema failure, + * or unexpected bootstrap failure */ public int run() { LOG.info("Bootstrap flow started."); @@ -160,61 +209,105 @@ public class BootstrapRunner { // Step 2: Load configuration var config = configPort.loadConfiguration(); - // Step 3: Validate configuration (AP-006) + // Step 3: Validate configuration StartConfigurationValidator validator = validatorFactory.create(); validator.validate(config); - // Step 4: Resolve lock file path – apply default if not configured (AP-006) + // Step 4: Resolve lock file path – apply default if not configured Path lockFilePath = config.runtimeLockFile(); if (lockFilePath == null || lockFilePath.toString().isBlank()) { lockFilePath = Paths.get("pdf-umbenenner.lock"); - LOG.info("runtime.lock.file not configured, using default lock path: {}", lockFilePath.toAbsolutePath()); + LOG.info("runtime.lock.file not configured, using default lock path: {}", + lockFilePath.toAbsolutePath()); } RunLockPort runLockPort = runLockPortFactory.create(lockFilePath); - // Step 5: Create the batch run context - // Generate a unique run ID and initialize the run context + // Step 5 (M4): Initialise the SQLite schema before the batch loop begins. + // A failure here is a hard start error → exit code 1. + initializeSchema(config); + + // Step 6: Create the batch run context RunId runId = new RunId(UUID.randomUUID().toString()); BatchRunContext runContext = new BatchRunContext(runId, Instant.now()); LOG.info("Batch run started. RunId: {}", runId); - // Step 6: Create the use case with the validated config and run lock (application layer). + // Step 7: Create the use case with the validated config and run lock. // Config is passed directly; the use case does not re-read the properties file. - // Adapters (source document port, PDF extraction port) are wired by the factory. + // Adapters (source document port, PDF extraction port, M4 ports) are wired by the factory. BatchRunProcessingUseCase useCase = useCaseFactory.create(config, runLockPort); - // Step 7: Create the CLI command adapter with the use case + // Step 8: Create the CLI command adapter with the use case SchedulerBatchCommand command = commandFactory.create(useCase); - // Step 8: Execute the command with the run context and handle the outcome + // Step 9: Execute the command with the run context and handle the outcome BatchRunOutcome outcome = command.run(runContext); - // Mark run as completed (AP-003) + // Mark run as completed runContext.setEndInstant(Instant.now()); if (outcome.isSuccess()) { LOG.info("Batch run completed successfully. RunId: {}", runContext.runId()); return 0; } else if (outcome.isLockUnavailable()) { - LOG.warn("Batch run aborted: another instance is already running. RunId: {}", runContext.runId()); + LOG.warn("Batch run aborted: another instance is already running. RunId: {}", + runContext.runId()); return 1; } else { LOG.error("Batch run failed. RunId: {}", runContext.runId()); return 1; } } catch (InvalidStartConfigurationException e) { - // Controlled failure for invalid configuration - log clearly without stack trace + // Controlled failure for invalid configuration – log clearly without stack trace LOG.error("Configuration validation failed: {}", e.getMessage()); return 1; } catch (IllegalStateException e) { // Configuration loading failed due to missing/invalid required properties - // Treat as invalid configuration for controlled failure LOG.error("Configuration loading failed: {}", e.getMessage()); return 1; + } catch (DocumentPersistenceException e) { + // Schema initialisation failed – hard start error + LOG.error("SQLite schema initialisation failed: {}", e.getMessage(), e); + return 1; } catch (Exception e) { LOG.error("Bootstrap failure during startup.", e); return 1; } } -} \ No newline at end of file + /** + * Initialises the M4 SQLite schema using the configured SQLite file path. + *

+ * This method is called once at startup, before the batch document loop begins. + * It uses the production {@link SqliteSchemaInitializationAdapter} directly because + * schema initialisation is a startup concern, not a per-document concern, and the + * {@link UseCaseFactory} abstraction is not the right place for it. + *

+ * If the {@code sqlite.file} configuration is null or blank, schema initialisation + * is skipped with a warning. This allows the existing test infrastructure (which + * uses the custom {@link UseCaseFactory}) to continue working without a real SQLite + * file. + * + * @param config the validated startup configuration + * @throws DocumentPersistenceException if schema initialisation fails + */ + private void initializeSchema(StartConfiguration config) { + if (config.sqliteFile() == null) { + LOG.warn("sqlite.file not configured – skipping schema initialisation."); + return; + } + String jdbcUrl = buildJdbcUrl(config); + PersistenceSchemaInitializationPort schemaPort = new SqliteSchemaInitializationAdapter(jdbcUrl); + schemaPort.initializeSchema(); + LOG.info("M4 SQLite schema initialised at: {}", jdbcUrl); + } + + /** + * Builds the JDBC URL for the SQLite database from the configured file path. + * + * @param config the startup configuration containing the SQLite file path + * @return the JDBC URL in the form {@code jdbc:sqlite:/path/to/file.db} + */ + static String buildJdbcUrl(StartConfiguration config) { + return "jdbc:sqlite:" + config.sqliteFile().toAbsolutePath().toString().replace('\\', '/'); + } +}