diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessor.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessor.java new file mode 100644 index 0000000..e0546a4 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessor.java @@ -0,0 +1,558 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown; +import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters; +import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnicalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository; +import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; +import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; +import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.time.Instant; +import java.util.Objects; + +/** + * Application-level service that implements the M4 per-document processing logic. + *
+ * This service is the single authoritative place for the M4 decision rules: + * idempotency checks, status/counter mapping, and consistent two-level persistence. + * It is intentionally tightly scoped to AP-006 and contains no M5+ logic. + * + *
+ * For every identified document, both the processing attempt and the master record are + * written in sequence. If either write fails, the failure is logged and the batch run + * continues with the next candidate. No partial state is intentionally left; if the + * attempt write succeeds but the master record write fails, the inconsistency is bounded + * to that one document and is logged clearly. True transactionality across two separate + * repository calls is not available without a larger architectural change; this is + * documented as a known limitation of the M4 scope. + * + *
+ * Failures that occur before a successful fingerprint is available are not + * historised in SQLite. They are handled by the caller and logged as non-identifiable + * run events. + * + * @since M4-AP-006 + */ +public class M4DocumentProcessor { + + private static final Logger LOG = LogManager.getLogger(M4DocumentProcessor.class); + + private final DocumentRecordRepository documentRecordRepository; + private final ProcessingAttemptRepository processingAttemptRepository; + + /** + * Creates the M4 document processor with the required persistence ports. + * + * @param documentRecordRepository port for reading and writing the document master record; + * must not be null + * @param processingAttemptRepository port for writing and reading the attempt history; + * must not be null + * @throws NullPointerException if any parameter is null + */ + public M4DocumentProcessor( + DocumentRecordRepository documentRecordRepository, + ProcessingAttemptRepository processingAttemptRepository) { + this.documentRecordRepository = + Objects.requireNonNull(documentRecordRepository, "documentRecordRepository must not be null"); + this.processingAttemptRepository = + Objects.requireNonNull(processingAttemptRepository, "processingAttemptRepository must not be null"); + } + + /** + * Applies the full M4 processing logic for one identified document candidate. + *
+ * The caller must have already computed a valid {@link DocumentFingerprint} for the + * candidate. The M3 outcome (from the PDF extraction and pre-check pipeline) is + * provided as {@code m3Outcome} and is used only when the document is not in a + * terminal state. + *
+ * This method never throws. All persistence failures are caught, logged, and + * treated as controlled per-document failures so the batch run can continue. + * + * @param candidate the source document candidate being processed; must not be null + * @param fingerprint the successfully computed fingerprint for this candidate; + * must not be null + * @param m3Outcome the result of the M3 pipeline (PDF extraction + pre-checks); + * must not be null + * @param context the current batch run context (for run ID and timing); + * must not be null + * @param attemptStart the instant at which processing of this candidate began; + * must not be null + */ + public void process( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentProcessingOutcome m3Outcome, + BatchRunContext context, + Instant attemptStart) { + + Objects.requireNonNull(candidate, "candidate must not be null"); + Objects.requireNonNull(fingerprint, "fingerprint must not be null"); + Objects.requireNonNull(m3Outcome, "m3Outcome must not be null"); + Objects.requireNonNull(context, "context must not be null"); + Objects.requireNonNull(attemptStart, "attemptStart must not be null"); + + // Step 1: Load the document master record + DocumentRecordLookupResult lookupResult = + documentRecordRepository.findByFingerprint(fingerprint); + + // Step 2: Handle persistence lookup failure – cannot safely proceed + if (lookupResult instanceof PersistenceLookupTechnicalFailure failure) { + LOG.error("Cannot process '{}': master record lookup failed: {}", + candidate.uniqueIdentifier(), failure.errorMessage()); + return; + } + + // Step 3: Determine the action based on the lookup result + switch (lookupResult) { + case DocumentTerminalSuccess terminalSuccess -> { + // Document already successfully processed → skip + LOG.info("Skipping '{}': already successfully processed (fingerprint: {}).", + candidate.uniqueIdentifier(), fingerprint.sha256Hex()); + persistSkipAttempt( + candidate, fingerprint, terminalSuccess.record(), + ProcessingStatus.SKIPPED_ALREADY_PROCESSED, + context, attemptStart); + } + + case DocumentTerminalFinalFailure terminalFailure -> { + // Document finally failed → skip + LOG.info("Skipping '{}': already finally failed (fingerprint: {}).", + candidate.uniqueIdentifier(), fingerprint.sha256Hex()); + persistSkipAttempt( + candidate, fingerprint, terminalFailure.record(), + ProcessingStatus.SKIPPED_FINAL_FAILURE, + context, attemptStart); + } + + case DocumentUnknown ignored -> { + // New document – process and create a new master record + processAndPersistNewDocument(candidate, fingerprint, m3Outcome, context, attemptStart); + } + + case DocumentKnownProcessable knownProcessable -> { + // Known but not terminal – process and update the existing master record + processAndPersistKnownDocument( + candidate, fingerprint, m3Outcome, knownProcessable.record(), + context, attemptStart); + } + + default -> + // Exhaustive sealed hierarchy; this branch is unreachable + LOG.error("Unexpected lookup result type for '{}': {}", + candidate.uniqueIdentifier(), lookupResult.getClass().getSimpleName()); + } + } + + // ------------------------------------------------------------------------- + // Skip path + // ------------------------------------------------------------------------- + + /** + * Persists a skip attempt and updates the master record's {@code updatedAt} timestamp. + *
+ * Skip events do not change any failure counter. The master record's overall status + * remains unchanged (terminal). + * + * @param candidate the candidate being skipped + * @param fingerprint the document fingerprint + * @param existingRecord the current master record (already terminal) + * @param skipStatus the skip status to record ({@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED} + * or {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}) + * @param context the current batch run context + * @param attemptStart the start instant of this processing attempt + */ + private void persistSkipAttempt( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentRecord existingRecord, + ProcessingStatus skipStatus, + BatchRunContext context, + Instant attemptStart) { + + Instant now = Instant.now(); + + try { + int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint); + + ProcessingAttempt skipAttempt = new ProcessingAttempt( + fingerprint, + context.runId(), + attemptNumber, + attemptStart, + now, + skipStatus, + null, // no failure class for skip + null, // no failure message for skip + false // not retryable + ); + + // Write attempt first, then update master record + processingAttemptRepository.save(skipAttempt); + + // Update master record: only updatedAt changes; status and counters stay the same + DocumentRecord updatedRecord = new DocumentRecord( + existingRecord.fingerprint(), + new SourceDocumentLocator(candidate.locator().value()), + candidate.uniqueIdentifier(), + existingRecord.overallStatus(), // terminal status unchanged + existingRecord.failureCounters(), // counters unchanged for skip + existingRecord.lastFailureInstant(), + existingRecord.lastSuccessInstant(), + existingRecord.createdAt(), + now // updatedAt = now + ); + documentRecordRepository.update(updatedRecord); + + LOG.debug("Skip attempt #{} persisted for '{}' with status {}.", + attemptNumber, candidate.uniqueIdentifier(), skipStatus); + + } catch (DocumentPersistenceException e) { + LOG.error("Failed to persist skip attempt for '{}': {}", + candidate.uniqueIdentifier(), e.getMessage(), e); + } + } + + // ------------------------------------------------------------------------- + // New document path + // ------------------------------------------------------------------------- + + /** + * Processes a newly discovered document (no existing master record) and persists + * both the attempt and the new master record. + * + * @param candidate the candidate being processed + * @param fingerprint the document fingerprint + * @param m3Outcome the M3 pipeline result + * @param context the current batch run context + * @param attemptStart the start instant of this processing attempt + */ + private void processAndPersistNewDocument( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentProcessingOutcome m3Outcome, + BatchRunContext context, + Instant attemptStart) { + + Instant now = Instant.now(); + + // Map M3 outcome to M4 status/counters for a brand-new document + M4Outcome m4 = mapM3OutcomeForNewDocument(m3Outcome); + + try { + // Attempt number is always 1 for a new document + int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint); + + ProcessingAttempt attempt = buildAttempt( + fingerprint, context, attemptNumber, attemptStart, now, m4); + + // Create the new master record + DocumentRecord newRecord = new DocumentRecord( + fingerprint, + new SourceDocumentLocator(candidate.locator().value()), + candidate.uniqueIdentifier(), + m4.overallStatus(), + m4.counters(), + m4.overallStatus() == ProcessingStatus.SUCCESS ? null : now, // lastFailureInstant + m4.overallStatus() == ProcessingStatus.SUCCESS ? now : null, // lastSuccessInstant + now, // createdAt + now // updatedAt + ); + + // Persist attempt first, then master record + processingAttemptRepository.save(attempt); + documentRecordRepository.create(newRecord); + + LOG.info("New document '{}' processed: status={}, contentErrors={}, transientErrors={}.", + candidate.uniqueIdentifier(), + m4.overallStatus(), + m4.counters().contentErrorCount(), + m4.counters().transientErrorCount()); + + } catch (DocumentPersistenceException e) { + LOG.error("Failed to persist processing result for new document '{}': {}", + candidate.uniqueIdentifier(), e.getMessage(), e); + } + } + + // ------------------------------------------------------------------------- + // Known processable document path + // ------------------------------------------------------------------------- + + /** + * Processes a known but non-terminal document and updates both the attempt history + * and the master record. + * + * @param candidate the candidate being processed + * @param fingerprint the document fingerprint + * @param m3Outcome the M3 pipeline result + * @param existingRecord the current master record (not terminal) + * @param context the current batch run context + * @param attemptStart the start instant of this processing attempt + */ + private void processAndPersistKnownDocument( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentProcessingOutcome m3Outcome, + DocumentRecord existingRecord, + BatchRunContext context, + Instant attemptStart) { + + Instant now = Instant.now(); + + // Map M3 outcome to M4 status/counters, taking existing counters into account + M4Outcome m4 = mapM3OutcomeForKnownDocument(m3Outcome, existingRecord.failureCounters()); + + try { + int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint); + + ProcessingAttempt attempt = buildAttempt( + fingerprint, context, attemptNumber, attemptStart, now, m4); + + // Update the master record with new status, counters and timestamps + DocumentRecord updatedRecord = new DocumentRecord( + existingRecord.fingerprint(), + new SourceDocumentLocator(candidate.locator().value()), + candidate.uniqueIdentifier(), + m4.overallStatus(), + m4.counters(), + m4.overallStatus() == ProcessingStatus.SUCCESS + ? existingRecord.lastFailureInstant() : now, + m4.overallStatus() == ProcessingStatus.SUCCESS + ? now : existingRecord.lastSuccessInstant(), + existingRecord.createdAt(), + now // updatedAt + ); + + // Persist attempt first, then master record + processingAttemptRepository.save(attempt); + documentRecordRepository.update(updatedRecord); + + LOG.info("Known document '{}' processed: status={}, contentErrors={}, transientErrors={}.", + candidate.uniqueIdentifier(), + m4.overallStatus(), + m4.counters().contentErrorCount(), + m4.counters().transientErrorCount()); + + } catch (DocumentPersistenceException e) { + LOG.error("Failed to persist processing result for known document '{}': {}", + candidate.uniqueIdentifier(), e.getMessage(), e); + } + } + + // ------------------------------------------------------------------------- + // M3 → M4 outcome mapping + // ------------------------------------------------------------------------- + + /** + * Maps an M3 outcome to M4 status, counters, and retryable flag for a brand-new + * document (no prior history, counters start at zero). + * + * @param m3Outcome the M3 pipeline result + * @return the M4 outcome with status, counters and retryable flag + */ + private M4Outcome mapM3OutcomeForNewDocument(DocumentProcessingOutcome m3Outcome) { + return mapM3OutcomeForKnownDocument(m3Outcome, FailureCounters.zero()); + } + + /** + * Maps an M3 outcome to M4 status, counters, and retryable flag, taking the + * existing failure counters into account. + *
+ * M4 minimal rules applied here: + *
+ * Tightly scoped to {@link M4DocumentProcessor}; not exposed outside this class. + * + * @param overallStatus the M4 overall status to persist + * @param counters the updated failure counters to persist + * @param retryable whether the failure is retryable in a later run + */ + private record M4Outcome( + ProcessingStatus overallStatus, + FailureCounters counters, + boolean retryable) { + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java index 4fa7068..64107ad 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java @@ -1,28 +1,65 @@ /** - * Application-level services for business logic evaluation. + * Application-level services for business logic evaluation and M4 orchestration. *
- * This package contains stateless, pure-logic services that evaluate document content - * and apply business rules. Services in this package: + * This package contains stateless, pure-logic services that evaluate document content, + * apply business rules, and orchestrate the M4 per-document processing flow. + * Services in this package: *
+ * The {@link de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor} + * implements the verbindliche M4 processing order per candidate: *
+ * For every identified document, the processing attempt and the master record are + * written in sequence. If either write fails, the failure is caught and logged; + * the batch run continues with the next candidate. True transactionality across + * two separate repository calls is not available in the M4 scope; this is a known + * and documented limitation. + * + *
+ * Failures that occur before a successful fingerprint is available are not handled + * by this package. They are handled by the use case and are not historised in SQLite. */ package de.gecheckt.pdf.umbenenner.application.service; diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java index c61f77a..669c749 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java @@ -3,54 +3,80 @@ package de.gecheckt.pdf.umbenenner.application.usecase; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError; import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException; import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException; import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort; import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService; +import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; -import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; -import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed; -import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; -import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult; -import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; -import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError; import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.time.Instant; import java.util.List; +import java.util.Objects; /** * Batch processing implementation of {@link BatchRunProcessingUseCase}. *
- * Orchestrates the complete batch processing workflow: + * Orchestrates the complete M4 batch processing workflow per candidate: *
- * Processing boundary: - *
- * Non-Goals (not implemented): + * If the fingerprint computation fails (e.g. the file is no longer readable), the + * candidate is logged as a non-identifiable run event and is not historised + * in SQLite. The batch run continues with the next candidate. + * + *
+ * For every identified document, the processing attempt and the master record are + * written in sequence by {@link M4DocumentProcessor}. Persistence failures for a single + * document are caught and logged; the batch run continues with the remaining candidates. + * + *
* The configuration is loaded and validated by Bootstrap before use case creation;
- * the use case receives the result directly and does not re-read it.
+ * the use case receives the result directly and does not re-read the properties file.
*
- * @param configuration the validated startup configuration
- * @param runLockPort for exclusive run locking
- * @param sourceDocumentCandidatesPort for loading PDF candidates from the source folder
- * @param pdfTextExtractionPort for extracting text and page count from a single PDF
+ * @param configuration the validated startup configuration; must not be null
+ * @param runLockPort for exclusive run locking; must not be null
+ * @param sourceDocumentCandidatesPort for loading PDF candidates from the source folder;
+ * must not be null
+ * @param pdfTextExtractionPort for extracting text and page count from a single PDF;
+ * must not be null
+ * @param fingerprintPort for computing the SHA-256 fingerprint of a candidate;
+ * must not be null
+ * @param m4DocumentProcessor for applying M4 decision logic and persisting results;
+ * must not be null
* @throws NullPointerException if any parameter is null
*/
public DefaultBatchRunProcessingUseCase(
StartConfiguration configuration,
RunLockPort runLockPort,
SourceDocumentCandidatesPort sourceDocumentCandidatesPort,
- PdfTextExtractionPort pdfTextExtractionPort) {
- this.configuration = configuration;
- this.runLockPort = runLockPort;
- this.sourceDocumentCandidatesPort = sourceDocumentCandidatesPort;
- this.pdfTextExtractionPort = pdfTextExtractionPort;
+ PdfTextExtractionPort pdfTextExtractionPort,
+ FingerprintPort fingerprintPort,
+ M4DocumentProcessor m4DocumentProcessor) {
+ this.configuration = Objects.requireNonNull(configuration, "configuration must not be null");
+ this.runLockPort = Objects.requireNonNull(runLockPort, "runLockPort must not be null");
+ this.sourceDocumentCandidatesPort = Objects.requireNonNull(
+ sourceDocumentCandidatesPort, "sourceDocumentCandidatesPort must not be null");
+ this.pdfTextExtractionPort = Objects.requireNonNull(
+ pdfTextExtractionPort, "pdfTextExtractionPort must not be null");
+ this.fingerprintPort = Objects.requireNonNull(fingerprintPort, "fingerprintPort must not be null");
+ this.m4DocumentProcessor = Objects.requireNonNull(
+ m4DocumentProcessor, "m4DocumentProcessor must not be null");
}
@Override
@@ -96,12 +138,15 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
lockAcquired = true;
LOG.debug("Run lock acquired successfully.");
} catch (RunLockUnavailableException e) {
- LOG.warn("Run lock not available – another instance is already running. This instance terminates immediately.");
+ LOG.warn("Run lock not available – another instance is already running. "
+ + "This instance terminates immediately.");
return BatchRunOutcome.LOCK_UNAVAILABLE;
}
- LOG.debug("Configuration in use: source={}, target={}", configuration.sourceFolder(), configuration.targetFolder());
- LOG.info("Batch run started. RunId: {}, Start: {}", context.runId(), context.startInstant());
+ LOG.debug("Configuration in use: source={}, target={}",
+ configuration.sourceFolder(), configuration.targetFolder());
+ LOG.info("Batch run started. RunId: {}, Start: {}",
+ context.runId(), context.startInstant());
// Step 2: Load PDF candidates from source folder
List
- * Processing steps per document:
+ * M4 processing order:
*
- * Per-document errors (extraction failure, technical error, pre-check failure) do not abort the overall
- * batch run. Each candidate ends controlled regardless of its outcome.
- *
- * Processing boundary: no KI call, no persistence, no filename generation,
- * no target file copy is initiated here, even for candidates that pass all pre-checks.
+ * Per-document errors do not abort the overall batch run. Each candidate ends
+ * controlled regardless of its outcome.
*
* @param candidate the candidate to process
+ * @param context the current batch run context
*/
- private void processCandidate(SourceDocumentCandidate candidate) {
+ private void processCandidate(SourceDocumentCandidate candidate, BatchRunContext context) {
LOG.debug("Processing candidate: {}", candidate.uniqueIdentifier());
- PdfExtractionResult extractionResult = pdfTextExtractionPort.extractTextAndPageCount(candidate);
+ // Record the attempt start instant before any work begins
+ Instant attemptStart = Instant.now();
+
+ // Step M4-1: Compute fingerprint
+ FingerprintResult fingerprintResult = fingerprintPort.computeFingerprint(candidate);
+
+ switch (fingerprintResult) {
+ case FingerprintTechnicalError fingerprintError -> {
+ // Pre-fingerprint failure: not historised in SQLite
+ LOG.warn("Fingerprint computation failed for '{}': {} — candidate skipped (not historised).",
+ candidate.uniqueIdentifier(), fingerprintError.errorMessage());
+ return;
+ }
+
+ case FingerprintSuccess fingerprintSuccess -> {
+ DocumentFingerprint fingerprint = fingerprintSuccess.fingerprint();
+ LOG.debug("Fingerprint computed for '{}': {}",
+ candidate.uniqueIdentifier(), fingerprint.sha256Hex());
+
+ // Step M4-2..M4-8: Execute M3 pipeline and delegate M4 logic to the processor
+ // The M3 pipeline runs only if the document is not in a terminal state;
+ // M4DocumentProcessor handles the terminal check internally.
+ // We run M3 eagerly here and pass the result; M4DocumentProcessor will
+ // ignore it for terminal documents.
+ DocumentProcessingOutcome m3Outcome = runM3Pipeline(candidate);
+
+ // Delegate idempotency check, status mapping, and persistence to M4DocumentProcessor
+ m4DocumentProcessor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+ }
+ }
+ }
+
+ /**
+ * Runs the M3 pipeline (PDF text extraction + pre-checks) for the given candidate.
+ *
+ * This method is called after a successful fingerprint computation. The result is
+ * passed to {@link M4DocumentProcessor}, which applies it only when the document is
+ * not in a terminal state.
+ *
+ * @param candidate the candidate to run through the M3 pipeline
+ * @return the M3 pipeline outcome (pre-check passed, pre-check failed, or technical error)
+ */
+ private DocumentProcessingOutcome runM3Pipeline(SourceDocumentCandidate candidate) {
+ PdfExtractionResult extractionResult =
+ pdfTextExtractionPort.extractTextAndPageCount(candidate);
// Log extraction outcome
switch (extractionResult) {
- case PdfExtractionSuccess success ->
+ case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess success ->
LOG.debug("PDF extraction successful for '{}'. Pages: {}, Text length: {} chars.",
- candidate.uniqueIdentifier(), success.pageCount().value(), success.extractedText().length());
- case PdfExtractionContentError contentError ->
+ candidate.uniqueIdentifier(),
+ success.pageCount().value(),
+ success.extractedText().length());
+ case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError contentError ->
LOG.debug("PDF content extraction failed for '{}' (content problem): {}",
candidate.uniqueIdentifier(), contentError.reason());
- case PdfExtractionTechnicalError technicalError ->
+ case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError technicalError ->
LOG.debug("PDF extraction technical error for '{}': {}",
candidate.uniqueIdentifier(), technicalError.errorMessage());
}
- // Process through complete pipeline
- var outcome = DocumentProcessingService.processDocument(candidate, extractionResult, configuration);
+ DocumentProcessingOutcome outcome =
+ DocumentProcessingService.processDocument(candidate, extractionResult, configuration);
- // Log processing outcome
+ // Log M3 outcome
switch (outcome) {
- case PreCheckPassed passed ->
- LOG.info("Pre-checks PASSED for '{}'. Candidate ready for further processing.",
+ case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed passed ->
+ LOG.info("Pre-checks PASSED for '{}'. Candidate ready for M4 persistence.",
candidate.uniqueIdentifier());
- case PreCheckFailed failed ->
- LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error – may retry in later run).",
+ case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed ->
+ LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error).",
candidate.uniqueIdentifier(), failed.failureReasonDescription());
- case TechnicalDocumentError technicalError ->
- LOG.warn("Processing FAILED for '{}': {} (Technical error – may retry in later run).",
+ case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError ->
+ LOG.warn("Processing FAILED for '{}': {} (Technical error – retryable).",
candidate.uniqueIdentifier(), technicalError.errorMessage());
}
+
+ return outcome;
}
-}
\ No newline at end of file
+}
diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/package-info.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/package-info.java
index 4a9e1c4..da46568 100644
--- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/package-info.java
+++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/package-info.java
@@ -4,9 +4,25 @@
* Implementations:
*
+ *
+ * For each candidate, {@link de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase}
+ * enforces this order:
+ *
* All implementations are infrastructure-agnostic and interact only through ports.
+ *
+ * @since M2 (extended in M4-AP-006)
*/
-package de.gecheckt.pdf.umbenenner.application.usecase;
\ No newline at end of file
+package de.gecheckt.pdf.umbenenner.application.usecase;
diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessorTest.java
new file mode 100644
index 0000000..39761cf
--- /dev/null
+++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M4DocumentProcessorTest.java
@@ -0,0 +1,425 @@
+package de.gecheckt.pdf.umbenenner.application.service;
+
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess;
+import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown;
+import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
+import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnicalFailure;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
+import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
+import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
+import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
+import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
+import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
+import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
+import de.gecheckt.pdf.umbenenner.domain.model.RunId;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
+import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
+
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+
+import java.time.Instant;
+import java.util.ArrayList;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+/**
+ * Unit tests for {@link M4DocumentProcessor}.
+ *
+ * Covers:
+ *
* Responsibilities:
*
- * Exit code semantics:
+ *
+ *
+ * The production constructor wires the following M4 adapters:
+ *
* Receives the already-loaded and validated {@link StartConfiguration} and run lock port.
* The factory is responsible for creating and wiring any additional outbound ports
- * required by the use case (e.g., source document port, PDF extraction port).
+ * required by the use case (e.g., source document port, PDF extraction port, M4 ports).
*/
@FunctionalInterface
public interface UseCaseFactory {
@@ -101,34 +126,52 @@ public class BootstrapRunner {
/**
* Creates the BootstrapRunner with default factories for production use.
*
- * Wires the full processing pipeline:
+ * Wires the full M4 processing pipeline:
*
+ * Schema initialisation is performed in {@link #run()} before the use case is created,
+ * using {@link SqliteSchemaInitializationAdapter}.
*/
public BootstrapRunner() {
this.configPortFactory = PropertiesConfigurationPortAdapter::new;
this.runLockPortFactory = FilesystemRunLockPortAdapter::new;
this.validatorFactory = StartConfigurationValidator::new;
- this.useCaseFactory = (config, lock) -> new DefaultBatchRunProcessingUseCase(
- config,
- lock,
- new SourceDocumentCandidatesPortAdapter(config.sourceFolder()),
- new PdfTextExtractionPortAdapter());
+ this.useCaseFactory = (config, lock) -> {
+ String jdbcUrl = buildJdbcUrl(config);
+ FingerprintPort fingerprintPort = new Sha256FingerprintAdapter();
+ DocumentRecordRepository documentRecordRepository =
+ new SqliteDocumentRecordRepositoryAdapter(jdbcUrl);
+ ProcessingAttemptRepository processingAttemptRepository =
+ new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl);
+ M4DocumentProcessor m4Processor =
+ new M4DocumentProcessor(documentRecordRepository, processingAttemptRepository);
+ return new DefaultBatchRunProcessingUseCase(
+ config,
+ lock,
+ new SourceDocumentCandidatesPortAdapter(config.sourceFolder()),
+ new PdfTextExtractionPortAdapter(),
+ fingerprintPort,
+ m4Processor);
+ };
this.commandFactory = SchedulerBatchCommand::new;
}
/**
* Creates the BootstrapRunner with custom factories for testing.
*
- * @param configPortFactory factory for creating ConfigurationPort instances
+ * @param configPortFactory factory for creating ConfigurationPort instances
* @param runLockPortFactory factory for creating RunLockPort instances
- * @param validatorFactory factory for creating StartConfigurationValidator instances
- * @param useCaseFactory factory for creating BatchRunProcessingUseCase instances
- * @param commandFactory factory for creating SchedulerBatchCommand instances
+ * @param validatorFactory factory for creating StartConfigurationValidator instances
+ * @param useCaseFactory factory for creating BatchRunProcessingUseCase instances
+ * @param commandFactory factory for creating SchedulerBatchCommand instances
*/
public BootstrapRunner(ConfigurationPortFactory configPortFactory,
RunLockPortFactory runLockPortFactory,
@@ -145,11 +188,17 @@ public class BootstrapRunner {
/**
* Runs the application startup sequence.
*
- * AP-003: Manually wires the object graph and invokes the CLI command.
- * AP-005: Wires ConfigurationPort adapter and passes it to the use case.
- * AP-006: Validates configuration before allowing processing to start.
+ * M4 additions:
+ *
+ * This method is called once at startup, before the batch document loop begins.
+ * It uses the production {@link SqliteSchemaInitializationAdapter} directly because
+ * schema initialisation is a startup concern, not a per-document concern, and the
+ * {@link UseCaseFactory} abstraction is not the right place for it.
+ *
+ * If the {@code sqlite.file} configuration is null or blank, schema initialisation
+ * is skipped with a warning. This allows the existing test infrastructure (which
+ * uses the custom {@link UseCaseFactory}) to continue working without a real SQLite
+ * file.
+ *
+ * @param config the validated startup configuration
+ * @throws DocumentPersistenceException if schema initialisation fails
+ */
+ private void initializeSchema(StartConfiguration config) {
+ if (config.sqliteFile() == null) {
+ LOG.warn("sqlite.file not configured – skipping schema initialisation.");
+ return;
+ }
+ String jdbcUrl = buildJdbcUrl(config);
+ PersistenceSchemaInitializationPort schemaPort = new SqliteSchemaInitializationAdapter(jdbcUrl);
+ schemaPort.initializeSchema();
+ LOG.info("M4 SQLite schema initialised at: {}", jdbcUrl);
+ }
+
+ /**
+ * Builds the JDBC URL for the SQLite database from the configured file path.
+ *
+ * @param config the startup configuration containing the SQLite file path
+ * @return the JDBC URL in the form {@code jdbc:sqlite:/path/to/file.db}
+ */
+ static String buildJdbcUrl(StartConfiguration config) {
+ return "jdbc:sqlite:" + config.sqliteFile().toAbsolutePath().toString().replace('\\', '/');
+ }
+}
- *
*
*
* M4 processing order (AP-006)
+ *
+ *
+ *
+ *
+ */
+class M4DocumentProcessorTest {
+
+ private static final String FINGERPRINT_HEX =
+ "a".repeat(64); // 64 lowercase hex chars
+
+ private CapturingDocumentRecordRepository recordRepo;
+ private CapturingProcessingAttemptRepository attemptRepo;
+ private M4DocumentProcessor processor;
+
+ private SourceDocumentCandidate candidate;
+ private DocumentFingerprint fingerprint;
+ private BatchRunContext context;
+ private Instant attemptStart;
+
+ @BeforeEach
+ void setUp() {
+ recordRepo = new CapturingDocumentRecordRepository();
+ attemptRepo = new CapturingProcessingAttemptRepository();
+ processor = new M4DocumentProcessor(recordRepo, attemptRepo);
+
+ candidate = new SourceDocumentCandidate(
+ "test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf"));
+ fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
+ context = new BatchRunContext(new RunId("run-001"), Instant.now());
+ attemptStart = Instant.now();
+ }
+
+ // -------------------------------------------------------------------------
+ // New document (DocumentUnknown) path
+ // -------------------------------------------------------------------------
+
+ @Test
+ void process_newDocument_preCheckPassed_persistsSuccessStatus() {
+ recordRepo.setLookupResult(new DocumentUnknown());
+ DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+ candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+ processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+ // One attempt written
+ assertEquals(1, attemptRepo.savedAttempts.size());
+ ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+ assertEquals(ProcessingStatus.SUCCESS, attempt.status());
+ assertFalse(attempt.retryable());
+ assertNull(attempt.failureClass());
+ assertNull(attempt.failureMessage());
+
+ // One master record created
+ assertEquals(1, recordRepo.createdRecords.size());
+ DocumentRecord record = recordRepo.createdRecords.get(0);
+ assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
+ assertEquals(0, record.failureCounters().contentErrorCount());
+ assertEquals(0, record.failureCounters().transientErrorCount());
+ assertNotNull(record.lastSuccessInstant());
+ assertNull(record.lastFailureInstant());
+ }
+
+ @Test
+ void process_newDocument_firstContentError_persistsFailedRetryable_contentCounterOne() {
+ recordRepo.setLookupResult(new DocumentUnknown());
+ DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
+ candidate, PreCheckFailureReason.NO_USABLE_TEXT);
+
+ processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+ assertEquals(1, attemptRepo.savedAttempts.size());
+ ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+ assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
+ assertTrue(attempt.retryable());
+
+ assertEquals(1, recordRepo.createdRecords.size());
+ DocumentRecord record = recordRepo.createdRecords.get(0);
+ assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
+ assertEquals(1, record.failureCounters().contentErrorCount());
+ assertEquals(0, record.failureCounters().transientErrorCount());
+ assertNotNull(record.lastFailureInstant());
+ assertNull(record.lastSuccessInstant());
+ }
+
+ @Test
+ void process_newDocument_technicalError_persistsFailedRetryable_transientCounterOne() {
+ recordRepo.setLookupResult(new DocumentUnknown());
+ DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError(
+ candidate, "I/O error", null);
+
+ processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+ assertEquals(1, attemptRepo.savedAttempts.size());
+ ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+ assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
+ assertTrue(attempt.retryable());
+
+ assertEquals(1, recordRepo.createdRecords.size());
+ DocumentRecord record = recordRepo.createdRecords.get(0);
+ assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
+ assertEquals(0, record.failureCounters().contentErrorCount());
+ assertEquals(1, record.failureCounters().transientErrorCount());
+ }
+
+ // -------------------------------------------------------------------------
+ // Known processable document path (DocumentKnownProcessable)
+ // -------------------------------------------------------------------------
+
+ @Test
+ void process_knownDocument_secondContentError_persistsFailedFinal_contentCounterTwo() {
+ // Existing record: first content error already recorded
+ DocumentRecord existingRecord = buildRecord(
+ ProcessingStatus.FAILED_RETRYABLE,
+ new FailureCounters(1, 0));
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+
+ DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
+ candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
+
+ processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+ assertEquals(1, attemptRepo.savedAttempts.size());
+ ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+ assertEquals(ProcessingStatus.FAILED_FINAL, attempt.status());
+ assertFalse(attempt.retryable());
+
+ assertEquals(1, recordRepo.updatedRecords.size());
+ DocumentRecord record = recordRepo.updatedRecords.get(0);
+ assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
+ assertEquals(2, record.failureCounters().contentErrorCount());
+ assertEquals(0, record.failureCounters().transientErrorCount());
+ }
+
+ @Test
+ void process_knownDocument_technicalError_incrementsTransientCounter_remainsRetryable() {
+ DocumentRecord existingRecord = buildRecord(
+ ProcessingStatus.FAILED_RETRYABLE,
+ new FailureCounters(0, 2));
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+
+ DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError(
+ candidate, "Timeout", null);
+
+ processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+ assertEquals(1, recordRepo.updatedRecords.size());
+ DocumentRecord record = recordRepo.updatedRecords.get(0);
+ assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
+ assertEquals(0, record.failureCounters().contentErrorCount());
+ assertEquals(3, record.failureCounters().transientErrorCount());
+ assertTrue(attemptRepo.savedAttempts.get(0).retryable());
+ }
+
+ @Test
+ void process_knownDocument_preCheckPassed_persistsSuccess() {
+ DocumentRecord existingRecord = buildRecord(
+ ProcessingStatus.FAILED_RETRYABLE,
+ new FailureCounters(0, 1));
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+
+ DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+ candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+ processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+ assertEquals(1, recordRepo.updatedRecords.size());
+ DocumentRecord record = recordRepo.updatedRecords.get(0);
+ assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
+ // Counters unchanged on success
+ assertEquals(0, record.failureCounters().contentErrorCount());
+ assertEquals(1, record.failureCounters().transientErrorCount());
+ assertNotNull(record.lastSuccessInstant());
+ }
+
+ // -------------------------------------------------------------------------
+ // Skip paths
+ // -------------------------------------------------------------------------
+
+ @Test
+ void process_terminalSuccess_persistsSkipAttemptWithSkippedAlreadyProcessed() {
+ DocumentRecord existingRecord = buildRecord(
+ ProcessingStatus.SUCCESS,
+ FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
+
+ DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+ candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+ processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+ assertEquals(1, attemptRepo.savedAttempts.size());
+ ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+ assertEquals(ProcessingStatus.SKIPPED_ALREADY_PROCESSED, attempt.status());
+ assertFalse(attempt.retryable());
+ assertNull(attempt.failureClass());
+ assertNull(attempt.failureMessage());
+
+ // Master record updated (only updatedAt changes)
+ assertEquals(1, recordRepo.updatedRecords.size());
+ DocumentRecord record = recordRepo.updatedRecords.get(0);
+ // Status and counters remain unchanged
+ assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
+ assertEquals(0, record.failureCounters().contentErrorCount());
+ assertEquals(0, record.failureCounters().transientErrorCount());
+ }
+
+ @Test
+ void process_terminalFinalFailure_persistsSkipAttemptWithSkippedFinalFailure() {
+ DocumentRecord existingRecord = buildRecord(
+ ProcessingStatus.FAILED_FINAL,
+ new FailureCounters(2, 0));
+ recordRepo.setLookupResult(new DocumentTerminalFinalFailure(existingRecord));
+
+ DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
+ candidate, PreCheckFailureReason.NO_USABLE_TEXT);
+
+ processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+ assertEquals(1, attemptRepo.savedAttempts.size());
+ ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
+ assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, attempt.status());
+ assertFalse(attempt.retryable());
+
+ // Master record updated (only updatedAt changes); counters unchanged
+ assertEquals(1, recordRepo.updatedRecords.size());
+ DocumentRecord record = recordRepo.updatedRecords.get(0);
+ assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
+ assertEquals(2, record.failureCounters().contentErrorCount());
+ assertEquals(0, record.failureCounters().transientErrorCount());
+ }
+
+ @Test
+ void process_skipEvent_doesNotChangeErrorCounters() {
+ FailureCounters originalCounters = new FailureCounters(1, 3);
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, originalCounters);
+ recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
+
+ processor.process(candidate, fingerprint,
+ new PreCheckPassed(candidate, new PdfExtractionSuccess("t", new PdfPageCount(1))),
+ context, attemptStart);
+
+ DocumentRecord updated = recordRepo.updatedRecords.get(0);
+ assertEquals(originalCounters.contentErrorCount(), updated.failureCounters().contentErrorCount(),
+ "Skip must not change content error counter");
+ assertEquals(originalCounters.transientErrorCount(), updated.failureCounters().transientErrorCount(),
+ "Skip must not change transient error counter");
+ }
+
+ // -------------------------------------------------------------------------
+ // Persistence lookup failure
+ // -------------------------------------------------------------------------
+
+ @Test
+ void process_persistenceLookupFailure_noAttemptWritten_noException() {
+ recordRepo.setLookupResult(new PersistenceLookupTechnicalFailure("DB unavailable", null));
+
+ DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+ candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+ // Must not throw
+ assertDoesNotThrow(() ->
+ processor.process(candidate, fingerprint, m3Outcome, context, attemptStart));
+
+ // No attempt written, no record created/updated
+ assertEquals(0, attemptRepo.savedAttempts.size(),
+ "No attempt must be written when lookup fails");
+ assertEquals(0, recordRepo.createdRecords.size());
+ assertEquals(0, recordRepo.updatedRecords.size());
+ }
+
+ // -------------------------------------------------------------------------
+ // Persistence write failure: controlled, no crash
+ // -------------------------------------------------------------------------
+
+ @Test
+ void process_persistenceWriteFailure_doesNotThrow_batchContinues() {
+ recordRepo.setLookupResult(new DocumentUnknown());
+ // Make the attempt save throw
+ attemptRepo.failOnSave = true;
+
+ DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+ candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+ // Must not propagate the exception
+ assertDoesNotThrow(() ->
+ processor.process(candidate, fingerprint, m3Outcome, context, attemptStart));
+ }
+
+ // -------------------------------------------------------------------------
+ // Attempt number monotonicity
+ // -------------------------------------------------------------------------
+
+ @Test
+ void process_attemptNumberIsAssignedFromRepository() {
+ recordRepo.setLookupResult(new DocumentUnknown());
+ attemptRepo.nextAttemptNumber = 3; // Simulate 2 prior attempts
+
+ DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
+ candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+ processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+
+ assertEquals(1, attemptRepo.savedAttempts.size());
+ assertEquals(3, attemptRepo.savedAttempts.get(0).attemptNumber(),
+ "Attempt number must be taken from the repository");
+ }
+
+ // -------------------------------------------------------------------------
+ // Helpers
+ // -------------------------------------------------------------------------
+
+ private DocumentRecord buildRecord(ProcessingStatus status, FailureCounters counters) {
+ Instant now = Instant.now();
+ return new DocumentRecord(
+ fingerprint,
+ new SourceDocumentLocator("/tmp/test.pdf"),
+ "test.pdf",
+ status,
+ counters,
+ status == ProcessingStatus.SUCCESS ? null : now,
+ status == ProcessingStatus.SUCCESS ? now : null,
+ now,
+ now
+ );
+ }
+
+ // -------------------------------------------------------------------------
+ // Capturing test doubles
+ // -------------------------------------------------------------------------
+
+ private static class CapturingDocumentRecordRepository implements DocumentRecordRepository {
+ private DocumentRecordLookupResult lookupResult = new DocumentUnknown();
+ final List
*
*/
class BatchRunProcessingUseCaseTest {
@@ -57,8 +70,9 @@ class BatchRunProcessingUseCaseTest {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+ new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("test-run-1"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
@@ -73,8 +87,9 @@ class BatchRunProcessingUseCaseTest {
CountingRunLockPort lockPort = new CountingRunLockPort(true);
StartConfiguration config = buildConfig(tempDir);
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+ new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("test-run-2"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
@@ -92,8 +107,9 @@ class BatchRunProcessingUseCaseTest {
CountingRunLockPort lockPort = new CountingRunLockPort(true);
StartConfiguration config = buildConfig(tempDir);
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+ new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("test-run-f1"), Instant.now());
useCase.execute(context);
@@ -108,8 +124,9 @@ class BatchRunProcessingUseCaseTest {
ErrorAfterAcquireLockPort lockPort = new ErrorAfterAcquireLockPort();
StartConfiguration config = buildConfig(tempDir);
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+ new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("test-run-3"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
@@ -128,8 +145,9 @@ class BatchRunProcessingUseCaseTest {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
+ new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("empty"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
@@ -138,25 +156,26 @@ class BatchRunProcessingUseCaseTest {
}
@Test
- void execute_happyPath_candidatePassesPreChecks_endsControlledWithoutKiOrCopy() throws Exception {
+ void execute_happyPath_candidatePassesPreChecks_m4PersistenceInvoked() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
- // Candidate with usable text within page limit
SourceDocumentCandidate candidate = makeCandidate("document.pdf");
PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice text", new PdfPageCount(1));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(success);
+ TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, candidatesPort, extractionPort);
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, candidatesPort, extractionPort,
+ new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("happy"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
- // Batch run succeeds; document ended controlled at boundary (no KI, no copy)
assertTrue(outcome.isSuccess(), "Happy path should yield SUCCESS");
assertEquals(1, extractionPort.callCount(), "Extraction should be called exactly once");
+ assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called exactly once");
}
@Test
@@ -165,40 +184,42 @@ class BatchRunProcessingUseCaseTest {
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate candidate = makeCandidate("image-only.pdf");
- // Extraction returns text with no letters or digits
PdfExtractionSuccess emptySuccess = new PdfExtractionSuccess(" ", new PdfPageCount(1));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(emptySuccess);
+ TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, candidatesPort, extractionPort);
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, candidatesPort, extractionPort,
+ new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("no-text"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
- // Document ends with pre-check failure; batch itself still succeeds
assertTrue(outcome.isSuccess(), "No-usable-text pre-check failure should not abort the batch run");
+ assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for content errors");
}
@Test
void execute_pageLimitExceeded_candidateEndsControlled_batchContinues() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
- // Config has maxPages=3; document has 10 pages
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate candidate = makeCandidate("big.pdf");
PdfExtractionSuccess manyPages = new PdfExtractionSuccess("Some text", new PdfPageCount(10));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(manyPages);
+ TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, candidatesPort, extractionPort);
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, candidatesPort, extractionPort,
+ new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("page-limit"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
- // maxPages in buildConfig is 3; 10 pages exceeds limit – pre-check fails, batch continues
assertTrue(outcome.isSuccess(), "Page limit exceeded should not abort the batch run");
+ assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for page limit errors");
}
@Test
@@ -210,14 +231,17 @@ class BatchRunProcessingUseCaseTest {
PdfExtractionContentError contentError = new PdfExtractionContentError("PDF is encrypted");
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(contentError);
+ TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, candidatesPort, extractionPort);
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, candidatesPort, extractionPort,
+ new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("content-error"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(), "Extraction content error should not abort the batch run");
+ assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for content errors");
}
@Test
@@ -229,14 +253,17 @@ class BatchRunProcessingUseCaseTest {
PdfExtractionTechnicalError technicalError = new PdfExtractionTechnicalError("I/O error reading file", null);
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(technicalError);
+ TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, candidatesPort, extractionPort);
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, candidatesPort, extractionPort,
+ new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("tech-error"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(), "Technical extraction error should not abort the batch run");
+ assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for technical errors");
}
@Test
@@ -248,18 +275,71 @@ class BatchRunProcessingUseCaseTest {
throw new SourceDocumentAccessException("Source folder not readable");
};
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, failingPort, new NoOpExtractionPort());
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, failingPort, new NoOpExtractionPort(),
+ new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("access-fail"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isFailure(), "Source folder access failure should yield FAILURE outcome");
assertFalse(outcome.isSuccess(), "Source folder access failure must not be SUCCESS");
- // Lock must still be released
assertTrue(lockPort.wasReleaseCalled(), "Lock should be released even when source access fails");
}
+ // -------------------------------------------------------------------------
+ // M4-specific: fingerprint failure → not historised
+ // -------------------------------------------------------------------------
+
+ @Test
+ void execute_fingerprintFailure_candidateNotHistorised_batchContinues() throws Exception {
+ MockRunLockPort lockPort = new MockRunLockPort();
+ StartConfiguration config = buildConfig(tempDir);
+
+ SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf");
+ FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
+ TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
+
+ // Fingerprint always fails → M4 processor must NOT be called
+ FingerprintPort alwaysFailingFingerprintPort = c ->
+ new FingerprintTechnicalError("Cannot read file", null);
+
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, candidatesPort, new NoOpExtractionPort(),
+ alwaysFailingFingerprintPort, m4Processor);
+ BatchRunContext context = new BatchRunContext(new RunId("fp-fail"), Instant.now());
+
+ BatchRunOutcome outcome = useCase.execute(context);
+
+ assertTrue(outcome.isSuccess(), "Fingerprint failure should not abort the batch run");
+ assertEquals(0, m4Processor.processCallCount(),
+ "M4 processor must NOT be called when fingerprint computation fails (pre-fingerprint failure)");
+ }
+
+ @Test
+ void execute_fingerprintFailure_extractionNotCalled() throws Exception {
+ MockRunLockPort lockPort = new MockRunLockPort();
+ StartConfiguration config = buildConfig(tempDir);
+
+ SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf");
+ FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
+ FixedExtractionPort extractionPort = new FixedExtractionPort(
+ new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+ FingerprintPort alwaysFailingFingerprintPort = c ->
+ new FingerprintTechnicalError("Cannot read file", null);
+
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, candidatesPort, extractionPort,
+ alwaysFailingFingerprintPort, new NoOpM4DocumentProcessor());
+ BatchRunContext context = new BatchRunContext(new RunId("fp-fail-no-extract"), Instant.now());
+
+ useCase.execute(context);
+
+ assertEquals(0, extractionPort.callCount(),
+ "PDF extraction must NOT be called when fingerprint computation fails");
+ }
+
/**
* Mixed-batch test: one document per outcome type in a single run.
* Proves that no individual outcome aborts the overall batch.
@@ -267,18 +347,18 @@ class BatchRunProcessingUseCaseTest {
@Test
void execute_mixedBatch_allOutcomeTypes_batchOverallSucceeds() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
- // maxPages=3 in buildConfig; pageLimitCandidate has 10 pages → exceeds limit
StartConfiguration config = buildConfig(tempDir);
- SourceDocumentCandidate goodCandidate = makeCandidate("good.pdf");
- SourceDocumentCandidate noTextCandidate = makeCandidate("notext.pdf");
- SourceDocumentCandidate pageLimitCandidate = makeCandidate("toobig.pdf");
+ SourceDocumentCandidate goodCandidate = makeCandidate("good.pdf");
+ SourceDocumentCandidate noTextCandidate = makeCandidate("notext.pdf");
+ SourceDocumentCandidate pageLimitCandidate = makeCandidate("toobig.pdf");
SourceDocumentCandidate technicalErrorCandidate = makeCandidate("broken.pdf");
- SourceDocumentCandidate contentErrorCandidate = makeCandidate("encrypted.pdf");
+ SourceDocumentCandidate contentErrorCandidate = makeCandidate("encrypted.pdf");
+ SourceDocumentCandidate fpFailCandidate = makeCandidate("unreadable.pdf");
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(
goodCandidate, noTextCandidate, pageLimitCandidate,
- technicalErrorCandidate, contentErrorCandidate));
+ technicalErrorCandidate, contentErrorCandidate, fpFailCandidate));
MappedExtractionPort extractionPort = new MappedExtractionPort()
.with(goodCandidate, new PdfExtractionSuccess("Invoice text", new PdfPageCount(1)))
@@ -287,16 +367,31 @@ class BatchRunProcessingUseCaseTest {
.with(technicalErrorCandidate, new PdfExtractionTechnicalError("I/O error", null))
.with(contentErrorCandidate, new PdfExtractionContentError("PDF is encrypted"));
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, candidatesPort, extractionPort);
+ // fpFailCandidate gets a fingerprint failure; others get a valid fingerprint
+ FingerprintPort mappedFingerprintPort = candidate -> {
+ if (candidate.uniqueIdentifier().equals("unreadable.pdf")) {
+ return new FingerprintTechnicalError("Cannot read", null);
+ }
+ return new FingerprintSuccess(makeFingerprint(candidate.uniqueIdentifier()));
+ };
+
+ TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
+
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, candidatesPort, extractionPort,
+ mappedFingerprintPort, m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("mixed"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
- assertTrue(outcome.isSuccess(),
- "Mixed batch with all outcome types must yield batch SUCCESS");
+ assertTrue(outcome.isSuccess(), "Mixed batch with all outcome types must yield batch SUCCESS");
+ // 5 candidates with successful fingerprint → M4 processor called 5 times
+ // 1 candidate with fingerprint failure → M4 processor NOT called
+ assertEquals(5, m4Processor.processCallCount(),
+ "M4 processor must be called for each candidate with a successful fingerprint");
+ // Extraction called for 5 candidates (not for fpFailCandidate)
assertEquals(5, extractionPort.callCount(),
- "Extraction must be attempted for each of the 5 candidates");
+ "Extraction must be attempted for each of the 5 candidates with a valid fingerprint");
}
@Test
@@ -312,21 +407,35 @@ class BatchRunProcessingUseCaseTest {
PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice content", new PdfPageCount(2));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(candidates);
FixedExtractionPort extractionPort = new FixedExtractionPort(success);
+ TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
- DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
- config, lockPort, candidatesPort, extractionPort);
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase(
+ config, lockPort, candidatesPort, extractionPort,
+ new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("multi"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(), "All three candidates processed should yield SUCCESS");
assertEquals(3, extractionPort.callCount(), "Extraction should be called once per candidate");
+ assertEquals(3, m4Processor.processCallCount(), "M4 processor should be called once per candidate");
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
+ private static DefaultBatchRunProcessingUseCase buildUseCase(
+ StartConfiguration config,
+ RunLockPort lockPort,
+ SourceDocumentCandidatesPort candidatesPort,
+ PdfTextExtractionPort extractionPort,
+ FingerprintPort fingerprintPort,
+ M4DocumentProcessor m4Processor) {
+ return new DefaultBatchRunProcessingUseCase(
+ config, lockPort, candidatesPort, extractionPort, fingerprintPort, m4Processor);
+ }
+
private static StartConfiguration buildConfig(Path tempDir) throws Exception {
Path sourceDir = Files.createDirectories(tempDir.resolve("source"));
Path targetDir = Files.createDirectories(tempDir.resolve("target"));
@@ -357,6 +466,13 @@ class BatchRunProcessingUseCaseTest {
return new SourceDocumentCandidate(filename, 1024L, new SourceDocumentLocator("/tmp/" + filename));
}
+ /** Creates a deterministic fake fingerprint from a string (padded to 64 hex chars). */
+ private static DocumentFingerprint makeFingerprint(String seed) {
+ String hex = String.format("%064x", Math.abs(seed.hashCode()));
+ // Ensure exactly 64 lowercase hex chars
+ return new DocumentFingerprint(hex.substring(0, 64));
+ }
+
// -------------------------------------------------------------------------
// Mock / Stub implementations
// -------------------------------------------------------------------------
@@ -480,4 +596,88 @@ class BatchRunProcessingUseCaseTest {
int callCount() { return calls; }
}
-}
\ No newline at end of file
+
+ /**
+ * Fingerprint port that always returns a deterministic success based on the candidate's
+ * unique identifier.
+ */
+ private static class AlwaysSuccessFingerprintPort implements FingerprintPort {
+ @Override
+ public FingerprintResult computeFingerprint(SourceDocumentCandidate candidate) {
+ String hex = String.format("%064x", Math.abs(candidate.uniqueIdentifier().hashCode()));
+ return new FingerprintSuccess(new DocumentFingerprint(hex.substring(0, 64)));
+ }
+ }
+
+ /**
+ * No-op M4DocumentProcessor that does nothing (for tests that only care about
+ * lock/batch lifecycle, not M4 persistence).
+ */
+ private static class NoOpM4DocumentProcessor extends M4DocumentProcessor {
+ NoOpM4DocumentProcessor() {
+ super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository());
+ }
+ }
+
+ /**
+ * Tracking M4DocumentProcessor that counts how many times {@code process()} is called.
+ */
+ private static class TrackingM4DocumentProcessor extends M4DocumentProcessor {
+ private int processCallCount = 0;
+
+ TrackingM4DocumentProcessor() {
+ super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository());
+ }
+
+ @Override
+ public void process(
+ de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate,
+ de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint fingerprint,
+ de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome m3Outcome,
+ de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext context,
+ java.time.Instant attemptStart) {
+ processCallCount++;
+ // Delegate to super so the real logic runs (with no-op repos)
+ super.process(candidate, fingerprint, m3Outcome, context, attemptStart);
+ }
+
+ int processCallCount() { return processCallCount; }
+ }
+
+ /** No-op DocumentRecordRepository for use in test M4DocumentProcessor instances. */
+ private static class NoOpDocumentRecordRepository implements DocumentRecordRepository {
+ @Override
+ public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) {
+ // Return DocumentUnknown so the M4 processor always takes the "new document" path
+ return new DocumentUnknown();
+ }
+
+ @Override
+ public void create(DocumentRecord record) {
+ // No-op
+ }
+
+ @Override
+ public void update(DocumentRecord record) {
+ // No-op
+ }
+ }
+
+ /** No-op ProcessingAttemptRepository for use in test M4DocumentProcessor instances. */
+ private static class NoOpProcessingAttemptRepository implements ProcessingAttemptRepository {
+ @Override
+ public int loadNextAttemptNumber(DocumentFingerprint fingerprint) {
+ return 1;
+ }
+
+ @Override
+ public void save(ProcessingAttempt attempt) {
+ // No-op
+ }
+
+ @Override
+ public List
- *
- * Exit code semantics
*
*
+ *
+ * M4 wiring
+ *
+ *
+ *
+ * @since M2 (extended in M4-AP-006)
*/
public class BootstrapRunner {
@@ -83,7 +108,7 @@ public class BootstrapRunner {
*
- *
+ *
+ *
*
- * @return exit code: 0 for success, 1 for invalid configuration or unexpected failure
+ * @return exit code: 0 for success, 1 for invalid configuration, schema failure,
+ * or unexpected bootstrap failure
*/
public int run() {
LOG.info("Bootstrap flow started.");
@@ -160,61 +209,105 @@ public class BootstrapRunner {
// Step 2: Load configuration
var config = configPort.loadConfiguration();
- // Step 3: Validate configuration (AP-006)
+ // Step 3: Validate configuration
StartConfigurationValidator validator = validatorFactory.create();
validator.validate(config);
- // Step 4: Resolve lock file path – apply default if not configured (AP-006)
+ // Step 4: Resolve lock file path – apply default if not configured
Path lockFilePath = config.runtimeLockFile();
if (lockFilePath == null || lockFilePath.toString().isBlank()) {
lockFilePath = Paths.get("pdf-umbenenner.lock");
- LOG.info("runtime.lock.file not configured, using default lock path: {}", lockFilePath.toAbsolutePath());
+ LOG.info("runtime.lock.file not configured, using default lock path: {}",
+ lockFilePath.toAbsolutePath());
}
RunLockPort runLockPort = runLockPortFactory.create(lockFilePath);
- // Step 5: Create the batch run context
- // Generate a unique run ID and initialize the run context
+ // Step 5 (M4): Initialise the SQLite schema before the batch loop begins.
+ // A failure here is a hard start error → exit code 1.
+ initializeSchema(config);
+
+ // Step 6: Create the batch run context
RunId runId = new RunId(UUID.randomUUID().toString());
BatchRunContext runContext = new BatchRunContext(runId, Instant.now());
LOG.info("Batch run started. RunId: {}", runId);
- // Step 6: Create the use case with the validated config and run lock (application layer).
+ // Step 7: Create the use case with the validated config and run lock.
// Config is passed directly; the use case does not re-read the properties file.
- // Adapters (source document port, PDF extraction port) are wired by the factory.
+ // Adapters (source document port, PDF extraction port, M4 ports) are wired by the factory.
BatchRunProcessingUseCase useCase = useCaseFactory.create(config, runLockPort);
- // Step 7: Create the CLI command adapter with the use case
+ // Step 8: Create the CLI command adapter with the use case
SchedulerBatchCommand command = commandFactory.create(useCase);
- // Step 8: Execute the command with the run context and handle the outcome
+ // Step 9: Execute the command with the run context and handle the outcome
BatchRunOutcome outcome = command.run(runContext);
- // Mark run as completed (AP-003)
+ // Mark run as completed
runContext.setEndInstant(Instant.now());
if (outcome.isSuccess()) {
LOG.info("Batch run completed successfully. RunId: {}", runContext.runId());
return 0;
} else if (outcome.isLockUnavailable()) {
- LOG.warn("Batch run aborted: another instance is already running. RunId: {}", runContext.runId());
+ LOG.warn("Batch run aborted: another instance is already running. RunId: {}",
+ runContext.runId());
return 1;
} else {
LOG.error("Batch run failed. RunId: {}", runContext.runId());
return 1;
}
} catch (InvalidStartConfigurationException e) {
- // Controlled failure for invalid configuration - log clearly without stack trace
+ // Controlled failure for invalid configuration – log clearly without stack trace
LOG.error("Configuration validation failed: {}", e.getMessage());
return 1;
} catch (IllegalStateException e) {
// Configuration loading failed due to missing/invalid required properties
- // Treat as invalid configuration for controlled failure
LOG.error("Configuration loading failed: {}", e.getMessage());
return 1;
+ } catch (DocumentPersistenceException e) {
+ // Schema initialisation failed – hard start error
+ LOG.error("SQLite schema initialisation failed: {}", e.getMessage(), e);
+ return 1;
} catch (Exception e) {
LOG.error("Bootstrap failure during startup.", e);
return 1;
}
}
-}
\ No newline at end of file
+ /**
+ * Initialises the M4 SQLite schema using the configured SQLite file path.
+ *