M4 AP-006 Idempotenz- und Persistenzlogik integrieren
This commit is contained in:
@@ -0,0 +1,558 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.service;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnicalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Application-level service that implements the M4 per-document processing logic.
|
||||
* <p>
|
||||
* This service is the single authoritative place for the M4 decision rules:
|
||||
* idempotency checks, status/counter mapping, and consistent two-level persistence.
|
||||
* It is intentionally tightly scoped to AP-006 and contains no M5+ logic.
|
||||
*
|
||||
* <h2>M4 processing order per candidate</h2>
|
||||
* <ol>
|
||||
* <li>Load the document master record by fingerprint.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#SUCCESS} → create and persist
|
||||
* a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#FAILED_FINAL} → create and persist
|
||||
* a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.</li>
|
||||
* <li>Otherwise execute the M3 flow (already done by the caller) and map the result
|
||||
* into M4 status, counters and retryable flag.</li>
|
||||
* <li>Persist exactly one historised processing attempt for the identified document.</li>
|
||||
* <li>Persist the updated document master record.</li>
|
||||
* </ol>
|
||||
*
|
||||
* <h2>M4 minimal rules</h2>
|
||||
* <ul>
|
||||
* <li>Already successful documents are skipped in later runs.</li>
|
||||
* <li>Already finally failed documents are skipped in later runs.</li>
|
||||
* <li>First historised deterministic content failure from M3 →
|
||||
* {@link ProcessingStatus#FAILED_RETRYABLE}, content error counter becomes 1,
|
||||
* {@code retryable=true}.</li>
|
||||
* <li>Second historised deterministic content failure in a later run →
|
||||
* {@link ProcessingStatus#FAILED_FINAL}, content error counter becomes 2,
|
||||
* {@code retryable=false}.</li>
|
||||
* <li>Document-related technical failures after successful fingerprinting remain
|
||||
* {@link ProcessingStatus#FAILED_RETRYABLE}, increment transient error counter,
|
||||
* {@code retryable=true}.</li>
|
||||
* <li>Skip events do not change error counters.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Persistence consistency</h2>
|
||||
* <p>
|
||||
* For every identified document, both the processing attempt and the master record are
|
||||
* written in sequence. If either write fails, the failure is logged and the batch run
|
||||
* continues with the next candidate. No partial state is intentionally left; if the
|
||||
* attempt write succeeds but the master record write fails, the inconsistency is bounded
|
||||
* to that one document and is logged clearly. True transactionality across two separate
|
||||
* repository calls is not available without a larger architectural change; this is
|
||||
* documented as a known limitation of the M4 scope.
|
||||
*
|
||||
* <h2>Pre-fingerprint failures</h2>
|
||||
* <p>
|
||||
* Failures that occur before a successful fingerprint is available are <em>not</em>
|
||||
* historised in SQLite. They are handled by the caller and logged as non-identifiable
|
||||
* run events.
|
||||
*
|
||||
* @since M4-AP-006
|
||||
*/
|
||||
public class M4DocumentProcessor {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(M4DocumentProcessor.class);
|
||||
|
||||
private final DocumentRecordRepository documentRecordRepository;
|
||||
private final ProcessingAttemptRepository processingAttemptRepository;
|
||||
|
||||
/**
|
||||
* Creates the M4 document processor with the required persistence ports.
|
||||
*
|
||||
* @param documentRecordRepository port for reading and writing the document master record;
|
||||
* must not be null
|
||||
* @param processingAttemptRepository port for writing and reading the attempt history;
|
||||
* must not be null
|
||||
* @throws NullPointerException if any parameter is null
|
||||
*/
|
||||
public M4DocumentProcessor(
|
||||
DocumentRecordRepository documentRecordRepository,
|
||||
ProcessingAttemptRepository processingAttemptRepository) {
|
||||
this.documentRecordRepository =
|
||||
Objects.requireNonNull(documentRecordRepository, "documentRecordRepository must not be null");
|
||||
this.processingAttemptRepository =
|
||||
Objects.requireNonNull(processingAttemptRepository, "processingAttemptRepository must not be null");
|
||||
}
|
||||
|
||||
/**
|
||||
* Applies the full M4 processing logic for one identified document candidate.
|
||||
* <p>
|
||||
* The caller must have already computed a valid {@link DocumentFingerprint} for the
|
||||
* candidate. The M3 outcome (from the PDF extraction and pre-check pipeline) is
|
||||
* provided as {@code m3Outcome} and is used only when the document is not in a
|
||||
* terminal state.
|
||||
* <p>
|
||||
* This method never throws. All persistence failures are caught, logged, and
|
||||
* treated as controlled per-document failures so the batch run can continue.
|
||||
*
|
||||
* @param candidate the source document candidate being processed; must not be null
|
||||
* @param fingerprint the successfully computed fingerprint for this candidate;
|
||||
* must not be null
|
||||
* @param m3Outcome the result of the M3 pipeline (PDF extraction + pre-checks);
|
||||
* must not be null
|
||||
* @param context the current batch run context (for run ID and timing);
|
||||
* must not be null
|
||||
* @param attemptStart the instant at which processing of this candidate began;
|
||||
* must not be null
|
||||
*/
|
||||
public void process(
|
||||
SourceDocumentCandidate candidate,
|
||||
DocumentFingerprint fingerprint,
|
||||
DocumentProcessingOutcome m3Outcome,
|
||||
BatchRunContext context,
|
||||
Instant attemptStart) {
|
||||
|
||||
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||
Objects.requireNonNull(fingerprint, "fingerprint must not be null");
|
||||
Objects.requireNonNull(m3Outcome, "m3Outcome must not be null");
|
||||
Objects.requireNonNull(context, "context must not be null");
|
||||
Objects.requireNonNull(attemptStart, "attemptStart must not be null");
|
||||
|
||||
// Step 1: Load the document master record
|
||||
DocumentRecordLookupResult lookupResult =
|
||||
documentRecordRepository.findByFingerprint(fingerprint);
|
||||
|
||||
// Step 2: Handle persistence lookup failure – cannot safely proceed
|
||||
if (lookupResult instanceof PersistenceLookupTechnicalFailure failure) {
|
||||
LOG.error("Cannot process '{}': master record lookup failed: {}",
|
||||
candidate.uniqueIdentifier(), failure.errorMessage());
|
||||
return;
|
||||
}
|
||||
|
||||
// Step 3: Determine the action based on the lookup result
|
||||
switch (lookupResult) {
|
||||
case DocumentTerminalSuccess terminalSuccess -> {
|
||||
// Document already successfully processed → skip
|
||||
LOG.info("Skipping '{}': already successfully processed (fingerprint: {}).",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex());
|
||||
persistSkipAttempt(
|
||||
candidate, fingerprint, terminalSuccess.record(),
|
||||
ProcessingStatus.SKIPPED_ALREADY_PROCESSED,
|
||||
context, attemptStart);
|
||||
}
|
||||
|
||||
case DocumentTerminalFinalFailure terminalFailure -> {
|
||||
// Document finally failed → skip
|
||||
LOG.info("Skipping '{}': already finally failed (fingerprint: {}).",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex());
|
||||
persistSkipAttempt(
|
||||
candidate, fingerprint, terminalFailure.record(),
|
||||
ProcessingStatus.SKIPPED_FINAL_FAILURE,
|
||||
context, attemptStart);
|
||||
}
|
||||
|
||||
case DocumentUnknown ignored -> {
|
||||
// New document – process and create a new master record
|
||||
processAndPersistNewDocument(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
}
|
||||
|
||||
case DocumentKnownProcessable knownProcessable -> {
|
||||
// Known but not terminal – process and update the existing master record
|
||||
processAndPersistKnownDocument(
|
||||
candidate, fingerprint, m3Outcome, knownProcessable.record(),
|
||||
context, attemptStart);
|
||||
}
|
||||
|
||||
default ->
|
||||
// Exhaustive sealed hierarchy; this branch is unreachable
|
||||
LOG.error("Unexpected lookup result type for '{}': {}",
|
||||
candidate.uniqueIdentifier(), lookupResult.getClass().getSimpleName());
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Skip path
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Persists a skip attempt and updates the master record's {@code updatedAt} timestamp.
|
||||
* <p>
|
||||
* Skip events do not change any failure counter. The master record's overall status
|
||||
* remains unchanged (terminal).
|
||||
*
|
||||
* @param candidate the candidate being skipped
|
||||
* @param fingerprint the document fingerprint
|
||||
* @param existingRecord the current master record (already terminal)
|
||||
* @param skipStatus the skip status to record ({@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}
|
||||
* or {@link ProcessingStatus#SKIPPED_FINAL_FAILURE})
|
||||
* @param context the current batch run context
|
||||
* @param attemptStart the start instant of this processing attempt
|
||||
*/
|
||||
private void persistSkipAttempt(
|
||||
SourceDocumentCandidate candidate,
|
||||
DocumentFingerprint fingerprint,
|
||||
DocumentRecord existingRecord,
|
||||
ProcessingStatus skipStatus,
|
||||
BatchRunContext context,
|
||||
Instant attemptStart) {
|
||||
|
||||
Instant now = Instant.now();
|
||||
|
||||
try {
|
||||
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
|
||||
|
||||
ProcessingAttempt skipAttempt = new ProcessingAttempt(
|
||||
fingerprint,
|
||||
context.runId(),
|
||||
attemptNumber,
|
||||
attemptStart,
|
||||
now,
|
||||
skipStatus,
|
||||
null, // no failure class for skip
|
||||
null, // no failure message for skip
|
||||
false // not retryable
|
||||
);
|
||||
|
||||
// Write attempt first, then update master record
|
||||
processingAttemptRepository.save(skipAttempt);
|
||||
|
||||
// Update master record: only updatedAt changes; status and counters stay the same
|
||||
DocumentRecord updatedRecord = new DocumentRecord(
|
||||
existingRecord.fingerprint(),
|
||||
new SourceDocumentLocator(candidate.locator().value()),
|
||||
candidate.uniqueIdentifier(),
|
||||
existingRecord.overallStatus(), // terminal status unchanged
|
||||
existingRecord.failureCounters(), // counters unchanged for skip
|
||||
existingRecord.lastFailureInstant(),
|
||||
existingRecord.lastSuccessInstant(),
|
||||
existingRecord.createdAt(),
|
||||
now // updatedAt = now
|
||||
);
|
||||
documentRecordRepository.update(updatedRecord);
|
||||
|
||||
LOG.debug("Skip attempt #{} persisted for '{}' with status {}.",
|
||||
attemptNumber, candidate.uniqueIdentifier(), skipStatus);
|
||||
|
||||
} catch (DocumentPersistenceException e) {
|
||||
LOG.error("Failed to persist skip attempt for '{}': {}",
|
||||
candidate.uniqueIdentifier(), e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// New document path
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Processes a newly discovered document (no existing master record) and persists
|
||||
* both the attempt and the new master record.
|
||||
*
|
||||
* @param candidate the candidate being processed
|
||||
* @param fingerprint the document fingerprint
|
||||
* @param m3Outcome the M3 pipeline result
|
||||
* @param context the current batch run context
|
||||
* @param attemptStart the start instant of this processing attempt
|
||||
*/
|
||||
private void processAndPersistNewDocument(
|
||||
SourceDocumentCandidate candidate,
|
||||
DocumentFingerprint fingerprint,
|
||||
DocumentProcessingOutcome m3Outcome,
|
||||
BatchRunContext context,
|
||||
Instant attemptStart) {
|
||||
|
||||
Instant now = Instant.now();
|
||||
|
||||
// Map M3 outcome to M4 status/counters for a brand-new document
|
||||
M4Outcome m4 = mapM3OutcomeForNewDocument(m3Outcome);
|
||||
|
||||
try {
|
||||
// Attempt number is always 1 for a new document
|
||||
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
|
||||
|
||||
ProcessingAttempt attempt = buildAttempt(
|
||||
fingerprint, context, attemptNumber, attemptStart, now, m4);
|
||||
|
||||
// Create the new master record
|
||||
DocumentRecord newRecord = new DocumentRecord(
|
||||
fingerprint,
|
||||
new SourceDocumentLocator(candidate.locator().value()),
|
||||
candidate.uniqueIdentifier(),
|
||||
m4.overallStatus(),
|
||||
m4.counters(),
|
||||
m4.overallStatus() == ProcessingStatus.SUCCESS ? null : now, // lastFailureInstant
|
||||
m4.overallStatus() == ProcessingStatus.SUCCESS ? now : null, // lastSuccessInstant
|
||||
now, // createdAt
|
||||
now // updatedAt
|
||||
);
|
||||
|
||||
// Persist attempt first, then master record
|
||||
processingAttemptRepository.save(attempt);
|
||||
documentRecordRepository.create(newRecord);
|
||||
|
||||
LOG.info("New document '{}' processed: status={}, contentErrors={}, transientErrors={}.",
|
||||
candidate.uniqueIdentifier(),
|
||||
m4.overallStatus(),
|
||||
m4.counters().contentErrorCount(),
|
||||
m4.counters().transientErrorCount());
|
||||
|
||||
} catch (DocumentPersistenceException e) {
|
||||
LOG.error("Failed to persist processing result for new document '{}': {}",
|
||||
candidate.uniqueIdentifier(), e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Known processable document path
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Processes a known but non-terminal document and updates both the attempt history
|
||||
* and the master record.
|
||||
*
|
||||
* @param candidate the candidate being processed
|
||||
* @param fingerprint the document fingerprint
|
||||
* @param m3Outcome the M3 pipeline result
|
||||
* @param existingRecord the current master record (not terminal)
|
||||
* @param context the current batch run context
|
||||
* @param attemptStart the start instant of this processing attempt
|
||||
*/
|
||||
private void processAndPersistKnownDocument(
|
||||
SourceDocumentCandidate candidate,
|
||||
DocumentFingerprint fingerprint,
|
||||
DocumentProcessingOutcome m3Outcome,
|
||||
DocumentRecord existingRecord,
|
||||
BatchRunContext context,
|
||||
Instant attemptStart) {
|
||||
|
||||
Instant now = Instant.now();
|
||||
|
||||
// Map M3 outcome to M4 status/counters, taking existing counters into account
|
||||
M4Outcome m4 = mapM3OutcomeForKnownDocument(m3Outcome, existingRecord.failureCounters());
|
||||
|
||||
try {
|
||||
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
|
||||
|
||||
ProcessingAttempt attempt = buildAttempt(
|
||||
fingerprint, context, attemptNumber, attemptStart, now, m4);
|
||||
|
||||
// Update the master record with new status, counters and timestamps
|
||||
DocumentRecord updatedRecord = new DocumentRecord(
|
||||
existingRecord.fingerprint(),
|
||||
new SourceDocumentLocator(candidate.locator().value()),
|
||||
candidate.uniqueIdentifier(),
|
||||
m4.overallStatus(),
|
||||
m4.counters(),
|
||||
m4.overallStatus() == ProcessingStatus.SUCCESS
|
||||
? existingRecord.lastFailureInstant() : now,
|
||||
m4.overallStatus() == ProcessingStatus.SUCCESS
|
||||
? now : existingRecord.lastSuccessInstant(),
|
||||
existingRecord.createdAt(),
|
||||
now // updatedAt
|
||||
);
|
||||
|
||||
// Persist attempt first, then master record
|
||||
processingAttemptRepository.save(attempt);
|
||||
documentRecordRepository.update(updatedRecord);
|
||||
|
||||
LOG.info("Known document '{}' processed: status={}, contentErrors={}, transientErrors={}.",
|
||||
candidate.uniqueIdentifier(),
|
||||
m4.overallStatus(),
|
||||
m4.counters().contentErrorCount(),
|
||||
m4.counters().transientErrorCount());
|
||||
|
||||
} catch (DocumentPersistenceException e) {
|
||||
LOG.error("Failed to persist processing result for known document '{}': {}",
|
||||
candidate.uniqueIdentifier(), e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// M3 → M4 outcome mapping
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Maps an M3 outcome to M4 status, counters, and retryable flag for a brand-new
|
||||
* document (no prior history, counters start at zero).
|
||||
*
|
||||
* @param m3Outcome the M3 pipeline result
|
||||
* @return the M4 outcome with status, counters and retryable flag
|
||||
*/
|
||||
private M4Outcome mapM3OutcomeForNewDocument(DocumentProcessingOutcome m3Outcome) {
|
||||
return mapM3OutcomeForKnownDocument(m3Outcome, FailureCounters.zero());
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps an M3 outcome to M4 status, counters, and retryable flag, taking the
|
||||
* existing failure counters into account.
|
||||
* <p>
|
||||
* <strong>M4 minimal rules applied here:</strong>
|
||||
* <ul>
|
||||
* <li>M3 success → {@link ProcessingStatus#SUCCESS}, counters unchanged,
|
||||
* {@code retryable=false}.</li>
|
||||
* <li>M3 deterministic content error (first occurrence, contentErrorCount == 0) →
|
||||
* {@link ProcessingStatus#FAILED_RETRYABLE}, contentErrorCount +1,
|
||||
* {@code retryable=true}.</li>
|
||||
* <li>M3 deterministic content error (second occurrence, contentErrorCount >= 1) →
|
||||
* {@link ProcessingStatus#FAILED_FINAL}, contentErrorCount +1,
|
||||
* {@code retryable=false}.</li>
|
||||
* <li>M3 technical error → {@link ProcessingStatus#FAILED_RETRYABLE},
|
||||
* transientErrorCount +1, {@code retryable=true}.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param m3Outcome the M3 pipeline result
|
||||
* @param existingCounters the current failure counters from the master record
|
||||
* @return the M4 outcome with updated status, counters and retryable flag
|
||||
*/
|
||||
private M4Outcome mapM3OutcomeForKnownDocument(
|
||||
DocumentProcessingOutcome m3Outcome,
|
||||
FailureCounters existingCounters) {
|
||||
|
||||
return switch (m3Outcome) {
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored -> {
|
||||
// M3 success: document passed all pre-checks
|
||||
// In M4 scope (no KI, no target copy), PreCheckPassed is the terminal success
|
||||
yield new M4Outcome(
|
||||
ProcessingStatus.SUCCESS,
|
||||
existingCounters, // counters unchanged on success
|
||||
false // not retryable
|
||||
);
|
||||
}
|
||||
|
||||
case PreCheckFailed contentError -> {
|
||||
// Deterministic content error: apply the 1-retry rule
|
||||
FailureCounters updatedCounters = existingCounters.withIncrementedContentErrorCount();
|
||||
boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0;
|
||||
|
||||
if (isFirstOccurrence) {
|
||||
// First content error → FAILED_RETRYABLE
|
||||
yield new M4Outcome(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
updatedCounters,
|
||||
true
|
||||
);
|
||||
} else {
|
||||
// Second (or later) content error → FAILED_FINAL
|
||||
yield new M4Outcome(
|
||||
ProcessingStatus.FAILED_FINAL,
|
||||
updatedCounters,
|
||||
false
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
case TechnicalDocumentError technicalError -> {
|
||||
// Technical error after fingerprinting: always FAILED_RETRYABLE, increment transient counter
|
||||
yield new M4Outcome(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
existingCounters.withIncrementedTransientErrorCount(),
|
||||
true
|
||||
);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Helper: build ProcessingAttempt
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Constructs a {@link ProcessingAttempt} from the given parameters and M4 outcome.
|
||||
*
|
||||
* @param fingerprint the document fingerprint
|
||||
* @param context the current batch run context
|
||||
* @param attemptNumber the monotonic attempt number
|
||||
* @param startedAt the start instant of this attempt
|
||||
* @param endedAt the end instant of this attempt
|
||||
* @param m4 the M4 outcome (status, counters, retryable)
|
||||
* @return the constructed processing attempt
|
||||
*/
|
||||
private ProcessingAttempt buildAttempt(
|
||||
DocumentFingerprint fingerprint,
|
||||
BatchRunContext context,
|
||||
int attemptNumber,
|
||||
Instant startedAt,
|
||||
Instant endedAt,
|
||||
M4Outcome m4) {
|
||||
|
||||
String failureClass = null;
|
||||
String failureMessage = null;
|
||||
|
||||
if (m4.overallStatus() == ProcessingStatus.FAILED_RETRYABLE
|
||||
|| m4.overallStatus() == ProcessingStatus.FAILED_FINAL) {
|
||||
failureClass = m4.overallStatus().name();
|
||||
failureMessage = buildFailureMessage(m4);
|
||||
}
|
||||
|
||||
return new ProcessingAttempt(
|
||||
fingerprint,
|
||||
context.runId(),
|
||||
attemptNumber,
|
||||
startedAt,
|
||||
endedAt,
|
||||
m4.overallStatus(),
|
||||
failureClass,
|
||||
failureMessage,
|
||||
m4.retryable()
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds a human-readable failure message from the M4 outcome.
|
||||
*
|
||||
* @param m4 the M4 outcome
|
||||
* @return a non-null failure message string
|
||||
*/
|
||||
private String buildFailureMessage(M4Outcome m4) {
|
||||
return switch (m4.overallStatus()) {
|
||||
case FAILED_RETRYABLE -> "Processing failed (retryable). "
|
||||
+ "ContentErrors=" + m4.counters().contentErrorCount()
|
||||
+ ", TransientErrors=" + m4.counters().transientErrorCount();
|
||||
case FAILED_FINAL -> "Processing failed finally (not retryable). "
|
||||
+ "ContentErrors=" + m4.counters().contentErrorCount()
|
||||
+ ", TransientErrors=" + m4.counters().transientErrorCount();
|
||||
default -> m4.overallStatus().name();
|
||||
};
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Internal value type: M4 outcome
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Internal value type carrying the M4 status, updated counters, and retryable flag
|
||||
* after mapping from an M3 outcome.
|
||||
* <p>
|
||||
* Tightly scoped to {@link M4DocumentProcessor}; not exposed outside this class.
|
||||
*
|
||||
* @param overallStatus the M4 overall status to persist
|
||||
* @param counters the updated failure counters to persist
|
||||
* @param retryable whether the failure is retryable in a later run
|
||||
*/
|
||||
private record M4Outcome(
|
||||
ProcessingStatus overallStatus,
|
||||
FailureCounters counters,
|
||||
boolean retryable) {
|
||||
}
|
||||
}
|
||||
@@ -1,28 +1,65 @@
|
||||
/**
|
||||
* Application-level services for business logic evaluation.
|
||||
* Application-level services for business logic evaluation and M4 orchestration.
|
||||
* <p>
|
||||
* This package contains stateless, pure-logic services that evaluate document content
|
||||
* and apply business rules. Services in this package:
|
||||
* This package contains stateless, pure-logic services that evaluate document content,
|
||||
* apply business rules, and orchestrate the M4 per-document processing flow.
|
||||
* Services in this package:
|
||||
* <ul>
|
||||
* <li>Do not manage state or resources</li>
|
||||
* <li>Do not depend on infrastructure (database, filesystem, network)</li>
|
||||
* <li>Do not depend on infrastructure (database, filesystem, network) directly;
|
||||
* they interact with infrastructure exclusively through outbound ports</li>
|
||||
* <li>Can be tested with simple unit tests and in-memory mocks</li>
|
||||
* <li>Are reused by multiple use cases or adapters</li>
|
||||
* </ul>
|
||||
*
|
||||
* Current services:
|
||||
* <h2>Current services</h2>
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.PreCheckEvaluator} — Pre-check evaluation</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService} — Complete document processing pipeline orchestration</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.PreCheckEvaluator}
|
||||
* — Pre-check evaluation (M3)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService}
|
||||
* — Complete M3 document processing pipeline orchestration</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor}
|
||||
* — M4 per-document idempotency, status/counter mapping and consistent
|
||||
* two-level persistence (AP-006)</li>
|
||||
* </ul>
|
||||
*
|
||||
* Document Processing Pipeline:
|
||||
* The {@link de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService} coordinates
|
||||
* the complete processing workflow:
|
||||
* <h2>M4 processing flow ({@code M4DocumentProcessor})</h2>
|
||||
* <p>
|
||||
* The {@link de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor}
|
||||
* implements the verbindliche M4 processing order per candidate:
|
||||
* <ol>
|
||||
* <li>Convert technical PDF extraction results to processing outcomes</li>
|
||||
* <li>Route successful extractions through pre-check validation</li>
|
||||
* <li>Classify extraction and pre-check failures with appropriate error types</li>
|
||||
* <li>Load the document master record by fingerprint.</li>
|
||||
* <li>If overall status is {@code SUCCESS} → persist a skip attempt with
|
||||
* {@code SKIPPED_ALREADY_PROCESSED}; do not change counters.</li>
|
||||
* <li>If overall status is {@code FAILED_FINAL} → persist a skip attempt with
|
||||
* {@code SKIPPED_FINAL_FAILURE}; do not change counters.</li>
|
||||
* <li>Otherwise map the M3 outcome into M4 status, counters and retryable flag
|
||||
* using the M4 minimal rules.</li>
|
||||
* <li>Persist exactly one historised processing attempt.</li>
|
||||
* <li>Persist the updated document master record.</li>
|
||||
* </ol>
|
||||
*
|
||||
* <h2>M4 minimal rules (status and counter semantics)</h2>
|
||||
* <ul>
|
||||
* <li>First deterministic content error → {@code FAILED_RETRYABLE},
|
||||
* content error counter +1, {@code retryable=true}.</li>
|
||||
* <li>Second deterministic content error → {@code FAILED_FINAL},
|
||||
* content error counter +1 (cumulative = 2), {@code retryable=false}.</li>
|
||||
* <li>Technical error after fingerprinting → {@code FAILED_RETRYABLE},
|
||||
* transient error counter +1, {@code retryable=true}.</li>
|
||||
* <li>Skip events do not change any failure counter.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Persistence consistency</h2>
|
||||
* <p>
|
||||
* For every identified document, the processing attempt and the master record are
|
||||
* written in sequence. If either write fails, the failure is caught and logged;
|
||||
* the batch run continues with the next candidate. True transactionality across
|
||||
* two separate repository calls is not available in the M4 scope; this is a known
|
||||
* and documented limitation.
|
||||
*
|
||||
* <h2>Pre-fingerprint failures</h2>
|
||||
* <p>
|
||||
* Failures that occur before a successful fingerprint is available are not handled
|
||||
* by this package. They are handled by the use case and are not historised in SQLite.
|
||||
*/
|
||||
package de.gecheckt.pdf.umbenenner.application.service;
|
||||
|
||||
@@ -3,54 +3,80 @@ package de.gecheckt.pdf.umbenenner.application.usecase;
|
||||
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService;
|
||||
import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Batch processing implementation of {@link BatchRunProcessingUseCase}.
|
||||
* <p>
|
||||
* Orchestrates the complete batch processing workflow:
|
||||
* Orchestrates the complete M4 batch processing workflow per candidate:
|
||||
* <ol>
|
||||
* <li>Acquire exclusive run lock to prevent concurrent instances</li>
|
||||
* <li>Scan source folder for PDF candidates</li>
|
||||
* <li>For each candidate: extract text and page count, run pre-checks</li>
|
||||
* <li>Log per-document decision; end each document controlled without KI or target copy</li>
|
||||
* <li>Release lock and return structured outcome for Bootstrap exit code mapping</li>
|
||||
* <li>Acquire exclusive run lock to prevent concurrent instances.</li>
|
||||
* <li>Scan source folder for PDF candidates.</li>
|
||||
* <li>For each candidate, execute the M4 processing order:
|
||||
* <ol type="a">
|
||||
* <li>Compute fingerprint.</li>
|
||||
* <li>Load document master record.</li>
|
||||
* <li>If already {@code SUCCESS} → persist skip attempt with
|
||||
* {@code SKIPPED_ALREADY_PROCESSED}.</li>
|
||||
* <li>If already {@code FAILED_FINAL} → persist skip attempt with
|
||||
* {@code SKIPPED_FINAL_FAILURE}.</li>
|
||||
* <li>Otherwise execute the M3 pipeline (extraction + pre-checks).</li>
|
||||
* <li>Map M3 result into M4 status, counters and retryable flag.</li>
|
||||
* <li>Persist exactly one historised processing attempt.</li>
|
||||
* <li>Persist the updated document master record.</li>
|
||||
* </ol>
|
||||
* </li>
|
||||
* <li>Release lock and return structured outcome for Bootstrap exit code mapping.</li>
|
||||
* </ol>
|
||||
*
|
||||
* <h2>Idempotency</h2>
|
||||
* <p>
|
||||
* Processing boundary:
|
||||
* <ul>
|
||||
* <li>Documents that pass pre-checks end controlled and are ready for further processing (KI, persistence, copy)</li>
|
||||
* <li>Documents with deterministic content errors (no usable text, page limit exceeded) end controlled</li>
|
||||
* <li>Documents with technical extraction errors end controlled; they do not abort the overall run</li>
|
||||
* <li>If the source folder itself is inaccessible, the run fails with {@link BatchRunOutcome#FAILURE}</li>
|
||||
* </ul>
|
||||
* Documents are identified exclusively by their SHA-256 content fingerprint. A document
|
||||
* whose overall status is {@code SUCCESS} or {@code FAILED_FINAL} is skipped in every
|
||||
* subsequent run; only a skip attempt is historised.
|
||||
*
|
||||
* <h2>Pre-fingerprint failures</h2>
|
||||
* <p>
|
||||
* Non-Goals (not implemented):
|
||||
* If the fingerprint computation fails (e.g. the file is no longer readable), the
|
||||
* candidate is logged as a non-identifiable run event and is <em>not</em> historised
|
||||
* in SQLite. The batch run continues with the next candidate.
|
||||
*
|
||||
* <h2>Persistence consistency</h2>
|
||||
* <p>
|
||||
* For every identified document, the processing attempt and the master record are
|
||||
* written in sequence by {@link M4DocumentProcessor}. Persistence failures for a single
|
||||
* document are caught and logged; the batch run continues with the remaining candidates.
|
||||
*
|
||||
* <h2>Non-Goals (not implemented in M4)</h2>
|
||||
* <ul>
|
||||
* <li>No fingerprinting or SQLite persistence</li>
|
||||
* <li>No KI/AI integration or prompt loading</li>
|
||||
* <li>No filename generation or target file copy</li>
|
||||
* <li>No cross-run retry logic</li>
|
||||
* <li>No KI/AI integration or prompt loading.</li>
|
||||
* <li>No filename generation or target file copy.</li>
|
||||
* <li>No M5+ retry rules for KI or target copy failures.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-004 (extended in M4-AP-006)
|
||||
*/
|
||||
public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCase {
|
||||
|
||||
@@ -60,28 +86,44 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
private final RunLockPort runLockPort;
|
||||
private final SourceDocumentCandidatesPort sourceDocumentCandidatesPort;
|
||||
private final PdfTextExtractionPort pdfTextExtractionPort;
|
||||
private final FingerprintPort fingerprintPort;
|
||||
private final M4DocumentProcessor m4DocumentProcessor;
|
||||
|
||||
/**
|
||||
* Creates the batch use case with the already-loaded startup configuration and all required ports.
|
||||
* Creates the batch use case with the already-loaded startup configuration and all
|
||||
* required ports for the M4 flow.
|
||||
* <p>
|
||||
* The configuration is loaded and validated by Bootstrap before use case creation;
|
||||
* the use case receives the result directly and does not re-read it.
|
||||
* the use case receives the result directly and does not re-read the properties file.
|
||||
*
|
||||
* @param configuration the validated startup configuration
|
||||
* @param runLockPort for exclusive run locking
|
||||
* @param sourceDocumentCandidatesPort for loading PDF candidates from the source folder
|
||||
* @param pdfTextExtractionPort for extracting text and page count from a single PDF
|
||||
* @param configuration the validated startup configuration; must not be null
|
||||
* @param runLockPort for exclusive run locking; must not be null
|
||||
* @param sourceDocumentCandidatesPort for loading PDF candidates from the source folder;
|
||||
* must not be null
|
||||
* @param pdfTextExtractionPort for extracting text and page count from a single PDF;
|
||||
* must not be null
|
||||
* @param fingerprintPort for computing the SHA-256 fingerprint of a candidate;
|
||||
* must not be null
|
||||
* @param m4DocumentProcessor for applying M4 decision logic and persisting results;
|
||||
* must not be null
|
||||
* @throws NullPointerException if any parameter is null
|
||||
*/
|
||||
public DefaultBatchRunProcessingUseCase(
|
||||
StartConfiguration configuration,
|
||||
RunLockPort runLockPort,
|
||||
SourceDocumentCandidatesPort sourceDocumentCandidatesPort,
|
||||
PdfTextExtractionPort pdfTextExtractionPort) {
|
||||
this.configuration = configuration;
|
||||
this.runLockPort = runLockPort;
|
||||
this.sourceDocumentCandidatesPort = sourceDocumentCandidatesPort;
|
||||
this.pdfTextExtractionPort = pdfTextExtractionPort;
|
||||
PdfTextExtractionPort pdfTextExtractionPort,
|
||||
FingerprintPort fingerprintPort,
|
||||
M4DocumentProcessor m4DocumentProcessor) {
|
||||
this.configuration = Objects.requireNonNull(configuration, "configuration must not be null");
|
||||
this.runLockPort = Objects.requireNonNull(runLockPort, "runLockPort must not be null");
|
||||
this.sourceDocumentCandidatesPort = Objects.requireNonNull(
|
||||
sourceDocumentCandidatesPort, "sourceDocumentCandidatesPort must not be null");
|
||||
this.pdfTextExtractionPort = Objects.requireNonNull(
|
||||
pdfTextExtractionPort, "pdfTextExtractionPort must not be null");
|
||||
this.fingerprintPort = Objects.requireNonNull(fingerprintPort, "fingerprintPort must not be null");
|
||||
this.m4DocumentProcessor = Objects.requireNonNull(
|
||||
m4DocumentProcessor, "m4DocumentProcessor must not be null");
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -96,12 +138,15 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
lockAcquired = true;
|
||||
LOG.debug("Run lock acquired successfully.");
|
||||
} catch (RunLockUnavailableException e) {
|
||||
LOG.warn("Run lock not available – another instance is already running. This instance terminates immediately.");
|
||||
LOG.warn("Run lock not available – another instance is already running. "
|
||||
+ "This instance terminates immediately.");
|
||||
return BatchRunOutcome.LOCK_UNAVAILABLE;
|
||||
}
|
||||
|
||||
LOG.debug("Configuration in use: source={}, target={}", configuration.sourceFolder(), configuration.targetFolder());
|
||||
LOG.info("Batch run started. RunId: {}, Start: {}", context.runId(), context.startInstant());
|
||||
LOG.debug("Configuration in use: source={}, target={}",
|
||||
configuration.sourceFolder(), configuration.targetFolder());
|
||||
LOG.info("Batch run started. RunId: {}, Start: {}",
|
||||
context.runId(), context.startInstant());
|
||||
|
||||
// Step 2: Load PDF candidates from source folder
|
||||
List<SourceDocumentCandidate> candidates;
|
||||
@@ -113,12 +158,13 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
}
|
||||
LOG.info("Found {} PDF candidate(s) in source folder.", candidates.size());
|
||||
|
||||
// Step 3: Process each candidate through the pipeline
|
||||
// Step 3: Process each candidate through the M4 pipeline
|
||||
for (SourceDocumentCandidate candidate : candidates) {
|
||||
processCandidate(candidate);
|
||||
processCandidate(candidate, context);
|
||||
}
|
||||
|
||||
LOG.info("Batch run completed. Processed {} candidate(s). RunId: {}", candidates.size(), context.runId());
|
||||
LOG.info("Batch run completed. Processed {} candidate(s). RunId: {}",
|
||||
candidates.size(), context.runId());
|
||||
return BatchRunOutcome.SUCCESS;
|
||||
|
||||
} catch (Exception e) {
|
||||
@@ -126,8 +172,8 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
return BatchRunOutcome.FAILURE;
|
||||
} finally {
|
||||
// Release the run lock only if it was successfully acquired.
|
||||
// If acquire() threw RunLockUnavailableException, the lock belongs to another instance
|
||||
// and must not be deleted by this instance.
|
||||
// If acquire() threw RunLockUnavailableException, the lock belongs to another
|
||||
// instance and must not be deleted by this instance.
|
||||
if (lockAcquired) {
|
||||
try {
|
||||
runLockPort.release();
|
||||
@@ -140,56 +186,105 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a single PDF candidate through the complete pipeline.
|
||||
* Processes a single PDF candidate through the complete M4 pipeline.
|
||||
* <p>
|
||||
* Processing steps per document:
|
||||
* M4 processing order:
|
||||
* <ol>
|
||||
* <li>Log candidate recognition</li>
|
||||
* <li>Extract text and page count from the PDF via {@link PdfTextExtractionPort}</li>
|
||||
* <li>Process extraction result through pre-checks via {@link DocumentProcessingService}</li>
|
||||
* <li>Log extraction outcome and final decision</li>
|
||||
* <li>Record the attempt start instant.</li>
|
||||
* <li>Compute the SHA-256 fingerprint of the candidate file content.</li>
|
||||
* <li>If fingerprint computation fails: log as non-identifiable run event and
|
||||
* return — no SQLite record is created.</li>
|
||||
* <li>Execute the M3 pipeline (PDF extraction + pre-checks).</li>
|
||||
* <li>Delegate to {@link M4DocumentProcessor} for idempotency check, status/counter
|
||||
* mapping, and consistent two-level persistence.</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* Per-document errors (extraction failure, technical error, pre-check failure) do not abort the overall
|
||||
* batch run. Each candidate ends controlled regardless of its outcome.
|
||||
* <p>
|
||||
* Processing boundary: no KI call, no persistence, no filename generation,
|
||||
* no target file copy is initiated here, even for candidates that pass all pre-checks.
|
||||
* Per-document errors do not abort the overall batch run. Each candidate ends
|
||||
* controlled regardless of its outcome.
|
||||
*
|
||||
* @param candidate the candidate to process
|
||||
* @param context the current batch run context
|
||||
*/
|
||||
private void processCandidate(SourceDocumentCandidate candidate) {
|
||||
private void processCandidate(SourceDocumentCandidate candidate, BatchRunContext context) {
|
||||
LOG.debug("Processing candidate: {}", candidate.uniqueIdentifier());
|
||||
|
||||
PdfExtractionResult extractionResult = pdfTextExtractionPort.extractTextAndPageCount(candidate);
|
||||
// Record the attempt start instant before any work begins
|
||||
Instant attemptStart = Instant.now();
|
||||
|
||||
// Step M4-1: Compute fingerprint
|
||||
FingerprintResult fingerprintResult = fingerprintPort.computeFingerprint(candidate);
|
||||
|
||||
switch (fingerprintResult) {
|
||||
case FingerprintTechnicalError fingerprintError -> {
|
||||
// Pre-fingerprint failure: not historised in SQLite
|
||||
LOG.warn("Fingerprint computation failed for '{}': {} — candidate skipped (not historised).",
|
||||
candidate.uniqueIdentifier(), fingerprintError.errorMessage());
|
||||
return;
|
||||
}
|
||||
|
||||
case FingerprintSuccess fingerprintSuccess -> {
|
||||
DocumentFingerprint fingerprint = fingerprintSuccess.fingerprint();
|
||||
LOG.debug("Fingerprint computed for '{}': {}",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex());
|
||||
|
||||
// Step M4-2..M4-8: Execute M3 pipeline and delegate M4 logic to the processor
|
||||
// The M3 pipeline runs only if the document is not in a terminal state;
|
||||
// M4DocumentProcessor handles the terminal check internally.
|
||||
// We run M3 eagerly here and pass the result; M4DocumentProcessor will
|
||||
// ignore it for terminal documents.
|
||||
DocumentProcessingOutcome m3Outcome = runM3Pipeline(candidate);
|
||||
|
||||
// Delegate idempotency check, status mapping, and persistence to M4DocumentProcessor
|
||||
m4DocumentProcessor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs the M3 pipeline (PDF text extraction + pre-checks) for the given candidate.
|
||||
* <p>
|
||||
* This method is called after a successful fingerprint computation. The result is
|
||||
* passed to {@link M4DocumentProcessor}, which applies it only when the document is
|
||||
* not in a terminal state.
|
||||
*
|
||||
* @param candidate the candidate to run through the M3 pipeline
|
||||
* @return the M3 pipeline outcome (pre-check passed, pre-check failed, or technical error)
|
||||
*/
|
||||
private DocumentProcessingOutcome runM3Pipeline(SourceDocumentCandidate candidate) {
|
||||
PdfExtractionResult extractionResult =
|
||||
pdfTextExtractionPort.extractTextAndPageCount(candidate);
|
||||
|
||||
// Log extraction outcome
|
||||
switch (extractionResult) {
|
||||
case PdfExtractionSuccess success ->
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess success ->
|
||||
LOG.debug("PDF extraction successful for '{}'. Pages: {}, Text length: {} chars.",
|
||||
candidate.uniqueIdentifier(), success.pageCount().value(), success.extractedText().length());
|
||||
case PdfExtractionContentError contentError ->
|
||||
candidate.uniqueIdentifier(),
|
||||
success.pageCount().value(),
|
||||
success.extractedText().length());
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError contentError ->
|
||||
LOG.debug("PDF content extraction failed for '{}' (content problem): {}",
|
||||
candidate.uniqueIdentifier(), contentError.reason());
|
||||
case PdfExtractionTechnicalError technicalError ->
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError technicalError ->
|
||||
LOG.debug("PDF extraction technical error for '{}': {}",
|
||||
candidate.uniqueIdentifier(), technicalError.errorMessage());
|
||||
}
|
||||
|
||||
// Process through complete pipeline
|
||||
var outcome = DocumentProcessingService.processDocument(candidate, extractionResult, configuration);
|
||||
DocumentProcessingOutcome outcome =
|
||||
DocumentProcessingService.processDocument(candidate, extractionResult, configuration);
|
||||
|
||||
// Log processing outcome
|
||||
// Log M3 outcome
|
||||
switch (outcome) {
|
||||
case PreCheckPassed passed ->
|
||||
LOG.info("Pre-checks PASSED for '{}'. Candidate ready for further processing.",
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed passed ->
|
||||
LOG.info("Pre-checks PASSED for '{}'. Candidate ready for M4 persistence.",
|
||||
candidate.uniqueIdentifier());
|
||||
case PreCheckFailed failed ->
|
||||
LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error – may retry in later run).",
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed ->
|
||||
LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error).",
|
||||
candidate.uniqueIdentifier(), failed.failureReasonDescription());
|
||||
case TechnicalDocumentError technicalError ->
|
||||
LOG.warn("Processing FAILED for '{}': {} (Technical error – may retry in later run).",
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError ->
|
||||
LOG.warn("Processing FAILED for '{}': {} (Technical error – retryable).",
|
||||
candidate.uniqueIdentifier(), technicalError.errorMessage());
|
||||
}
|
||||
|
||||
return outcome;
|
||||
}
|
||||
}
|
||||
@@ -4,9 +4,25 @@
|
||||
* Implementations:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase}
|
||||
* — Production implementation with run lock and controlled batch cycle</li>
|
||||
* — Production implementation with run lock, M4 fingerprint-based idempotency,
|
||||
* and consistent two-level persistence (extended in M4-AP-006)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <h2>M4 processing order (AP-006)</h2>
|
||||
* <p>
|
||||
* For each candidate, {@link de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase}
|
||||
* enforces this order:
|
||||
* <ol>
|
||||
* <li>Compute SHA-256 fingerprint of the candidate file content.</li>
|
||||
* <li>If fingerprint computation fails: log as non-identifiable run event;
|
||||
* do NOT write any SQLite record; continue with next candidate.</li>
|
||||
* <li>Run the M3 pipeline (PDF extraction + pre-checks).</li>
|
||||
* <li>Delegate to {@link de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor}
|
||||
* for idempotency check, status/counter mapping, and consistent persistence.</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* All implementations are infrastructure-agnostic and interact only through ports.
|
||||
*
|
||||
* @since M2 (extended in M4-AP-006)
|
||||
*/
|
||||
package de.gecheckt.pdf.umbenenner.application.usecase;
|
||||
@@ -0,0 +1,425 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.service;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnicalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
/**
|
||||
* Unit tests for {@link M4DocumentProcessor}.
|
||||
* <p>
|
||||
* Covers:
|
||||
* <ul>
|
||||
* <li>M4 minimal rules: status, counter and retryable flag mapping</li>
|
||||
* <li>Skip logic for SUCCESS and FAILED_FINAL documents</li>
|
||||
* <li>New document path (DocumentUnknown)</li>
|
||||
* <li>Known processable document path (DocumentKnownProcessable)</li>
|
||||
* <li>Persistence lookup failure: no attempt written</li>
|
||||
* <li>Persistence write failure: controlled failure, no crash</li>
|
||||
* <li>Skip events do not change error counters</li>
|
||||
* </ul>
|
||||
*/
|
||||
class M4DocumentProcessorTest {
|
||||
|
||||
private static final String FINGERPRINT_HEX =
|
||||
"a".repeat(64); // 64 lowercase hex chars
|
||||
|
||||
private CapturingDocumentRecordRepository recordRepo;
|
||||
private CapturingProcessingAttemptRepository attemptRepo;
|
||||
private M4DocumentProcessor processor;
|
||||
|
||||
private SourceDocumentCandidate candidate;
|
||||
private DocumentFingerprint fingerprint;
|
||||
private BatchRunContext context;
|
||||
private Instant attemptStart;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
recordRepo = new CapturingDocumentRecordRepository();
|
||||
attemptRepo = new CapturingProcessingAttemptRepository();
|
||||
processor = new M4DocumentProcessor(recordRepo, attemptRepo);
|
||||
|
||||
candidate = new SourceDocumentCandidate(
|
||||
"test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf"));
|
||||
fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
|
||||
context = new BatchRunContext(new RunId("run-001"), Instant.now());
|
||||
attemptStart = Instant.now();
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// New document (DocumentUnknown) path
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void process_newDocument_preCheckPassed_persistsSuccessStatus() {
|
||||
recordRepo.setLookupResult(new DocumentUnknown());
|
||||
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
|
||||
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
|
||||
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
|
||||
// One attempt written
|
||||
assertEquals(1, attemptRepo.savedAttempts.size());
|
||||
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
|
||||
assertEquals(ProcessingStatus.SUCCESS, attempt.status());
|
||||
assertFalse(attempt.retryable());
|
||||
assertNull(attempt.failureClass());
|
||||
assertNull(attempt.failureMessage());
|
||||
|
||||
// One master record created
|
||||
assertEquals(1, recordRepo.createdRecords.size());
|
||||
DocumentRecord record = recordRepo.createdRecords.get(0);
|
||||
assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
|
||||
assertEquals(0, record.failureCounters().contentErrorCount());
|
||||
assertEquals(0, record.failureCounters().transientErrorCount());
|
||||
assertNotNull(record.lastSuccessInstant());
|
||||
assertNull(record.lastFailureInstant());
|
||||
}
|
||||
|
||||
@Test
|
||||
void process_newDocument_firstContentError_persistsFailedRetryable_contentCounterOne() {
|
||||
recordRepo.setLookupResult(new DocumentUnknown());
|
||||
DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
|
||||
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
|
||||
|
||||
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
|
||||
assertEquals(1, attemptRepo.savedAttempts.size());
|
||||
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
|
||||
assertTrue(attempt.retryable());
|
||||
|
||||
assertEquals(1, recordRepo.createdRecords.size());
|
||||
DocumentRecord record = recordRepo.createdRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
|
||||
assertEquals(1, record.failureCounters().contentErrorCount());
|
||||
assertEquals(0, record.failureCounters().transientErrorCount());
|
||||
assertNotNull(record.lastFailureInstant());
|
||||
assertNull(record.lastSuccessInstant());
|
||||
}
|
||||
|
||||
@Test
|
||||
void process_newDocument_technicalError_persistsFailedRetryable_transientCounterOne() {
|
||||
recordRepo.setLookupResult(new DocumentUnknown());
|
||||
DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError(
|
||||
candidate, "I/O error", null);
|
||||
|
||||
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
|
||||
assertEquals(1, attemptRepo.savedAttempts.size());
|
||||
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
|
||||
assertTrue(attempt.retryable());
|
||||
|
||||
assertEquals(1, recordRepo.createdRecords.size());
|
||||
DocumentRecord record = recordRepo.createdRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
|
||||
assertEquals(0, record.failureCounters().contentErrorCount());
|
||||
assertEquals(1, record.failureCounters().transientErrorCount());
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Known processable document path (DocumentKnownProcessable)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void process_knownDocument_secondContentError_persistsFailedFinal_contentCounterTwo() {
|
||||
// Existing record: first content error already recorded
|
||||
DocumentRecord existingRecord = buildRecord(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
new FailureCounters(1, 0));
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
|
||||
DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
|
||||
candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
|
||||
|
||||
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
|
||||
assertEquals(1, attemptRepo.savedAttempts.size());
|
||||
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_FINAL, attempt.status());
|
||||
assertFalse(attempt.retryable());
|
||||
|
||||
assertEquals(1, recordRepo.updatedRecords.size());
|
||||
DocumentRecord record = recordRepo.updatedRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
|
||||
assertEquals(2, record.failureCounters().contentErrorCount());
|
||||
assertEquals(0, record.failureCounters().transientErrorCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
void process_knownDocument_technicalError_incrementsTransientCounter_remainsRetryable() {
|
||||
DocumentRecord existingRecord = buildRecord(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
new FailureCounters(0, 2));
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
|
||||
DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError(
|
||||
candidate, "Timeout", null);
|
||||
|
||||
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
|
||||
assertEquals(1, recordRepo.updatedRecords.size());
|
||||
DocumentRecord record = recordRepo.updatedRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
|
||||
assertEquals(0, record.failureCounters().contentErrorCount());
|
||||
assertEquals(3, record.failureCounters().transientErrorCount());
|
||||
assertTrue(attemptRepo.savedAttempts.get(0).retryable());
|
||||
}
|
||||
|
||||
@Test
|
||||
void process_knownDocument_preCheckPassed_persistsSuccess() {
|
||||
DocumentRecord existingRecord = buildRecord(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
new FailureCounters(0, 1));
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
|
||||
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
|
||||
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
|
||||
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
|
||||
assertEquals(1, recordRepo.updatedRecords.size());
|
||||
DocumentRecord record = recordRepo.updatedRecords.get(0);
|
||||
assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
|
||||
// Counters unchanged on success
|
||||
assertEquals(0, record.failureCounters().contentErrorCount());
|
||||
assertEquals(1, record.failureCounters().transientErrorCount());
|
||||
assertNotNull(record.lastSuccessInstant());
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Skip paths
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void process_terminalSuccess_persistsSkipAttemptWithSkippedAlreadyProcessed() {
|
||||
DocumentRecord existingRecord = buildRecord(
|
||||
ProcessingStatus.SUCCESS,
|
||||
FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
|
||||
|
||||
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
|
||||
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
|
||||
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
|
||||
assertEquals(1, attemptRepo.savedAttempts.size());
|
||||
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
|
||||
assertEquals(ProcessingStatus.SKIPPED_ALREADY_PROCESSED, attempt.status());
|
||||
assertFalse(attempt.retryable());
|
||||
assertNull(attempt.failureClass());
|
||||
assertNull(attempt.failureMessage());
|
||||
|
||||
// Master record updated (only updatedAt changes)
|
||||
assertEquals(1, recordRepo.updatedRecords.size());
|
||||
DocumentRecord record = recordRepo.updatedRecords.get(0);
|
||||
// Status and counters remain unchanged
|
||||
assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
|
||||
assertEquals(0, record.failureCounters().contentErrorCount());
|
||||
assertEquals(0, record.failureCounters().transientErrorCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
void process_terminalFinalFailure_persistsSkipAttemptWithSkippedFinalFailure() {
|
||||
DocumentRecord existingRecord = buildRecord(
|
||||
ProcessingStatus.FAILED_FINAL,
|
||||
new FailureCounters(2, 0));
|
||||
recordRepo.setLookupResult(new DocumentTerminalFinalFailure(existingRecord));
|
||||
|
||||
DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
|
||||
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
|
||||
|
||||
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
|
||||
assertEquals(1, attemptRepo.savedAttempts.size());
|
||||
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
|
||||
assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, attempt.status());
|
||||
assertFalse(attempt.retryable());
|
||||
|
||||
// Master record updated (only updatedAt changes); counters unchanged
|
||||
assertEquals(1, recordRepo.updatedRecords.size());
|
||||
DocumentRecord record = recordRepo.updatedRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
|
||||
assertEquals(2, record.failureCounters().contentErrorCount());
|
||||
assertEquals(0, record.failureCounters().transientErrorCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
void process_skipEvent_doesNotChangeErrorCounters() {
|
||||
FailureCounters originalCounters = new FailureCounters(1, 3);
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, originalCounters);
|
||||
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
|
||||
|
||||
processor.process(candidate, fingerprint,
|
||||
new PreCheckPassed(candidate, new PdfExtractionSuccess("t", new PdfPageCount(1))),
|
||||
context, attemptStart);
|
||||
|
||||
DocumentRecord updated = recordRepo.updatedRecords.get(0);
|
||||
assertEquals(originalCounters.contentErrorCount(), updated.failureCounters().contentErrorCount(),
|
||||
"Skip must not change content error counter");
|
||||
assertEquals(originalCounters.transientErrorCount(), updated.failureCounters().transientErrorCount(),
|
||||
"Skip must not change transient error counter");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Persistence lookup failure
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void process_persistenceLookupFailure_noAttemptWritten_noException() {
|
||||
recordRepo.setLookupResult(new PersistenceLookupTechnicalFailure("DB unavailable", null));
|
||||
|
||||
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
|
||||
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
|
||||
// Must not throw
|
||||
assertDoesNotThrow(() ->
|
||||
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart));
|
||||
|
||||
// No attempt written, no record created/updated
|
||||
assertEquals(0, attemptRepo.savedAttempts.size(),
|
||||
"No attempt must be written when lookup fails");
|
||||
assertEquals(0, recordRepo.createdRecords.size());
|
||||
assertEquals(0, recordRepo.updatedRecords.size());
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Persistence write failure: controlled, no crash
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void process_persistenceWriteFailure_doesNotThrow_batchContinues() {
|
||||
recordRepo.setLookupResult(new DocumentUnknown());
|
||||
// Make the attempt save throw
|
||||
attemptRepo.failOnSave = true;
|
||||
|
||||
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
|
||||
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
|
||||
// Must not propagate the exception
|
||||
assertDoesNotThrow(() ->
|
||||
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart));
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Attempt number monotonicity
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void process_attemptNumberIsAssignedFromRepository() {
|
||||
recordRepo.setLookupResult(new DocumentUnknown());
|
||||
attemptRepo.nextAttemptNumber = 3; // Simulate 2 prior attempts
|
||||
|
||||
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
|
||||
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
|
||||
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
|
||||
assertEquals(1, attemptRepo.savedAttempts.size());
|
||||
assertEquals(3, attemptRepo.savedAttempts.get(0).attemptNumber(),
|
||||
"Attempt number must be taken from the repository");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private DocumentRecord buildRecord(ProcessingStatus status, FailureCounters counters) {
|
||||
Instant now = Instant.now();
|
||||
return new DocumentRecord(
|
||||
fingerprint,
|
||||
new SourceDocumentLocator("/tmp/test.pdf"),
|
||||
"test.pdf",
|
||||
status,
|
||||
counters,
|
||||
status == ProcessingStatus.SUCCESS ? null : now,
|
||||
status == ProcessingStatus.SUCCESS ? now : null,
|
||||
now,
|
||||
now
|
||||
);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Capturing test doubles
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private static class CapturingDocumentRecordRepository implements DocumentRecordRepository {
|
||||
private DocumentRecordLookupResult lookupResult = new DocumentUnknown();
|
||||
final List<DocumentRecord> createdRecords = new ArrayList<>();
|
||||
final List<DocumentRecord> updatedRecords = new ArrayList<>();
|
||||
|
||||
void setLookupResult(DocumentRecordLookupResult result) {
|
||||
this.lookupResult = result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) {
|
||||
return lookupResult;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void create(DocumentRecord record) {
|
||||
createdRecords.add(record);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(DocumentRecord record) {
|
||||
updatedRecords.add(record);
|
||||
}
|
||||
}
|
||||
|
||||
private static class CapturingProcessingAttemptRepository implements ProcessingAttemptRepository {
|
||||
final List<ProcessingAttempt> savedAttempts = new ArrayList<>();
|
||||
int nextAttemptNumber = 1;
|
||||
boolean failOnSave = false;
|
||||
|
||||
@Override
|
||||
public int loadNextAttemptNumber(DocumentFingerprint fingerprint) {
|
||||
return nextAttemptNumber;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(ProcessingAttempt attempt) {
|
||||
if (failOnSave) {
|
||||
throw new DocumentPersistenceException("Simulated save failure");
|
||||
}
|
||||
savedAttempts.add(attempt);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint) {
|
||||
return List.copyOf(savedAttempts);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,12 +2,24 @@ package de.gecheckt.pdf.umbenenner.application.usecase;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||
@@ -37,10 +49,11 @@ import static org.junit.jupiter.api.Assertions.*;
|
||||
* <ul>
|
||||
* <li>Lock acquisition and release lifecycle</li>
|
||||
* <li>Source folder scanning and per-document processing loop</li>
|
||||
* <li>Happy path: candidate passes pre-checks, ends controlled without KI or target copy</li>
|
||||
* <li>Happy path: candidate passes pre-checks, M4 persistence is invoked</li>
|
||||
* <li>Deterministic content errors: no usable text, page limit exceeded</li>
|
||||
* <li>Technical extraction errors: controlled per-document end, batch continues</li>
|
||||
* <li>Source folder access failure: batch fails with FAILURE outcome</li>
|
||||
* <li>M4 idempotency: fingerprint failure → not historised</li>
|
||||
* </ul>
|
||||
*/
|
||||
class BatchRunProcessingUseCaseTest {
|
||||
@@ -57,8 +70,9 @@ class BatchRunProcessingUseCaseTest {
|
||||
MockRunLockPort lockPort = new MockRunLockPort();
|
||||
StartConfiguration config = buildConfig(tempDir);
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
|
||||
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
|
||||
BatchRunContext context = new BatchRunContext(new RunId("test-run-1"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
@@ -73,8 +87,9 @@ class BatchRunProcessingUseCaseTest {
|
||||
CountingRunLockPort lockPort = new CountingRunLockPort(true);
|
||||
StartConfiguration config = buildConfig(tempDir);
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
|
||||
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
|
||||
BatchRunContext context = new BatchRunContext(new RunId("test-run-2"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
@@ -92,8 +107,9 @@ class BatchRunProcessingUseCaseTest {
|
||||
CountingRunLockPort lockPort = new CountingRunLockPort(true);
|
||||
StartConfiguration config = buildConfig(tempDir);
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
|
||||
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
|
||||
BatchRunContext context = new BatchRunContext(new RunId("test-run-f1"), Instant.now());
|
||||
|
||||
useCase.execute(context);
|
||||
@@ -108,8 +124,9 @@ class BatchRunProcessingUseCaseTest {
|
||||
ErrorAfterAcquireLockPort lockPort = new ErrorAfterAcquireLockPort();
|
||||
StartConfiguration config = buildConfig(tempDir);
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
|
||||
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
|
||||
BatchRunContext context = new BatchRunContext(new RunId("test-run-3"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
@@ -128,8 +145,9 @@ class BatchRunProcessingUseCaseTest {
|
||||
MockRunLockPort lockPort = new MockRunLockPort();
|
||||
StartConfiguration config = buildConfig(tempDir);
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
|
||||
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
|
||||
BatchRunContext context = new BatchRunContext(new RunId("empty"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
@@ -138,25 +156,26 @@ class BatchRunProcessingUseCaseTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
void execute_happyPath_candidatePassesPreChecks_endsControlledWithoutKiOrCopy() throws Exception {
|
||||
void execute_happyPath_candidatePassesPreChecks_m4PersistenceInvoked() throws Exception {
|
||||
MockRunLockPort lockPort = new MockRunLockPort();
|
||||
StartConfiguration config = buildConfig(tempDir);
|
||||
|
||||
// Candidate with usable text within page limit
|
||||
SourceDocumentCandidate candidate = makeCandidate("document.pdf");
|
||||
PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice text", new PdfPageCount(1));
|
||||
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
|
||||
FixedExtractionPort extractionPort = new FixedExtractionPort(success);
|
||||
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort);
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort,
|
||||
new AlwaysSuccessFingerprintPort(), m4Processor);
|
||||
BatchRunContext context = new BatchRunContext(new RunId("happy"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
|
||||
// Batch run succeeds; document ended controlled at boundary (no KI, no copy)
|
||||
assertTrue(outcome.isSuccess(), "Happy path should yield SUCCESS");
|
||||
assertEquals(1, extractionPort.callCount(), "Extraction should be called exactly once");
|
||||
assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called exactly once");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -165,40 +184,42 @@ class BatchRunProcessingUseCaseTest {
|
||||
StartConfiguration config = buildConfig(tempDir);
|
||||
|
||||
SourceDocumentCandidate candidate = makeCandidate("image-only.pdf");
|
||||
// Extraction returns text with no letters or digits
|
||||
PdfExtractionSuccess emptySuccess = new PdfExtractionSuccess(" ", new PdfPageCount(1));
|
||||
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
|
||||
FixedExtractionPort extractionPort = new FixedExtractionPort(emptySuccess);
|
||||
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort);
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort,
|
||||
new AlwaysSuccessFingerprintPort(), m4Processor);
|
||||
BatchRunContext context = new BatchRunContext(new RunId("no-text"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
|
||||
// Document ends with pre-check failure; batch itself still succeeds
|
||||
assertTrue(outcome.isSuccess(), "No-usable-text pre-check failure should not abort the batch run");
|
||||
assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for content errors");
|
||||
}
|
||||
|
||||
@Test
|
||||
void execute_pageLimitExceeded_candidateEndsControlled_batchContinues() throws Exception {
|
||||
MockRunLockPort lockPort = new MockRunLockPort();
|
||||
// Config has maxPages=3; document has 10 pages
|
||||
StartConfiguration config = buildConfig(tempDir);
|
||||
|
||||
SourceDocumentCandidate candidate = makeCandidate("big.pdf");
|
||||
PdfExtractionSuccess manyPages = new PdfExtractionSuccess("Some text", new PdfPageCount(10));
|
||||
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
|
||||
FixedExtractionPort extractionPort = new FixedExtractionPort(manyPages);
|
||||
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort);
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort,
|
||||
new AlwaysSuccessFingerprintPort(), m4Processor);
|
||||
BatchRunContext context = new BatchRunContext(new RunId("page-limit"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
|
||||
// maxPages in buildConfig is 3; 10 pages exceeds limit – pre-check fails, batch continues
|
||||
assertTrue(outcome.isSuccess(), "Page limit exceeded should not abort the batch run");
|
||||
assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for page limit errors");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -210,14 +231,17 @@ class BatchRunProcessingUseCaseTest {
|
||||
PdfExtractionContentError contentError = new PdfExtractionContentError("PDF is encrypted");
|
||||
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
|
||||
FixedExtractionPort extractionPort = new FixedExtractionPort(contentError);
|
||||
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort);
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort,
|
||||
new AlwaysSuccessFingerprintPort(), m4Processor);
|
||||
BatchRunContext context = new BatchRunContext(new RunId("content-error"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
|
||||
assertTrue(outcome.isSuccess(), "Extraction content error should not abort the batch run");
|
||||
assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for content errors");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -229,14 +253,17 @@ class BatchRunProcessingUseCaseTest {
|
||||
PdfExtractionTechnicalError technicalError = new PdfExtractionTechnicalError("I/O error reading file", null);
|
||||
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
|
||||
FixedExtractionPort extractionPort = new FixedExtractionPort(technicalError);
|
||||
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort);
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort,
|
||||
new AlwaysSuccessFingerprintPort(), m4Processor);
|
||||
BatchRunContext context = new BatchRunContext(new RunId("tech-error"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
|
||||
assertTrue(outcome.isSuccess(), "Technical extraction error should not abort the batch run");
|
||||
assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for technical errors");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -248,18 +275,71 @@ class BatchRunProcessingUseCaseTest {
|
||||
throw new SourceDocumentAccessException("Source folder not readable");
|
||||
};
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, failingPort, new NoOpExtractionPort());
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, failingPort, new NoOpExtractionPort(),
|
||||
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
|
||||
BatchRunContext context = new BatchRunContext(new RunId("access-fail"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
|
||||
assertTrue(outcome.isFailure(), "Source folder access failure should yield FAILURE outcome");
|
||||
assertFalse(outcome.isSuccess(), "Source folder access failure must not be SUCCESS");
|
||||
// Lock must still be released
|
||||
assertTrue(lockPort.wasReleaseCalled(), "Lock should be released even when source access fails");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// M4-specific: fingerprint failure → not historised
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void execute_fingerprintFailure_candidateNotHistorised_batchContinues() throws Exception {
|
||||
MockRunLockPort lockPort = new MockRunLockPort();
|
||||
StartConfiguration config = buildConfig(tempDir);
|
||||
|
||||
SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf");
|
||||
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
|
||||
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
|
||||
|
||||
// Fingerprint always fails → M4 processor must NOT be called
|
||||
FingerprintPort alwaysFailingFingerprintPort = c ->
|
||||
new FingerprintTechnicalError("Cannot read file", null);
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, candidatesPort, new NoOpExtractionPort(),
|
||||
alwaysFailingFingerprintPort, m4Processor);
|
||||
BatchRunContext context = new BatchRunContext(new RunId("fp-fail"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
|
||||
assertTrue(outcome.isSuccess(), "Fingerprint failure should not abort the batch run");
|
||||
assertEquals(0, m4Processor.processCallCount(),
|
||||
"M4 processor must NOT be called when fingerprint computation fails (pre-fingerprint failure)");
|
||||
}
|
||||
|
||||
@Test
|
||||
void execute_fingerprintFailure_extractionNotCalled() throws Exception {
|
||||
MockRunLockPort lockPort = new MockRunLockPort();
|
||||
StartConfiguration config = buildConfig(tempDir);
|
||||
|
||||
SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf");
|
||||
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
|
||||
FixedExtractionPort extractionPort = new FixedExtractionPort(
|
||||
new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
|
||||
FingerprintPort alwaysFailingFingerprintPort = c ->
|
||||
new FingerprintTechnicalError("Cannot read file", null);
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort,
|
||||
alwaysFailingFingerprintPort, new NoOpM4DocumentProcessor());
|
||||
BatchRunContext context = new BatchRunContext(new RunId("fp-fail-no-extract"), Instant.now());
|
||||
|
||||
useCase.execute(context);
|
||||
|
||||
assertEquals(0, extractionPort.callCount(),
|
||||
"PDF extraction must NOT be called when fingerprint computation fails");
|
||||
}
|
||||
|
||||
/**
|
||||
* Mixed-batch test: one document per outcome type in a single run.
|
||||
* Proves that no individual outcome aborts the overall batch.
|
||||
@@ -267,7 +347,6 @@ class BatchRunProcessingUseCaseTest {
|
||||
@Test
|
||||
void execute_mixedBatch_allOutcomeTypes_batchOverallSucceeds() throws Exception {
|
||||
MockRunLockPort lockPort = new MockRunLockPort();
|
||||
// maxPages=3 in buildConfig; pageLimitCandidate has 10 pages → exceeds limit
|
||||
StartConfiguration config = buildConfig(tempDir);
|
||||
|
||||
SourceDocumentCandidate goodCandidate = makeCandidate("good.pdf");
|
||||
@@ -275,10 +354,11 @@ class BatchRunProcessingUseCaseTest {
|
||||
SourceDocumentCandidate pageLimitCandidate = makeCandidate("toobig.pdf");
|
||||
SourceDocumentCandidate technicalErrorCandidate = makeCandidate("broken.pdf");
|
||||
SourceDocumentCandidate contentErrorCandidate = makeCandidate("encrypted.pdf");
|
||||
SourceDocumentCandidate fpFailCandidate = makeCandidate("unreadable.pdf");
|
||||
|
||||
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(
|
||||
goodCandidate, noTextCandidate, pageLimitCandidate,
|
||||
technicalErrorCandidate, contentErrorCandidate));
|
||||
technicalErrorCandidate, contentErrorCandidate, fpFailCandidate));
|
||||
|
||||
MappedExtractionPort extractionPort = new MappedExtractionPort()
|
||||
.with(goodCandidate, new PdfExtractionSuccess("Invoice text", new PdfPageCount(1)))
|
||||
@@ -287,16 +367,31 @@ class BatchRunProcessingUseCaseTest {
|
||||
.with(technicalErrorCandidate, new PdfExtractionTechnicalError("I/O error", null))
|
||||
.with(contentErrorCandidate, new PdfExtractionContentError("PDF is encrypted"));
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort);
|
||||
// fpFailCandidate gets a fingerprint failure; others get a valid fingerprint
|
||||
FingerprintPort mappedFingerprintPort = candidate -> {
|
||||
if (candidate.uniqueIdentifier().equals("unreadable.pdf")) {
|
||||
return new FingerprintTechnicalError("Cannot read", null);
|
||||
}
|
||||
return new FingerprintSuccess(makeFingerprint(candidate.uniqueIdentifier()));
|
||||
};
|
||||
|
||||
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort,
|
||||
mappedFingerprintPort, m4Processor);
|
||||
BatchRunContext context = new BatchRunContext(new RunId("mixed"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
|
||||
assertTrue(outcome.isSuccess(),
|
||||
"Mixed batch with all outcome types must yield batch SUCCESS");
|
||||
assertTrue(outcome.isSuccess(), "Mixed batch with all outcome types must yield batch SUCCESS");
|
||||
// 5 candidates with successful fingerprint → M4 processor called 5 times
|
||||
// 1 candidate with fingerprint failure → M4 processor NOT called
|
||||
assertEquals(5, m4Processor.processCallCount(),
|
||||
"M4 processor must be called for each candidate with a successful fingerprint");
|
||||
// Extraction called for 5 candidates (not for fpFailCandidate)
|
||||
assertEquals(5, extractionPort.callCount(),
|
||||
"Extraction must be attempted for each of the 5 candidates");
|
||||
"Extraction must be attempted for each of the 5 candidates with a valid fingerprint");
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -312,21 +407,35 @@ class BatchRunProcessingUseCaseTest {
|
||||
PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice content", new PdfPageCount(2));
|
||||
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(candidates);
|
||||
FixedExtractionPort extractionPort = new FixedExtractionPort(success);
|
||||
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort);
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort,
|
||||
new AlwaysSuccessFingerprintPort(), m4Processor);
|
||||
BatchRunContext context = new BatchRunContext(new RunId("multi"), Instant.now());
|
||||
|
||||
BatchRunOutcome outcome = useCase.execute(context);
|
||||
|
||||
assertTrue(outcome.isSuccess(), "All three candidates processed should yield SUCCESS");
|
||||
assertEquals(3, extractionPort.callCount(), "Extraction should be called once per candidate");
|
||||
assertEquals(3, m4Processor.processCallCount(), "M4 processor should be called once per candidate");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
private static DefaultBatchRunProcessingUseCase buildUseCase(
|
||||
StartConfiguration config,
|
||||
RunLockPort lockPort,
|
||||
SourceDocumentCandidatesPort candidatesPort,
|
||||
PdfTextExtractionPort extractionPort,
|
||||
FingerprintPort fingerprintPort,
|
||||
M4DocumentProcessor m4Processor) {
|
||||
return new DefaultBatchRunProcessingUseCase(
|
||||
config, lockPort, candidatesPort, extractionPort, fingerprintPort, m4Processor);
|
||||
}
|
||||
|
||||
private static StartConfiguration buildConfig(Path tempDir) throws Exception {
|
||||
Path sourceDir = Files.createDirectories(tempDir.resolve("source"));
|
||||
Path targetDir = Files.createDirectories(tempDir.resolve("target"));
|
||||
@@ -357,6 +466,13 @@ class BatchRunProcessingUseCaseTest {
|
||||
return new SourceDocumentCandidate(filename, 1024L, new SourceDocumentLocator("/tmp/" + filename));
|
||||
}
|
||||
|
||||
/** Creates a deterministic fake fingerprint from a string (padded to 64 hex chars). */
|
||||
private static DocumentFingerprint makeFingerprint(String seed) {
|
||||
String hex = String.format("%064x", Math.abs(seed.hashCode()));
|
||||
// Ensure exactly 64 lowercase hex chars
|
||||
return new DocumentFingerprint(hex.substring(0, 64));
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Mock / Stub implementations
|
||||
// -------------------------------------------------------------------------
|
||||
@@ -480,4 +596,88 @@ class BatchRunProcessingUseCaseTest {
|
||||
|
||||
int callCount() { return calls; }
|
||||
}
|
||||
|
||||
/**
|
||||
* Fingerprint port that always returns a deterministic success based on the candidate's
|
||||
* unique identifier.
|
||||
*/
|
||||
private static class AlwaysSuccessFingerprintPort implements FingerprintPort {
|
||||
@Override
|
||||
public FingerprintResult computeFingerprint(SourceDocumentCandidate candidate) {
|
||||
String hex = String.format("%064x", Math.abs(candidate.uniqueIdentifier().hashCode()));
|
||||
return new FingerprintSuccess(new DocumentFingerprint(hex.substring(0, 64)));
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* No-op M4DocumentProcessor that does nothing (for tests that only care about
|
||||
* lock/batch lifecycle, not M4 persistence).
|
||||
*/
|
||||
private static class NoOpM4DocumentProcessor extends M4DocumentProcessor {
|
||||
NoOpM4DocumentProcessor() {
|
||||
super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Tracking M4DocumentProcessor that counts how many times {@code process()} is called.
|
||||
*/
|
||||
private static class TrackingM4DocumentProcessor extends M4DocumentProcessor {
|
||||
private int processCallCount = 0;
|
||||
|
||||
TrackingM4DocumentProcessor() {
|
||||
super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void process(
|
||||
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate,
|
||||
de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint fingerprint,
|
||||
de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome m3Outcome,
|
||||
de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext context,
|
||||
java.time.Instant attemptStart) {
|
||||
processCallCount++;
|
||||
// Delegate to super so the real logic runs (with no-op repos)
|
||||
super.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||
}
|
||||
|
||||
int processCallCount() { return processCallCount; }
|
||||
}
|
||||
|
||||
/** No-op DocumentRecordRepository for use in test M4DocumentProcessor instances. */
|
||||
private static class NoOpDocumentRecordRepository implements DocumentRecordRepository {
|
||||
@Override
|
||||
public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) {
|
||||
// Return DocumentUnknown so the M4 processor always takes the "new document" path
|
||||
return new DocumentUnknown();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void create(DocumentRecord record) {
|
||||
// No-op
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(DocumentRecord record) {
|
||||
// No-op
|
||||
}
|
||||
}
|
||||
|
||||
/** No-op ProcessingAttemptRepository for use in test M4DocumentProcessor instances. */
|
||||
private static class NoOpProcessingAttemptRepository implements ProcessingAttemptRepository {
|
||||
@Override
|
||||
public int loadNextAttemptNumber(DocumentFingerprint fingerprint) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void save(ProcessingAttempt attempt) {
|
||||
// No-op
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint) {
|
||||
return List.of();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -5,16 +5,26 @@ import org.apache.logging.log4j.Logger;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.adapter.in.cli.SchedulerBatchCommand;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.configuration.PropertiesConfigurationPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.fingerprint.Sha256FingerprintAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.lock.FilesystemRunLockPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.pdfextraction.PdfTextExtractionPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sourcedocument.SourceDocumentCandidatesPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteDocumentRecordRepositoryAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.application.config.InvalidStartConfigurationException;
|
||||
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
|
||||
import de.gecheckt.pdf.umbenenner.application.config.StartConfigurationValidator;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor;
|
||||
import de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
|
||||
@@ -29,20 +39,35 @@ import java.util.UUID;
|
||||
* <p>
|
||||
* Responsibilities:
|
||||
* <ol>
|
||||
* <li>Load and validate the startup configuration</li>
|
||||
* <li>Resolve the run-lock file path (with default fallback)</li>
|
||||
* <li>Create and wire all ports and adapters</li>
|
||||
* <li>Start the CLI adapter and execute the batch use case</li>
|
||||
* <li>Map the batch outcome to a process exit code</li>
|
||||
* <li>Load and validate the startup configuration.</li>
|
||||
* <li>Resolve the run-lock file path (with default fallback).</li>
|
||||
* <li>Initialise the SQLite schema (M4: before the batch document loop begins).</li>
|
||||
* <li>Create and wire all ports and adapters, including the M4 persistence ports.</li>
|
||||
* <li>Start the CLI adapter and execute the batch use case.</li>
|
||||
* <li>Map the batch outcome to a process exit code.</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* Exit code semantics:
|
||||
*
|
||||
* <h2>Exit code semantics</h2>
|
||||
* <ul>
|
||||
* <li>{@code 0}: Batch run executed successfully; individual document failures do not
|
||||
* change the exit code as long as the run itself completed without a hard infrastructure error.</li>
|
||||
* <li>{@code 1}: Hard start, bootstrap, or configuration failure that prevented the run
|
||||
* from beginning, or a critical infrastructure failure during the run.</li>
|
||||
* change the exit code as long as the run itself completed without a hard
|
||||
* infrastructure error.</li>
|
||||
* <li>{@code 1}: Hard start, bootstrap, configuration, or schema-initialisation failure
|
||||
* that prevented the run from beginning, or a critical infrastructure failure
|
||||
* during the run.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>M4 wiring</h2>
|
||||
* <p>
|
||||
* The production constructor wires the following M4 adapters:
|
||||
* <ul>
|
||||
* <li>{@link Sha256FingerprintAdapter} — SHA-256 content fingerprinting.</li>
|
||||
* <li>{@link SqliteSchemaInitializationAdapter} — schema initialisation at startup.</li>
|
||||
* <li>{@link SqliteDocumentRecordRepositoryAdapter} — document master record CRUD.</li>
|
||||
* <li>{@link SqliteProcessingAttemptRepositoryAdapter} — attempt history CRUD.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M2 (extended in M4-AP-006)
|
||||
*/
|
||||
public class BootstrapRunner {
|
||||
|
||||
@@ -83,7 +108,7 @@ public class BootstrapRunner {
|
||||
* <p>
|
||||
* Receives the already-loaded and validated {@link StartConfiguration} and run lock port.
|
||||
* The factory is responsible for creating and wiring any additional outbound ports
|
||||
* required by the use case (e.g., source document port, PDF extraction port).
|
||||
* required by the use case (e.g., source document port, PDF extraction port, M4 ports).
|
||||
*/
|
||||
@FunctionalInterface
|
||||
public interface UseCaseFactory {
|
||||
@@ -101,23 +126,41 @@ public class BootstrapRunner {
|
||||
/**
|
||||
* Creates the BootstrapRunner with default factories for production use.
|
||||
* <p>
|
||||
* Wires the full processing pipeline:
|
||||
* Wires the full M4 processing pipeline:
|
||||
* <ul>
|
||||
* <li>{@link PropertiesConfigurationPortAdapter} for configuration loading</li>
|
||||
* <li>{@link FilesystemRunLockPortAdapter} for exclusive run locking</li>
|
||||
* <li>{@link SourceDocumentCandidatesPortAdapter} for PDF candidate discovery</li>
|
||||
* <li>{@link PdfTextExtractionPortAdapter} for PDFBox-based text and page count extraction</li>
|
||||
* <li>{@link PropertiesConfigurationPortAdapter} for configuration loading.</li>
|
||||
* <li>{@link FilesystemRunLockPortAdapter} for exclusive run locking.</li>
|
||||
* <li>{@link SourceDocumentCandidatesPortAdapter} for PDF candidate discovery.</li>
|
||||
* <li>{@link PdfTextExtractionPortAdapter} for PDFBox-based text and page count extraction.</li>
|
||||
* <li>{@link Sha256FingerprintAdapter} for SHA-256 content fingerprinting.</li>
|
||||
* <li>{@link SqliteDocumentRecordRepositoryAdapter} for document master record CRUD.</li>
|
||||
* <li>{@link SqliteProcessingAttemptRepositoryAdapter} for attempt history CRUD.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Schema initialisation is performed in {@link #run()} before the use case is created,
|
||||
* using {@link SqliteSchemaInitializationAdapter}.
|
||||
*/
|
||||
public BootstrapRunner() {
|
||||
this.configPortFactory = PropertiesConfigurationPortAdapter::new;
|
||||
this.runLockPortFactory = FilesystemRunLockPortAdapter::new;
|
||||
this.validatorFactory = StartConfigurationValidator::new;
|
||||
this.useCaseFactory = (config, lock) -> new DefaultBatchRunProcessingUseCase(
|
||||
this.useCaseFactory = (config, lock) -> {
|
||||
String jdbcUrl = buildJdbcUrl(config);
|
||||
FingerprintPort fingerprintPort = new Sha256FingerprintAdapter();
|
||||
DocumentRecordRepository documentRecordRepository =
|
||||
new SqliteDocumentRecordRepositoryAdapter(jdbcUrl);
|
||||
ProcessingAttemptRepository processingAttemptRepository =
|
||||
new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl);
|
||||
M4DocumentProcessor m4Processor =
|
||||
new M4DocumentProcessor(documentRecordRepository, processingAttemptRepository);
|
||||
return new DefaultBatchRunProcessingUseCase(
|
||||
config,
|
||||
lock,
|
||||
new SourceDocumentCandidatesPortAdapter(config.sourceFolder()),
|
||||
new PdfTextExtractionPortAdapter());
|
||||
new PdfTextExtractionPortAdapter(),
|
||||
fingerprintPort,
|
||||
m4Processor);
|
||||
};
|
||||
this.commandFactory = SchedulerBatchCommand::new;
|
||||
}
|
||||
|
||||
@@ -145,11 +188,17 @@ public class BootstrapRunner {
|
||||
/**
|
||||
* Runs the application startup sequence.
|
||||
* <p>
|
||||
* AP-003: Manually wires the object graph and invokes the CLI command.
|
||||
* AP-005: Wires ConfigurationPort adapter and passes it to the use case.
|
||||
* AP-006: Validates configuration before allowing processing to start.
|
||||
* M4 additions:
|
||||
* <ul>
|
||||
* <li>Derives the SQLite JDBC URL from the configured {@code sqlite.file} path.</li>
|
||||
* <li>Initialises the M4 SQLite schema via
|
||||
* {@link PersistenceSchemaInitializationPort#initializeSchema()} before the
|
||||
* batch document loop begins. A schema initialisation failure aborts the run
|
||||
* with exit code 1.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @return exit code: 0 for success, 1 for invalid configuration or unexpected failure
|
||||
* @return exit code: 0 for success, 1 for invalid configuration, schema failure,
|
||||
* or unexpected bootstrap failure
|
||||
*/
|
||||
public int run() {
|
||||
LOG.info("Bootstrap flow started.");
|
||||
@@ -160,61 +209,105 @@ public class BootstrapRunner {
|
||||
// Step 2: Load configuration
|
||||
var config = configPort.loadConfiguration();
|
||||
|
||||
// Step 3: Validate configuration (AP-006)
|
||||
// Step 3: Validate configuration
|
||||
StartConfigurationValidator validator = validatorFactory.create();
|
||||
validator.validate(config);
|
||||
|
||||
// Step 4: Resolve lock file path – apply default if not configured (AP-006)
|
||||
// Step 4: Resolve lock file path – apply default if not configured
|
||||
Path lockFilePath = config.runtimeLockFile();
|
||||
if (lockFilePath == null || lockFilePath.toString().isBlank()) {
|
||||
lockFilePath = Paths.get("pdf-umbenenner.lock");
|
||||
LOG.info("runtime.lock.file not configured, using default lock path: {}", lockFilePath.toAbsolutePath());
|
||||
LOG.info("runtime.lock.file not configured, using default lock path: {}",
|
||||
lockFilePath.toAbsolutePath());
|
||||
}
|
||||
RunLockPort runLockPort = runLockPortFactory.create(lockFilePath);
|
||||
|
||||
// Step 5: Create the batch run context
|
||||
// Generate a unique run ID and initialize the run context
|
||||
// Step 5 (M4): Initialise the SQLite schema before the batch loop begins.
|
||||
// A failure here is a hard start error → exit code 1.
|
||||
initializeSchema(config);
|
||||
|
||||
// Step 6: Create the batch run context
|
||||
RunId runId = new RunId(UUID.randomUUID().toString());
|
||||
BatchRunContext runContext = new BatchRunContext(runId, Instant.now());
|
||||
LOG.info("Batch run started. RunId: {}", runId);
|
||||
|
||||
// Step 6: Create the use case with the validated config and run lock (application layer).
|
||||
// Step 7: Create the use case with the validated config and run lock.
|
||||
// Config is passed directly; the use case does not re-read the properties file.
|
||||
// Adapters (source document port, PDF extraction port) are wired by the factory.
|
||||
// Adapters (source document port, PDF extraction port, M4 ports) are wired by the factory.
|
||||
BatchRunProcessingUseCase useCase = useCaseFactory.create(config, runLockPort);
|
||||
|
||||
// Step 7: Create the CLI command adapter with the use case
|
||||
// Step 8: Create the CLI command adapter with the use case
|
||||
SchedulerBatchCommand command = commandFactory.create(useCase);
|
||||
|
||||
// Step 8: Execute the command with the run context and handle the outcome
|
||||
// Step 9: Execute the command with the run context and handle the outcome
|
||||
BatchRunOutcome outcome = command.run(runContext);
|
||||
|
||||
// Mark run as completed (AP-003)
|
||||
// Mark run as completed
|
||||
runContext.setEndInstant(Instant.now());
|
||||
|
||||
if (outcome.isSuccess()) {
|
||||
LOG.info("Batch run completed successfully. RunId: {}", runContext.runId());
|
||||
return 0;
|
||||
} else if (outcome.isLockUnavailable()) {
|
||||
LOG.warn("Batch run aborted: another instance is already running. RunId: {}", runContext.runId());
|
||||
LOG.warn("Batch run aborted: another instance is already running. RunId: {}",
|
||||
runContext.runId());
|
||||
return 1;
|
||||
} else {
|
||||
LOG.error("Batch run failed. RunId: {}", runContext.runId());
|
||||
return 1;
|
||||
}
|
||||
} catch (InvalidStartConfigurationException e) {
|
||||
// Controlled failure for invalid configuration - log clearly without stack trace
|
||||
// Controlled failure for invalid configuration – log clearly without stack trace
|
||||
LOG.error("Configuration validation failed: {}", e.getMessage());
|
||||
return 1;
|
||||
} catch (IllegalStateException e) {
|
||||
// Configuration loading failed due to missing/invalid required properties
|
||||
// Treat as invalid configuration for controlled failure
|
||||
LOG.error("Configuration loading failed: {}", e.getMessage());
|
||||
return 1;
|
||||
} catch (DocumentPersistenceException e) {
|
||||
// Schema initialisation failed – hard start error
|
||||
LOG.error("SQLite schema initialisation failed: {}", e.getMessage(), e);
|
||||
return 1;
|
||||
} catch (Exception e) {
|
||||
LOG.error("Bootstrap failure during startup.", e);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialises the M4 SQLite schema using the configured SQLite file path.
|
||||
* <p>
|
||||
* This method is called once at startup, before the batch document loop begins.
|
||||
* It uses the production {@link SqliteSchemaInitializationAdapter} directly because
|
||||
* schema initialisation is a startup concern, not a per-document concern, and the
|
||||
* {@link UseCaseFactory} abstraction is not the right place for it.
|
||||
* <p>
|
||||
* If the {@code sqlite.file} configuration is null or blank, schema initialisation
|
||||
* is skipped with a warning. This allows the existing test infrastructure (which
|
||||
* uses the custom {@link UseCaseFactory}) to continue working without a real SQLite
|
||||
* file.
|
||||
*
|
||||
* @param config the validated startup configuration
|
||||
* @throws DocumentPersistenceException if schema initialisation fails
|
||||
*/
|
||||
private void initializeSchema(StartConfiguration config) {
|
||||
if (config.sqliteFile() == null) {
|
||||
LOG.warn("sqlite.file not configured – skipping schema initialisation.");
|
||||
return;
|
||||
}
|
||||
String jdbcUrl = buildJdbcUrl(config);
|
||||
PersistenceSchemaInitializationPort schemaPort = new SqliteSchemaInitializationAdapter(jdbcUrl);
|
||||
schemaPort.initializeSchema();
|
||||
LOG.info("M4 SQLite schema initialised at: {}", jdbcUrl);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds the JDBC URL for the SQLite database from the configured file path.
|
||||
*
|
||||
* @param config the startup configuration containing the SQLite file path
|
||||
* @return the JDBC URL in the form {@code jdbc:sqlite:/path/to/file.db}
|
||||
*/
|
||||
static String buildJdbcUrl(StartConfiguration config) {
|
||||
return "jdbc:sqlite:" + config.sqliteFile().toAbsolutePath().toString().replace('\\', '/');
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user