1
0

Nachbearbeitung: M4DocumentProcessor fachlich neutral umbenannt

This commit is contained in:
2026-04-04 10:43:31 +02:00
parent 326e739e45
commit c3d207b742
5 changed files with 179 additions and 179 deletions

View File

@@ -29,30 +29,30 @@ import java.time.Instant;
import java.util.Objects;
/**
* Application-level service that implements the M4 per-document processing logic.
* Application-level service that implements the per-document processing logic.
* <p>
* This service is the single authoritative place for the M4 decision rules:
* This service is the single authoritative place for the decision rules:
* idempotency checks, status/counter mapping, and consistent two-level persistence.
* It is intentionally tightly scoped to AP-006 and contains no M5+ logic.
* It is intentionally tightly scoped to AP-006 and contains no further logic.
*
* <h2>M4 processing order per candidate</h2>
* <h2>Processing order per candidate</h2>
* <ol>
* <li>Load the document master record by fingerprint.</li>
* <li>If the overall status is {@link ProcessingStatus#SUCCESS} create and persist
* a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}.</li>
* <li>If the overall status is {@link ProcessingStatus#FAILED_FINAL} create and persist
* a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.</li>
* <li>Otherwise execute the M3 flow (already done by the caller) and map the result
* into M4 status, counters and retryable flag.</li>
* <li>Otherwise execute the flow (already done by the caller) and map the result
* into status, counters and retryable flag.</li>
* <li>Persist exactly one historised processing attempt for the identified document.</li>
* <li>Persist the updated document master record.</li>
* </ol>
*
* <h2>M4 minimal rules</h2>
* <h2>Minimal rules</h2>
* <ul>
* <li>Already successful documents are skipped in later runs.</li>
* <li>Already finally failed documents are skipped in later runs.</li>
* <li>First historised deterministic content failure from M3
* <li>First historised deterministic content failure from processing
* {@link ProcessingStatus#FAILED_RETRYABLE}, content error counter becomes 1,
* {@code retryable=true}.</li>
* <li>Second historised deterministic content failure in a later run
@@ -77,18 +77,18 @@ import java.util.Objects;
* historised in SQLite. They are handled by the caller and logged as non-identifiable
* run events.
*
* @since M4-AP-006
* @since AP-006
*/
public class M4DocumentProcessor {
public class DocumentProcessingCoordinator {
private static final Logger LOG = LogManager.getLogger(M4DocumentProcessor.class);
private static final Logger LOG = LogManager.getLogger(DocumentProcessingCoordinator.class);
private final DocumentRecordRepository documentRecordRepository;
private final ProcessingAttemptRepository processingAttemptRepository;
private final UnitOfWorkPort unitOfWorkPort;
/**
* Creates the M4 document processor with the required persistence ports.
* Creates the document processor with the required persistence ports.
*
* @param documentRecordRepository port for reading and writing the document master record;
* must not be null
@@ -98,7 +98,7 @@ public class M4DocumentProcessor {
* must not be null
* @throws NullPointerException if any parameter is null
*/
public M4DocumentProcessor(
public DocumentProcessingCoordinator(
DocumentRecordRepository documentRecordRepository,
ProcessingAttemptRepository processingAttemptRepository,
UnitOfWorkPort unitOfWorkPort) {
@@ -111,10 +111,10 @@ public class M4DocumentProcessor {
}
/**
* Applies the full M4 processing logic for one identified document candidate.
* Applies the full processing logic for one identified document candidate.
* <p>
* The caller must have already computed a valid {@link DocumentFingerprint} for the
* candidate. The M3 outcome (from the PDF extraction and pre-check pipeline) is
* candidate. The outcome (from the PDF extraction and pre-check pipeline) is
* provided as {@code m3Outcome} and is used only when the document is not in a
* terminal state.
* <p>
@@ -124,7 +124,7 @@ public class M4DocumentProcessor {
* @param candidate the source document candidate being processed; must not be null
* @param fingerprint the successfully computed fingerprint for this candidate;
* must not be null
* @param m3Outcome the result of the M3 pipeline (PDF extraction + pre-checks);
* @param m3Outcome the result of the pipeline (PDF extraction + pre-checks);
* must not be null
* @param context the current batch run context (for run ID and timing);
* must not be null
@@ -197,15 +197,15 @@ public class M4DocumentProcessor {
}
/**
* Applies the full M4 processing logic for one identified document candidate.
* Applies the full processing logic for one identified document candidate.
* <p>
* The caller must have already computed a valid {@link DocumentFingerprint} for the
* candidate. This method handles the complete M4 processing flow:
* candidate. This method handles the complete processing flow:
* <ol>
* <li>Load document master record.</li>
* <li>Handle terminal SUCCESS / FAILED_FINAL skip cases first.</li>
* <li>Only if not terminal: execute the M3 flow (PDF extraction + pre-checks).</li>
* <li>Map M3 outcome to M4 status, counters and retryable flag.</li>
* <li>Only if not terminal: execute the flow (PDF extraction + pre-checks).</li>
* <li>Map outcome to status, counters and retryable flag.</li>
* <li>Persist exactly one historised processing attempt.</li>
* <li>Persist the updated document master record.</li>
* </ol>
@@ -220,7 +220,7 @@ public class M4DocumentProcessor {
* must not be null
* @param attemptStart the instant at which processing of this candidate began;
* must not be null
* @param m3Executor functional interface to execute the M3 pipeline when needed;
* @param m3Executor functional interface to execute the pipeline when needed;
* must not be null
*/
public void processWithM3Execution(
@@ -270,13 +270,13 @@ public class M4DocumentProcessor {
}
case DocumentUnknown ignored -> {
// New document execute M3 pipeline and process
// New document execute pipeline and process
DocumentProcessingOutcome m3Outcome = m3Executor.apply(candidate);
processAndPersistNewDocument(candidate, fingerprint, m3Outcome, context, attemptStart);
}
case DocumentKnownProcessable knownProcessable -> {
// Known but not terminal execute M3 pipeline and process
// Known but not terminal execute pipeline and process
DocumentProcessingOutcome m3Outcome = m3Executor.apply(candidate);
processAndPersistKnownDocument(
candidate, fingerprint, m3Outcome, knownProcessable.record(),
@@ -371,7 +371,7 @@ public class M4DocumentProcessor {
*
* @param candidate the candidate being processed
* @param fingerprint the document fingerprint
* @param m3Outcome the M3 pipeline result
* @param m3Outcome the pipeline result
* @param context the current batch run context
* @param attemptStart the start instant of this processing attempt
*/
@@ -384,25 +384,25 @@ public class M4DocumentProcessor {
Instant now = Instant.now();
// Map M3 outcome to M4 status/counters for a brand-new document
M4Outcome m4 = mapM3OutcomeForNewDocument(m3Outcome);
// Map outcome to status/counters for a brand-new document
ProcessingOutcome outcome = mapM3OutcomeForNewDocument(m3Outcome);
try {
// Attempt number is always 1 for a new document
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
ProcessingAttempt attempt = buildAttempt(
fingerprint, context, attemptNumber, attemptStart, now, m4);
fingerprint, context, attemptNumber, attemptStart, now, outcome);
// Create the new master record
DocumentRecord newRecord = new DocumentRecord(
fingerprint,
new SourceDocumentLocator(candidate.locator().value()),
candidate.uniqueIdentifier(),
m4.overallStatus(),
m4.counters(),
m4.overallStatus() == ProcessingStatus.SUCCESS ? null : now, // lastFailureInstant
m4.overallStatus() == ProcessingStatus.SUCCESS ? now : null, // lastSuccessInstant
outcome.overallStatus(),
outcome.counters(),
outcome.overallStatus() == ProcessingStatus.SUCCESS ? null : now, // lastFailureInstant
outcome.overallStatus() == ProcessingStatus.SUCCESS ? now : null, // lastSuccessInstant
now, // createdAt
now // updatedAt
);
@@ -415,9 +415,9 @@ public class M4DocumentProcessor {
LOG.info("New document '{}' processed: status={}, contentErrors={}, transientErrors={}.",
candidate.uniqueIdentifier(),
m4.overallStatus(),
m4.counters().contentErrorCount(),
m4.counters().transientErrorCount());
outcome.overallStatus(),
outcome.counters().contentErrorCount(),
outcome.counters().transientErrorCount());
} catch (DocumentPersistenceException e) {
LOG.error("Failed to persist processing result for new document '{}': {}",
@@ -435,7 +435,7 @@ public class M4DocumentProcessor {
*
* @param candidate the candidate being processed
* @param fingerprint the document fingerprint
* @param m3Outcome the M3 pipeline result
* @param m3Outcome the pipeline result
* @param existingRecord the current master record (not terminal)
* @param context the current batch run context
* @param attemptStart the start instant of this processing attempt
@@ -450,25 +450,25 @@ public class M4DocumentProcessor {
Instant now = Instant.now();
// Map M3 outcome to M4 status/counters, taking existing counters into account
M4Outcome m4 = mapM3OutcomeForKnownDocument(m3Outcome, existingRecord.failureCounters());
// Map outcome to status/counters, taking existing counters into account
ProcessingOutcome outcome = mapM3OutcomeForKnownDocument(m3Outcome, existingRecord.failureCounters());
try {
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
ProcessingAttempt attempt = buildAttempt(
fingerprint, context, attemptNumber, attemptStart, now, m4);
fingerprint, context, attemptNumber, attemptStart, now, outcome);
// Update the master record with new status, counters and timestamps
DocumentRecord updatedRecord = new DocumentRecord(
existingRecord.fingerprint(),
new SourceDocumentLocator(candidate.locator().value()),
candidate.uniqueIdentifier(),
m4.overallStatus(),
m4.counters(),
m4.overallStatus() == ProcessingStatus.SUCCESS
outcome.overallStatus(),
outcome.counters(),
outcome.overallStatus() == ProcessingStatus.SUCCESS
? existingRecord.lastFailureInstant() : now,
m4.overallStatus() == ProcessingStatus.SUCCESS
outcome.overallStatus() == ProcessingStatus.SUCCESS
? now : existingRecord.lastSuccessInstant(),
existingRecord.createdAt(),
now // updatedAt
@@ -482,9 +482,9 @@ public class M4DocumentProcessor {
LOG.info("Known document '{}' processed: status={}, contentErrors={}, transientErrors={}.",
candidate.uniqueIdentifier(),
m4.overallStatus(),
m4.counters().contentErrorCount(),
m4.counters().transientErrorCount());
outcome.overallStatus(),
outcome.counters().contentErrorCount(),
outcome.counters().transientErrorCount());
} catch (DocumentPersistenceException e) {
LOG.error("Failed to persist processing result for known document '{}': {}",
@@ -493,51 +493,51 @@ public class M4DocumentProcessor {
}
// -------------------------------------------------------------------------
// M3 M4 outcome mapping
// M3 outcome mapping
// -------------------------------------------------------------------------
/**
* Maps an M3 outcome to M4 status, counters, and retryable flag for a brand-new
* Maps an outcome to status, counters, and retryable flag for a brand-new
* document (no prior history, counters start at zero).
*
* @param m3Outcome the M3 pipeline result
* @return the M4 outcome with status, counters and retryable flag
* @param m3Outcome the pipeline result
* @return the outcome with status, counters and retryable flag
*/
private M4Outcome mapM3OutcomeForNewDocument(DocumentProcessingOutcome m3Outcome) {
private ProcessingOutcome mapM3OutcomeForNewDocument(DocumentProcessingOutcome m3Outcome) {
return mapM3OutcomeForKnownDocument(m3Outcome, FailureCounters.zero());
}
/**
* Maps an M3 outcome to M4 status, counters, and retryable flag, taking the
* Maps an outcome to status, counters, and retryable flag, taking the
* existing failure counters into account.
* <p>
* <strong>M4 minimal rules applied here:</strong>
* <strong>Minimal rules applied here:</strong>
* <ul>
* <li>M3 success {@link ProcessingStatus#SUCCESS}, counters unchanged,
* <li>success {@link ProcessingStatus#SUCCESS}, counters unchanged,
* {@code retryable=false}.</li>
* <li>M3 deterministic content error (first occurrence, contentErrorCount == 0)
* <li>deterministic content error (first occurrence, contentErrorCount == 0)
* {@link ProcessingStatus#FAILED_RETRYABLE}, contentErrorCount +1,
* {@code retryable=true}.</li>
* <li>M3 deterministic content error (second occurrence, contentErrorCount >= 1)
* <li>deterministic content error (second occurrence, contentErrorCount >= 1)
* {@link ProcessingStatus#FAILED_FINAL}, contentErrorCount +1,
* {@code retryable=false}.</li>
* <li>M3 technical error {@link ProcessingStatus#FAILED_RETRYABLE},
* <li>technical error {@link ProcessingStatus#FAILED_RETRYABLE},
* transientErrorCount +1, {@code retryable=true}.</li>
* </ul>
*
* @param m3Outcome the M3 pipeline result
* @param m3Outcome the pipeline result
* @param existingCounters the current failure counters from the master record
* @return the M4 outcome with updated status, counters and retryable flag
* @return the outcome with updated status, counters and retryable flag
*/
private M4Outcome mapM3OutcomeForKnownDocument(
private ProcessingOutcome mapM3OutcomeForKnownDocument(
DocumentProcessingOutcome m3Outcome,
FailureCounters existingCounters) {
return switch (m3Outcome) {
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored -> {
// M3 success: document passed all pre-checks
// In M4 scope (no KI, no target copy), PreCheckPassed is the terminal success
yield new M4Outcome(
// success: document passed all pre-checks
// In scope (no KI, no target copy), PreCheckPassed is the terminal success
yield new ProcessingOutcome(
ProcessingStatus.SUCCESS,
existingCounters, // counters unchanged on success
false // not retryable
@@ -551,14 +551,14 @@ public class M4DocumentProcessor {
if (isFirstOccurrence) {
// First content error FAILED_RETRYABLE
yield new M4Outcome(
yield new ProcessingOutcome(
ProcessingStatus.FAILED_RETRYABLE,
updatedCounters,
true
);
} else {
// Second (or later) content error FAILED_FINAL
yield new M4Outcome(
yield new ProcessingOutcome(
ProcessingStatus.FAILED_FINAL,
updatedCounters,
false
@@ -568,7 +568,7 @@ public class M4DocumentProcessor {
case TechnicalDocumentError technicalError -> {
// Technical error after fingerprinting: always FAILED_RETRYABLE, increment transient counter
yield new M4Outcome(
yield new ProcessingOutcome(
ProcessingStatus.FAILED_RETRYABLE,
existingCounters.withIncrementedTransientErrorCount(),
true
@@ -582,14 +582,14 @@ public class M4DocumentProcessor {
// -------------------------------------------------------------------------
/**
* Constructs a {@link ProcessingAttempt} from the given parameters and M4 outcome.
* Constructs a {@link ProcessingAttempt} from the given parameters and outcome.
*
* @param fingerprint the document fingerprint
* @param context the current batch run context
* @param attemptNumber the monotonic attempt number
* @param startedAt the start instant of this attempt
* @param endedAt the end instant of this attempt
* @param m4 the M4 outcome (status, counters, retryable)
* @param outcome the outcome (status, counters, retryable)
* @return the constructed processing attempt
*/
private ProcessingAttempt buildAttempt(
@@ -598,15 +598,15 @@ public class M4DocumentProcessor {
int attemptNumber,
Instant startedAt,
Instant endedAt,
M4Outcome m4) {
ProcessingOutcome outcome) {
String failureClass = null;
String failureMessage = null;
if (m4.overallStatus() == ProcessingStatus.FAILED_RETRYABLE
|| m4.overallStatus() == ProcessingStatus.FAILED_FINAL) {
failureClass = m4.overallStatus().name();
failureMessage = buildFailureMessage(m4);
if (outcome.overallStatus() == ProcessingStatus.FAILED_RETRYABLE
|| outcome.overallStatus() == ProcessingStatus.FAILED_FINAL) {
failureClass = outcome.overallStatus().name();
failureMessage = buildFailureMessage(outcome);
}
return new ProcessingAttempt(
@@ -615,46 +615,46 @@ public class M4DocumentProcessor {
attemptNumber,
startedAt,
endedAt,
m4.overallStatus(),
outcome.overallStatus(),
failureClass,
failureMessage,
m4.retryable()
outcome.retryable()
);
}
/**
* Builds a human-readable failure message from the M4 outcome.
* Builds a human-readable failure message from the outcome.
*
* @param m4 the M4 outcome
* @param outcome the outcome
* @return a non-null failure message string
*/
private String buildFailureMessage(M4Outcome m4) {
return switch (m4.overallStatus()) {
private String buildFailureMessage(ProcessingOutcome outcome) {
return switch (outcome.overallStatus()) {
case FAILED_RETRYABLE -> "Processing failed (retryable). "
+ "ContentErrors=" + m4.counters().contentErrorCount()
+ ", TransientErrors=" + m4.counters().transientErrorCount();
+ "ContentErrors=" + outcome.counters().contentErrorCount()
+ ", TransientErrors=" + outcome.counters().transientErrorCount();
case FAILED_FINAL -> "Processing failed finally (not retryable). "
+ "ContentErrors=" + m4.counters().contentErrorCount()
+ ", TransientErrors=" + m4.counters().transientErrorCount();
default -> m4.overallStatus().name();
+ "ContentErrors=" + outcome.counters().contentErrorCount()
+ ", TransientErrors=" + outcome.counters().transientErrorCount();
default -> outcome.overallStatus().name();
};
}
// -------------------------------------------------------------------------
// Internal value type: M4 outcome
// Internal value type: outcome
// -------------------------------------------------------------------------
/**
* Internal value type carrying the M4 status, updated counters, and retryable flag
* after mapping from an M3 outcome.
* Internal value type carrying the status, updated counters, and retryable flag
* after mapping from an outcome.
* <p>
* Tightly scoped to {@link M4DocumentProcessor}; not exposed outside this class.
* Tightly scoped to {@link DocumentProcessingCoordinator}; not exposed outside this class.
*
* @param overallStatus the M4 overall status to persist
* @param overallStatus the overall status to persist
* @param counters the updated failure counters to persist
* @param retryable whether the failure is retryable in a later run
*/
private record M4Outcome(
private record ProcessingOutcome(
ProcessingStatus overallStatus,
FailureCounters counters,
boolean retryable) {

View File

@@ -12,8 +12,8 @@ import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingCoordinator;
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService;
import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
@@ -30,11 +30,11 @@ import java.util.Objects;
/**
* Batch processing implementation of {@link BatchRunProcessingUseCase}.
* <p>
* Orchestrates the complete M4 batch processing workflow per candidate:
* Orchestrates the complete batch processing workflow per candidate:
* <ol>
* <li>Acquire exclusive run lock to prevent concurrent instances.</li>
* <li>Scan source folder for PDF candidates.</li>
* <li>For each candidate, execute the M4 processing order:
* <li>For each candidate, execute the processing order:
* <ol type="a">
* <li>Compute fingerprint.</li>
* <li>Load document master record.</li>
@@ -42,8 +42,8 @@ import java.util.Objects;
* {@code SKIPPED_ALREADY_PROCESSED}.</li>
* <li>If already {@code FAILED_FINAL} → persist skip attempt with
* {@code SKIPPED_FINAL_FAILURE}.</li>
* <li>Otherwise execute the M3 pipeline (extraction + pre-checks).</li>
* <li>Map M3 result into M4 status, counters and retryable flag.</li>
* <li>Otherwise execute the pipeline (extraction + pre-checks).</li>
* <li>Map result into status, counters and retryable flag.</li>
* <li>Persist exactly one historised processing attempt.</li>
* <li>Persist the updated document master record.</li>
* </ol>
@@ -66,17 +66,17 @@ import java.util.Objects;
* <h2>Persistence consistency</h2>
* <p>
* For every identified document, the processing attempt and the master record are
* written in sequence by {@link M4DocumentProcessor}. Persistence failures for a single
* written in sequence by {@link DocumentProcessingCoordinator}. Persistence failures for a single
* document are caught and logged; the batch run continues with the remaining candidates.
*
* <h2>Non-Goals (not implemented in M4)</h2>
* <h2>Non-Goals (not implemented)</h2>
* <ul>
* <li>No KI/AI integration or prompt loading.</li>
* <li>No filename generation or target file copy.</li>
* <li>No M5+ retry rules for KI or target copy failures.</li>
* <li>No retry rules for KI or target copy failures.</li>
* </ul>
*
* @since M3-AP-004 (extended in M4-AP-006)
* @since AP-004 (extended in AP-006)
*/
public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCase {
@@ -87,11 +87,11 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
private final SourceDocumentCandidatesPort sourceDocumentCandidatesPort;
private final PdfTextExtractionPort pdfTextExtractionPort;
private final FingerprintPort fingerprintPort;
private final M4DocumentProcessor m4DocumentProcessor;
private final DocumentProcessingCoordinator m4DocumentProcessor;
/**
* Creates the batch use case with the already-loaded startup configuration and all
* required ports for the M4 flow.
* required ports for the flow.
* <p>
* The configuration is loaded and validated by Bootstrap before use case creation;
* the use case receives the result directly and does not re-read the properties file.
@@ -104,7 +104,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
* must not be null
* @param fingerprintPort for computing the SHA-256 fingerprint of a candidate;
* must not be null
* @param m4DocumentProcessor for applying M4 decision logic and persisting results;
* @param m4DocumentProcessor for applying decision logic and persisting results;
* must not be null
* @throws NullPointerException if any parameter is null
*/
@@ -114,7 +114,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
SourceDocumentCandidatesPort sourceDocumentCandidatesPort,
PdfTextExtractionPort pdfTextExtractionPort,
FingerprintPort fingerprintPort,
M4DocumentProcessor m4DocumentProcessor) {
DocumentProcessingCoordinator m4DocumentProcessor) {
this.configuration = Objects.requireNonNull(configuration, "configuration must not be null");
this.runLockPort = Objects.requireNonNull(runLockPort, "runLockPort must not be null");
this.sourceDocumentCandidatesPort = Objects.requireNonNull(
@@ -158,7 +158,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
}
LOG.info("Found {} PDF candidate(s) in source folder.", candidates.size());
// Step 3: Process each candidate through the M4 pipeline
// Step 3: Process each candidate through the pipeline
for (SourceDocumentCandidate candidate : candidates) {
processCandidate(candidate, context);
}
@@ -186,9 +186,9 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
}
/**
* Processes a single PDF candidate through the complete M4 pipeline.
* Processes a single PDF candidate through the complete pipeline.
* <p>
* M4 processing order:
* Processing order:
* <ol>
* <li>Record the attempt start instant.</li>
* <li>Compute the SHA-256 fingerprint of the candidate file content.</li>
@@ -199,8 +199,8 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
* {@code SKIPPED_ALREADY_PROCESSED}.</li>
* <li>If already {@code FAILED_FINAL} → persist skip attempt with
* {@code SKIPPED_FINAL_FAILURE}.</li>
* <li>Otherwise execute the M3 pipeline (extraction + pre-checks).</li>
* <li>Map M3 result into M4 status, counters and retryable flag.</li>
* <li>Otherwise execute the pipeline (extraction + pre-checks).</li>
* <li>Map result into status, counters and retryable flag.</li>
* <li>Persist exactly one historised processing attempt.</li>
* <li>Persist the updated document master record.</li>
* </ol>
@@ -217,7 +217,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
// Record the attempt start instant before any work begins
Instant attemptStart = Instant.now();
// Step M4-1: Compute fingerprint
// Step 1: Compute fingerprint
FingerprintResult fingerprintResult = fingerprintPort.computeFingerprint(candidate);
switch (fingerprintResult) {
@@ -233,28 +233,28 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
LOG.debug("Fingerprint computed for '{}': {}",
candidate.uniqueIdentifier(), fingerprint.sha256Hex());
// Delegate the complete M4 processing logic to the processor
// Delegate the complete processing logic to the processor
// The processor handles loading document master record, checking terminal status,
// executing M3 pipeline only when needed, and persisting results consistently
// executing pipeline only when needed, and persisting results consistently
m4DocumentProcessor.processWithM3Execution(
candidate,
fingerprint,
context,
attemptStart,
this::runM3Pipeline); // Pass the M3 executor as a function
this::runM3Pipeline); // Pass the executor as a function
}
}
}
/**
* Runs the M3 pipeline (PDF text extraction + pre-checks) for the given candidate.
* Runs the pipeline (PDF text extraction + pre-checks) for the given candidate.
* <p>
* This method is called after a successful fingerprint computation. The result is
* passed to {@link M4DocumentProcessor}, which applies it only when the document is
* passed to {@link DocumentProcessingCoordinator}, which applies it only when the document is
* not in a terminal state.
*
* @param candidate the candidate to run through the M3 pipeline
* @return the M3 pipeline outcome (pre-check passed, pre-check failed, or technical error)
* @param candidate the candidate to run through the pipeline
* @return the pipeline outcome (pre-check passed, pre-check failed, or technical error)
*/
private DocumentProcessingOutcome runM3Pipeline(SourceDocumentCandidate candidate) {
PdfExtractionResult extractionResult =
@@ -284,10 +284,10 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
DocumentProcessingOutcome outcome =
DocumentProcessingService.processDocument(candidate, extractionResult, configuration);
// Log M3 outcome
// Log outcome
switch (outcome) {
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed passed -> {
LOG.info("Pre-checks PASSED for '{}'. Candidate ready for M4 persistence.",
LOG.info("Pre-checks PASSED for '{}'. Candidate ready for persistence.",
candidate.uniqueIdentifier());
}
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed -> {