M4 AP-006 Reihenfolge, Konsistenz und Scope bereinigen
This commit is contained in:
@@ -192,6 +192,100 @@ public class M4DocumentProcessor {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Applies the full M4 processing logic for one identified document candidate.
|
||||||
|
* <p>
|
||||||
|
* The caller must have already computed a valid {@link DocumentFingerprint} for the
|
||||||
|
* candidate. This method handles the complete M4 processing flow:
|
||||||
|
* <ol>
|
||||||
|
* <li>Load document master record.</li>
|
||||||
|
* <li>Handle terminal SUCCESS / FAILED_FINAL skip cases first.</li>
|
||||||
|
* <li>Only if not terminal: execute the M3 flow (PDF extraction + pre-checks).</li>
|
||||||
|
* <li>Map M3 outcome to M4 status, counters and retryable flag.</li>
|
||||||
|
* <li>Persist exactly one historised processing attempt.</li>
|
||||||
|
* <li>Persist the updated document master record.</li>
|
||||||
|
* </ol>
|
||||||
|
* <p>
|
||||||
|
* This method never throws. All persistence failures are caught, logged, and
|
||||||
|
* treated as controlled per-document failures so the batch run can continue.
|
||||||
|
*
|
||||||
|
* @param candidate the source document candidate being processed; must not be null
|
||||||
|
* @param fingerprint the successfully computed fingerprint for this candidate;
|
||||||
|
* must not be null
|
||||||
|
* @param context the current batch run context (for run ID and timing);
|
||||||
|
* must not be null
|
||||||
|
* @param attemptStart the instant at which processing of this candidate began;
|
||||||
|
* must not be null
|
||||||
|
* @param m3Executor functional interface to execute the M3 pipeline when needed;
|
||||||
|
* must not be null
|
||||||
|
*/
|
||||||
|
public void processWithM3Execution(
|
||||||
|
SourceDocumentCandidate candidate,
|
||||||
|
DocumentFingerprint fingerprint,
|
||||||
|
BatchRunContext context,
|
||||||
|
Instant attemptStart,
|
||||||
|
java.util.function.Function<SourceDocumentCandidate, DocumentProcessingOutcome> m3Executor) {
|
||||||
|
|
||||||
|
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||||
|
Objects.requireNonNull(fingerprint, "fingerprint must not be null");
|
||||||
|
Objects.requireNonNull(context, "context must not be null");
|
||||||
|
Objects.requireNonNull(attemptStart, "attemptStart must not be null");
|
||||||
|
Objects.requireNonNull(m3Executor, "m3Executor must not be null");
|
||||||
|
|
||||||
|
// Step 1: Load the document master record
|
||||||
|
DocumentRecordLookupResult lookupResult =
|
||||||
|
documentRecordRepository.findByFingerprint(fingerprint);
|
||||||
|
|
||||||
|
// Step 2: Handle persistence lookup failure – cannot safely proceed
|
||||||
|
if (lookupResult instanceof PersistenceLookupTechnicalFailure failure) {
|
||||||
|
LOG.error("Cannot process '{}': master record lookup failed: {}",
|
||||||
|
candidate.uniqueIdentifier(), failure.errorMessage());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 3: Determine the action based on the lookup result
|
||||||
|
switch (lookupResult) {
|
||||||
|
case DocumentTerminalSuccess terminalSuccess -> {
|
||||||
|
// Document already successfully processed → skip
|
||||||
|
LOG.info("Skipping '{}': already successfully processed (fingerprint: {}).",
|
||||||
|
candidate.uniqueIdentifier(), fingerprint.sha256Hex());
|
||||||
|
persistSkipAttempt(
|
||||||
|
candidate, fingerprint, terminalSuccess.record(),
|
||||||
|
ProcessingStatus.SKIPPED_ALREADY_PROCESSED,
|
||||||
|
context, attemptStart);
|
||||||
|
}
|
||||||
|
|
||||||
|
case DocumentTerminalFinalFailure terminalFailure -> {
|
||||||
|
// Document finally failed → skip
|
||||||
|
LOG.info("Skipping '{}': already finally failed (fingerprint: {}).",
|
||||||
|
candidate.uniqueIdentifier(), fingerprint.sha256Hex());
|
||||||
|
persistSkipAttempt(
|
||||||
|
candidate, fingerprint, terminalFailure.record(),
|
||||||
|
ProcessingStatus.SKIPPED_FINAL_FAILURE,
|
||||||
|
context, attemptStart);
|
||||||
|
}
|
||||||
|
|
||||||
|
case DocumentUnknown ignored -> {
|
||||||
|
// New document – execute M3 pipeline and process
|
||||||
|
DocumentProcessingOutcome m3Outcome = m3Executor.apply(candidate);
|
||||||
|
processAndPersistNewDocument(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||||
|
}
|
||||||
|
|
||||||
|
case DocumentKnownProcessable knownProcessable -> {
|
||||||
|
// Known but not terminal – execute M3 pipeline and process
|
||||||
|
DocumentProcessingOutcome m3Outcome = m3Executor.apply(candidate);
|
||||||
|
processAndPersistKnownDocument(
|
||||||
|
candidate, fingerprint, m3Outcome, knownProcessable.record(),
|
||||||
|
context, attemptStart);
|
||||||
|
}
|
||||||
|
|
||||||
|
default ->
|
||||||
|
// Exhaustive sealed hierarchy; this branch is unreachable
|
||||||
|
LOG.error("Unexpected lookup result type for '{}': {}",
|
||||||
|
candidate.uniqueIdentifier(), lookupResult.getClass().getSimpleName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
// Skip path
|
// Skip path
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
@@ -555,4 +649,4 @@ public class M4DocumentProcessor {
|
|||||||
FailureCounters counters,
|
FailureCounters counters,
|
||||||
boolean retryable) {
|
boolean retryable) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -194,9 +194,15 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
|||||||
* <li>Compute the SHA-256 fingerprint of the candidate file content.</li>
|
* <li>Compute the SHA-256 fingerprint of the candidate file content.</li>
|
||||||
* <li>If fingerprint computation fails: log as non-identifiable run event and
|
* <li>If fingerprint computation fails: log as non-identifiable run event and
|
||||||
* return — no SQLite record is created.</li>
|
* return — no SQLite record is created.</li>
|
||||||
* <li>Execute the M3 pipeline (PDF extraction + pre-checks).</li>
|
* <li>Load document master record.</li>
|
||||||
* <li>Delegate to {@link M4DocumentProcessor} for idempotency check, status/counter
|
* <li>If already {@code SUCCESS} → persist skip attempt with
|
||||||
* mapping, and consistent two-level persistence.</li>
|
* {@code SKIPPED_ALREADY_PROCESSED}.</li>
|
||||||
|
* <li>If already {@code FAILED_FINAL} → persist skip attempt with
|
||||||
|
* {@code SKIPPED_FINAL_FAILURE}.</li>
|
||||||
|
* <li>Otherwise execute the M3 pipeline (extraction + pre-checks).</li>
|
||||||
|
* <li>Map M3 result into M4 status, counters and retryable flag.</li>
|
||||||
|
* <li>Persist exactly one historised processing attempt.</li>
|
||||||
|
* <li>Persist the updated document master record.</li>
|
||||||
* </ol>
|
* </ol>
|
||||||
* <p>
|
* <p>
|
||||||
* Per-document errors do not abort the overall batch run. Each candidate ends
|
* Per-document errors do not abort the overall batch run. Each candidate ends
|
||||||
@@ -227,15 +233,15 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
|||||||
LOG.debug("Fingerprint computed for '{}': {}",
|
LOG.debug("Fingerprint computed for '{}': {}",
|
||||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex());
|
candidate.uniqueIdentifier(), fingerprint.sha256Hex());
|
||||||
|
|
||||||
// Step M4-2..M4-8: Execute M3 pipeline and delegate M4 logic to the processor
|
// Delegate the complete M4 processing logic to the processor
|
||||||
// The M3 pipeline runs only if the document is not in a terminal state;
|
// The processor handles loading document master record, checking terminal status,
|
||||||
// M4DocumentProcessor handles the terminal check internally.
|
// executing M3 pipeline only when needed, and persisting results consistently
|
||||||
// We run M3 eagerly here and pass the result; M4DocumentProcessor will
|
m4DocumentProcessor.processWithM3Execution(
|
||||||
// ignore it for terminal documents.
|
candidate,
|
||||||
DocumentProcessingOutcome m3Outcome = runM3Pipeline(candidate);
|
fingerprint,
|
||||||
|
context,
|
||||||
// Delegate idempotency check, status mapping, and persistence to M4DocumentProcessor
|
attemptStart,
|
||||||
m4DocumentProcessor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
this::runM3Pipeline); // Pass the M3 executor as a function
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -287,4 +293,4 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
|||||||
|
|
||||||
return outcome;
|
return outcome;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -641,6 +641,18 @@ class BatchRunProcessingUseCaseTest {
|
|||||||
super.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
super.process(candidate, fingerprint, m3Outcome, context, attemptStart);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void processWithM3Execution(
|
||||||
|
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate,
|
||||||
|
de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint fingerprint,
|
||||||
|
de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext context,
|
||||||
|
java.time.Instant attemptStart,
|
||||||
|
java.util.function.Function<de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate, de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome> m3Executor) {
|
||||||
|
processCallCount++;
|
||||||
|
// Delegate to super so the real logic runs (with no-op repos)
|
||||||
|
super.processWithM3Execution(candidate, fingerprint, context, attemptStart, m3Executor);
|
||||||
|
}
|
||||||
|
|
||||||
int processCallCount() { return processCallCount; }
|
int processCallCount() { return processCallCount; }
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -680,4 +692,4 @@ class BatchRunProcessingUseCaseTest {
|
|||||||
return List.of();
|
return List.of();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Reference in New Issue
Block a user