Umsetzung von Meilenstein M7
This commit is contained in:
@@ -1,5 +1,7 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.config;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
|
||||
|
||||
/**
|
||||
* Minimal runtime configuration for the application layer.
|
||||
* <p>
|
||||
@@ -9,12 +11,59 @@ package de.gecheckt.pdf.umbenenner.application.config;
|
||||
* <p>
|
||||
* This intentionally small contract ensures the application layer depends only on
|
||||
* the configuration values it actually uses, following hexagonal architecture principles.
|
||||
*
|
||||
* <h2>Validation invariants</h2>
|
||||
* <ul>
|
||||
* <li>{@link #maxPages()} must be ≥ 1.</li>
|
||||
* <li>{@link #maxRetriesTransient()} must be ≥ 1. The value {@code 0} is invalid
|
||||
* start configuration and must prevent the batch run from starting with exit
|
||||
* code 1.</li>
|
||||
* <li>{@link #aiContentSensitivity()} must not be {@code null}. The safe default is
|
||||
* {@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT}.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>AI content sensitivity</h2>
|
||||
* <p>
|
||||
* The {@link #aiContentSensitivity()} field is derived from the {@code log.ai.sensitive}
|
||||
* configuration property (default: {@code false}). It governs whether the complete AI raw
|
||||
* response and complete AI {@code reasoning} may be written to log files. Sensitive AI
|
||||
* content is always persisted in SQLite regardless of this setting; only log output is
|
||||
* affected.
|
||||
* <p>
|
||||
* The safe default ({@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT}) must be used
|
||||
* whenever {@code log.ai.sensitive} is absent, {@code false}, or set to any value other
|
||||
* than the explicit opt-in.
|
||||
*/
|
||||
public record RuntimeConfiguration(
|
||||
/**
|
||||
* Maximum number of pages a document can have to be processed.
|
||||
* Documents exceeding this limit are rejected during pre-checks.
|
||||
*/
|
||||
int maxPages
|
||||
int maxPages,
|
||||
|
||||
/**
|
||||
* Maximum number of historised transient technical errors allowed per fingerprint
|
||||
* across all scheduler runs.
|
||||
* <p>
|
||||
* The attempt that causes the counter to reach this value finalises the document
|
||||
* to {@code FAILED_FINAL}. Must be an Integer ≥ 1; the value {@code 0} is
|
||||
* invalid start configuration.
|
||||
* <p>
|
||||
* Example: {@code maxRetriesTransient = 1} means the first transient error
|
||||
* immediately finalises the document.
|
||||
*/
|
||||
int maxRetriesTransient,
|
||||
|
||||
/**
|
||||
* Sensitivity decision governing whether AI-generated content may be written to log files.
|
||||
* <p>
|
||||
* Derived from the {@code log.ai.sensitive} configuration property. The default is
|
||||
* {@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT} (do not log sensitive content).
|
||||
* Only {@link AiContentSensitivity#LOG_SENSITIVE_CONTENT} is produced when
|
||||
* {@code log.ai.sensitive = true} is explicitly set.
|
||||
* <p>
|
||||
* Must not be {@code null}.
|
||||
*/
|
||||
AiContentSensitivity aiContentSensitivity
|
||||
)
|
||||
{ }
|
||||
|
||||
@@ -10,6 +10,16 @@ import java.nio.file.Path;
|
||||
* loaded and validated at bootstrap time. This is a complete configuration model
|
||||
* for the entire application startup, including paths, API settings, persistence,
|
||||
* and operational parameters.
|
||||
*
|
||||
* <h2>AI content sensitivity ({@code log.ai.sensitive})</h2>
|
||||
* <p>
|
||||
* The boolean property {@code log.ai.sensitive} controls whether sensitive AI-generated
|
||||
* content (complete raw AI response, complete AI {@code reasoning}) may be written to
|
||||
* log files. The default is {@code false} (safe/protect). Set to {@code true} only when
|
||||
* explicit diagnostic logging of AI content is required.
|
||||
* <p>
|
||||
* Sensitive AI content is always persisted in SQLite regardless of this setting.
|
||||
* Only log output is affected.
|
||||
*/
|
||||
public record StartConfiguration(
|
||||
Path sourceFolder,
|
||||
@@ -25,6 +35,13 @@ public record StartConfiguration(
|
||||
Path runtimeLockFile,
|
||||
Path logDirectory,
|
||||
String logLevel,
|
||||
String apiKey
|
||||
String apiKey,
|
||||
|
||||
/**
|
||||
* Whether sensitive AI content (raw response, reasoning) may be written to log files.
|
||||
* Corresponds to the {@code log.ai.sensitive} configuration property.
|
||||
* Default: {@code false} (do not log sensitive content).
|
||||
*/
|
||||
boolean logAiSensitive
|
||||
)
|
||||
{ }
|
||||
|
||||
@@ -0,0 +1,46 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Sensitivity decision governing whether AI-generated content may be written to log files.
|
||||
* <p>
|
||||
* The following AI-generated content items are classified as sensitive and are subject to
|
||||
* this decision:
|
||||
* <ul>
|
||||
* <li>The <strong>complete AI raw response</strong> (full JSON body returned by the
|
||||
* AI service)</li>
|
||||
* <li>The <strong>complete AI {@code reasoning}</strong> field extracted from the
|
||||
* AI response</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Sensitive AI content is always written to SQLite (for traceability) regardless of
|
||||
* this decision. The decision controls only whether the content is also emitted into
|
||||
* log files.
|
||||
* <p>
|
||||
* <strong>Default behaviour:</strong> The default is {@link #PROTECT_SENSITIVE_CONTENT}.
|
||||
* Logging of sensitive AI content must be explicitly enabled by setting the boolean
|
||||
* configuration property {@code log.ai.sensitive = true}. Any other value, or the
|
||||
* absence of the property, results in {@link #PROTECT_SENSITIVE_CONTENT}.
|
||||
* <p>
|
||||
* <strong>Non-sensitive AI content</strong> (e.g. the resolved title, the resolved date,
|
||||
* the date source) is not covered by this decision and may always be logged.
|
||||
*/
|
||||
public enum AiContentSensitivity {
|
||||
|
||||
/**
|
||||
* Sensitive AI content (raw response, reasoning) must <strong>not</strong> be written
|
||||
* to log files.
|
||||
* <p>
|
||||
* This is the safe default. It is active whenever {@code log.ai.sensitive} is absent,
|
||||
* {@code false}, or set to any value other than the explicit opt-in.
|
||||
*/
|
||||
PROTECT_SENSITIVE_CONTENT,
|
||||
|
||||
/**
|
||||
* Sensitive AI content (raw response, reasoning) <strong>may</strong> be written
|
||||
* to log files.
|
||||
* <p>
|
||||
* This value is only produced when {@code log.ai.sensitive = true} is explicitly set
|
||||
* in the application configuration. It must never be the implicit default.
|
||||
*/
|
||||
LOG_SENSITIVE_CONTENT
|
||||
}
|
||||
@@ -0,0 +1,90 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Unified classification of all document-level errors in the end state.
|
||||
* <p>
|
||||
* This enumeration provides a single, exhaustive taxonomy for every error category
|
||||
* that the retry policy and logging infrastructure must distinguish. It replaces
|
||||
* any ad-hoc string-based classification where an authoritative type is needed.
|
||||
* <p>
|
||||
* <strong>Mapping to failure counters:</strong>
|
||||
* <ul>
|
||||
* <li>{@link #DETERMINISTIC_CONTENT_ERROR} → increments the content-error counter
|
||||
* ({@link FailureCounters#contentErrorCount()}). The first occurrence leads to
|
||||
* {@code FAILED_RETRYABLE}; the second leads to {@code FAILED_FINAL}.
|
||||
* There is no further retry after the second deterministic content error.</li>
|
||||
* <li>{@link #TRANSIENT_TECHNICAL_ERROR} → increments the transient-error counter
|
||||
* ({@link FailureCounters#transientErrorCount()}). Remains retryable until the
|
||||
* counter reaches the configured {@code max.retries.transient} limit (Integer ≥ 1).
|
||||
* The attempt that reaches the limit finalises the document to {@code FAILED_FINAL}.</li>
|
||||
* <li>{@link #TARGET_COPY_TECHNICAL_ERROR} → signals a failure on the physical target
|
||||
* file copy path. Within the same run, exactly one immediate technical retry is
|
||||
* allowed. If the immediate retry also fails, the error is treated as a
|
||||
* {@link #TRANSIENT_TECHNICAL_ERROR} for the purposes of counter updates and
|
||||
* cross-run retry evaluation.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Scope of deterministic content errors:</strong>
|
||||
* <ul>
|
||||
* <li>No usable PDF text extracted</li>
|
||||
* <li>Page limit exceeded</li>
|
||||
* <li>AI response functionally invalid (generic/unusable title, unparseable date)</li>
|
||||
* <li>Document content ambiguous or not uniquely interpretable</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Scope of transient technical errors:</strong>
|
||||
* <ul>
|
||||
* <li>AI service unreachable, HTTP timeout, network error</li>
|
||||
* <li>Unparseable or structurally invalid AI JSON</li>
|
||||
* <li>Temporary I/O error during PDF text extraction</li>
|
||||
* <li>Temporary SQLite lock or persistence failure</li>
|
||||
* <li>Any other non-deterministic infrastructure failure</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Architecture note:</strong> This type carries no infrastructure dependencies.
|
||||
* It is safe to reference from Domain, Application and Adapter layers.
|
||||
*/
|
||||
public enum DocumentErrorClassification {
|
||||
|
||||
/**
|
||||
* A deterministic content error that cannot be resolved by retrying with the same
|
||||
* document content.
|
||||
* <p>
|
||||
* Examples: no extractable text, page limit exceeded, AI-returned title is generic
|
||||
* or unusable, document content is ambiguous.
|
||||
* <p>
|
||||
* Retry rule: the first historised occurrence of this error for a fingerprint leads
|
||||
* to {@code FAILED_RETRYABLE} (one later run may retry). The second historised
|
||||
* occurrence leads to {@code FAILED_FINAL} (no further retries).
|
||||
*/
|
||||
DETERMINISTIC_CONTENT_ERROR,
|
||||
|
||||
/**
|
||||
* A transient technical infrastructure failure unrelated to the document content.
|
||||
* <p>
|
||||
* Examples: AI endpoint not reachable, HTTP timeout, malformed or non-parseable
|
||||
* JSON, temporary I/O failure, temporary SQLite lock.
|
||||
* <p>
|
||||
* Retry rule: remains {@code FAILED_RETRYABLE} until the transient-error counter
|
||||
* reaches the configured {@code max.retries.transient} limit. The attempt that
|
||||
* reaches the limit finalises the document to {@code FAILED_FINAL}.
|
||||
* The configured limit must be an Integer ≥ 1; the value {@code 0} is invalid
|
||||
* start configuration and prevents the batch run from starting.
|
||||
*/
|
||||
TRANSIENT_TECHNICAL_ERROR,
|
||||
|
||||
/**
|
||||
* A technical failure specifically on the physical target-file copy path.
|
||||
* <p>
|
||||
* This error class is distinct from {@link #TRANSIENT_TECHNICAL_ERROR} because it
|
||||
* triggers a special within-run handling: exactly one immediate technical retry of
|
||||
* the copy operation is allowed within the same document run. No new AI call and no
|
||||
* new naming proposal derivation occur during the immediate retry.
|
||||
* <p>
|
||||
* If the immediate retry succeeds, the document proceeds to {@code SUCCESS}.
|
||||
* If the immediate retry also fails, the combined failure is recorded as a
|
||||
* {@link #TRANSIENT_TECHNICAL_ERROR} for counter and cross-run retry evaluation.
|
||||
* The immediate retry is not counted in the laufübergreifenden transient-error counter.
|
||||
*/
|
||||
TARGET_COPY_TECHNICAL_ERROR
|
||||
}
|
||||
@@ -0,0 +1,81 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
|
||||
|
||||
/**
|
||||
* Sealed type carrying the correlation context for all document-related log entries.
|
||||
* <p>
|
||||
* The logging correlation rule distinguishes two phases of document processing:
|
||||
* <ol>
|
||||
* <li><strong>Pre-fingerprint phase:</strong> Before a {@link DocumentFingerprint} has
|
||||
* been successfully computed (e.g. the source file cannot be read for hashing),
|
||||
* log entries are correlated via the batch run identifier and a stable candidate
|
||||
* description derived from the candidate's own identifier (typically its source
|
||||
* file path or name). Use {@link CandidateCorrelation}.</li>
|
||||
* <li><strong>Post-fingerprint phase:</strong> Once the fingerprint has been
|
||||
* successfully computed, all subsequent document-related log entries are correlated
|
||||
* via the batch run identifier and the fingerprint. Use
|
||||
* {@link FingerprintCorrelation}.</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* <strong>Architecture constraints:</strong>
|
||||
* <ul>
|
||||
* <li>This type contains no filesystem ({@code Path}, {@code File}) or NIO types.</li>
|
||||
* <li>This type introduces no additional persistence truth source.</li>
|
||||
* <li>The correlation is a logging concern only and does not influence the processing
|
||||
* outcome, retry decision, or persistence model.</li>
|
||||
* </ul>
|
||||
*/
|
||||
public sealed interface DocumentLogCorrelation {
|
||||
|
||||
/**
|
||||
* Returns the batch run identifier shared by all log entries within one run.
|
||||
*
|
||||
* @return run identifier; never {@code null}
|
||||
*/
|
||||
RunId runId();
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Pre-fingerprint correlation
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Correlation context available before a {@link DocumentFingerprint} has been
|
||||
* successfully computed.
|
||||
* <p>
|
||||
* Used when the fingerprint computation itself fails or when a log entry must be
|
||||
* emitted at the very start of candidate processing (before any hashing result is
|
||||
* available).
|
||||
* <p>
|
||||
* The {@code candidateDescription} is a stable, human-readable identifier for the
|
||||
* candidate derived from the candidate's own unique identifier — typically the
|
||||
* source file name or path representation. It must not change between log entries
|
||||
* for the same candidate within a single run.
|
||||
*
|
||||
* @param runId batch run identifier; never {@code null}
|
||||
* @param candidateDescription stable human-readable candidate identifier;
|
||||
* never {@code null} or blank
|
||||
*/
|
||||
record CandidateCorrelation(RunId runId, String candidateDescription)
|
||||
implements DocumentLogCorrelation {}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Post-fingerprint correlation
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Correlation context available after a {@link DocumentFingerprint} has been
|
||||
* successfully computed.
|
||||
* <p>
|
||||
* Used for all document-related log entries from the point at which the fingerprint
|
||||
* is known. The fingerprint is the authoritative, content-stable document identity
|
||||
* and must appear in or be unambiguously derivable from every subsequent log entry
|
||||
* for this document.
|
||||
*
|
||||
* @param runId batch run identifier; never {@code null}
|
||||
* @param fingerprint content-based document identity; never {@code null}
|
||||
*/
|
||||
record FingerprintCorrelation(RunId runId, DocumentFingerprint fingerprint)
|
||||
implements DocumentLogCorrelation {}
|
||||
}
|
||||
@@ -7,24 +7,34 @@ package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
* <ul>
|
||||
* <li><strong>Content error counter</strong> ({@link #contentErrorCount()}):
|
||||
* counts how many times a deterministic content error occurred for this document
|
||||
* (no usable text, page limit exceeded). At count 1 the document is
|
||||
* {@code FAILED_RETRYABLE}; at count 2 it becomes {@code FAILED_FINAL}.
|
||||
* (no usable text, page limit exceeded, AI functional failure, ambiguous content).
|
||||
* At count 1 the document transitions to {@code FAILED_RETRYABLE};
|
||||
* at count 2 it transitions to {@code FAILED_FINAL}.
|
||||
* Skip events do <em>not</em> increase this counter.</li>
|
||||
* <li><strong>Transient error counter</strong> ({@link #transientErrorCount()}):
|
||||
* counts how many times a technical infrastructure error occurred after a
|
||||
* successful fingerprint was computed. The document remains
|
||||
* {@code FAILED_RETRYABLE} until the configured maximum is reached in later
|
||||
* milestones. Skip events do <em>not</em> increase this counter.</li>
|
||||
* counts how many times a transient technical error occurred after a successful
|
||||
* fingerprint was computed. The document remains {@code FAILED_RETRYABLE} while
|
||||
* this counter is strictly less than the configured {@code max.retries.transient}
|
||||
* value. The attempt that causes the counter to reach {@code max.retries.transient}
|
||||
* transitions the document to {@code FAILED_FINAL}.
|
||||
* The configured limit must be an Integer ≥ 1.
|
||||
* Skip events do <em>not</em> increase this counter.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* A freshly discovered document starts with both counters at zero.
|
||||
* Counters are only written by the repository layer on the instructions of the
|
||||
* application use case; they never change as a side-effect of a read operation.
|
||||
* <strong>Immediate within-run target copy retry:</strong>
|
||||
* The physical target-copy retry within the same run is not tracked in either counter.
|
||||
* It is a purely technical within-run mechanism and does not affect the
|
||||
* laufübergreifenden counter state.
|
||||
* <p>
|
||||
* <strong>Counter invariant:</strong>
|
||||
* Both counters start at zero for a newly discovered document and only increase
|
||||
* monotonically. The counters are written by the repository layer on the instructions
|
||||
* of the application use case; they never change as a side-effect of a read operation.
|
||||
*
|
||||
* @param contentErrorCount number of deterministic content errors recorded so far;
|
||||
* must be >= 0
|
||||
* @param transientErrorCount number of transient technical errors recorded so far;
|
||||
* must be >= 0
|
||||
* @param contentErrorCount number of historised deterministic content errors;
|
||||
* must be ≥ 0
|
||||
* @param transientErrorCount number of historised transient technical errors;
|
||||
* must be ≥ 0
|
||||
*/
|
||||
public record FailureCounters(int contentErrorCount, int transientErrorCount) {
|
||||
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Decision governing whether a within-run immediate technical retry of the target copy
|
||||
* operation is permitted.
|
||||
* <p>
|
||||
* The immediate retry mechanism is strictly scoped:
|
||||
* <ul>
|
||||
* <li>It applies <strong>only</strong> to the physical target-file copy path.</li>
|
||||
* <li>It is permitted <strong>at most once</strong> per document per run (first copy
|
||||
* attempt failed; one additional attempt is allowed).</li>
|
||||
* <li>It does <strong>not</strong> involve a new AI call, a new naming-proposal
|
||||
* derivation, or any other pipeline stage.</li>
|
||||
* <li>It does <strong>not</strong> increment the laufübergreifenden
|
||||
* transient-error counter regardless of outcome.</li>
|
||||
* <li>It is a purely technical within-run recovery mechanism and is
|
||||
* <strong>not</strong> counted as a cross-run retry in the sense of
|
||||
* {@code max.retries.transient}.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The concrete retry decision for the subsequent persistence step is derived from the
|
||||
* combined outcome after the immediate retry completes (see {@link RetryDecision}).
|
||||
*/
|
||||
public enum ImmediateRetryDecision {
|
||||
|
||||
/**
|
||||
* An immediate within-run retry of the target copy operation is permitted.
|
||||
* <p>
|
||||
* This value is produced when the first physical copy attempt within the current
|
||||
* document run has failed. The copy must be retried exactly once more.
|
||||
* No other pipeline stage is repeated.
|
||||
*/
|
||||
ALLOWED,
|
||||
|
||||
/**
|
||||
* No immediate within-run retry is permitted.
|
||||
* <p>
|
||||
* This value is produced when the immediate retry quota for this document run has
|
||||
* already been consumed (i.e. the immediate retry attempt itself has failed), or
|
||||
* when the failure did not occur on the target copy path.
|
||||
* The error must be escalated to the cross-run retry evaluation.
|
||||
*/
|
||||
DENIED
|
||||
}
|
||||
@@ -0,0 +1,172 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Sealed type representing the complete, authoritative retry decision for a document
|
||||
* after an error has been classified.
|
||||
* <p>
|
||||
* A {@code RetryDecision} is the output of the retry policy evaluation. It unambiguously
|
||||
* encodes what must happen next for the document: which status to persist, which counter
|
||||
* to increment, and whether a within-run immediate retry is still possible.
|
||||
* <p>
|
||||
* <strong>Decision cases and their semantics:</strong>
|
||||
* <ol>
|
||||
* <li>{@link ContentErrorRetryable} — first deterministic content error. Document moves
|
||||
* to {@code FAILED_RETRYABLE}; content-error counter is incremented by 1. One later
|
||||
* scheduler run may retry.</li>
|
||||
* <li>{@link ContentErrorFinal} — second (or later) deterministic content error. Document
|
||||
* moves to {@code FAILED_FINAL}; content-error counter is incremented by 1. No further
|
||||
* processing in any future run.</li>
|
||||
* <li>{@link TransientErrorRetryable} — transient technical error with remaining retry budget.
|
||||
* Document moves to {@code FAILED_RETRYABLE}; transient-error counter is incremented by 1.
|
||||
* A later scheduler run may retry, as long as the counter stays below
|
||||
* {@code max.retries.transient}.</li>
|
||||
* <li>{@link TransientErrorFinal} — transient technical error that exhausts the configured
|
||||
* {@code max.retries.transient} budget. Document moves to {@code FAILED_FINAL};
|
||||
* transient-error counter is incremented by 1. No further processing in any future run.</li>
|
||||
* <li>{@link TargetCopyWithImmediateRetry} — first physical copy failure within the current
|
||||
* run. The document has not yet changed status; exactly one immediate within-run retry
|
||||
* of the copy step is permitted. No new AI call and no new naming-proposal derivation
|
||||
* occur. This decision does not yet modify any counter or status; the outcome of the
|
||||
* immediate retry determines which subsequent decision applies.</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* <strong>What this type does NOT cover:</strong>
|
||||
* <ul>
|
||||
* <li>Skip decisions ({@code SKIPPED_ALREADY_PROCESSED}, {@code SKIPPED_FINAL_FAILURE})
|
||||
* — skips are not retry decisions; they are pure historisation events.</li>
|
||||
* <li>Success — a successful outcome is not a retry decision.</li>
|
||||
* <li>Pre-fingerprint failures — errors before the fingerprint is computed are not
|
||||
* historised as attempts and therefore do not produce a {@code RetryDecision}.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Counter invariant:</strong> Skip decisions ({@code SKIPPED_ALREADY_PROCESSED},
|
||||
* {@code SKIPPED_FINAL_FAILURE}) never produce a {@code RetryDecision} and never change
|
||||
* any failure counter.
|
||||
* <p>
|
||||
* <strong>Single-truth rule:</strong> The retry decision is derived exclusively from the
|
||||
* document master record and the attempt history. No additional, parallel truth source
|
||||
* for retry state is introduced.
|
||||
*/
|
||||
public sealed interface RetryDecision {
|
||||
|
||||
/**
|
||||
* Returns the failure class identifier for persistence and logging.
|
||||
* <p>
|
||||
* The failure class is a short, stable string identifying the type of failure,
|
||||
* typically the enum constant name of the original error or exception class name.
|
||||
*
|
||||
* @return failure class string; never {@code null} or blank
|
||||
*/
|
||||
String failureClass();
|
||||
|
||||
/**
|
||||
* Returns a human-readable failure message for persistence and logging.
|
||||
*
|
||||
* @return failure message; never {@code null} or blank
|
||||
*/
|
||||
String failureMessage();
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Deterministic content error cases
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* First historised deterministic content error for this fingerprint.
|
||||
* <p>
|
||||
* The document must be persisted with status {@code FAILED_RETRYABLE} and the
|
||||
* content-error counter incremented by 1. Exactly one later scheduler run is
|
||||
* permitted to retry.
|
||||
*
|
||||
* @param failureClass failure class identifier; never {@code null} or blank
|
||||
* @param failureMessage human-readable failure description; never {@code null} or blank
|
||||
*/
|
||||
record ContentErrorRetryable(String failureClass, String failureMessage)
|
||||
implements RetryDecision {}
|
||||
|
||||
/**
|
||||
* Second (or subsequent) historised deterministic content error for this fingerprint.
|
||||
* <p>
|
||||
* The document must be persisted with status {@code FAILED_FINAL} and the
|
||||
* content-error counter incremented by 1. No further processing is allowed in
|
||||
* any future run.
|
||||
*
|
||||
* @param failureClass failure class identifier; never {@code null} or blank
|
||||
* @param failureMessage human-readable failure description; never {@code null} or blank
|
||||
*/
|
||||
record ContentErrorFinal(String failureClass, String failureMessage)
|
||||
implements RetryDecision {}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Transient technical error cases
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Transient technical error with remaining retry budget.
|
||||
* <p>
|
||||
* The transient-error counter after incrementing is strictly less than
|
||||
* {@code max.retries.transient}. The document must be persisted with status
|
||||
* {@code FAILED_RETRYABLE} and the transient-error counter incremented by 1.
|
||||
* A later scheduler run may retry.
|
||||
*
|
||||
* @param failureClass failure class identifier; never {@code null} or blank
|
||||
* @param failureMessage human-readable failure description; never {@code null} or blank
|
||||
*/
|
||||
record TransientErrorRetryable(String failureClass, String failureMessage)
|
||||
implements RetryDecision {}
|
||||
|
||||
/**
|
||||
* Transient technical error that exhausts the configured {@code max.retries.transient}
|
||||
* budget.
|
||||
* <p>
|
||||
* The transient-error counter after incrementing equals {@code max.retries.transient}.
|
||||
* The document must be persisted with status {@code FAILED_FINAL} and the
|
||||
* transient-error counter incremented by 1. No further processing is allowed in
|
||||
* any future run.
|
||||
* <p>
|
||||
* Example: with {@code max.retries.transient = 1}, the very first transient error
|
||||
* produces this decision immediately.
|
||||
*
|
||||
* @param failureClass failure class identifier; never {@code null} or blank
|
||||
* @param failureMessage human-readable failure description; never {@code null} or blank
|
||||
*/
|
||||
record TransientErrorFinal(String failureClass, String failureMessage)
|
||||
implements RetryDecision {}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Target copy immediate retry case
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* First physical target-file copy failure within the current run.
|
||||
* <p>
|
||||
* Exactly one immediate technical retry of the copy operation is permitted within
|
||||
* the same document run. This decision does not change any counter or document
|
||||
* status — it defers the final outcome until the immediate retry completes:
|
||||
* <ul>
|
||||
* <li>If the immediate retry succeeds → document proceeds to {@code SUCCESS}.</li>
|
||||
* <li>If the immediate retry also fails → the combined failure is classified as
|
||||
* a transient technical error and a {@link TransientErrorRetryable} or
|
||||
* {@link TransientErrorFinal} decision is produced for the final persistence
|
||||
* step.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The immediate retry is strictly limited to the physical copy path. No new AI call
|
||||
* and no new naming-proposal derivation occur. This mechanism does not increment the
|
||||
* laufübergreifenden transient-error counter.
|
||||
*
|
||||
* @param failureMessage human-readable description of the initial copy failure;
|
||||
* never {@code null} or blank
|
||||
*/
|
||||
record TargetCopyWithImmediateRetry(String failureMessage) implements RetryDecision {
|
||||
|
||||
/**
|
||||
* Returns the constant failure class identifier for target copy failures.
|
||||
*
|
||||
* @return {@code "TARGET_COPY_TECHNICAL_ERROR"}
|
||||
*/
|
||||
@Override
|
||||
public String failureClass() {
|
||||
return DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR.name();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -62,6 +62,20 @@
|
||||
* — Sealed result of parsing raw response into JSON structure (success or parsing failure)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Retry policy and logging types:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification}
|
||||
* — Unified classification of all document-level errors (content, transient, target copy)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision}
|
||||
* — Sealed type representing the authoritative retry decision for a document error</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision}
|
||||
* — Decision governing whether a within-run target copy retry is permitted</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity}
|
||||
* — Sensitivity decision governing whether AI-generated content may be logged</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentLogCorrelation}
|
||||
* — Sealed type carrying the correlation context for document-related log entries</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Exception types:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException}
|
||||
|
||||
@@ -0,0 +1,200 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.service;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Default implementation of the {@link RetryDecisionEvaluator} interface.
|
||||
* <p>
|
||||
* Applies the binding retry policy rules exactly as specified:
|
||||
* <ul>
|
||||
* <li><strong>Deterministic content errors</strong>: the first historised occurrence
|
||||
* for a fingerprint leads to {@link RetryDecision.ContentErrorRetryable} (one later
|
||||
* scheduler run may retry); the second occurrence leads to
|
||||
* {@link RetryDecision.ContentErrorFinal} (no further retries).</li>
|
||||
* <li><strong>Transient technical errors</strong>: the error remains
|
||||
* {@link RetryDecision.TransientErrorRetryable} while the counter after incrementing
|
||||
* is strictly less than {@code maxRetriesTransient}. When the counter after
|
||||
* incrementing reaches {@code maxRetriesTransient}, the result is
|
||||
* {@link RetryDecision.TransientErrorFinal}.</li>
|
||||
* <li><strong>Target copy failures</strong>: the first copy failure within a run
|
||||
* produces {@link RetryDecision.TargetCopyWithImmediateRetry}, allowing exactly
|
||||
* one immediate within-run retry of the physical copy step. This decision does
|
||||
* not modify any counter.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Counter semantics:</strong> The {@code currentCounters} passed to
|
||||
* {@link #evaluate} reflect the state <em>before</em> the current attempt's counter
|
||||
* increment. This evaluator computes what the counter will be after incrementing and
|
||||
* applies the threshold check accordingly.
|
||||
* <p>
|
||||
* <strong>Skip events</strong> ({@code SKIPPED_ALREADY_PROCESSED},
|
||||
* {@code SKIPPED_FINAL_FAILURE}) are not routed through this evaluator and never
|
||||
* produce a {@link RetryDecision}. No failure counter is changed by skip events.
|
||||
* <p>
|
||||
* <strong>Immediate within-run retry</strong> for the target copy path is a purely
|
||||
* technical within-run mechanism. It does not increment the laufübergreifenden
|
||||
* transient-error counter regardless of outcome, and it is not part of the
|
||||
* cross-run retry budget governed by {@code max.retries.transient}.
|
||||
* <p>
|
||||
* <strong>Single-truth rule:</strong> Evaluations are derived solely from the document
|
||||
* master record's failure counters and the configured limit. No additional, parallel
|
||||
* persistence source for retry decisions is introduced.
|
||||
* <p>
|
||||
* This class is stateless and thread-safe.
|
||||
*/
|
||||
public final class DefaultRetryDecisionEvaluator implements RetryDecisionEvaluator {
|
||||
|
||||
/**
|
||||
* Derives the authoritative retry decision for a document-level error.
|
||||
* <p>
|
||||
* Decision rules by error class:
|
||||
* <ul>
|
||||
* <li>{@link DocumentErrorClassification#DETERMINISTIC_CONTENT_ERROR}:
|
||||
* {@code contentErrorCount} before increment = 0 →
|
||||
* {@link RetryDecision.ContentErrorRetryable}; else →
|
||||
* {@link RetryDecision.ContentErrorFinal}.</li>
|
||||
* <li>{@link DocumentErrorClassification#TRANSIENT_TECHNICAL_ERROR}:
|
||||
* {@code transientErrorCount + 1 < maxRetriesTransient} →
|
||||
* {@link RetryDecision.TransientErrorRetryable};
|
||||
* {@code transientErrorCount + 1 >= maxRetriesTransient} →
|
||||
* {@link RetryDecision.TransientErrorFinal}.</li>
|
||||
* <li>{@link DocumentErrorClassification#TARGET_COPY_TECHNICAL_ERROR}:
|
||||
* always → {@link RetryDecision.TargetCopyWithImmediateRetry}.
|
||||
* No counter is modified by this decision.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param errorClass classification of the error that occurred; never {@code null}
|
||||
* @param currentCounters failure counters <em>before</em> incrementing for this
|
||||
* attempt; never {@code null}
|
||||
* @param maxRetriesTransient configured maximum number of historised transient errors
|
||||
* allowed per fingerprint; must be ≥ 1
|
||||
* @param failureClass short, stable failure class identifier; never {@code null} or blank
|
||||
* @param failureMessage human-readable description of the error; never {@code null} or blank
|
||||
* @return the authoritative {@link RetryDecision}; never {@code null}
|
||||
* @throws IllegalArgumentException if {@code maxRetriesTransient} is less than 1
|
||||
* @throws NullPointerException if any reference parameter is {@code null}
|
||||
*/
|
||||
@Override
|
||||
public RetryDecision evaluate(
|
||||
DocumentErrorClassification errorClass,
|
||||
FailureCounters currentCounters,
|
||||
int maxRetriesTransient,
|
||||
String failureClass,
|
||||
String failureMessage) {
|
||||
|
||||
Objects.requireNonNull(errorClass, "errorClass must not be null");
|
||||
Objects.requireNonNull(currentCounters, "currentCounters must not be null");
|
||||
Objects.requireNonNull(failureClass, "failureClass must not be null");
|
||||
Objects.requireNonNull(failureMessage, "failureMessage must not be null");
|
||||
if (failureClass.isBlank()) {
|
||||
throw new IllegalArgumentException("failureClass must not be blank");
|
||||
}
|
||||
if (failureMessage.isBlank()) {
|
||||
throw new IllegalArgumentException("failureMessage must not be blank");
|
||||
}
|
||||
if (maxRetriesTransient < 1) {
|
||||
throw new IllegalArgumentException(
|
||||
"maxRetriesTransient must be >= 1, but was: " + maxRetriesTransient);
|
||||
}
|
||||
|
||||
return switch (errorClass) {
|
||||
case DETERMINISTIC_CONTENT_ERROR -> evaluateContentError(
|
||||
currentCounters, failureClass, failureMessage);
|
||||
case TRANSIENT_TECHNICAL_ERROR -> evaluateTransientError(
|
||||
currentCounters, maxRetriesTransient, failureClass, failureMessage);
|
||||
case TARGET_COPY_TECHNICAL_ERROR ->
|
||||
new RetryDecision.TargetCopyWithImmediateRetry(failureMessage);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether an immediate within-run retry of the target copy operation
|
||||
* is permitted.
|
||||
* <p>
|
||||
* {@link ImmediateRetryDecision#ALLOWED} is returned only when the copy has failed
|
||||
* on its first attempt within the current run. If this is the second copy attempt
|
||||
* (the immediate retry itself has failed), {@link ImmediateRetryDecision#DENIED} is
|
||||
* returned and the failure must be escalated to the cross-run retry evaluation.
|
||||
*
|
||||
* @param isFirstCopyAttemptInThisRun {@code true} if the failing copy attempt was
|
||||
* the first copy attempt for this document in
|
||||
* the current run
|
||||
* @return {@link ImmediateRetryDecision#ALLOWED} or {@link ImmediateRetryDecision#DENIED};
|
||||
* never {@code null}
|
||||
*/
|
||||
@Override
|
||||
public ImmediateRetryDecision evaluateImmediateRetry(boolean isFirstCopyAttemptInThisRun) {
|
||||
return isFirstCopyAttemptInThisRun
|
||||
? ImmediateRetryDecision.ALLOWED
|
||||
: ImmediateRetryDecision.DENIED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluates the retry decision for a deterministic content error.
|
||||
* <p>
|
||||
* The content-error counter before this attempt determines the decision:
|
||||
* <ul>
|
||||
* <li>Count = 0 (first error) → {@link RetryDecision.ContentErrorRetryable};
|
||||
* one later scheduler run may retry.</li>
|
||||
* <li>Count ≥ 1 (second or subsequent error) → {@link RetryDecision.ContentErrorFinal};
|
||||
* no further retries.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param currentCounters failure counters before incrementing
|
||||
* @param failureClass failure class identifier
|
||||
* @param failureMessage failure description
|
||||
* @return the appropriate content-error retry decision
|
||||
*/
|
||||
private static RetryDecision evaluateContentError(
|
||||
FailureCounters currentCounters,
|
||||
String failureClass,
|
||||
String failureMessage) {
|
||||
|
||||
if (currentCounters.contentErrorCount() == 0) {
|
||||
return new RetryDecision.ContentErrorRetryable(failureClass, failureMessage);
|
||||
}
|
||||
return new RetryDecision.ContentErrorFinal(failureClass, failureMessage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluates the retry decision for a transient technical error.
|
||||
* <p>
|
||||
* The transient-error counter after incrementing determines the decision:
|
||||
* <ul>
|
||||
* <li>Counter after increment strictly less than {@code maxRetriesTransient} →
|
||||
* {@link RetryDecision.TransientErrorRetryable}; a later scheduler run may retry.</li>
|
||||
* <li>Counter after increment equals or exceeds {@code maxRetriesTransient} →
|
||||
* {@link RetryDecision.TransientErrorFinal}; no further retries.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Example with {@code maxRetriesTransient = 1}: counter before = 0,
|
||||
* counter after = 1 = limit → {@link RetryDecision.TransientErrorFinal} immediately.
|
||||
* <p>
|
||||
* Example with {@code maxRetriesTransient = 2}: counter before = 0,
|
||||
* counter after = 1 < 2 → {@link RetryDecision.TransientErrorRetryable};
|
||||
* counter before = 1, counter after = 2 = limit → {@link RetryDecision.TransientErrorFinal}.
|
||||
*
|
||||
* @param currentCounters failure counters before incrementing
|
||||
* @param maxRetriesTransient configured maximum historised transient errors (≥ 1)
|
||||
* @param failureClass failure class identifier
|
||||
* @param failureMessage failure description
|
||||
* @return the appropriate transient-error retry decision
|
||||
*/
|
||||
private static RetryDecision evaluateTransientError(
|
||||
FailureCounters currentCounters,
|
||||
int maxRetriesTransient,
|
||||
String failureClass,
|
||||
String failureMessage) {
|
||||
|
||||
int counterAfterIncrement = currentCounters.transientErrorCount() + 1;
|
||||
if (counterAfterIncrement < maxRetriesTransient) {
|
||||
return new RetryDecision.TransientErrorRetryable(failureClass, failureMessage);
|
||||
}
|
||||
return new RetryDecision.TransientErrorFinal(failureClass, failureMessage);
|
||||
}
|
||||
}
|
||||
@@ -43,40 +43,76 @@ import java.util.function.Function;
|
||||
* Application-level service that implements the per-document processing logic.
|
||||
* <p>
|
||||
* This service is the single authoritative place for the decision rules:
|
||||
* idempotency checks, status/counter mapping, target-copy finalization, and consistent
|
||||
* two-level persistence.
|
||||
* idempotency checks, status/counter mapping, target-copy finalization, retry
|
||||
* finalization, skip semantics, and consistent two-level persistence.
|
||||
*
|
||||
* <h2>Processing order per candidate</h2>
|
||||
* <ol>
|
||||
* <li>Load the document master record by fingerprint.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#SUCCESS} → create and persist
|
||||
* a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#FAILED_FINAL} → create and persist
|
||||
* a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#SUCCESS} →
|
||||
* <strong>log skip at INFO with fingerprint</strong>;
|
||||
* persist a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}.
|
||||
* Failure counters are not changed.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#FAILED_FINAL} →
|
||||
* <strong>log skip at INFO with fingerprint</strong>;
|
||||
* persist a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.
|
||||
* Failure counters are not changed.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#PROPOSAL_READY} → load the
|
||||
* leading proposal attempt and execute the target-copy finalization flow:
|
||||
* build the base filename, resolve duplicates, write the copy, persist SUCCESS or
|
||||
* FAILED_RETRYABLE.</li>
|
||||
* build the base filename, resolve duplicates,
|
||||
* <strong>log generated target filename at INFO with fingerprint</strong>,
|
||||
* write the copy, persist SUCCESS or FAILED_RETRYABLE.</li>
|
||||
* <li>Otherwise execute the pipeline (extraction + pre-checks + AI naming) and map
|
||||
* the result into status, counters, and retryable flag.</li>
|
||||
* <li><strong>Log retry decision at INFO with fingerprint and error classification</strong>:
|
||||
* FAILED_RETRYABLE (will retry in a later scheduler run) or
|
||||
* FAILED_FINAL (retry budget exhausted, no further processing).</li>
|
||||
* <li>Persist exactly one historised processing attempt for the identified document.</li>
|
||||
* <li>Persist the updated document master record.</li>
|
||||
* </ol>
|
||||
*
|
||||
* <h2>Retry finalization rules</h2>
|
||||
* <ul>
|
||||
* <li><strong>Deterministic content errors:</strong> The first historised occurrence
|
||||
* leads to {@link ProcessingStatus#FAILED_RETRYABLE} (content-error counter incremented
|
||||
* by 1). The second historised occurrence leads to {@link ProcessingStatus#FAILED_FINAL}
|
||||
* (content-error counter incremented by 1). No further retry is possible.</li>
|
||||
* <li><strong>Transient technical errors:</strong> The transient-error counter is
|
||||
* incremented by 1 per occurrence. The document remains
|
||||
* {@link ProcessingStatus#FAILED_RETRYABLE} as long as the counter is strictly less
|
||||
* than {@code maxRetriesTransient}. The attempt that causes the counter to reach
|
||||
* {@code maxRetriesTransient} finalises the document to
|
||||
* {@link ProcessingStatus#FAILED_FINAL}. Valid values of {@code maxRetriesTransient}
|
||||
* are integers ≥ 1; the value 0 is invalid startup configuration.</li>
|
||||
* <li><strong>Skip events</strong> ({@code SKIPPED_ALREADY_PROCESSED},
|
||||
* {@code SKIPPED_FINAL_FAILURE}) never change any failure counter.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Status transitions</h2>
|
||||
* <ul>
|
||||
* <li>Pre-check passed + AI naming proposal ready → {@link ProcessingStatus#PROPOSAL_READY}</li>
|
||||
* <li>First deterministic content failure → {@link ProcessingStatus#FAILED_RETRYABLE}</li>
|
||||
* <li>Second deterministic content failure → {@link ProcessingStatus#FAILED_FINAL}</li>
|
||||
* <li>Technical infrastructure failure → {@link ProcessingStatus#FAILED_RETRYABLE}</li>
|
||||
* <li>Technical failure at transient retry limit → {@link ProcessingStatus#FAILED_FINAL}</li>
|
||||
* <li>{@link ProcessingStatus#PROPOSAL_READY} + successful target copy + consistent
|
||||
* persistence → {@link ProcessingStatus#SUCCESS}</li>
|
||||
* <li>{@link ProcessingStatus#PROPOSAL_READY} + first copy failure + successful immediate retry
|
||||
* → treated as successful copy, proceeds to {@link ProcessingStatus#SUCCESS}</li>
|
||||
* <li>{@link ProcessingStatus#PROPOSAL_READY} + both copy attempts fail → cross-run
|
||||
* {@link ProcessingStatus#FAILED_RETRYABLE}, transient error counter +1</li>
|
||||
* <li>{@link ProcessingStatus#PROPOSAL_READY} + technical failure → {@link ProcessingStatus#FAILED_RETRYABLE},
|
||||
* transient error counter +1</li>
|
||||
* <li>{@link ProcessingStatus#SUCCESS} → {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED} skip</li>
|
||||
* <li>{@link ProcessingStatus#FAILED_FINAL} → {@link ProcessingStatus#SKIPPED_FINAL_FAILURE} skip</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Log correlation</h2>
|
||||
* <p>
|
||||
* All log entries emitted by this coordinator are post-fingerprint: the fingerprint is
|
||||
* available for every document that reaches this coordinator. Relevant log entries carry
|
||||
* the document fingerprint for unambiguous correlation across runs.
|
||||
*
|
||||
* <h2>Leading source for the naming proposal (verbindlich)</h2>
|
||||
* <p>
|
||||
* When a document is in {@code PROPOSAL_READY} state, the authoritative source for the
|
||||
@@ -116,9 +152,16 @@ public class DocumentProcessingCoordinator {
|
||||
private final TargetFolderPort targetFolderPort;
|
||||
private final TargetFileCopyPort targetFileCopyPort;
|
||||
private final ProcessingLogger logger;
|
||||
private final int maxRetriesTransient;
|
||||
|
||||
/**
|
||||
* Creates the document processing coordinator with all required ports and the logger.
|
||||
* Creates the document processing coordinator with all required ports, logger, and
|
||||
* the transient retry limit.
|
||||
* <p>
|
||||
* {@code maxRetriesTransient} is the maximum number of historised transient error attempts
|
||||
* per fingerprint before the document is finalised to
|
||||
* {@link ProcessingStatus#FAILED_FINAL}. The attempt that causes the counter to
|
||||
* reach this value finalises the document. Must be >= 1.
|
||||
*
|
||||
* @param documentRecordRepository port for reading and writing the document master record;
|
||||
* must not be null
|
||||
@@ -130,7 +173,10 @@ public class DocumentProcessingCoordinator {
|
||||
* @param targetFileCopyPort port for copying source files to the target folder;
|
||||
* must not be null
|
||||
* @param logger for processing-related logging; must not be null
|
||||
* @throws NullPointerException if any parameter is null
|
||||
* @param maxRetriesTransient maximum number of historised transient error attempts
|
||||
* before finalisation; must be >= 1
|
||||
* @throws NullPointerException if any object parameter is null
|
||||
* @throws IllegalArgumentException if {@code maxRetriesTransient} is less than 1
|
||||
*/
|
||||
public DocumentProcessingCoordinator(
|
||||
DocumentRecordRepository documentRecordRepository,
|
||||
@@ -138,7 +184,12 @@ public class DocumentProcessingCoordinator {
|
||||
UnitOfWorkPort unitOfWorkPort,
|
||||
TargetFolderPort targetFolderPort,
|
||||
TargetFileCopyPort targetFileCopyPort,
|
||||
ProcessingLogger logger) {
|
||||
ProcessingLogger logger,
|
||||
int maxRetriesTransient) {
|
||||
if (maxRetriesTransient < 1) {
|
||||
throw new IllegalArgumentException(
|
||||
"maxRetriesTransient must be >= 1, got: " + maxRetriesTransient);
|
||||
}
|
||||
this.documentRecordRepository =
|
||||
Objects.requireNonNull(documentRecordRepository, "documentRecordRepository must not be null");
|
||||
this.processingAttemptRepository =
|
||||
@@ -150,6 +201,7 @@ public class DocumentProcessingCoordinator {
|
||||
this.targetFileCopyPort =
|
||||
Objects.requireNonNull(targetFileCopyPort, "targetFileCopyPort must not be null");
|
||||
this.logger = Objects.requireNonNull(logger, "logger must not be null");
|
||||
this.maxRetriesTransient = maxRetriesTransient;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -285,7 +337,7 @@ public class DocumentProcessingCoordinator {
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// M6 target-copy finalization path
|
||||
// Target-copy finalization path
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
@@ -297,6 +349,10 @@ public class DocumentProcessingCoordinator {
|
||||
* <li>Build the base filename from the proposal's date and title.</li>
|
||||
* <li>Resolve the first available unique filename in the target folder.</li>
|
||||
* <li>Copy the source file to the target folder.</li>
|
||||
* <li>If the copy fails: attempt exactly one immediate within-run retry of the same
|
||||
* physical copy step. No new AI call and no new naming-proposal derivation occur.
|
||||
* If the retry also fails, treat the combined failure as a transient error and
|
||||
* skip the SUCCESS path.</li>
|
||||
* <li>Persist a new {@code SUCCESS} attempt and update the master record.</li>
|
||||
* <li>If persistence fails after a successful copy: attempt best-effort rollback
|
||||
* of the copy and persist {@code FAILED_RETRYABLE} instead.</li>
|
||||
@@ -365,19 +421,41 @@ public class DocumentProcessingCoordinator {
|
||||
|
||||
String resolvedFilename =
|
||||
((ResolvedTargetFilename) resolutionResult).resolvedFilename();
|
||||
logger.info("Resolved target filename for '{}': '{}'.",
|
||||
candidate.uniqueIdentifier(), resolvedFilename);
|
||||
logger.info("Generated target filename for '{}' (fingerprint: {}): '{}'.",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex(), resolvedFilename);
|
||||
|
||||
// --- Step 4: Copy file to target ---
|
||||
// --- Step 4: Copy file to target (with one immediate within-run retry) ---
|
||||
TargetFileCopyResult copyResult =
|
||||
targetFileCopyPort.copyToTarget(candidate.locator(), resolvedFilename);
|
||||
|
||||
if (copyResult instanceof TargetFileCopyTechnicalFailure copyFailure) {
|
||||
logger.error("Target copy failed for '{}': {}",
|
||||
candidate.uniqueIdentifier(), copyFailure.errorMessage());
|
||||
return persistTransientError(
|
||||
candidate, fingerprint, existingRecord, context, attemptStart, now,
|
||||
"Target file copy failed: " + copyFailure.errorMessage());
|
||||
if (copyResult instanceof TargetFileCopyTechnicalFailure firstCopyFailure) {
|
||||
// First copy attempt failed — perform exactly one immediate within-run retry.
|
||||
// The retry reuses the same resolved filename and document context; no new AI
|
||||
// call, no new naming-proposal derivation. This mechanism does not increment
|
||||
// the cross-run transient-error counter by itself.
|
||||
logger.warn("First target copy attempt failed for '{}': {}. Performing immediate within-run retry.",
|
||||
candidate.uniqueIdentifier(), firstCopyFailure.errorMessage());
|
||||
|
||||
TargetFileCopyResult retryCopyResult =
|
||||
targetFileCopyPort.copyToTarget(candidate.locator(), resolvedFilename);
|
||||
|
||||
if (retryCopyResult instanceof TargetFileCopyTechnicalFailure retryCopyFailure) {
|
||||
// Immediate retry also failed — the combined failure is escalated as a
|
||||
// cross-run transient technical error. No further within-run retry is
|
||||
// attempted. This is the only document-level result for persistence.
|
||||
logger.error("Immediate within-run retry also failed for '{}': {}",
|
||||
candidate.uniqueIdentifier(), retryCopyFailure.errorMessage());
|
||||
String combinedMessage = "Target file copy failed after immediate within-run retry."
|
||||
+ " First attempt: " + firstCopyFailure.errorMessage()
|
||||
+ "; Retry attempt: " + retryCopyFailure.errorMessage();
|
||||
return persistTransientError(
|
||||
candidate, fingerprint, existingRecord, context, attemptStart, now,
|
||||
combinedMessage);
|
||||
}
|
||||
|
||||
// Immediate retry succeeded — proceed to SUCCESS path as if the copy
|
||||
// had succeeded on the first attempt.
|
||||
logger.info("Immediate within-run retry succeeded for '{}'.", candidate.uniqueIdentifier());
|
||||
}
|
||||
|
||||
// Copy succeeded — attempt to persist SUCCESS
|
||||
@@ -447,8 +525,15 @@ public class DocumentProcessingCoordinator {
|
||||
}
|
||||
|
||||
/**
|
||||
* Persists a {@code FAILED_RETRYABLE} attempt with an incremented transient error counter
|
||||
* for a document-level technical error during the target-copy finalization stage.
|
||||
* Persists a transient error for a document-level technical failure during the
|
||||
* target-copy finalization stage.
|
||||
* <p>
|
||||
* The resulting status is {@link ProcessingStatus#FAILED_FINAL} if the incremented
|
||||
* transient error counter reaches {@code maxRetriesTransient}; otherwise
|
||||
* {@link ProcessingStatus#FAILED_RETRYABLE}. The transient error counter is always
|
||||
* incremented by exactly one. This method does not increment the laufübergreifenden
|
||||
* transient counter for the within-run immediate retry — only the combined outcome
|
||||
* of the retry is reported here.
|
||||
*
|
||||
* @return true if the error was persisted; false if the error persistence itself failed
|
||||
*/
|
||||
@@ -463,26 +548,38 @@ public class DocumentProcessingCoordinator {
|
||||
|
||||
FailureCounters updatedCounters =
|
||||
existingRecord.failureCounters().withIncrementedTransientErrorCount();
|
||||
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
|
||||
ProcessingStatus errorStatus = limitReached
|
||||
? ProcessingStatus.FAILED_FINAL
|
||||
: ProcessingStatus.FAILED_RETRYABLE;
|
||||
|
||||
try {
|
||||
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
|
||||
ProcessingAttempt errorAttempt = ProcessingAttempt.withoutAiFields(
|
||||
fingerprint, context.runId(), attemptNumber, attemptStart, now,
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
ProcessingStatus.FAILED_RETRYABLE.name(),
|
||||
errorMessage, true);
|
||||
errorStatus,
|
||||
errorStatus.name(),
|
||||
errorMessage, !limitReached);
|
||||
|
||||
DocumentRecord errorRecord = buildTransientErrorRecord(
|
||||
existingRecord, candidate, updatedCounters, now);
|
||||
existingRecord, candidate, updatedCounters, errorStatus, now);
|
||||
|
||||
unitOfWorkPort.executeInTransaction(txOps -> {
|
||||
txOps.saveProcessingAttempt(errorAttempt);
|
||||
txOps.updateDocumentRecord(errorRecord);
|
||||
});
|
||||
|
||||
logger.debug("Transient error persisted for '{}': status=FAILED_RETRYABLE, "
|
||||
+ "transientErrors={}.",
|
||||
candidate.uniqueIdentifier(),
|
||||
updatedCounters.transientErrorCount());
|
||||
if (limitReached) {
|
||||
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_FINAL — "
|
||||
+ "transient error limit reached ({}/{} attempts). No further retry.",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
|
||||
updatedCounters.transientErrorCount(), maxRetriesTransient);
|
||||
} else {
|
||||
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_RETRYABLE — "
|
||||
+ "transient error, will retry in later run ({}/{} attempts).",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
|
||||
updatedCounters.transientErrorCount(), maxRetriesTransient);
|
||||
}
|
||||
return true;
|
||||
|
||||
} catch (DocumentPersistenceException persistEx) {
|
||||
@@ -493,9 +590,13 @@ public class DocumentProcessingCoordinator {
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to persist a {@code FAILED_RETRYABLE} attempt after a persistence failure
|
||||
* that occurred following a successful target copy. This is a secondary persistence
|
||||
* effort; its failure is logged but does not change the return value.
|
||||
* Attempts to persist a transient error after a persistence failure that occurred
|
||||
* following a successful target copy. This is a secondary persistence effort;
|
||||
* its failure is logged but does not change the return value.
|
||||
* <p>
|
||||
* Applies the same transient limit check as {@link #persistTransientError}: if the
|
||||
* incremented counter reaches {@code maxRetriesTransient}, the secondary attempt
|
||||
* is persisted as {@link ProcessingStatus#FAILED_FINAL}.
|
||||
*/
|
||||
private void persistTransientErrorAfterPersistenceFailure(
|
||||
SourceDocumentCandidate candidate,
|
||||
@@ -508,16 +609,21 @@ public class DocumentProcessingCoordinator {
|
||||
|
||||
FailureCounters updatedCounters =
|
||||
existingRecord.failureCounters().withIncrementedTransientErrorCount();
|
||||
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
|
||||
ProcessingStatus errorStatus = limitReached
|
||||
? ProcessingStatus.FAILED_FINAL
|
||||
: ProcessingStatus.FAILED_RETRYABLE;
|
||||
|
||||
try {
|
||||
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
|
||||
ProcessingAttempt errorAttempt = ProcessingAttempt.withoutAiFields(
|
||||
fingerprint, context.runId(), attemptNumber, attemptStart, now,
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
ProcessingStatus.FAILED_RETRYABLE.name(),
|
||||
errorMessage, true);
|
||||
errorStatus,
|
||||
errorStatus.name(),
|
||||
errorMessage, !limitReached);
|
||||
|
||||
DocumentRecord errorRecord = buildTransientErrorRecord(
|
||||
existingRecord, candidate, updatedCounters, now);
|
||||
existingRecord, candidate, updatedCounters, errorStatus, now);
|
||||
|
||||
unitOfWorkPort.executeInTransaction(txOps -> {
|
||||
txOps.saveProcessingAttempt(errorAttempt);
|
||||
@@ -618,13 +724,13 @@ public class DocumentProcessingCoordinator {
|
||||
|
||||
private ProcessingOutcomeTransition.ProcessingOutcome mapOutcomeForNewDocument(
|
||||
DocumentProcessingOutcome pipelineOutcome) {
|
||||
return ProcessingOutcomeTransition.forNewDocument(pipelineOutcome);
|
||||
return ProcessingOutcomeTransition.forNewDocument(pipelineOutcome, maxRetriesTransient);
|
||||
}
|
||||
|
||||
private ProcessingOutcomeTransition.ProcessingOutcome mapOutcomeForKnownDocument(
|
||||
DocumentProcessingOutcome pipelineOutcome,
|
||||
FailureCounters existingCounters) {
|
||||
return ProcessingOutcomeTransition.forKnownDocument(pipelineOutcome, existingCounters);
|
||||
return ProcessingOutcomeTransition.forKnownDocument(pipelineOutcome, existingCounters, maxRetriesTransient);
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
@@ -717,12 +823,13 @@ public class DocumentProcessingCoordinator {
|
||||
DocumentRecord existingRecord,
|
||||
SourceDocumentCandidate candidate,
|
||||
FailureCounters updatedCounters,
|
||||
ProcessingStatus targetStatus,
|
||||
Instant now) {
|
||||
return new DocumentRecord(
|
||||
existingRecord.fingerprint(),
|
||||
new SourceDocumentLocator(candidate.locator().value()),
|
||||
candidate.uniqueIdentifier(),
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
targetStatus,
|
||||
updatedCounters,
|
||||
now, // lastFailureInstant
|
||||
existingRecord.lastSuccessInstant(),
|
||||
@@ -764,11 +871,27 @@ public class DocumentProcessingCoordinator {
|
||||
recordWriter.accept(txOps);
|
||||
});
|
||||
|
||||
logger.info("Document '{}' processed: status={}, contentErrors={}, transientErrors={}.",
|
||||
candidate.uniqueIdentifier(),
|
||||
outcome.overallStatus(),
|
||||
outcome.counters().contentErrorCount(),
|
||||
outcome.counters().transientErrorCount());
|
||||
if (outcome.overallStatus() == ProcessingStatus.FAILED_RETRYABLE) {
|
||||
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_RETRYABLE — "
|
||||
+ "will retry in later scheduler run. "
|
||||
+ "ContentErrors={}, TransientErrors={}.",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
|
||||
outcome.counters().contentErrorCount(),
|
||||
outcome.counters().transientErrorCount());
|
||||
} else if (outcome.overallStatus() == ProcessingStatus.FAILED_FINAL) {
|
||||
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_FINAL — "
|
||||
+ "permanently failed, no further retry. "
|
||||
+ "ContentErrors={}, TransientErrors={}.",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
|
||||
outcome.counters().contentErrorCount(),
|
||||
outcome.counters().transientErrorCount());
|
||||
} else {
|
||||
logger.info("Document '{}' processed: status={} (fingerprint: {}). "
|
||||
+ "ContentErrors={}, TransientErrors={}.",
|
||||
candidate.uniqueIdentifier(), outcome.overallStatus(), fingerprint.sha256Hex(),
|
||||
outcome.counters().contentErrorCount(),
|
||||
outcome.counters().transientErrorCount());
|
||||
}
|
||||
return true;
|
||||
|
||||
} catch (DocumentPersistenceException e) {
|
||||
|
||||
@@ -36,9 +36,29 @@ import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
|
||||
* <li><strong>AI functional failure (second or later occurrence):</strong>
|
||||
* Status becomes {@link ProcessingStatus#FAILED_FINAL},
|
||||
* content error counter incremented by 1, {@code retryable=false}.</li>
|
||||
* <li><strong>Technical error (pre-fingerprint / extraction / AI infrastructure):</strong>
|
||||
* <li><strong>Technical error below the transient retry limit:</strong>
|
||||
* Status becomes {@link ProcessingStatus#FAILED_RETRYABLE},
|
||||
* transient error counter incremented by 1, {@code retryable=true}.</li>
|
||||
* <li><strong>Technical error at or above the transient retry limit:</strong>
|
||||
* Status becomes {@link ProcessingStatus#FAILED_FINAL},
|
||||
* transient error counter incremented by 1, {@code retryable=false}.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Transient retry limit semantics</h2>
|
||||
* <p>
|
||||
* {@code maxRetriesTransient} is interpreted as the maximum number of historised
|
||||
* transient error attempts per fingerprint. The attempt that causes the counter
|
||||
* to reach {@code maxRetriesTransient} finalises the document status to
|
||||
* {@link ProcessingStatus#FAILED_FINAL}. Valid values are integers >= 1;
|
||||
* the value 0 is invalid startup configuration and must be rejected before
|
||||
* the batch run begins.
|
||||
* <p>
|
||||
* Examples:
|
||||
* <ul>
|
||||
* <li>{@code maxRetriesTransient = 1}: the first historised transient error
|
||||
* immediately finalises to {@code FAILED_FINAL}.</li>
|
||||
* <li>{@code maxRetriesTransient = 2}: the first transient error yields
|
||||
* {@code FAILED_RETRYABLE}; the second finalises to {@code FAILED_FINAL}.</li>
|
||||
* </ul>
|
||||
*/
|
||||
final class ProcessingOutcomeTransition {
|
||||
@@ -52,24 +72,33 @@ final class ProcessingOutcomeTransition {
|
||||
* <p>
|
||||
* For new documents, all failure counters start at zero.
|
||||
*
|
||||
* @param pipelineOutcome the outcome from the processing pipeline
|
||||
* @param pipelineOutcome the outcome from the processing pipeline
|
||||
* @param maxRetriesTransient maximum number of historised transient error attempts
|
||||
* before the document is finalised to {@code FAILED_FINAL};
|
||||
* must be >= 1
|
||||
* @return the mapped outcome with status, counters, and retryability
|
||||
*/
|
||||
static ProcessingOutcome forNewDocument(DocumentProcessingOutcome pipelineOutcome) {
|
||||
return forKnownDocument(pipelineOutcome, FailureCounters.zero());
|
||||
static ProcessingOutcome forNewDocument(
|
||||
DocumentProcessingOutcome pipelineOutcome,
|
||||
int maxRetriesTransient) {
|
||||
return forKnownDocument(pipelineOutcome, FailureCounters.zero(), maxRetriesTransient);
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps a pipeline outcome to a processing outcome, considering the existing
|
||||
* failure counter state from a known document's history.
|
||||
*
|
||||
* @param pipelineOutcome the outcome from the processing pipeline
|
||||
* @param existingCounters the current failure counter values from the document's master record
|
||||
* @param pipelineOutcome the outcome from the processing pipeline
|
||||
* @param existingCounters the current failure counter values from the document's master record
|
||||
* @param maxRetriesTransient maximum number of historised transient error attempts
|
||||
* before the document is finalised to {@code FAILED_FINAL};
|
||||
* must be >= 1
|
||||
* @return the mapped outcome with updated status, counters, and retryability
|
||||
*/
|
||||
static ProcessingOutcome forKnownDocument(
|
||||
DocumentProcessingOutcome pipelineOutcome,
|
||||
FailureCounters existingCounters) {
|
||||
FailureCounters existingCounters,
|
||||
int maxRetriesTransient) {
|
||||
|
||||
return switch (pipelineOutcome) {
|
||||
case NamingProposalReady ignored -> {
|
||||
@@ -106,31 +135,37 @@ final class ProcessingOutcomeTransition {
|
||||
}
|
||||
|
||||
case TechnicalDocumentError ignored4 -> {
|
||||
// Technical error (extraction / infrastructure): retryable, transient counter +1
|
||||
// Technical error (extraction / infrastructure): apply transient retry limit
|
||||
FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount();
|
||||
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
|
||||
yield new ProcessingOutcome(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
existingCounters.withIncrementedTransientErrorCount(),
|
||||
true
|
||||
limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE,
|
||||
updatedCounters,
|
||||
!limitReached
|
||||
);
|
||||
}
|
||||
|
||||
case AiTechnicalFailure ignored5 -> {
|
||||
// Technical AI error (timeout, unreachable, bad JSON): retryable, transient counter +1
|
||||
// Technical AI error (timeout, unreachable, bad JSON): apply transient retry limit
|
||||
FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount();
|
||||
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
|
||||
yield new ProcessingOutcome(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
existingCounters.withIncrementedTransientErrorCount(),
|
||||
true
|
||||
limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE,
|
||||
updatedCounters,
|
||||
!limitReached
|
||||
);
|
||||
}
|
||||
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored6 -> {
|
||||
// Pre-check passed without AI step: in normal flow this should not appear at
|
||||
// the outcome transition level once the AI pipeline is fully wired. Treat it
|
||||
// as a technical error to avoid silent inconsistency.
|
||||
// as a technical error and apply the transient retry limit.
|
||||
FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount();
|
||||
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
|
||||
yield new ProcessingOutcome(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
existingCounters.withIncrementedTransientErrorCount(),
|
||||
true
|
||||
limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE,
|
||||
updatedCounters,
|
||||
!limitReached
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -0,0 +1,107 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.service;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision;
|
||||
|
||||
/**
|
||||
* Application service contract for deriving authoritative retry decisions from
|
||||
* document error state and configuration.
|
||||
* <p>
|
||||
* This interface defines the single, testable entry point for all retry policy
|
||||
* evaluations. Implementations must apply the verbindlichen retry rules exactly
|
||||
* as specified:
|
||||
* <ul>
|
||||
* <li><strong>Deterministic content errors</strong> ({@link DocumentErrorClassification#DETERMINISTIC_CONTENT_ERROR}):
|
||||
* the <em>first</em> historised content error for a fingerprint results in
|
||||
* {@link RetryDecision.ContentErrorRetryable}; the <em>second</em> results in
|
||||
* {@link RetryDecision.ContentErrorFinal}.</li>
|
||||
* <li><strong>Transient technical errors</strong> ({@link DocumentErrorClassification#TRANSIENT_TECHNICAL_ERROR}):
|
||||
* the error remains retryable while the transient-error counter after incrementing
|
||||
* stays strictly below {@code maxRetriesTransient}. When the counter after
|
||||
* incrementing reaches {@code maxRetriesTransient}, the result is
|
||||
* {@link RetryDecision.TransientErrorFinal}.</li>
|
||||
* <li><strong>Target copy failures</strong> ({@link DocumentErrorClassification#TARGET_COPY_TECHNICAL_ERROR})
|
||||
* on the <em>first</em> copy attempt within a run: result is
|
||||
* {@link RetryDecision.TargetCopyWithImmediateRetry}. After the immediate retry
|
||||
* has itself failed, the failure is re-evaluated as a
|
||||
* {@link DocumentErrorClassification#TRANSIENT_TECHNICAL_ERROR}.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Counter semantics:</strong>
|
||||
* <ul>
|
||||
* <li>The {@code currentCounters} passed to {@link #evaluate} reflect the state
|
||||
* <em>before</em> the current attempt's counter increment. The evaluator is
|
||||
* responsible for determining what the counter will be after incrementing and
|
||||
* applying the threshold check accordingly.</li>
|
||||
* <li>Skip events ({@code SKIPPED_ALREADY_PROCESSED}, {@code SKIPPED_FINAL_FAILURE})
|
||||
* are not routed through this evaluator and never produce a
|
||||
* {@link RetryDecision}.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>{@code maxRetriesTransient} invariant:</strong>
|
||||
* The value must be an Integer ≥ 1. A value of {@code 0} is invalid configuration
|
||||
* and must be rejected at startup before any batch run begins. Implementations of
|
||||
* this interface may assume the value is always ≥ 1 when called.
|
||||
* <p>
|
||||
* Example for {@code maxRetriesTransient = 1}:
|
||||
* <ul>
|
||||
* <li>transient-error counter before = 0 → after increment = 1 = limit → {@link RetryDecision.TransientErrorFinal}</li>
|
||||
* </ul>
|
||||
* Example for {@code maxRetriesTransient = 2}:
|
||||
* <ul>
|
||||
* <li>transient-error counter before = 0 → after increment = 1 < 2 → {@link RetryDecision.TransientErrorRetryable}</li>
|
||||
* <li>transient-error counter before = 1 → after increment = 2 = limit → {@link RetryDecision.TransientErrorFinal}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Single-truth rule:</strong> No parallel persistence source for retry
|
||||
* decisions is introduced. Evaluations are derived solely from the document master
|
||||
* record's failure counters and the configured limit.
|
||||
*/
|
||||
public interface RetryDecisionEvaluator {
|
||||
|
||||
/**
|
||||
* Derives the authoritative retry decision for a document-level error.
|
||||
* <p>
|
||||
* The decision is determined by the error classification, the existing failure
|
||||
* counters (before any increment for the current attempt), and the configured
|
||||
* transient-retry limit.
|
||||
*
|
||||
* @param errorClass classification of the error that occurred; never {@code null}
|
||||
* @param currentCounters failure counters <em>before</em> incrementing for this
|
||||
* attempt; never {@code null}
|
||||
* @param maxRetriesTransient configured maximum number of historised transient errors
|
||||
* allowed per fingerprint; must be ≥ 1
|
||||
* @param failureClass short, stable failure class identifier for persistence
|
||||
* and logging; never {@code null} or blank
|
||||
* @param failureMessage human-readable description of the error; never {@code null}
|
||||
* or blank
|
||||
* @return the authoritative {@link RetryDecision}; never {@code null}
|
||||
* @throws IllegalArgumentException if {@code maxRetriesTransient} is less than 1
|
||||
*/
|
||||
RetryDecision evaluate(
|
||||
DocumentErrorClassification errorClass,
|
||||
FailureCounters currentCounters,
|
||||
int maxRetriesTransient,
|
||||
String failureClass,
|
||||
String failureMessage);
|
||||
|
||||
/**
|
||||
* Determines whether an immediate within-run retry of the target copy operation
|
||||
* is permitted.
|
||||
* <p>
|
||||
* An immediate retry is {@link ImmediateRetryDecision#ALLOWED} only when the copy
|
||||
* has failed on its first attempt within the current run. If this is the second
|
||||
* copy attempt within the same run (i.e. the immediate retry itself has failed),
|
||||
* the result is {@link ImmediateRetryDecision#DENIED}.
|
||||
*
|
||||
* @param isFirstCopyAttemptInThisRun {@code true} if the failing copy attempt was
|
||||
* the first copy attempt for this document in
|
||||
* the current run; {@code false} if it was the
|
||||
* immediate retry attempt
|
||||
* @return {@link ImmediateRetryDecision#ALLOWED} or {@link ImmediateRetryDecision#DENIED};
|
||||
* never {@code null}
|
||||
*/
|
||||
ImmediateRetryDecision evaluateImmediateRetry(boolean isFirstCopyAttemptInThisRun);
|
||||
}
|
||||
@@ -222,23 +222,30 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
* <p>
|
||||
* Processing order:
|
||||
* <ol>
|
||||
* <li><strong>Log:</strong> detected source file at INFO level with run-ID (pre-fingerprint
|
||||
* correlation via run-ID and candidate description).</li>
|
||||
* <li>Record the attempt start instant.</li>
|
||||
* <li>Compute the SHA-256 fingerprint of the candidate file content.</li>
|
||||
* <li>If fingerprint computation fails: log as non-identifiable run event and
|
||||
* return true — no SQLite record is created, but no persistence failure occurred.</li>
|
||||
* <li>If fingerprint computation fails: log as non-identifiable run event with run-ID
|
||||
* and return true — no SQLite record is created, no persistence failure.</li>
|
||||
* <li>Load document master record.</li>
|
||||
* <li>If already {@code SUCCESS} → persist skip attempt with
|
||||
* {@code SKIPPED_ALREADY_PROCESSED}.</li>
|
||||
* <li>If already {@code FAILED_FINAL} → persist skip attempt with
|
||||
* {@code SKIPPED_FINAL_FAILURE}.</li>
|
||||
* <li>Otherwise execute the pipeline (extraction + pre-checks).</li>
|
||||
* <li>Map result into status, counters and retryable flag.</li>
|
||||
* <li>If already {@code SUCCESS} → log skip at INFO level with fingerprint;
|
||||
* persist skip attempt with {@code SKIPPED_ALREADY_PROCESSED}.</li>
|
||||
* <li>If already {@code FAILED_FINAL} → log skip at INFO level with fingerprint;
|
||||
* persist skip attempt with {@code SKIPPED_FINAL_FAILURE}.</li>
|
||||
* <li>Otherwise execute the pipeline (extraction + pre-checks + AI naming).</li>
|
||||
* <li>Map result into status, counters, and retryable flag.</li>
|
||||
* <li><strong>Log:</strong> retry decision at INFO level with fingerprint and error
|
||||
* classification (FAILED_RETRYABLE or FAILED_FINAL).</li>
|
||||
* <li>Persist exactly one historised processing attempt.</li>
|
||||
* <li>Persist the updated document master record.</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* Per-document errors do not abort the overall batch run. Each candidate ends
|
||||
* controlled regardless of its outcome.
|
||||
* <p>
|
||||
* Post-fingerprint log entries carry the document fingerprint for correlation.
|
||||
* Pre-fingerprint log entries (steps 1–4) use run-ID and candidate description.
|
||||
*
|
||||
* @param candidate the candidate to process
|
||||
* @param context the current batch run context
|
||||
@@ -246,14 +253,15 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
* errors return true; persistence failures return false)
|
||||
*/
|
||||
private boolean processCandidate(SourceDocumentCandidate candidate, BatchRunContext context) {
|
||||
logger.debug("Processing candidate: {}", candidate.uniqueIdentifier());
|
||||
logger.info("Detected source file '{}' for processing (RunId: {}).",
|
||||
candidate.uniqueIdentifier(), context.runId());
|
||||
|
||||
Instant attemptStart = Instant.now();
|
||||
FingerprintResult fingerprintResult = fingerprintPort.computeFingerprint(candidate);
|
||||
|
||||
return switch (fingerprintResult) {
|
||||
case FingerprintTechnicalError fingerprintError -> {
|
||||
handleFingerprintError(candidate, fingerprintError);
|
||||
handleFingerprintError(candidate, fingerprintError, context);
|
||||
yield true; // fingerprint errors are not persistence failures
|
||||
}
|
||||
case FingerprintSuccess fingerprintSuccess ->
|
||||
@@ -262,15 +270,23 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles a fingerprint computation error by logging it as a non-identifiable event.
|
||||
* Handles a fingerprint computation error by logging it as a non-identifiable run event.
|
||||
* No SQLite record is created for this candidate.
|
||||
* <p>
|
||||
* Log entries before a successful fingerprint are correlated via the batch run identifier
|
||||
* and the candidate description, as no fingerprint is available for document-level
|
||||
* correlation.
|
||||
*
|
||||
* @param candidate the candidate that could not be fingerprinted
|
||||
* @param error the fingerprint error
|
||||
* @param error the fingerprint error
|
||||
* @param context the current batch run context; used for run-level log correlation
|
||||
*/
|
||||
private void handleFingerprintError(SourceDocumentCandidate candidate, FingerprintTechnicalError error) {
|
||||
logger.warn("Fingerprint computation failed for '{}': {} — candidate skipped (not historised).",
|
||||
candidate.uniqueIdentifier(), error.errorMessage());
|
||||
private void handleFingerprintError(
|
||||
SourceDocumentCandidate candidate,
|
||||
FingerprintTechnicalError error,
|
||||
BatchRunContext context) {
|
||||
logger.warn("Fingerprint computation failed for '{}' (RunId: {}): {} — candidate not historised.",
|
||||
candidate.uniqueIdentifier(), context.runId(), error.errorMessage());
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -0,0 +1,189 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
/**
|
||||
* Tests for the {@link DocumentLogCorrelation} sealed type and its two permitted implementations.
|
||||
* <p>
|
||||
* Verifies:
|
||||
* <ul>
|
||||
* <li>{@link DocumentLogCorrelation.CandidateCorrelation} stores the run identifier and
|
||||
* candidate description correctly (pre-fingerprint phase).</li>
|
||||
* <li>{@link DocumentLogCorrelation.FingerprintCorrelation} stores the run identifier and
|
||||
* fingerprint correctly (post-fingerprint phase).</li>
|
||||
* <li>The sealed type contract: only the two permitted subtypes exist.</li>
|
||||
* </ul>
|
||||
*/
|
||||
class DocumentLogCorrelationTest {
|
||||
|
||||
private static final String RUN_ID_VALUE = "run-correlation-test-001";
|
||||
private static final String CANDIDATE_DESCRIPTION = "invoice-2026-01-15.pdf";
|
||||
private static final String FINGERPRINT_HEX = "a".repeat(64);
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// CandidateCorrelation – pre-fingerprint phase
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void candidateCorrelation_storesRunId() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentLogCorrelation.CandidateCorrelation correlation =
|
||||
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
|
||||
|
||||
assertEquals(runId, correlation.runId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void candidateCorrelation_storesCandidateDescription() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentLogCorrelation.CandidateCorrelation correlation =
|
||||
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
|
||||
|
||||
assertEquals(CANDIDATE_DESCRIPTION, correlation.candidateDescription());
|
||||
}
|
||||
|
||||
@Test
|
||||
void candidateCorrelation_runIdAccessibleViaInterface() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentLogCorrelation correlation =
|
||||
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
|
||||
|
||||
// runId() is declared on the sealed interface and must be accessible polymorphically
|
||||
assertEquals(runId, correlation.runId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void candidateCorrelation_twoInstancesWithSameDataAreEqual() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentLogCorrelation.CandidateCorrelation first =
|
||||
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
|
||||
DocumentLogCorrelation.CandidateCorrelation second =
|
||||
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
|
||||
|
||||
assertEquals(first, second);
|
||||
}
|
||||
|
||||
@Test
|
||||
void candidateCorrelation_implementsDocumentLogCorrelation() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentLogCorrelation.CandidateCorrelation correlation =
|
||||
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
|
||||
|
||||
assertInstanceOf(DocumentLogCorrelation.class, correlation);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// FingerprintCorrelation – post-fingerprint phase
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void fingerprintCorrelation_storesRunId() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
|
||||
DocumentLogCorrelation.FingerprintCorrelation correlation =
|
||||
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
|
||||
|
||||
assertEquals(runId, correlation.runId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void fingerprintCorrelation_storesFingerprint() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
|
||||
DocumentLogCorrelation.FingerprintCorrelation correlation =
|
||||
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
|
||||
|
||||
assertEquals(fingerprint, correlation.fingerprint());
|
||||
}
|
||||
|
||||
@Test
|
||||
void fingerprintCorrelation_runIdAccessibleViaInterface() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
|
||||
DocumentLogCorrelation correlation =
|
||||
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
|
||||
|
||||
// runId() is declared on the sealed interface and must be accessible polymorphically
|
||||
assertEquals(runId, correlation.runId());
|
||||
}
|
||||
|
||||
@Test
|
||||
void fingerprintCorrelation_twoInstancesWithSameDataAreEqual() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
|
||||
DocumentLogCorrelation.FingerprintCorrelation first =
|
||||
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
|
||||
DocumentLogCorrelation.FingerprintCorrelation second =
|
||||
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
|
||||
|
||||
assertEquals(first, second);
|
||||
}
|
||||
|
||||
@Test
|
||||
void fingerprintCorrelation_implementsDocumentLogCorrelation() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
|
||||
DocumentLogCorrelation.FingerprintCorrelation correlation =
|
||||
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
|
||||
|
||||
assertInstanceOf(DocumentLogCorrelation.class, correlation);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Sealed type structural contract
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void sealedType_patternMatchExhaustsAllPermittedSubtypes() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
|
||||
DocumentLogCorrelation candidatePhase =
|
||||
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
|
||||
DocumentLogCorrelation fingerprintPhase =
|
||||
new DocumentLogCorrelation.FingerprintCorrelation(runId, new DocumentFingerprint(FINGERPRINT_HEX));
|
||||
|
||||
// Pattern match on the sealed type must compile exhaustively for exactly these two cases
|
||||
String candidatePhaseResult = describe(candidatePhase);
|
||||
String fingerprintPhaseResult = describe(fingerprintPhase);
|
||||
|
||||
assertEquals("candidate", candidatePhaseResult);
|
||||
assertEquals("fingerprint", fingerprintPhaseResult);
|
||||
}
|
||||
|
||||
/** Helper method using an exhaustive switch over the sealed type. */
|
||||
private static String describe(DocumentLogCorrelation correlation) {
|
||||
return switch (correlation) {
|
||||
case DocumentLogCorrelation.CandidateCorrelation ignored -> "candidate";
|
||||
case DocumentLogCorrelation.FingerprintCorrelation ignored -> "fingerprint";
|
||||
};
|
||||
}
|
||||
|
||||
@Test
|
||||
void candidateCorrelation_differentDescriptions_areNotEqual() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentLogCorrelation.CandidateCorrelation withFirst =
|
||||
new DocumentLogCorrelation.CandidateCorrelation(runId, "first.pdf");
|
||||
DocumentLogCorrelation.CandidateCorrelation withSecond =
|
||||
new DocumentLogCorrelation.CandidateCorrelation(runId, "second.pdf");
|
||||
|
||||
assertNotEquals(withFirst, withSecond);
|
||||
}
|
||||
|
||||
@Test
|
||||
void fingerprintCorrelation_differentFingerprints_areNotEqual() {
|
||||
RunId runId = new RunId(RUN_ID_VALUE);
|
||||
DocumentFingerprint first = new DocumentFingerprint("a".repeat(64));
|
||||
DocumentFingerprint second = new DocumentFingerprint("b".repeat(64));
|
||||
DocumentLogCorrelation.FingerprintCorrelation withFirst =
|
||||
new DocumentLogCorrelation.FingerprintCorrelation(runId, first);
|
||||
DocumentLogCorrelation.FingerprintCorrelation withSecond =
|
||||
new DocumentLogCorrelation.FingerprintCorrelation(runId, second);
|
||||
|
||||
assertNotEquals(withFirst, withSecond);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,320 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.service;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
/**
|
||||
* Tests for {@link DefaultRetryDecisionEvaluator}.
|
||||
* <p>
|
||||
* Verifies the binding retry policy rules for deterministic content errors,
|
||||
* transient technical errors, target copy failures, and the within-run
|
||||
* immediate retry mechanism.
|
||||
*/
|
||||
class DefaultRetryDecisionEvaluatorTest {
|
||||
|
||||
private static final String FAILURE_CLASS = "SOME_FAILURE";
|
||||
private static final String FAILURE_MESSAGE = "Something went wrong";
|
||||
|
||||
private DefaultRetryDecisionEvaluator evaluator;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
evaluator = new DefaultRetryDecisionEvaluator();
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Deterministic content error rules
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void evaluate_firstContentError_returnsContentErrorRetryable() {
|
||||
FailureCounters counters = new FailureCounters(0, 0);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
|
||||
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.ContentErrorRetryable.class, decision);
|
||||
RetryDecision.ContentErrorRetryable retryable = (RetryDecision.ContentErrorRetryable) decision;
|
||||
assertEquals(FAILURE_CLASS, retryable.failureClass());
|
||||
assertEquals(FAILURE_MESSAGE, retryable.failureMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_secondContentError_returnsContentErrorFinal() {
|
||||
FailureCounters counters = new FailureCounters(1, 0);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
|
||||
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.ContentErrorFinal.class, decision);
|
||||
RetryDecision.ContentErrorFinal finalDecision = (RetryDecision.ContentErrorFinal) decision;
|
||||
assertEquals(FAILURE_CLASS, finalDecision.failureClass());
|
||||
assertEquals(FAILURE_MESSAGE, finalDecision.failureMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_subsequentContentErrors_alwaysReturnContentErrorFinal() {
|
||||
// Any count >= 1 results in final (covers legacy M4-M6 data with higher counts)
|
||||
for (int count = 1; count <= 5; count++) {
|
||||
FailureCounters counters = new FailureCounters(count, 0);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
|
||||
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.ContentErrorFinal.class, decision,
|
||||
"Expected ContentErrorFinal for contentErrorCount=" + count);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_contentError_transientCounterIsIrrelevant() {
|
||||
// Non-zero transient counter must not affect content error decision
|
||||
FailureCounters counters = new FailureCounters(0, 5);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
|
||||
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.ContentErrorRetryable.class, decision);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Transient technical error rules
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void evaluate_transientError_maxRetriesTransientOne_firstError_returnsTransientErrorFinal() {
|
||||
// maxRetriesTransient=1: counter before=0, after=1=limit → final immediately
|
||||
FailureCounters counters = new FailureCounters(0, 0);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
|
||||
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision,
|
||||
"With maxRetriesTransient=1, first transient error must be final");
|
||||
RetryDecision.TransientErrorFinal finalDecision = (RetryDecision.TransientErrorFinal) decision;
|
||||
assertEquals(FAILURE_CLASS, finalDecision.failureClass());
|
||||
assertEquals(FAILURE_MESSAGE, finalDecision.failureMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_transientError_maxRetriesTransientTwo_firstError_returnsTransientErrorRetryable() {
|
||||
// maxRetriesTransient=2: counter before=0, after=1 < 2 → retryable
|
||||
FailureCounters counters = new FailureCounters(0, 0);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
|
||||
counters, 2, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision);
|
||||
RetryDecision.TransientErrorRetryable retryable = (RetryDecision.TransientErrorRetryable) decision;
|
||||
assertEquals(FAILURE_CLASS, retryable.failureClass());
|
||||
assertEquals(FAILURE_MESSAGE, retryable.failureMessage());
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_transientError_maxRetriesTransientTwo_secondError_returnsTransientErrorFinal() {
|
||||
// maxRetriesTransient=2: counter before=1, after=2=limit → final
|
||||
FailureCounters counters = new FailureCounters(0, 1);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
|
||||
counters, 2, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision,
|
||||
"With maxRetriesTransient=2, second transient error must be final");
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_transientError_maxRetriesTransientThree_firstError_returnsRetryable() {
|
||||
FailureCounters counters = new FailureCounters(0, 0);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
|
||||
counters, 3, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision);
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_transientError_maxRetriesTransientThree_secondError_returnsRetryable() {
|
||||
FailureCounters counters = new FailureCounters(0, 1);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
|
||||
counters, 3, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision);
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_transientError_maxRetriesTransientThree_thirdError_returnsFinal() {
|
||||
// counter before=2, after=3=limit → final
|
||||
FailureCounters counters = new FailureCounters(0, 2);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
|
||||
counters, 3, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision,
|
||||
"Third transient error with maxRetriesTransient=3 must be final");
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_transientError_contentCounterIsIrrelevant() {
|
||||
// Non-zero content error counter must not affect transient error decision
|
||||
FailureCounters counters = new FailureCounters(1, 0);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
|
||||
counters, 2, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision);
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_transientError_legacyDataWithHigherCounts_finalizesCorrectly() {
|
||||
// Existing M4-M6 data may have counter values beyond normal expectations;
|
||||
// the evaluator must still apply the threshold check consistently
|
||||
FailureCounters counters = new FailureCounters(3, 5);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
|
||||
counters, 3, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
// counter before=5, after=6 >= 3 → final
|
||||
assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Target copy technical error rule
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void evaluate_targetCopyError_returnsTargetCopyWithImmediateRetry() {
|
||||
FailureCounters counters = new FailureCounters(0, 0);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR,
|
||||
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.TargetCopyWithImmediateRetry.class, decision);
|
||||
RetryDecision.TargetCopyWithImmediateRetry immediate =
|
||||
(RetryDecision.TargetCopyWithImmediateRetry) decision;
|
||||
assertEquals(FAILURE_MESSAGE, immediate.failureMessage());
|
||||
assertEquals(DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR.name(),
|
||||
immediate.failureClass());
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_targetCopyError_countersAndMaxRetriesAreIgnored() {
|
||||
// Target copy decision is independent of counters and maxRetriesTransient
|
||||
FailureCounters counters = new FailureCounters(2, 3);
|
||||
|
||||
RetryDecision decision = evaluator.evaluate(
|
||||
DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR,
|
||||
counters, 5, FAILURE_CLASS, FAILURE_MESSAGE);
|
||||
|
||||
assertInstanceOf(RetryDecision.TargetCopyWithImmediateRetry.class, decision);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Immediate within-run retry decision
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void evaluateImmediateRetry_firstAttempt_returnsAllowed() {
|
||||
ImmediateRetryDecision decision = evaluator.evaluateImmediateRetry(true);
|
||||
|
||||
assertEquals(ImmediateRetryDecision.ALLOWED, decision);
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluateImmediateRetry_secondAttempt_returnsDenied() {
|
||||
ImmediateRetryDecision decision = evaluator.evaluateImmediateRetry(false);
|
||||
|
||||
assertEquals(ImmediateRetryDecision.DENIED, decision);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Guard conditions
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void evaluate_throwsWhenMaxRetriesTransientIsZero() {
|
||||
FailureCounters counters = FailureCounters.zero();
|
||||
|
||||
assertThrows(IllegalArgumentException.class, () ->
|
||||
evaluator.evaluate(
|
||||
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
|
||||
counters, 0, FAILURE_CLASS, FAILURE_MESSAGE));
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_throwsWhenMaxRetriesTransientIsNegative() {
|
||||
FailureCounters counters = FailureCounters.zero();
|
||||
|
||||
assertThrows(IllegalArgumentException.class, () ->
|
||||
evaluator.evaluate(
|
||||
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
|
||||
counters, -1, FAILURE_CLASS, FAILURE_MESSAGE));
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_throwsWhenErrorClassIsNull() {
|
||||
assertThrows(NullPointerException.class, () ->
|
||||
evaluator.evaluate(null, FailureCounters.zero(), 1,
|
||||
FAILURE_CLASS, FAILURE_MESSAGE));
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_throwsWhenCountersAreNull() {
|
||||
assertThrows(NullPointerException.class, () ->
|
||||
evaluator.evaluate(
|
||||
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
|
||||
null, 1, FAILURE_CLASS, FAILURE_MESSAGE));
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_throwsWhenFailureClassIsNull() {
|
||||
assertThrows(NullPointerException.class, () ->
|
||||
evaluator.evaluate(
|
||||
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
|
||||
FailureCounters.zero(), 1, null, FAILURE_MESSAGE));
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_throwsWhenFailureClassIsBlank() {
|
||||
assertThrows(IllegalArgumentException.class, () ->
|
||||
evaluator.evaluate(
|
||||
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
|
||||
FailureCounters.zero(), 1, " ", FAILURE_MESSAGE));
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_throwsWhenFailureMessageIsNull() {
|
||||
assertThrows(NullPointerException.class, () ->
|
||||
evaluator.evaluate(
|
||||
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
|
||||
FailureCounters.zero(), 1, FAILURE_CLASS, null));
|
||||
}
|
||||
|
||||
@Test
|
||||
void evaluate_throwsWhenFailureMessageIsBlank() {
|
||||
assertThrows(IllegalArgumentException.class, () ->
|
||||
evaluator.evaluate(
|
||||
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
|
||||
FailureCounters.zero(), 1, FAILURE_CLASS, " "));
|
||||
}
|
||||
}
|
||||
@@ -70,6 +70,9 @@ class DocumentProcessingCoordinatorTest {
|
||||
private static final String FINGERPRINT_HEX =
|
||||
"a".repeat(64); // 64 lowercase hex chars
|
||||
|
||||
/** Default transient retry limit used in the shared {@link #processor} instance. */
|
||||
private static final int DEFAULT_MAX_RETRIES_TRANSIENT = 3;
|
||||
|
||||
private CapturingDocumentRecordRepository recordRepo;
|
||||
private CapturingProcessingAttemptRepository attemptRepo;
|
||||
private CapturingUnitOfWorkPort unitOfWorkPort;
|
||||
@@ -86,7 +89,8 @@ class DocumentProcessingCoordinatorTest {
|
||||
attemptRepo = new CapturingProcessingAttemptRepository();
|
||||
unitOfWorkPort = new CapturingUnitOfWorkPort(recordRepo, attemptRepo);
|
||||
processor = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(),
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
candidate = new SourceDocumentCandidate(
|
||||
"test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf"));
|
||||
@@ -198,9 +202,11 @@ class DocumentProcessingCoordinatorTest {
|
||||
|
||||
@Test
|
||||
void process_knownDocument_technicalError_incrementsTransientCounter_remainsRetryable() {
|
||||
// Starting with 1 transient error; with DEFAULT_MAX_RETRIES_TRANSIENT=3, counter
|
||||
// becomes 2 after this run which is still below the limit → FAILED_RETRYABLE
|
||||
DocumentRecord existingRecord = buildRecord(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
new FailureCounters(0, 2));
|
||||
new FailureCounters(0, 1));
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
|
||||
DocumentProcessingOutcome outcome = new TechnicalDocumentError(
|
||||
@@ -212,10 +218,54 @@ class DocumentProcessingCoordinatorTest {
|
||||
DocumentRecord record = recordRepo.updatedRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
|
||||
assertEquals(0, record.failureCounters().contentErrorCount());
|
||||
assertEquals(3, record.failureCounters().transientErrorCount());
|
||||
assertEquals(2, record.failureCounters().transientErrorCount());
|
||||
assertTrue(attemptRepo.savedAttempts.get(0).retryable());
|
||||
}
|
||||
|
||||
@Test
|
||||
void process_knownDocument_technicalError_atTransientLimit_persistsFailedFinal() {
|
||||
// Counter already at limit - 1: the next error finalises the document
|
||||
DocumentRecord existingRecord = buildRecord(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
new FailureCounters(0, DEFAULT_MAX_RETRIES_TRANSIENT - 1));
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
|
||||
DocumentProcessingOutcome outcome = new TechnicalDocumentError(
|
||||
candidate, "Timeout at limit", null);
|
||||
|
||||
processor.process(candidate, fingerprint, outcome, context, attemptStart);
|
||||
|
||||
assertEquals(1, recordRepo.updatedRecords.size());
|
||||
DocumentRecord record = recordRepo.updatedRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus(),
|
||||
"Document must be finalised when transient limit is reached");
|
||||
assertEquals(DEFAULT_MAX_RETRIES_TRANSIENT, record.failureCounters().transientErrorCount(),
|
||||
"Transient counter must be incremented to the limit value");
|
||||
assertFalse(attemptRepo.savedAttempts.get(0).retryable(),
|
||||
"Attempt must not be retryable when transient limit is reached");
|
||||
}
|
||||
|
||||
@Test
|
||||
void process_newDocument_technicalError_maxRetriesTransient1_immediatelyFinalises() {
|
||||
// With maxRetriesTransient=1, the very first transient error finalises the document
|
||||
DocumentProcessingCoordinator coordinatorWith1Retry = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 1);
|
||||
recordRepo.setLookupResult(new DocumentUnknown());
|
||||
|
||||
DocumentProcessingOutcome outcome = new TechnicalDocumentError(
|
||||
candidate, "I/O error", null);
|
||||
|
||||
coordinatorWith1Retry.process(candidate, fingerprint, outcome, context, attemptStart);
|
||||
|
||||
assertEquals(1, recordRepo.createdRecords.size());
|
||||
DocumentRecord record = recordRepo.createdRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus(),
|
||||
"With maxRetriesTransient=1, the first transient error must immediately finalise");
|
||||
assertEquals(1, record.failureCounters().transientErrorCount());
|
||||
assertFalse(attemptRepo.savedAttempts.get(0).retryable());
|
||||
}
|
||||
|
||||
@Test
|
||||
void process_knownDocument_namingProposalReady_persistsProposalReadyStatus() {
|
||||
DocumentRecord existingRecord = buildRecord(
|
||||
@@ -617,7 +667,8 @@ class DocumentProcessingCoordinatorTest {
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
|
||||
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
recordRepo.setLookupResult(new PersistenceLookupTechnicalFailure("Datenbank nicht erreichbar", null));
|
||||
DocumentProcessingOutcome outcome = new PreCheckPassed(
|
||||
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
@@ -634,7 +685,8 @@ class DocumentProcessingCoordinatorTest {
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
|
||||
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
|
||||
DocumentProcessingOutcome outcome = new PreCheckPassed(
|
||||
@@ -652,7 +704,8 @@ class DocumentProcessingCoordinatorTest {
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
|
||||
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.FAILED_FINAL, new FailureCounters(2, 0));
|
||||
recordRepo.setLookupResult(new DocumentTerminalFinalFailure(existingRecord));
|
||||
DocumentProcessingOutcome outcome = new PreCheckFailed(
|
||||
@@ -670,7 +723,8 @@ class DocumentProcessingCoordinatorTest {
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
|
||||
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
recordRepo.setLookupResult(new DocumentUnknown());
|
||||
DocumentProcessingOutcome outcome = new PreCheckPassed(
|
||||
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
@@ -687,7 +741,8 @@ class DocumentProcessingCoordinatorTest {
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
|
||||
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
recordRepo.setLookupResult(new DocumentUnknown());
|
||||
unitOfWorkPort.failOnExecute = true;
|
||||
DocumentProcessingOutcome outcome = new PreCheckPassed(
|
||||
@@ -705,7 +760,8 @@ class DocumentProcessingCoordinatorTest {
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
|
||||
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
|
||||
DocumentProcessingOutcome outcome = new PreCheckPassed(
|
||||
@@ -723,7 +779,8 @@ class DocumentProcessingCoordinatorTest {
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
|
||||
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
|
||||
unitOfWorkPort.failOnExecute = true;
|
||||
@@ -811,7 +868,8 @@ class DocumentProcessingCoordinatorTest {
|
||||
|
||||
DocumentProcessingCoordinator coordinatorWithFailingFolder = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
|
||||
new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(),
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithFailingFolder.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
@@ -830,7 +888,8 @@ class DocumentProcessingCoordinatorTest {
|
||||
|
||||
DocumentProcessingCoordinator coordinatorWithFailingCopy = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger());
|
||||
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger(),
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithFailingCopy.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
@@ -902,6 +961,175 @@ class DocumentProcessingCoordinatorTest {
|
||||
assertFalse(result, "Should return false when persistence fails after successful copy");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_firstCopyFails_immediateRetrySucceeds_persistsSuccess() {
|
||||
// First copy attempt fails, immediate within-run retry succeeds → SUCCESS
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
|
||||
CountingTargetFileCopyPort countingCopyPort = new CountingTargetFileCopyPort(1); // fail first call only
|
||||
DocumentProcessingCoordinator coordinatorWithCountingCopy = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(),
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
boolean result = coordinatorWithCountingCopy.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart, c -> {
|
||||
throw new AssertionError("Pipeline must not run for PROPOSAL_READY");
|
||||
});
|
||||
|
||||
assertTrue(result, "Should succeed when immediate retry of target copy succeeds");
|
||||
|
||||
ProcessingAttempt successAttempt = attemptRepo.savedAttempts.stream()
|
||||
.filter(a -> a.status() == ProcessingStatus.SUCCESS)
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
assertNotNull(successAttempt, "A SUCCESS attempt must be persisted after a successful immediate retry");
|
||||
|
||||
DocumentRecord updated = recordRepo.updatedRecords.get(0);
|
||||
assertEquals(ProcessingStatus.SUCCESS, updated.overallStatus(),
|
||||
"Master record must show SUCCESS after successful immediate retry");
|
||||
|
||||
assertEquals(2, countingCopyPort.callCount,
|
||||
"copyToTarget must have been called exactly twice: first attempt + one retry");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_bothCopyAttemptsFail_persistsTransientError() {
|
||||
// Both the first copy attempt and the immediate retry fail → FAILED_RETRYABLE
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
|
||||
CountingTargetFileCopyPort countingCopyPort = new CountingTargetFileCopyPort(2); // fail both calls
|
||||
DocumentProcessingCoordinator coordinatorWithCountingCopy = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(),
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCountingCopy.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
|
||||
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
|
||||
.findFirst()
|
||||
.orElse(null);
|
||||
assertNotNull(errorAttempt, "A FAILED_RETRYABLE attempt must be persisted when both copy attempts fail");
|
||||
assertTrue(errorAttempt.retryable(), "Error must be retryable after exhausting immediate retry");
|
||||
|
||||
assertEquals(2, countingCopyPort.callCount,
|
||||
"copyToTarget must have been called exactly twice: first attempt + one immediate retry");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_immediateRetryDoesNotTriggerAiOrNewProposal() {
|
||||
// Ensures that during the immediate retry path no pipeline (AI) execution happens
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
|
||||
CountingTargetFileCopyPort countingCopyPort = new CountingTargetFileCopyPort(1); // fail first, succeed second
|
||||
DocumentProcessingCoordinator coordinatorWithCountingCopy = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(),
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCountingCopy.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart,
|
||||
c -> { throw new AssertionError("AI pipeline must NOT run during immediate retry"); });
|
||||
|
||||
// No FAILED_RETRYABLE must have been persisted — the retry succeeded
|
||||
long failedRetryableCount = attemptRepo.savedAttempts.stream()
|
||||
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
|
||||
.count();
|
||||
assertEquals(0, failedRetryableCount,
|
||||
"No FAILED_RETRYABLE must be persisted when immediate retry succeeds");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Sequential multi-run lifecycle tests
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void process_contentErrorLifecycle_firstRunRetryable_secondRunFinal_thirdRunSkipped() {
|
||||
// Run 1: new document, first deterministic content error → FAILED_RETRYABLE
|
||||
recordRepo.setLookupResult(new DocumentUnknown());
|
||||
DocumentProcessingOutcome contentError = new PreCheckFailed(
|
||||
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
|
||||
|
||||
processor.process(candidate, fingerprint, contentError, context, attemptStart);
|
||||
|
||||
DocumentRecord afterRun1 = recordRepo.createdRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, afterRun1.overallStatus(),
|
||||
"First content error must yield FAILED_RETRYABLE");
|
||||
assertEquals(1, afterRun1.failureCounters().contentErrorCount());
|
||||
assertTrue(attemptRepo.savedAttempts.get(0).retryable(),
|
||||
"First content error attempt must be retryable");
|
||||
|
||||
// Run 2: known document (FAILED_RETRYABLE, contentErrorCount=1), second content error → FAILED_FINAL
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(afterRun1));
|
||||
|
||||
processor.process(candidate, fingerprint, contentError, context, attemptStart);
|
||||
|
||||
DocumentRecord afterRun2 = recordRepo.updatedRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_FINAL, afterRun2.overallStatus(),
|
||||
"Second content error must yield FAILED_FINAL");
|
||||
assertEquals(2, afterRun2.failureCounters().contentErrorCount());
|
||||
assertFalse(attemptRepo.savedAttempts.get(1).retryable(),
|
||||
"Second content error attempt must not be retryable");
|
||||
|
||||
// Run 3: terminal FAILED_FINAL → SKIPPED_FINAL_FAILURE; counters must not change
|
||||
recordRepo.setLookupResult(new DocumentTerminalFinalFailure(afterRun2));
|
||||
|
||||
processor.process(candidate, fingerprint, contentError, context, attemptStart);
|
||||
|
||||
assertEquals(3, attemptRepo.savedAttempts.size(),
|
||||
"Three attempts must be recorded across the three runs");
|
||||
ProcessingAttempt skipAttempt = attemptRepo.savedAttempts.get(2);
|
||||
assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, skipAttempt.status());
|
||||
assertFalse(skipAttempt.retryable());
|
||||
|
||||
DocumentRecord afterRun3 = recordRepo.updatedRecords.get(1);
|
||||
assertEquals(2, afterRun3.failureCounters().contentErrorCount(),
|
||||
"Content error counter must remain 2 after a SKIPPED_FINAL_FAILURE event");
|
||||
assertEquals(0, afterRun3.failureCounters().transientErrorCount(),
|
||||
"Transient error counter must remain 0 after a SKIPPED_FINAL_FAILURE event");
|
||||
}
|
||||
|
||||
@Test
|
||||
void process_transientErrorLifecycle_maxRetriesTransient2_firstRetryable_secondFinal() {
|
||||
// maxRetriesTransient=2: first transient error → FAILED_RETRYABLE, second → FAILED_FINAL
|
||||
DocumentProcessingCoordinator coordinatorWith2Retries = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 2);
|
||||
DocumentProcessingOutcome transientError = new TechnicalDocumentError(candidate, "Timeout", null);
|
||||
|
||||
// Run 1: new document, first transient error → FAILED_RETRYABLE, transientErrorCount=1
|
||||
recordRepo.setLookupResult(new DocumentUnknown());
|
||||
|
||||
coordinatorWith2Retries.process(candidate, fingerprint, transientError, context, attemptStart);
|
||||
|
||||
DocumentRecord afterRun1 = recordRepo.createdRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, afterRun1.overallStatus(),
|
||||
"First transient error must yield FAILED_RETRYABLE when limit not yet reached");
|
||||
assertEquals(1, afterRun1.failureCounters().transientErrorCount());
|
||||
assertTrue(attemptRepo.savedAttempts.get(0).retryable());
|
||||
|
||||
// Run 2: transientErrorCount=1, second transient error reaches limit=2 → FAILED_FINAL
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(afterRun1));
|
||||
|
||||
coordinatorWith2Retries.process(candidate, fingerprint, transientError, context, attemptStart);
|
||||
|
||||
DocumentRecord afterRun2 = recordRepo.updatedRecords.get(0);
|
||||
assertEquals(ProcessingStatus.FAILED_FINAL, afterRun2.overallStatus(),
|
||||
"Second transient error must yield FAILED_FINAL when maxRetriesTransient=2 is reached");
|
||||
assertEquals(2, afterRun2.failureCounters().transientErrorCount(),
|
||||
"Transient error counter must equal maxRetriesTransient after finalisation");
|
||||
assertFalse(attemptRepo.savedAttempts.get(1).retryable(),
|
||||
"Final transient error attempt must not be retryable");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// -------------------------------------------------------------------------
|
||||
@@ -1089,6 +1317,26 @@ class DocumentProcessingCoordinatorTest {
|
||||
}
|
||||
}
|
||||
|
||||
private static class CountingTargetFileCopyPort implements TargetFileCopyPort {
|
||||
private int callCount = 0;
|
||||
private final int failFirstNCalls;
|
||||
|
||||
CountingTargetFileCopyPort(int failFirstNCalls) {
|
||||
this.failFirstNCalls = failFirstNCalls;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TargetFileCopyResult copyToTarget(
|
||||
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator sourceLocator,
|
||||
String resolvedFilename) {
|
||||
callCount++;
|
||||
if (callCount <= failFirstNCalls) {
|
||||
return new TargetFileCopyTechnicalFailure("Simulated copy failure on call " + callCount, false);
|
||||
}
|
||||
return new TargetFileCopySuccess();
|
||||
}
|
||||
}
|
||||
|
||||
private static class NoOpTargetFolderPort implements TargetFolderPort {
|
||||
@Override
|
||||
public String getTargetFolderLocator() {
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.service;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
|
||||
@@ -44,8 +45,8 @@ class DocumentProcessingServiceTest {
|
||||
SourceDocumentLocator locator = new SourceDocumentLocator(pdfFile.toString());
|
||||
candidate = new SourceDocumentCandidate("document.pdf", 2048L, locator);
|
||||
|
||||
// Create runtime configuration with maxPages limit
|
||||
runtimeConfig = new RuntimeConfiguration(10);
|
||||
// Create runtime configuration with maxPages limit and default transient retry limit
|
||||
runtimeConfig = new RuntimeConfiguration(10, 3, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.service;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
|
||||
@@ -236,7 +237,7 @@ class PreCheckEvaluatorTest {
|
||||
// =========================================================================
|
||||
|
||||
private RuntimeConfiguration buildConfig(int maxPages) throws Exception {
|
||||
return new RuntimeConfiguration(maxPages);
|
||||
return new RuntimeConfiguration(maxPages, 3, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT);
|
||||
}
|
||||
|
||||
private int maxPages(int limit) {
|
||||
|
||||
@@ -2,6 +2,7 @@ package de.gecheckt.pdf.umbenenner.application.usecase;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort;
|
||||
@@ -52,6 +53,7 @@ import java.net.URI;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.Instant;
|
||||
import java.util.ArrayList;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@@ -460,7 +462,7 @@ class BatchRunProcessingUseCaseTest {
|
||||
DocumentProcessingCoordinator failingProcessor = new DocumentProcessingCoordinator(
|
||||
new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(),
|
||||
new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(),
|
||||
new NoOpProcessingLogger()) {
|
||||
new NoOpProcessingLogger(), 3) {
|
||||
@Override
|
||||
public boolean processDeferredOutcome(
|
||||
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate,
|
||||
@@ -504,7 +506,7 @@ class BatchRunProcessingUseCaseTest {
|
||||
DocumentProcessingCoordinator selectiveFailingProcessor = new DocumentProcessingCoordinator(
|
||||
new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(),
|
||||
new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(),
|
||||
new NoOpProcessingLogger()) {
|
||||
new NoOpProcessingLogger(), 3) {
|
||||
private int callCount = 0;
|
||||
|
||||
@Override
|
||||
@@ -595,7 +597,7 @@ class BatchRunProcessingUseCaseTest {
|
||||
DocumentProcessingCoordinator failingCoordinator = new DocumentProcessingCoordinator(
|
||||
new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(),
|
||||
new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(),
|
||||
new NoOpProcessingLogger()) {
|
||||
new NoOpProcessingLogger(), 3) {
|
||||
@Override
|
||||
public boolean processDeferredOutcome(
|
||||
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate c,
|
||||
@@ -660,11 +662,12 @@ class BatchRunProcessingUseCaseTest {
|
||||
// Prüft, dass bei erfolgreich verarbeiteter Datei debug() durch logExtractionResult
|
||||
// und info() durch logProcessingOutcome aufgerufen wird.
|
||||
// Erwartete debug()-Aufrufe für einen Kandidaten (success-Pfad):
|
||||
// L138 (lock acquired) + L249 (processCandidate) + L293 (fingerprint) + L337 (logExtractionResult) + L213 (lock released) = 5
|
||||
// Ohne logExtractionResult-Aufruf: 4
|
||||
// lock acquired + fingerprint computed + logExtractionResult + lock released = 4
|
||||
// Ohne logExtractionResult-Aufruf wären es nur 3 debug()-Aufrufe.
|
||||
// Erwartete info()-Aufrufe für einen Kandidaten (success-Pfad):
|
||||
// L130 (initiiert) + L145 (gestartet) + L178 (Kandidaten gefunden) + L365 (PreCheckPassed) + L190 (abgeschlossen) = 5
|
||||
// Ohne logProcessingOutcome-Aufruf: 4
|
||||
// Batch initiiert + Batch gestartet + Kandidaten gefunden + erkannte Quelldatei
|
||||
// + logProcessingOutcome (PreCheckPassed) + Batch abgeschlossen = 6
|
||||
// Ohne logProcessingOutcome-Aufruf wären es 5 info()-Aufrufe.
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
RuntimeConfiguration config = buildConfig(tempDir);
|
||||
|
||||
@@ -680,21 +683,21 @@ class BatchRunProcessingUseCaseTest {
|
||||
|
||||
useCase.execute(new BatchRunContext(new RunId("log-precheck"), Instant.now()));
|
||||
|
||||
// Ohne logExtractionResult wären es mindestens 4 debug()-Aufrufe; mit logExtractionResult 5
|
||||
assertTrue(capturingLogger.debugCallCount >= 5,
|
||||
"logExtractionResult muss bei PdfExtractionSuccess debug() aufrufen (erwartet >= 5, war: "
|
||||
// Ohne logExtractionResult wären es nur 3 debug()-Aufrufe; mit logExtractionResult >= 4
|
||||
assertTrue(capturingLogger.debugCallCount >= 4,
|
||||
"logExtractionResult muss bei PdfExtractionSuccess debug() aufrufen (erwartet >= 4, war: "
|
||||
+ capturingLogger.debugCallCount + ")");
|
||||
// Ohne logProcessingOutcome wären es 4 info()-Aufrufe; mit logProcessingOutcome 5
|
||||
assertTrue(capturingLogger.infoCallCount >= 5,
|
||||
"logProcessingOutcome muss bei PreCheckPassed info() aufrufen (erwartet >= 5, war: "
|
||||
// Ohne logProcessingOutcome wären es 5 info()-Aufrufe; mit logProcessingOutcome >= 6
|
||||
assertTrue(capturingLogger.infoCallCount >= 6,
|
||||
"logProcessingOutcome muss bei PreCheckPassed info() aufrufen (erwartet >= 6, war: "
|
||||
+ capturingLogger.infoCallCount + ")");
|
||||
}
|
||||
|
||||
@Test
|
||||
void execute_extractionContentError_logsDebugAndPreCheckFailedInfo() throws Exception {
|
||||
// Prüft, dass bei PdfExtractionContentError debug (logExtractionResult) und info (logProcessingOutcome) geloggt wird.
|
||||
// Erwartete debug()-Aufrufe: 5 (lock + processCandidate + fingerprint + logExtractionResult (content) + lock released)
|
||||
// Erwartete info()-Aufrufe: 5 (L130 + L145 + L178 + L369 PreCheckFailed + L190)
|
||||
// Erwartete debug()-Aufrufe: 4 (lock acquired + fingerprint + logExtractionResult (content) + lock released)
|
||||
// Erwartete info()-Aufrufe: 6 (Batch initiiert + gestartet + Kandidaten gefunden + erkannte Quelldatei + PreCheckFailed + abgeschlossen)
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
RuntimeConfiguration config = buildConfig(tempDir);
|
||||
|
||||
@@ -710,20 +713,20 @@ class BatchRunProcessingUseCaseTest {
|
||||
|
||||
useCase.execute(new BatchRunContext(new RunId("log-content-error"), Instant.now()));
|
||||
|
||||
// Ohne logExtractionResult wären es 4 debug()-Aufrufe; mit logExtractionResult 5
|
||||
assertTrue(capturingLogger.debugCallCount >= 5,
|
||||
"logExtractionResult muss bei PdfExtractionContentError debug() aufrufen (erwartet >= 5, war: "
|
||||
// Ohne logExtractionResult wären es nur 3 debug()-Aufrufe; mit logExtractionResult >= 4
|
||||
assertTrue(capturingLogger.debugCallCount >= 4,
|
||||
"logExtractionResult muss bei PdfExtractionContentError debug() aufrufen (erwartet >= 4, war: "
|
||||
+ capturingLogger.debugCallCount + ")");
|
||||
// Ohne logProcessingOutcome (PreCheckFailed) wären es 4 info()-Aufrufe; mit 5
|
||||
assertTrue(capturingLogger.infoCallCount >= 5,
|
||||
"logProcessingOutcome muss bei PreCheckFailed info() aufrufen (erwartet >= 5, war: "
|
||||
// Ohne logProcessingOutcome (PreCheckFailed) wären es 5 info()-Aufrufe; mit >= 6
|
||||
assertTrue(capturingLogger.infoCallCount >= 6,
|
||||
"logProcessingOutcome muss bei PreCheckFailed info() aufrufen (erwartet >= 6, war: "
|
||||
+ capturingLogger.infoCallCount + ")");
|
||||
}
|
||||
|
||||
@Test
|
||||
void execute_extractionTechnicalError_logsDebugAndWarn() throws Exception {
|
||||
// Prüft, dass bei PdfExtractionTechnicalError debug (logExtractionResult) und warn (logProcessingOutcome) geloggt wird.
|
||||
// Erwartete debug()-Aufrufe: 5 (lock + processCandidate + fingerprint + logExtractionResult + lock released)
|
||||
// Erwartete debug()-Aufrufe: 4 (lock acquired + fingerprint + logExtractionResult + lock released)
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
RuntimeConfiguration config = buildConfig(tempDir);
|
||||
|
||||
@@ -739,15 +742,86 @@ class BatchRunProcessingUseCaseTest {
|
||||
|
||||
useCase.execute(new BatchRunContext(new RunId("log-tech-error"), Instant.now()));
|
||||
|
||||
// Ohne logExtractionResult wären es 4 debug()-Aufrufe; mit logExtractionResult 5
|
||||
assertTrue(capturingLogger.debugCallCount >= 5,
|
||||
"logExtractionResult muss bei PdfExtractionTechnicalError debug() aufrufen (erwartet >= 5, war: "
|
||||
// Ohne logExtractionResult wären es nur 3 debug()-Aufrufe; mit logExtractionResult >= 4
|
||||
assertTrue(capturingLogger.debugCallCount >= 4,
|
||||
"logExtractionResult muss bei PdfExtractionTechnicalError debug() aufrufen (erwartet >= 4, war: "
|
||||
+ capturingLogger.debugCallCount + ")");
|
||||
// logProcessingOutcome ruft warn() auf für TechnicalDocumentError
|
||||
assertTrue(capturingLogger.warnCallCount > 0,
|
||||
"logProcessingOutcome muss bei TechnicalDocumentError warn() aufrufen");
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Log correlation tests
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void execute_preFingerprintError_logContainsRunIdAndCandidateDescription() throws Exception {
|
||||
// When fingerprint computation fails, the warning log must reference both the run-ID
|
||||
// and the candidate's unique identifier (pre-fingerprint correlation rule).
|
||||
String runIdValue = "run-correlation-pre-fp";
|
||||
String candidateFilename = "unreadable-candidate.pdf";
|
||||
|
||||
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
|
||||
RuntimeConfiguration config = buildConfig(tempDir);
|
||||
|
||||
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(
|
||||
List.of(makeCandidate(candidateFilename)));
|
||||
|
||||
// Fingerprint port that always fails
|
||||
FingerprintPort failingFingerprintPort = c ->
|
||||
new FingerprintTechnicalError("File not readable", null);
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, new MockRunLockPort(), candidatesPort, new NoOpExtractionPort(),
|
||||
failingFingerprintPort, new NoOpDocumentProcessingCoordinator(),
|
||||
buildStubAiNamingService(), capturingLogger);
|
||||
|
||||
useCase.execute(new BatchRunContext(new RunId(runIdValue), Instant.now()));
|
||||
|
||||
// At least one warning message must contain both run-ID and candidate filename
|
||||
boolean correlationPresent = capturingLogger.warnMessages.stream()
|
||||
.anyMatch(m -> m.contains(runIdValue) && m.contains(candidateFilename));
|
||||
assertTrue(correlationPresent,
|
||||
"Pre-fingerprint warning must reference both run-ID '" + runIdValue
|
||||
+ "' and candidate '" + candidateFilename + "'. "
|
||||
+ "Captured warn messages: " + capturingLogger.warnMessages);
|
||||
}
|
||||
|
||||
@Test
|
||||
void execute_postFingerprintProcessing_logContainsFingerprintHex() throws Exception {
|
||||
// After a successful fingerprint computation, at least one log message must contain
|
||||
// the fingerprint's SHA-256 hex value (post-fingerprint correlation rule).
|
||||
String candidateFilename = "identifiable.pdf";
|
||||
|
||||
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
|
||||
RuntimeConfiguration config = buildConfig(tempDir);
|
||||
|
||||
SourceDocumentCandidate candidate = makeCandidate(candidateFilename);
|
||||
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
|
||||
FixedExtractionPort extractionPort = new FixedExtractionPort(
|
||||
new PdfExtractionSuccess("Some invoice text", new PdfPageCount(1)));
|
||||
|
||||
// Deterministic fingerprint port so we can verify the exact hex in the log
|
||||
AlwaysSuccessFingerprintPort fingerprintPort = new AlwaysSuccessFingerprintPort();
|
||||
DocumentFingerprint expectedFingerprint = ((FingerprintSuccess) fingerprintPort.computeFingerprint(candidate)).fingerprint();
|
||||
|
||||
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
|
||||
config, new MockRunLockPort(), candidatesPort, extractionPort,
|
||||
fingerprintPort, new TrackingDocumentProcessingCoordinator(),
|
||||
buildStubAiNamingService(), capturingLogger);
|
||||
|
||||
useCase.execute(new BatchRunContext(new RunId("run-correlation-post-fp"), Instant.now()));
|
||||
|
||||
String fingerprintHex = expectedFingerprint.sha256Hex();
|
||||
boolean fingerprintInLog = capturingLogger.allMessages().stream()
|
||||
.anyMatch(m -> m.contains(fingerprintHex));
|
||||
assertTrue(fingerprintInLog,
|
||||
"At least one log message must contain the fingerprint hex '" + fingerprintHex
|
||||
+ "' after successful fingerprint computation. "
|
||||
+ "Captured messages: " + capturingLogger.allMessages());
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Helpers
|
||||
// -------------------------------------------------------------------------
|
||||
@@ -779,8 +853,8 @@ class BatchRunProcessingUseCaseTest {
|
||||
}
|
||||
|
||||
private static RuntimeConfiguration buildConfig(Path tempDir) throws Exception {
|
||||
// maxPages set to 3 – useful for page-limit tests
|
||||
return new RuntimeConfiguration(3);
|
||||
// maxPages set to 3 – useful for page-limit tests; maxRetriesTransient set to 3
|
||||
return new RuntimeConfiguration(3, 3, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT);
|
||||
}
|
||||
|
||||
private static SourceDocumentCandidate makeCandidate(String filename) {
|
||||
@@ -937,7 +1011,7 @@ class BatchRunProcessingUseCaseTest {
|
||||
private static class NoOpDocumentProcessingCoordinator extends DocumentProcessingCoordinator {
|
||||
NoOpDocumentProcessingCoordinator() {
|
||||
super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(),
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 3);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -949,7 +1023,7 @@ class BatchRunProcessingUseCaseTest {
|
||||
|
||||
TrackingDocumentProcessingCoordinator() {
|
||||
super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(),
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 3);
|
||||
}
|
||||
|
||||
@Override
|
||||
@@ -1094,6 +1168,62 @@ class BatchRunProcessingUseCaseTest {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Captures formatted log messages for each log level.
|
||||
* Used by log-correlation tests that must inspect message content.
|
||||
*/
|
||||
private static class MessageCapturingProcessingLogger implements ProcessingLogger {
|
||||
final List<String> infoMessages = new ArrayList<>();
|
||||
final List<String> debugMessages = new ArrayList<>();
|
||||
final List<String> warnMessages = new ArrayList<>();
|
||||
final List<String> errorMessages = new ArrayList<>();
|
||||
|
||||
/** Formats a message template with its arguments the same way SLF4J/Log4j2 does. */
|
||||
private static String format(String message, Object... args) {
|
||||
if (args == null || args.length == 0) return message;
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int argIndex = 0;
|
||||
int start = 0;
|
||||
int pos;
|
||||
while ((pos = message.indexOf("{}", start)) != -1 && argIndex < args.length) {
|
||||
sb.append(message, start, pos);
|
||||
sb.append(args[argIndex++]);
|
||||
start = pos + 2;
|
||||
}
|
||||
sb.append(message, start, message.length());
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void info(String message, Object... args) {
|
||||
infoMessages.add(format(message, args));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void debug(String message, Object... args) {
|
||||
debugMessages.add(format(message, args));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void warn(String message, Object... args) {
|
||||
warnMessages.add(format(message, args));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void error(String message, Object... args) {
|
||||
errorMessages.add(format(message, args));
|
||||
}
|
||||
|
||||
List<String> allMessages() {
|
||||
List<String> all = new ArrayList<>();
|
||||
all.addAll(infoMessages);
|
||||
all.addAll(debugMessages);
|
||||
all.addAll(warnMessages);
|
||||
all.addAll(errorMessages);
|
||||
return all;
|
||||
}
|
||||
}
|
||||
|
||||
/** Zählt Logger-Aufrufe je Level, um VoidMethodCallMutator-Mutationen zu erkennen. */
|
||||
private static class CapturingProcessingLogger implements ProcessingLogger {
|
||||
int infoCallCount = 0;
|
||||
|
||||
Reference in New Issue
Block a user