Umsetzung von Meilenstein M7
This commit is contained in:
+50
-1
@@ -1,5 +1,7 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.config;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
|
||||
|
||||
/**
|
||||
* Minimal runtime configuration for the application layer.
|
||||
* <p>
|
||||
@@ -9,12 +11,59 @@ package de.gecheckt.pdf.umbenenner.application.config;
|
||||
* <p>
|
||||
* This intentionally small contract ensures the application layer depends only on
|
||||
* the configuration values it actually uses, following hexagonal architecture principles.
|
||||
*
|
||||
* <h2>Validation invariants</h2>
|
||||
* <ul>
|
||||
* <li>{@link #maxPages()} must be ≥ 1.</li>
|
||||
* <li>{@link #maxRetriesTransient()} must be ≥ 1. The value {@code 0} is invalid
|
||||
* start configuration and must prevent the batch run from starting with exit
|
||||
* code 1.</li>
|
||||
* <li>{@link #aiContentSensitivity()} must not be {@code null}. The safe default is
|
||||
* {@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT}.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>AI content sensitivity</h2>
|
||||
* <p>
|
||||
* The {@link #aiContentSensitivity()} field is derived from the {@code log.ai.sensitive}
|
||||
* configuration property (default: {@code false}). It governs whether the complete AI raw
|
||||
* response and complete AI {@code reasoning} may be written to log files. Sensitive AI
|
||||
* content is always persisted in SQLite regardless of this setting; only log output is
|
||||
* affected.
|
||||
* <p>
|
||||
* The safe default ({@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT}) must be used
|
||||
* whenever {@code log.ai.sensitive} is absent, {@code false}, or set to any value other
|
||||
* than the explicit opt-in.
|
||||
*/
|
||||
public record RuntimeConfiguration(
|
||||
/**
|
||||
* Maximum number of pages a document can have to be processed.
|
||||
* Documents exceeding this limit are rejected during pre-checks.
|
||||
*/
|
||||
int maxPages
|
||||
int maxPages,
|
||||
|
||||
/**
|
||||
* Maximum number of historised transient technical errors allowed per fingerprint
|
||||
* across all scheduler runs.
|
||||
* <p>
|
||||
* The attempt that causes the counter to reach this value finalises the document
|
||||
* to {@code FAILED_FINAL}. Must be an Integer ≥ 1; the value {@code 0} is
|
||||
* invalid start configuration.
|
||||
* <p>
|
||||
* Example: {@code maxRetriesTransient = 1} means the first transient error
|
||||
* immediately finalises the document.
|
||||
*/
|
||||
int maxRetriesTransient,
|
||||
|
||||
/**
|
||||
* Sensitivity decision governing whether AI-generated content may be written to log files.
|
||||
* <p>
|
||||
* Derived from the {@code log.ai.sensitive} configuration property. The default is
|
||||
* {@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT} (do not log sensitive content).
|
||||
* Only {@link AiContentSensitivity#LOG_SENSITIVE_CONTENT} is produced when
|
||||
* {@code log.ai.sensitive = true} is explicitly set.
|
||||
* <p>
|
||||
* Must not be {@code null}.
|
||||
*/
|
||||
AiContentSensitivity aiContentSensitivity
|
||||
)
|
||||
{ }
|
||||
|
||||
+18
-1
@@ -10,6 +10,16 @@ import java.nio.file.Path;
|
||||
* loaded and validated at bootstrap time. This is a complete configuration model
|
||||
* for the entire application startup, including paths, API settings, persistence,
|
||||
* and operational parameters.
|
||||
*
|
||||
* <h2>AI content sensitivity ({@code log.ai.sensitive})</h2>
|
||||
* <p>
|
||||
* The boolean property {@code log.ai.sensitive} controls whether sensitive AI-generated
|
||||
* content (complete raw AI response, complete AI {@code reasoning}) may be written to
|
||||
* log files. The default is {@code false} (safe/protect). Set to {@code true} only when
|
||||
* explicit diagnostic logging of AI content is required.
|
||||
* <p>
|
||||
* Sensitive AI content is always persisted in SQLite regardless of this setting.
|
||||
* Only log output is affected.
|
||||
*/
|
||||
public record StartConfiguration(
|
||||
Path sourceFolder,
|
||||
@@ -25,6 +35,13 @@ public record StartConfiguration(
|
||||
Path runtimeLockFile,
|
||||
Path logDirectory,
|
||||
String logLevel,
|
||||
String apiKey
|
||||
String apiKey,
|
||||
|
||||
/**
|
||||
* Whether sensitive AI content (raw response, reasoning) may be written to log files.
|
||||
* Corresponds to the {@code log.ai.sensitive} configuration property.
|
||||
* Default: {@code false} (do not log sensitive content).
|
||||
*/
|
||||
boolean logAiSensitive
|
||||
)
|
||||
{ }
|
||||
|
||||
+46
@@ -0,0 +1,46 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Sensitivity decision governing whether AI-generated content may be written to log files.
|
||||
* <p>
|
||||
* The following AI-generated content items are classified as sensitive and are subject to
|
||||
* this decision:
|
||||
* <ul>
|
||||
* <li>The <strong>complete AI raw response</strong> (full JSON body returned by the
|
||||
* AI service)</li>
|
||||
* <li>The <strong>complete AI {@code reasoning}</strong> field extracted from the
|
||||
* AI response</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Sensitive AI content is always written to SQLite (for traceability) regardless of
|
||||
* this decision. The decision controls only whether the content is also emitted into
|
||||
* log files.
|
||||
* <p>
|
||||
* <strong>Default behaviour:</strong> The default is {@link #PROTECT_SENSITIVE_CONTENT}.
|
||||
* Logging of sensitive AI content must be explicitly enabled by setting the boolean
|
||||
* configuration property {@code log.ai.sensitive = true}. Any other value, or the
|
||||
* absence of the property, results in {@link #PROTECT_SENSITIVE_CONTENT}.
|
||||
* <p>
|
||||
* <strong>Non-sensitive AI content</strong> (e.g. the resolved title, the resolved date,
|
||||
* the date source) is not covered by this decision and may always be logged.
|
||||
*/
|
||||
public enum AiContentSensitivity {
|
||||
|
||||
/**
|
||||
* Sensitive AI content (raw response, reasoning) must <strong>not</strong> be written
|
||||
* to log files.
|
||||
* <p>
|
||||
* This is the safe default. It is active whenever {@code log.ai.sensitive} is absent,
|
||||
* {@code false}, or set to any value other than the explicit opt-in.
|
||||
*/
|
||||
PROTECT_SENSITIVE_CONTENT,
|
||||
|
||||
/**
|
||||
* Sensitive AI content (raw response, reasoning) <strong>may</strong> be written
|
||||
* to log files.
|
||||
* <p>
|
||||
* This value is only produced when {@code log.ai.sensitive = true} is explicitly set
|
||||
* in the application configuration. It must never be the implicit default.
|
||||
*/
|
||||
LOG_SENSITIVE_CONTENT
|
||||
}
|
||||
+90
@@ -0,0 +1,90 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Unified classification of all document-level errors in the end state.
|
||||
* <p>
|
||||
* This enumeration provides a single, exhaustive taxonomy for every error category
|
||||
* that the retry policy and logging infrastructure must distinguish. It replaces
|
||||
* any ad-hoc string-based classification where an authoritative type is needed.
|
||||
* <p>
|
||||
* <strong>Mapping to failure counters:</strong>
|
||||
* <ul>
|
||||
* <li>{@link #DETERMINISTIC_CONTENT_ERROR} → increments the content-error counter
|
||||
* ({@link FailureCounters#contentErrorCount()}). The first occurrence leads to
|
||||
* {@code FAILED_RETRYABLE}; the second leads to {@code FAILED_FINAL}.
|
||||
* There is no further retry after the second deterministic content error.</li>
|
||||
* <li>{@link #TRANSIENT_TECHNICAL_ERROR} → increments the transient-error counter
|
||||
* ({@link FailureCounters#transientErrorCount()}). Remains retryable until the
|
||||
* counter reaches the configured {@code max.retries.transient} limit (Integer ≥ 1).
|
||||
* The attempt that reaches the limit finalises the document to {@code FAILED_FINAL}.</li>
|
||||
* <li>{@link #TARGET_COPY_TECHNICAL_ERROR} → signals a failure on the physical target
|
||||
* file copy path. Within the same run, exactly one immediate technical retry is
|
||||
* allowed. If the immediate retry also fails, the error is treated as a
|
||||
* {@link #TRANSIENT_TECHNICAL_ERROR} for the purposes of counter updates and
|
||||
* cross-run retry evaluation.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Scope of deterministic content errors:</strong>
|
||||
* <ul>
|
||||
* <li>No usable PDF text extracted</li>
|
||||
* <li>Page limit exceeded</li>
|
||||
* <li>AI response functionally invalid (generic/unusable title, unparseable date)</li>
|
||||
* <li>Document content ambiguous or not uniquely interpretable</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Scope of transient technical errors:</strong>
|
||||
* <ul>
|
||||
* <li>AI service unreachable, HTTP timeout, network error</li>
|
||||
* <li>Unparseable or structurally invalid AI JSON</li>
|
||||
* <li>Temporary I/O error during PDF text extraction</li>
|
||||
* <li>Temporary SQLite lock or persistence failure</li>
|
||||
* <li>Any other non-deterministic infrastructure failure</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Architecture note:</strong> This type carries no infrastructure dependencies.
|
||||
* It is safe to reference from Domain, Application and Adapter layers.
|
||||
*/
|
||||
public enum DocumentErrorClassification {
|
||||
|
||||
/**
|
||||
* A deterministic content error that cannot be resolved by retrying with the same
|
||||
* document content.
|
||||
* <p>
|
||||
* Examples: no extractable text, page limit exceeded, AI-returned title is generic
|
||||
* or unusable, document content is ambiguous.
|
||||
* <p>
|
||||
* Retry rule: the first historised occurrence of this error for a fingerprint leads
|
||||
* to {@code FAILED_RETRYABLE} (one later run may retry). The second historised
|
||||
* occurrence leads to {@code FAILED_FINAL} (no further retries).
|
||||
*/
|
||||
DETERMINISTIC_CONTENT_ERROR,
|
||||
|
||||
/**
|
||||
* A transient technical infrastructure failure unrelated to the document content.
|
||||
* <p>
|
||||
* Examples: AI endpoint not reachable, HTTP timeout, malformed or non-parseable
|
||||
* JSON, temporary I/O failure, temporary SQLite lock.
|
||||
* <p>
|
||||
* Retry rule: remains {@code FAILED_RETRYABLE} until the transient-error counter
|
||||
* reaches the configured {@code max.retries.transient} limit. The attempt that
|
||||
* reaches the limit finalises the document to {@code FAILED_FINAL}.
|
||||
* The configured limit must be an Integer ≥ 1; the value {@code 0} is invalid
|
||||
* start configuration and prevents the batch run from starting.
|
||||
*/
|
||||
TRANSIENT_TECHNICAL_ERROR,
|
||||
|
||||
/**
|
||||
* A technical failure specifically on the physical target-file copy path.
|
||||
* <p>
|
||||
* This error class is distinct from {@link #TRANSIENT_TECHNICAL_ERROR} because it
|
||||
* triggers a special within-run handling: exactly one immediate technical retry of
|
||||
* the copy operation is allowed within the same document run. No new AI call and no
|
||||
* new naming proposal derivation occur during the immediate retry.
|
||||
* <p>
|
||||
* If the immediate retry succeeds, the document proceeds to {@code SUCCESS}.
|
||||
* If the immediate retry also fails, the combined failure is recorded as a
|
||||
* {@link #TRANSIENT_TECHNICAL_ERROR} for counter and cross-run retry evaluation.
|
||||
* The immediate retry is not counted in the laufübergreifenden transient-error counter.
|
||||
*/
|
||||
TARGET_COPY_TECHNICAL_ERROR
|
||||
}
|
||||
+81
@@ -0,0 +1,81 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
|
||||
|
||||
/**
|
||||
* Sealed type carrying the correlation context for all document-related log entries.
|
||||
* <p>
|
||||
* The logging correlation rule distinguishes two phases of document processing:
|
||||
* <ol>
|
||||
* <li><strong>Pre-fingerprint phase:</strong> Before a {@link DocumentFingerprint} has
|
||||
* been successfully computed (e.g. the source file cannot be read for hashing),
|
||||
* log entries are correlated via the batch run identifier and a stable candidate
|
||||
* description derived from the candidate's own identifier (typically its source
|
||||
* file path or name). Use {@link CandidateCorrelation}.</li>
|
||||
* <li><strong>Post-fingerprint phase:</strong> Once the fingerprint has been
|
||||
* successfully computed, all subsequent document-related log entries are correlated
|
||||
* via the batch run identifier and the fingerprint. Use
|
||||
* {@link FingerprintCorrelation}.</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* <strong>Architecture constraints:</strong>
|
||||
* <ul>
|
||||
* <li>This type contains no filesystem ({@code Path}, {@code File}) or NIO types.</li>
|
||||
* <li>This type introduces no additional persistence truth source.</li>
|
||||
* <li>The correlation is a logging concern only and does not influence the processing
|
||||
* outcome, retry decision, or persistence model.</li>
|
||||
* </ul>
|
||||
*/
|
||||
public sealed interface DocumentLogCorrelation {
|
||||
|
||||
/**
|
||||
* Returns the batch run identifier shared by all log entries within one run.
|
||||
*
|
||||
* @return run identifier; never {@code null}
|
||||
*/
|
||||
RunId runId();
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Pre-fingerprint correlation
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Correlation context available before a {@link DocumentFingerprint} has been
|
||||
* successfully computed.
|
||||
* <p>
|
||||
* Used when the fingerprint computation itself fails or when a log entry must be
|
||||
* emitted at the very start of candidate processing (before any hashing result is
|
||||
* available).
|
||||
* <p>
|
||||
* The {@code candidateDescription} is a stable, human-readable identifier for the
|
||||
* candidate derived from the candidate's own unique identifier — typically the
|
||||
* source file name or path representation. It must not change between log entries
|
||||
* for the same candidate within a single run.
|
||||
*
|
||||
* @param runId batch run identifier; never {@code null}
|
||||
* @param candidateDescription stable human-readable candidate identifier;
|
||||
* never {@code null} or blank
|
||||
*/
|
||||
record CandidateCorrelation(RunId runId, String candidateDescription)
|
||||
implements DocumentLogCorrelation {}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Post-fingerprint correlation
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Correlation context available after a {@link DocumentFingerprint} has been
|
||||
* successfully computed.
|
||||
* <p>
|
||||
* Used for all document-related log entries from the point at which the fingerprint
|
||||
* is known. The fingerprint is the authoritative, content-stable document identity
|
||||
* and must appear in or be unambiguously derivable from every subsequent log entry
|
||||
* for this document.
|
||||
*
|
||||
* @param runId batch run identifier; never {@code null}
|
||||
* @param fingerprint content-based document identity; never {@code null}
|
||||
*/
|
||||
record FingerprintCorrelation(RunId runId, DocumentFingerprint fingerprint)
|
||||
implements DocumentLogCorrelation {}
|
||||
}
|
||||
+23
-13
@@ -7,24 +7,34 @@ package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
* <ul>
|
||||
* <li><strong>Content error counter</strong> ({@link #contentErrorCount()}):
|
||||
* counts how many times a deterministic content error occurred for this document
|
||||
* (no usable text, page limit exceeded). At count 1 the document is
|
||||
* {@code FAILED_RETRYABLE}; at count 2 it becomes {@code FAILED_FINAL}.
|
||||
* (no usable text, page limit exceeded, AI functional failure, ambiguous content).
|
||||
* At count 1 the document transitions to {@code FAILED_RETRYABLE};
|
||||
* at count 2 it transitions to {@code FAILED_FINAL}.
|
||||
* Skip events do <em>not</em> increase this counter.</li>
|
||||
* <li><strong>Transient error counter</strong> ({@link #transientErrorCount()}):
|
||||
* counts how many times a technical infrastructure error occurred after a
|
||||
* successful fingerprint was computed. The document remains
|
||||
* {@code FAILED_RETRYABLE} until the configured maximum is reached in later
|
||||
* milestones. Skip events do <em>not</em> increase this counter.</li>
|
||||
* counts how many times a transient technical error occurred after a successful
|
||||
* fingerprint was computed. The document remains {@code FAILED_RETRYABLE} while
|
||||
* this counter is strictly less than the configured {@code max.retries.transient}
|
||||
* value. The attempt that causes the counter to reach {@code max.retries.transient}
|
||||
* transitions the document to {@code FAILED_FINAL}.
|
||||
* The configured limit must be an Integer ≥ 1.
|
||||
* Skip events do <em>not</em> increase this counter.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* A freshly discovered document starts with both counters at zero.
|
||||
* Counters are only written by the repository layer on the instructions of the
|
||||
* application use case; they never change as a side-effect of a read operation.
|
||||
* <strong>Immediate within-run target copy retry:</strong>
|
||||
* The physical target-copy retry within the same run is not tracked in either counter.
|
||||
* It is a purely technical within-run mechanism and does not affect the
|
||||
* laufübergreifenden counter state.
|
||||
* <p>
|
||||
* <strong>Counter invariant:</strong>
|
||||
* Both counters start at zero for a newly discovered document and only increase
|
||||
* monotonically. The counters are written by the repository layer on the instructions
|
||||
* of the application use case; they never change as a side-effect of a read operation.
|
||||
*
|
||||
* @param contentErrorCount number of deterministic content errors recorded so far;
|
||||
* must be >= 0
|
||||
* @param transientErrorCount number of transient technical errors recorded so far;
|
||||
* must be >= 0
|
||||
* @param contentErrorCount number of historised deterministic content errors;
|
||||
* must be ≥ 0
|
||||
* @param transientErrorCount number of historised transient technical errors;
|
||||
* must be ≥ 0
|
||||
*/
|
||||
public record FailureCounters(int contentErrorCount, int transientErrorCount) {
|
||||
|
||||
|
||||
+44
@@ -0,0 +1,44 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Decision governing whether a within-run immediate technical retry of the target copy
|
||||
* operation is permitted.
|
||||
* <p>
|
||||
* The immediate retry mechanism is strictly scoped:
|
||||
* <ul>
|
||||
* <li>It applies <strong>only</strong> to the physical target-file copy path.</li>
|
||||
* <li>It is permitted <strong>at most once</strong> per document per run (first copy
|
||||
* attempt failed; one additional attempt is allowed).</li>
|
||||
* <li>It does <strong>not</strong> involve a new AI call, a new naming-proposal
|
||||
* derivation, or any other pipeline stage.</li>
|
||||
* <li>It does <strong>not</strong> increment the laufübergreifenden
|
||||
* transient-error counter regardless of outcome.</li>
|
||||
* <li>It is a purely technical within-run recovery mechanism and is
|
||||
* <strong>not</strong> counted as a cross-run retry in the sense of
|
||||
* {@code max.retries.transient}.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The concrete retry decision for the subsequent persistence step is derived from the
|
||||
* combined outcome after the immediate retry completes (see {@link RetryDecision}).
|
||||
*/
|
||||
public enum ImmediateRetryDecision {
|
||||
|
||||
/**
|
||||
* An immediate within-run retry of the target copy operation is permitted.
|
||||
* <p>
|
||||
* This value is produced when the first physical copy attempt within the current
|
||||
* document run has failed. The copy must be retried exactly once more.
|
||||
* No other pipeline stage is repeated.
|
||||
*/
|
||||
ALLOWED,
|
||||
|
||||
/**
|
||||
* No immediate within-run retry is permitted.
|
||||
* <p>
|
||||
* This value is produced when the immediate retry quota for this document run has
|
||||
* already been consumed (i.e. the immediate retry attempt itself has failed), or
|
||||
* when the failure did not occur on the target copy path.
|
||||
* The error must be escalated to the cross-run retry evaluation.
|
||||
*/
|
||||
DENIED
|
||||
}
|
||||
+172
@@ -0,0 +1,172 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Sealed type representing the complete, authoritative retry decision for a document
|
||||
* after an error has been classified.
|
||||
* <p>
|
||||
* A {@code RetryDecision} is the output of the retry policy evaluation. It unambiguously
|
||||
* encodes what must happen next for the document: which status to persist, which counter
|
||||
* to increment, and whether a within-run immediate retry is still possible.
|
||||
* <p>
|
||||
* <strong>Decision cases and their semantics:</strong>
|
||||
* <ol>
|
||||
* <li>{@link ContentErrorRetryable} — first deterministic content error. Document moves
|
||||
* to {@code FAILED_RETRYABLE}; content-error counter is incremented by 1. One later
|
||||
* scheduler run may retry.</li>
|
||||
* <li>{@link ContentErrorFinal} — second (or later) deterministic content error. Document
|
||||
* moves to {@code FAILED_FINAL}; content-error counter is incremented by 1. No further
|
||||
* processing in any future run.</li>
|
||||
* <li>{@link TransientErrorRetryable} — transient technical error with remaining retry budget.
|
||||
* Document moves to {@code FAILED_RETRYABLE}; transient-error counter is incremented by 1.
|
||||
* A later scheduler run may retry, as long as the counter stays below
|
||||
* {@code max.retries.transient}.</li>
|
||||
* <li>{@link TransientErrorFinal} — transient technical error that exhausts the configured
|
||||
* {@code max.retries.transient} budget. Document moves to {@code FAILED_FINAL};
|
||||
* transient-error counter is incremented by 1. No further processing in any future run.</li>
|
||||
* <li>{@link TargetCopyWithImmediateRetry} — first physical copy failure within the current
|
||||
* run. The document has not yet changed status; exactly one immediate within-run retry
|
||||
* of the copy step is permitted. No new AI call and no new naming-proposal derivation
|
||||
* occur. This decision does not yet modify any counter or status; the outcome of the
|
||||
* immediate retry determines which subsequent decision applies.</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* <strong>What this type does NOT cover:</strong>
|
||||
* <ul>
|
||||
* <li>Skip decisions ({@code SKIPPED_ALREADY_PROCESSED}, {@code SKIPPED_FINAL_FAILURE})
|
||||
* — skips are not retry decisions; they are pure historisation events.</li>
|
||||
* <li>Success — a successful outcome is not a retry decision.</li>
|
||||
* <li>Pre-fingerprint failures — errors before the fingerprint is computed are not
|
||||
* historised as attempts and therefore do not produce a {@code RetryDecision}.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Counter invariant:</strong> Skip decisions ({@code SKIPPED_ALREADY_PROCESSED},
|
||||
* {@code SKIPPED_FINAL_FAILURE}) never produce a {@code RetryDecision} and never change
|
||||
* any failure counter.
|
||||
* <p>
|
||||
* <strong>Single-truth rule:</strong> The retry decision is derived exclusively from the
|
||||
* document master record and the attempt history. No additional, parallel truth source
|
||||
* for retry state is introduced.
|
||||
*/
|
||||
public sealed interface RetryDecision {
|
||||
|
||||
/**
|
||||
* Returns the failure class identifier for persistence and logging.
|
||||
* <p>
|
||||
* The failure class is a short, stable string identifying the type of failure,
|
||||
* typically the enum constant name of the original error or exception class name.
|
||||
*
|
||||
* @return failure class string; never {@code null} or blank
|
||||
*/
|
||||
String failureClass();
|
||||
|
||||
/**
|
||||
* Returns a human-readable failure message for persistence and logging.
|
||||
*
|
||||
* @return failure message; never {@code null} or blank
|
||||
*/
|
||||
String failureMessage();
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Deterministic content error cases
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* First historised deterministic content error for this fingerprint.
|
||||
* <p>
|
||||
* The document must be persisted with status {@code FAILED_RETRYABLE} and the
|
||||
* content-error counter incremented by 1. Exactly one later scheduler run is
|
||||
* permitted to retry.
|
||||
*
|
||||
* @param failureClass failure class identifier; never {@code null} or blank
|
||||
* @param failureMessage human-readable failure description; never {@code null} or blank
|
||||
*/
|
||||
record ContentErrorRetryable(String failureClass, String failureMessage)
|
||||
implements RetryDecision {}
|
||||
|
||||
/**
|
||||
* Second (or subsequent) historised deterministic content error for this fingerprint.
|
||||
* <p>
|
||||
* The document must be persisted with status {@code FAILED_FINAL} and the
|
||||
* content-error counter incremented by 1. No further processing is allowed in
|
||||
* any future run.
|
||||
*
|
||||
* @param failureClass failure class identifier; never {@code null} or blank
|
||||
* @param failureMessage human-readable failure description; never {@code null} or blank
|
||||
*/
|
||||
record ContentErrorFinal(String failureClass, String failureMessage)
|
||||
implements RetryDecision {}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Transient technical error cases
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Transient technical error with remaining retry budget.
|
||||
* <p>
|
||||
* The transient-error counter after incrementing is strictly less than
|
||||
* {@code max.retries.transient}. The document must be persisted with status
|
||||
* {@code FAILED_RETRYABLE} and the transient-error counter incremented by 1.
|
||||
* A later scheduler run may retry.
|
||||
*
|
||||
* @param failureClass failure class identifier; never {@code null} or blank
|
||||
* @param failureMessage human-readable failure description; never {@code null} or blank
|
||||
*/
|
||||
record TransientErrorRetryable(String failureClass, String failureMessage)
|
||||
implements RetryDecision {}
|
||||
|
||||
/**
|
||||
* Transient technical error that exhausts the configured {@code max.retries.transient}
|
||||
* budget.
|
||||
* <p>
|
||||
* The transient-error counter after incrementing equals {@code max.retries.transient}.
|
||||
* The document must be persisted with status {@code FAILED_FINAL} and the
|
||||
* transient-error counter incremented by 1. No further processing is allowed in
|
||||
* any future run.
|
||||
* <p>
|
||||
* Example: with {@code max.retries.transient = 1}, the very first transient error
|
||||
* produces this decision immediately.
|
||||
*
|
||||
* @param failureClass failure class identifier; never {@code null} or blank
|
||||
* @param failureMessage human-readable failure description; never {@code null} or blank
|
||||
*/
|
||||
record TransientErrorFinal(String failureClass, String failureMessage)
|
||||
implements RetryDecision {}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Target copy immediate retry case
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
/**
|
||||
* First physical target-file copy failure within the current run.
|
||||
* <p>
|
||||
* Exactly one immediate technical retry of the copy operation is permitted within
|
||||
* the same document run. This decision does not change any counter or document
|
||||
* status — it defers the final outcome until the immediate retry completes:
|
||||
* <ul>
|
||||
* <li>If the immediate retry succeeds → document proceeds to {@code SUCCESS}.</li>
|
||||
* <li>If the immediate retry also fails → the combined failure is classified as
|
||||
* a transient technical error and a {@link TransientErrorRetryable} or
|
||||
* {@link TransientErrorFinal} decision is produced for the final persistence
|
||||
* step.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The immediate retry is strictly limited to the physical copy path. No new AI call
|
||||
* and no new naming-proposal derivation occur. This mechanism does not increment the
|
||||
* laufübergreifenden transient-error counter.
|
||||
*
|
||||
* @param failureMessage human-readable description of the initial copy failure;
|
||||
* never {@code null} or blank
|
||||
*/
|
||||
record TargetCopyWithImmediateRetry(String failureMessage) implements RetryDecision {
|
||||
|
||||
/**
|
||||
* Returns the constant failure class identifier for target copy failures.
|
||||
*
|
||||
* @return {@code "TARGET_COPY_TECHNICAL_ERROR"}
|
||||
*/
|
||||
@Override
|
||||
public String failureClass() {
|
||||
return DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR.name();
|
||||
}
|
||||
}
|
||||
}
|
||||
+14
@@ -62,6 +62,20 @@
|
||||
* — Sealed result of parsing raw response into JSON structure (success or parsing failure)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Retry policy and logging types:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification}
|
||||
* — Unified classification of all document-level errors (content, transient, target copy)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision}
|
||||
* — Sealed type representing the authoritative retry decision for a document error</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision}
|
||||
* — Decision governing whether a within-run target copy retry is permitted</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity}
|
||||
* — Sensitivity decision governing whether AI-generated content may be logged</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentLogCorrelation}
|
||||
* — Sealed type carrying the correlation context for document-related log entries</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Exception types:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException}
|
||||
|
||||
+200
@@ -0,0 +1,200 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.service;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Default implementation of the {@link RetryDecisionEvaluator} interface.
|
||||
* <p>
|
||||
* Applies the binding retry policy rules exactly as specified:
|
||||
* <ul>
|
||||
* <li><strong>Deterministic content errors</strong>: the first historised occurrence
|
||||
* for a fingerprint leads to {@link RetryDecision.ContentErrorRetryable} (one later
|
||||
* scheduler run may retry); the second occurrence leads to
|
||||
* {@link RetryDecision.ContentErrorFinal} (no further retries).</li>
|
||||
* <li><strong>Transient technical errors</strong>: the error remains
|
||||
* {@link RetryDecision.TransientErrorRetryable} while the counter after incrementing
|
||||
* is strictly less than {@code maxRetriesTransient}. When the counter after
|
||||
* incrementing reaches {@code maxRetriesTransient}, the result is
|
||||
* {@link RetryDecision.TransientErrorFinal}.</li>
|
||||
* <li><strong>Target copy failures</strong>: the first copy failure within a run
|
||||
* produces {@link RetryDecision.TargetCopyWithImmediateRetry}, allowing exactly
|
||||
* one immediate within-run retry of the physical copy step. This decision does
|
||||
* not modify any counter.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Counter semantics:</strong> The {@code currentCounters} passed to
|
||||
* {@link #evaluate} reflect the state <em>before</em> the current attempt's counter
|
||||
* increment. This evaluator computes what the counter will be after incrementing and
|
||||
* applies the threshold check accordingly.
|
||||
* <p>
|
||||
* <strong>Skip events</strong> ({@code SKIPPED_ALREADY_PROCESSED},
|
||||
* {@code SKIPPED_FINAL_FAILURE}) are not routed through this evaluator and never
|
||||
* produce a {@link RetryDecision}. No failure counter is changed by skip events.
|
||||
* <p>
|
||||
* <strong>Immediate within-run retry</strong> for the target copy path is a purely
|
||||
* technical within-run mechanism. It does not increment the laufübergreifenden
|
||||
* transient-error counter regardless of outcome, and it is not part of the
|
||||
* cross-run retry budget governed by {@code max.retries.transient}.
|
||||
* <p>
|
||||
* <strong>Single-truth rule:</strong> Evaluations are derived solely from the document
|
||||
* master record's failure counters and the configured limit. No additional, parallel
|
||||
* persistence source for retry decisions is introduced.
|
||||
* <p>
|
||||
* This class is stateless and thread-safe.
|
||||
*/
|
||||
public final class DefaultRetryDecisionEvaluator implements RetryDecisionEvaluator {
|
||||
|
||||
/**
|
||||
* Derives the authoritative retry decision for a document-level error.
|
||||
* <p>
|
||||
* Decision rules by error class:
|
||||
* <ul>
|
||||
* <li>{@link DocumentErrorClassification#DETERMINISTIC_CONTENT_ERROR}:
|
||||
* {@code contentErrorCount} before increment = 0 →
|
||||
* {@link RetryDecision.ContentErrorRetryable}; else →
|
||||
* {@link RetryDecision.ContentErrorFinal}.</li>
|
||||
* <li>{@link DocumentErrorClassification#TRANSIENT_TECHNICAL_ERROR}:
|
||||
* {@code transientErrorCount + 1 < maxRetriesTransient} →
|
||||
* {@link RetryDecision.TransientErrorRetryable};
|
||||
* {@code transientErrorCount + 1 >= maxRetriesTransient} →
|
||||
* {@link RetryDecision.TransientErrorFinal}.</li>
|
||||
* <li>{@link DocumentErrorClassification#TARGET_COPY_TECHNICAL_ERROR}:
|
||||
* always → {@link RetryDecision.TargetCopyWithImmediateRetry}.
|
||||
* No counter is modified by this decision.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param errorClass classification of the error that occurred; never {@code null}
|
||||
* @param currentCounters failure counters <em>before</em> incrementing for this
|
||||
* attempt; never {@code null}
|
||||
* @param maxRetriesTransient configured maximum number of historised transient errors
|
||||
* allowed per fingerprint; must be ≥ 1
|
||||
* @param failureClass short, stable failure class identifier; never {@code null} or blank
|
||||
* @param failureMessage human-readable description of the error; never {@code null} or blank
|
||||
* @return the authoritative {@link RetryDecision}; never {@code null}
|
||||
* @throws IllegalArgumentException if {@code maxRetriesTransient} is less than 1
|
||||
* @throws NullPointerException if any reference parameter is {@code null}
|
||||
*/
|
||||
@Override
|
||||
public RetryDecision evaluate(
|
||||
DocumentErrorClassification errorClass,
|
||||
FailureCounters currentCounters,
|
||||
int maxRetriesTransient,
|
||||
String failureClass,
|
||||
String failureMessage) {
|
||||
|
||||
Objects.requireNonNull(errorClass, "errorClass must not be null");
|
||||
Objects.requireNonNull(currentCounters, "currentCounters must not be null");
|
||||
Objects.requireNonNull(failureClass, "failureClass must not be null");
|
||||
Objects.requireNonNull(failureMessage, "failureMessage must not be null");
|
||||
if (failureClass.isBlank()) {
|
||||
throw new IllegalArgumentException("failureClass must not be blank");
|
||||
}
|
||||
if (failureMessage.isBlank()) {
|
||||
throw new IllegalArgumentException("failureMessage must not be blank");
|
||||
}
|
||||
if (maxRetriesTransient < 1) {
|
||||
throw new IllegalArgumentException(
|
||||
"maxRetriesTransient must be >= 1, but was: " + maxRetriesTransient);
|
||||
}
|
||||
|
||||
return switch (errorClass) {
|
||||
case DETERMINISTIC_CONTENT_ERROR -> evaluateContentError(
|
||||
currentCounters, failureClass, failureMessage);
|
||||
case TRANSIENT_TECHNICAL_ERROR -> evaluateTransientError(
|
||||
currentCounters, maxRetriesTransient, failureClass, failureMessage);
|
||||
case TARGET_COPY_TECHNICAL_ERROR ->
|
||||
new RetryDecision.TargetCopyWithImmediateRetry(failureMessage);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Determines whether an immediate within-run retry of the target copy operation
|
||||
* is permitted.
|
||||
* <p>
|
||||
* {@link ImmediateRetryDecision#ALLOWED} is returned only when the copy has failed
|
||||
* on its first attempt within the current run. If this is the second copy attempt
|
||||
* (the immediate retry itself has failed), {@link ImmediateRetryDecision#DENIED} is
|
||||
* returned and the failure must be escalated to the cross-run retry evaluation.
|
||||
*
|
||||
* @param isFirstCopyAttemptInThisRun {@code true} if the failing copy attempt was
|
||||
* the first copy attempt for this document in
|
||||
* the current run
|
||||
* @return {@link ImmediateRetryDecision#ALLOWED} or {@link ImmediateRetryDecision#DENIED};
|
||||
* never {@code null}
|
||||
*/
|
||||
@Override
|
||||
public ImmediateRetryDecision evaluateImmediateRetry(boolean isFirstCopyAttemptInThisRun) {
|
||||
return isFirstCopyAttemptInThisRun
|
||||
? ImmediateRetryDecision.ALLOWED
|
||||
: ImmediateRetryDecision.DENIED;
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluates the retry decision for a deterministic content error.
|
||||
* <p>
|
||||
* The content-error counter before this attempt determines the decision:
|
||||
* <ul>
|
||||
* <li>Count = 0 (first error) → {@link RetryDecision.ContentErrorRetryable};
|
||||
* one later scheduler run may retry.</li>
|
||||
* <li>Count ≥ 1 (second or subsequent error) → {@link RetryDecision.ContentErrorFinal};
|
||||
* no further retries.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param currentCounters failure counters before incrementing
|
||||
* @param failureClass failure class identifier
|
||||
* @param failureMessage failure description
|
||||
* @return the appropriate content-error retry decision
|
||||
*/
|
||||
private static RetryDecision evaluateContentError(
|
||||
FailureCounters currentCounters,
|
||||
String failureClass,
|
||||
String failureMessage) {
|
||||
|
||||
if (currentCounters.contentErrorCount() == 0) {
|
||||
return new RetryDecision.ContentErrorRetryable(failureClass, failureMessage);
|
||||
}
|
||||
return new RetryDecision.ContentErrorFinal(failureClass, failureMessage);
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluates the retry decision for a transient technical error.
|
||||
* <p>
|
||||
* The transient-error counter after incrementing determines the decision:
|
||||
* <ul>
|
||||
* <li>Counter after increment strictly less than {@code maxRetriesTransient} →
|
||||
* {@link RetryDecision.TransientErrorRetryable}; a later scheduler run may retry.</li>
|
||||
* <li>Counter after increment equals or exceeds {@code maxRetriesTransient} →
|
||||
* {@link RetryDecision.TransientErrorFinal}; no further retries.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Example with {@code maxRetriesTransient = 1}: counter before = 0,
|
||||
* counter after = 1 = limit → {@link RetryDecision.TransientErrorFinal} immediately.
|
||||
* <p>
|
||||
* Example with {@code maxRetriesTransient = 2}: counter before = 0,
|
||||
* counter after = 1 < 2 → {@link RetryDecision.TransientErrorRetryable};
|
||||
* counter before = 1, counter after = 2 = limit → {@link RetryDecision.TransientErrorFinal}.
|
||||
*
|
||||
* @param currentCounters failure counters before incrementing
|
||||
* @param maxRetriesTransient configured maximum historised transient errors (≥ 1)
|
||||
* @param failureClass failure class identifier
|
||||
* @param failureMessage failure description
|
||||
* @return the appropriate transient-error retry decision
|
||||
*/
|
||||
private static RetryDecision evaluateTransientError(
|
||||
FailureCounters currentCounters,
|
||||
int maxRetriesTransient,
|
||||
String failureClass,
|
||||
String failureMessage) {
|
||||
|
||||
int counterAfterIncrement = currentCounters.transientErrorCount() + 1;
|
||||
if (counterAfterIncrement < maxRetriesTransient) {
|
||||
return new RetryDecision.TransientErrorRetryable(failureClass, failureMessage);
|
||||
}
|
||||
return new RetryDecision.TransientErrorFinal(failureClass, failureMessage);
|
||||
}
|
||||
}
|
||||
+169
-46
@@ -43,40 +43,76 @@ import java.util.function.Function;
|
||||
* Application-level service that implements the per-document processing logic.
|
||||
* <p>
|
||||
* This service is the single authoritative place for the decision rules:
|
||||
* idempotency checks, status/counter mapping, target-copy finalization, and consistent
|
||||
* two-level persistence.
|
||||
* idempotency checks, status/counter mapping, target-copy finalization, retry
|
||||
* finalization, skip semantics, and consistent two-level persistence.
|
||||
*
|
||||
* <h2>Processing order per candidate</h2>
|
||||
* <ol>
|
||||
* <li>Load the document master record by fingerprint.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#SUCCESS} → create and persist
|
||||
* a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#FAILED_FINAL} → create and persist
|
||||
* a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#SUCCESS} →
|
||||
* <strong>log skip at INFO with fingerprint</strong>;
|
||||
* persist a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}.
|
||||
* Failure counters are not changed.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#FAILED_FINAL} →
|
||||
* <strong>log skip at INFO with fingerprint</strong>;
|
||||
* persist a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.
|
||||
* Failure counters are not changed.</li>
|
||||
* <li>If the overall status is {@link ProcessingStatus#PROPOSAL_READY} → load the
|
||||
* leading proposal attempt and execute the target-copy finalization flow:
|
||||
* build the base filename, resolve duplicates, write the copy, persist SUCCESS or
|
||||
* FAILED_RETRYABLE.</li>
|
||||
* build the base filename, resolve duplicates,
|
||||
* <strong>log generated target filename at INFO with fingerprint</strong>,
|
||||
* write the copy, persist SUCCESS or FAILED_RETRYABLE.</li>
|
||||
* <li>Otherwise execute the pipeline (extraction + pre-checks + AI naming) and map
|
||||
* the result into status, counters, and retryable flag.</li>
|
||||
* <li><strong>Log retry decision at INFO with fingerprint and error classification</strong>:
|
||||
* FAILED_RETRYABLE (will retry in a later scheduler run) or
|
||||
* FAILED_FINAL (retry budget exhausted, no further processing).</li>
|
||||
* <li>Persist exactly one historised processing attempt for the identified document.</li>
|
||||
* <li>Persist the updated document master record.</li>
|
||||
* </ol>
|
||||
*
|
||||
* <h2>Retry finalization rules</h2>
|
||||
* <ul>
|
||||
* <li><strong>Deterministic content errors:</strong> The first historised occurrence
|
||||
* leads to {@link ProcessingStatus#FAILED_RETRYABLE} (content-error counter incremented
|
||||
* by 1). The second historised occurrence leads to {@link ProcessingStatus#FAILED_FINAL}
|
||||
* (content-error counter incremented by 1). No further retry is possible.</li>
|
||||
* <li><strong>Transient technical errors:</strong> The transient-error counter is
|
||||
* incremented by 1 per occurrence. The document remains
|
||||
* {@link ProcessingStatus#FAILED_RETRYABLE} as long as the counter is strictly less
|
||||
* than {@code maxRetriesTransient}. The attempt that causes the counter to reach
|
||||
* {@code maxRetriesTransient} finalises the document to
|
||||
* {@link ProcessingStatus#FAILED_FINAL}. Valid values of {@code maxRetriesTransient}
|
||||
* are integers ≥ 1; the value 0 is invalid startup configuration.</li>
|
||||
* <li><strong>Skip events</strong> ({@code SKIPPED_ALREADY_PROCESSED},
|
||||
* {@code SKIPPED_FINAL_FAILURE}) never change any failure counter.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Status transitions</h2>
|
||||
* <ul>
|
||||
* <li>Pre-check passed + AI naming proposal ready → {@link ProcessingStatus#PROPOSAL_READY}</li>
|
||||
* <li>First deterministic content failure → {@link ProcessingStatus#FAILED_RETRYABLE}</li>
|
||||
* <li>Second deterministic content failure → {@link ProcessingStatus#FAILED_FINAL}</li>
|
||||
* <li>Technical infrastructure failure → {@link ProcessingStatus#FAILED_RETRYABLE}</li>
|
||||
* <li>Technical failure at transient retry limit → {@link ProcessingStatus#FAILED_FINAL}</li>
|
||||
* <li>{@link ProcessingStatus#PROPOSAL_READY} + successful target copy + consistent
|
||||
* persistence → {@link ProcessingStatus#SUCCESS}</li>
|
||||
* <li>{@link ProcessingStatus#PROPOSAL_READY} + first copy failure + successful immediate retry
|
||||
* → treated as successful copy, proceeds to {@link ProcessingStatus#SUCCESS}</li>
|
||||
* <li>{@link ProcessingStatus#PROPOSAL_READY} + both copy attempts fail → cross-run
|
||||
* {@link ProcessingStatus#FAILED_RETRYABLE}, transient error counter +1</li>
|
||||
* <li>{@link ProcessingStatus#PROPOSAL_READY} + technical failure → {@link ProcessingStatus#FAILED_RETRYABLE},
|
||||
* transient error counter +1</li>
|
||||
* <li>{@link ProcessingStatus#SUCCESS} → {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED} skip</li>
|
||||
* <li>{@link ProcessingStatus#FAILED_FINAL} → {@link ProcessingStatus#SKIPPED_FINAL_FAILURE} skip</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Log correlation</h2>
|
||||
* <p>
|
||||
* All log entries emitted by this coordinator are post-fingerprint: the fingerprint is
|
||||
* available for every document that reaches this coordinator. Relevant log entries carry
|
||||
* the document fingerprint for unambiguous correlation across runs.
|
||||
*
|
||||
* <h2>Leading source for the naming proposal (verbindlich)</h2>
|
||||
* <p>
|
||||
* When a document is in {@code PROPOSAL_READY} state, the authoritative source for the
|
||||
@@ -116,9 +152,16 @@ public class DocumentProcessingCoordinator {
|
||||
private final TargetFolderPort targetFolderPort;
|
||||
private final TargetFileCopyPort targetFileCopyPort;
|
||||
private final ProcessingLogger logger;
|
||||
private final int maxRetriesTransient;
|
||||
|
||||
/**
|
||||
* Creates the document processing coordinator with all required ports and the logger.
|
||||
* Creates the document processing coordinator with all required ports, logger, and
|
||||
* the transient retry limit.
|
||||
* <p>
|
||||
* {@code maxRetriesTransient} is the maximum number of historised transient error attempts
|
||||
* per fingerprint before the document is finalised to
|
||||
* {@link ProcessingStatus#FAILED_FINAL}. The attempt that causes the counter to
|
||||
* reach this value finalises the document. Must be >= 1.
|
||||
*
|
||||
* @param documentRecordRepository port for reading and writing the document master record;
|
||||
* must not be null
|
||||
@@ -130,7 +173,10 @@ public class DocumentProcessingCoordinator {
|
||||
* @param targetFileCopyPort port for copying source files to the target folder;
|
||||
* must not be null
|
||||
* @param logger for processing-related logging; must not be null
|
||||
* @throws NullPointerException if any parameter is null
|
||||
* @param maxRetriesTransient maximum number of historised transient error attempts
|
||||
* before finalisation; must be >= 1
|
||||
* @throws NullPointerException if any object parameter is null
|
||||
* @throws IllegalArgumentException if {@code maxRetriesTransient} is less than 1
|
||||
*/
|
||||
public DocumentProcessingCoordinator(
|
||||
DocumentRecordRepository documentRecordRepository,
|
||||
@@ -138,7 +184,12 @@ public class DocumentProcessingCoordinator {
|
||||
UnitOfWorkPort unitOfWorkPort,
|
||||
TargetFolderPort targetFolderPort,
|
||||
TargetFileCopyPort targetFileCopyPort,
|
||||
ProcessingLogger logger) {
|
||||
ProcessingLogger logger,
|
||||
int maxRetriesTransient) {
|
||||
if (maxRetriesTransient < 1) {
|
||||
throw new IllegalArgumentException(
|
||||
"maxRetriesTransient must be >= 1, got: " + maxRetriesTransient);
|
||||
}
|
||||
this.documentRecordRepository =
|
||||
Objects.requireNonNull(documentRecordRepository, "documentRecordRepository must not be null");
|
||||
this.processingAttemptRepository =
|
||||
@@ -150,6 +201,7 @@ public class DocumentProcessingCoordinator {
|
||||
this.targetFileCopyPort =
|
||||
Objects.requireNonNull(targetFileCopyPort, "targetFileCopyPort must not be null");
|
||||
this.logger = Objects.requireNonNull(logger, "logger must not be null");
|
||||
this.maxRetriesTransient = maxRetriesTransient;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -285,7 +337,7 @@ public class DocumentProcessingCoordinator {
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// M6 target-copy finalization path
|
||||
// Target-copy finalization path
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
@@ -297,6 +349,10 @@ public class DocumentProcessingCoordinator {
|
||||
* <li>Build the base filename from the proposal's date and title.</li>
|
||||
* <li>Resolve the first available unique filename in the target folder.</li>
|
||||
* <li>Copy the source file to the target folder.</li>
|
||||
* <li>If the copy fails: attempt exactly one immediate within-run retry of the same
|
||||
* physical copy step. No new AI call and no new naming-proposal derivation occur.
|
||||
* If the retry also fails, treat the combined failure as a transient error and
|
||||
* skip the SUCCESS path.</li>
|
||||
* <li>Persist a new {@code SUCCESS} attempt and update the master record.</li>
|
||||
* <li>If persistence fails after a successful copy: attempt best-effort rollback
|
||||
* of the copy and persist {@code FAILED_RETRYABLE} instead.</li>
|
||||
@@ -365,19 +421,41 @@ public class DocumentProcessingCoordinator {
|
||||
|
||||
String resolvedFilename =
|
||||
((ResolvedTargetFilename) resolutionResult).resolvedFilename();
|
||||
logger.info("Resolved target filename for '{}': '{}'.",
|
||||
candidate.uniqueIdentifier(), resolvedFilename);
|
||||
logger.info("Generated target filename for '{}' (fingerprint: {}): '{}'.",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex(), resolvedFilename);
|
||||
|
||||
// --- Step 4: Copy file to target ---
|
||||
// --- Step 4: Copy file to target (with one immediate within-run retry) ---
|
||||
TargetFileCopyResult copyResult =
|
||||
targetFileCopyPort.copyToTarget(candidate.locator(), resolvedFilename);
|
||||
|
||||
if (copyResult instanceof TargetFileCopyTechnicalFailure copyFailure) {
|
||||
logger.error("Target copy failed for '{}': {}",
|
||||
candidate.uniqueIdentifier(), copyFailure.errorMessage());
|
||||
return persistTransientError(
|
||||
candidate, fingerprint, existingRecord, context, attemptStart, now,
|
||||
"Target file copy failed: " + copyFailure.errorMessage());
|
||||
if (copyResult instanceof TargetFileCopyTechnicalFailure firstCopyFailure) {
|
||||
// First copy attempt failed — perform exactly one immediate within-run retry.
|
||||
// The retry reuses the same resolved filename and document context; no new AI
|
||||
// call, no new naming-proposal derivation. This mechanism does not increment
|
||||
// the cross-run transient-error counter by itself.
|
||||
logger.warn("First target copy attempt failed for '{}': {}. Performing immediate within-run retry.",
|
||||
candidate.uniqueIdentifier(), firstCopyFailure.errorMessage());
|
||||
|
||||
TargetFileCopyResult retryCopyResult =
|
||||
targetFileCopyPort.copyToTarget(candidate.locator(), resolvedFilename);
|
||||
|
||||
if (retryCopyResult instanceof TargetFileCopyTechnicalFailure retryCopyFailure) {
|
||||
// Immediate retry also failed — the combined failure is escalated as a
|
||||
// cross-run transient technical error. No further within-run retry is
|
||||
// attempted. This is the only document-level result for persistence.
|
||||
logger.error("Immediate within-run retry also failed for '{}': {}",
|
||||
candidate.uniqueIdentifier(), retryCopyFailure.errorMessage());
|
||||
String combinedMessage = "Target file copy failed after immediate within-run retry."
|
||||
+ " First attempt: " + firstCopyFailure.errorMessage()
|
||||
+ "; Retry attempt: " + retryCopyFailure.errorMessage();
|
||||
return persistTransientError(
|
||||
candidate, fingerprint, existingRecord, context, attemptStart, now,
|
||||
combinedMessage);
|
||||
}
|
||||
|
||||
// Immediate retry succeeded — proceed to SUCCESS path as if the copy
|
||||
// had succeeded on the first attempt.
|
||||
logger.info("Immediate within-run retry succeeded for '{}'.", candidate.uniqueIdentifier());
|
||||
}
|
||||
|
||||
// Copy succeeded — attempt to persist SUCCESS
|
||||
@@ -447,8 +525,15 @@ public class DocumentProcessingCoordinator {
|
||||
}
|
||||
|
||||
/**
|
||||
* Persists a {@code FAILED_RETRYABLE} attempt with an incremented transient error counter
|
||||
* for a document-level technical error during the target-copy finalization stage.
|
||||
* Persists a transient error for a document-level technical failure during the
|
||||
* target-copy finalization stage.
|
||||
* <p>
|
||||
* The resulting status is {@link ProcessingStatus#FAILED_FINAL} if the incremented
|
||||
* transient error counter reaches {@code maxRetriesTransient}; otherwise
|
||||
* {@link ProcessingStatus#FAILED_RETRYABLE}. The transient error counter is always
|
||||
* incremented by exactly one. This method does not increment the laufübergreifenden
|
||||
* transient counter for the within-run immediate retry — only the combined outcome
|
||||
* of the retry is reported here.
|
||||
*
|
||||
* @return true if the error was persisted; false if the error persistence itself failed
|
||||
*/
|
||||
@@ -463,26 +548,38 @@ public class DocumentProcessingCoordinator {
|
||||
|
||||
FailureCounters updatedCounters =
|
||||
existingRecord.failureCounters().withIncrementedTransientErrorCount();
|
||||
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
|
||||
ProcessingStatus errorStatus = limitReached
|
||||
? ProcessingStatus.FAILED_FINAL
|
||||
: ProcessingStatus.FAILED_RETRYABLE;
|
||||
|
||||
try {
|
||||
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
|
||||
ProcessingAttempt errorAttempt = ProcessingAttempt.withoutAiFields(
|
||||
fingerprint, context.runId(), attemptNumber, attemptStart, now,
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
ProcessingStatus.FAILED_RETRYABLE.name(),
|
||||
errorMessage, true);
|
||||
errorStatus,
|
||||
errorStatus.name(),
|
||||
errorMessage, !limitReached);
|
||||
|
||||
DocumentRecord errorRecord = buildTransientErrorRecord(
|
||||
existingRecord, candidate, updatedCounters, now);
|
||||
existingRecord, candidate, updatedCounters, errorStatus, now);
|
||||
|
||||
unitOfWorkPort.executeInTransaction(txOps -> {
|
||||
txOps.saveProcessingAttempt(errorAttempt);
|
||||
txOps.updateDocumentRecord(errorRecord);
|
||||
});
|
||||
|
||||
logger.debug("Transient error persisted for '{}': status=FAILED_RETRYABLE, "
|
||||
+ "transientErrors={}.",
|
||||
candidate.uniqueIdentifier(),
|
||||
updatedCounters.transientErrorCount());
|
||||
if (limitReached) {
|
||||
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_FINAL — "
|
||||
+ "transient error limit reached ({}/{} attempts). No further retry.",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
|
||||
updatedCounters.transientErrorCount(), maxRetriesTransient);
|
||||
} else {
|
||||
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_RETRYABLE — "
|
||||
+ "transient error, will retry in later run ({}/{} attempts).",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
|
||||
updatedCounters.transientErrorCount(), maxRetriesTransient);
|
||||
}
|
||||
return true;
|
||||
|
||||
} catch (DocumentPersistenceException persistEx) {
|
||||
@@ -493,9 +590,13 @@ public class DocumentProcessingCoordinator {
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempts to persist a {@code FAILED_RETRYABLE} attempt after a persistence failure
|
||||
* that occurred following a successful target copy. This is a secondary persistence
|
||||
* effort; its failure is logged but does not change the return value.
|
||||
* Attempts to persist a transient error after a persistence failure that occurred
|
||||
* following a successful target copy. This is a secondary persistence effort;
|
||||
* its failure is logged but does not change the return value.
|
||||
* <p>
|
||||
* Applies the same transient limit check as {@link #persistTransientError}: if the
|
||||
* incremented counter reaches {@code maxRetriesTransient}, the secondary attempt
|
||||
* is persisted as {@link ProcessingStatus#FAILED_FINAL}.
|
||||
*/
|
||||
private void persistTransientErrorAfterPersistenceFailure(
|
||||
SourceDocumentCandidate candidate,
|
||||
@@ -508,16 +609,21 @@ public class DocumentProcessingCoordinator {
|
||||
|
||||
FailureCounters updatedCounters =
|
||||
existingRecord.failureCounters().withIncrementedTransientErrorCount();
|
||||
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
|
||||
ProcessingStatus errorStatus = limitReached
|
||||
? ProcessingStatus.FAILED_FINAL
|
||||
: ProcessingStatus.FAILED_RETRYABLE;
|
||||
|
||||
try {
|
||||
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
|
||||
ProcessingAttempt errorAttempt = ProcessingAttempt.withoutAiFields(
|
||||
fingerprint, context.runId(), attemptNumber, attemptStart, now,
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
ProcessingStatus.FAILED_RETRYABLE.name(),
|
||||
errorMessage, true);
|
||||
errorStatus,
|
||||
errorStatus.name(),
|
||||
errorMessage, !limitReached);
|
||||
|
||||
DocumentRecord errorRecord = buildTransientErrorRecord(
|
||||
existingRecord, candidate, updatedCounters, now);
|
||||
existingRecord, candidate, updatedCounters, errorStatus, now);
|
||||
|
||||
unitOfWorkPort.executeInTransaction(txOps -> {
|
||||
txOps.saveProcessingAttempt(errorAttempt);
|
||||
@@ -618,13 +724,13 @@ public class DocumentProcessingCoordinator {
|
||||
|
||||
private ProcessingOutcomeTransition.ProcessingOutcome mapOutcomeForNewDocument(
|
||||
DocumentProcessingOutcome pipelineOutcome) {
|
||||
return ProcessingOutcomeTransition.forNewDocument(pipelineOutcome);
|
||||
return ProcessingOutcomeTransition.forNewDocument(pipelineOutcome, maxRetriesTransient);
|
||||
}
|
||||
|
||||
private ProcessingOutcomeTransition.ProcessingOutcome mapOutcomeForKnownDocument(
|
||||
DocumentProcessingOutcome pipelineOutcome,
|
||||
FailureCounters existingCounters) {
|
||||
return ProcessingOutcomeTransition.forKnownDocument(pipelineOutcome, existingCounters);
|
||||
return ProcessingOutcomeTransition.forKnownDocument(pipelineOutcome, existingCounters, maxRetriesTransient);
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
@@ -717,12 +823,13 @@ public class DocumentProcessingCoordinator {
|
||||
DocumentRecord existingRecord,
|
||||
SourceDocumentCandidate candidate,
|
||||
FailureCounters updatedCounters,
|
||||
ProcessingStatus targetStatus,
|
||||
Instant now) {
|
||||
return new DocumentRecord(
|
||||
existingRecord.fingerprint(),
|
||||
new SourceDocumentLocator(candidate.locator().value()),
|
||||
candidate.uniqueIdentifier(),
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
targetStatus,
|
||||
updatedCounters,
|
||||
now, // lastFailureInstant
|
||||
existingRecord.lastSuccessInstant(),
|
||||
@@ -764,11 +871,27 @@ public class DocumentProcessingCoordinator {
|
||||
recordWriter.accept(txOps);
|
||||
});
|
||||
|
||||
logger.info("Document '{}' processed: status={}, contentErrors={}, transientErrors={}.",
|
||||
candidate.uniqueIdentifier(),
|
||||
outcome.overallStatus(),
|
||||
outcome.counters().contentErrorCount(),
|
||||
outcome.counters().transientErrorCount());
|
||||
if (outcome.overallStatus() == ProcessingStatus.FAILED_RETRYABLE) {
|
||||
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_RETRYABLE — "
|
||||
+ "will retry in later scheduler run. "
|
||||
+ "ContentErrors={}, TransientErrors={}.",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
|
||||
outcome.counters().contentErrorCount(),
|
||||
outcome.counters().transientErrorCount());
|
||||
} else if (outcome.overallStatus() == ProcessingStatus.FAILED_FINAL) {
|
||||
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_FINAL — "
|
||||
+ "permanently failed, no further retry. "
|
||||
+ "ContentErrors={}, TransientErrors={}.",
|
||||
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
|
||||
outcome.counters().contentErrorCount(),
|
||||
outcome.counters().transientErrorCount());
|
||||
} else {
|
||||
logger.info("Document '{}' processed: status={} (fingerprint: {}). "
|
||||
+ "ContentErrors={}, TransientErrors={}.",
|
||||
candidate.uniqueIdentifier(), outcome.overallStatus(), fingerprint.sha256Hex(),
|
||||
outcome.counters().contentErrorCount(),
|
||||
outcome.counters().transientErrorCount());
|
||||
}
|
||||
return true;
|
||||
|
||||
} catch (DocumentPersistenceException e) {
|
||||
|
||||
+54
-19
@@ -36,9 +36,29 @@ import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
|
||||
* <li><strong>AI functional failure (second or later occurrence):</strong>
|
||||
* Status becomes {@link ProcessingStatus#FAILED_FINAL},
|
||||
* content error counter incremented by 1, {@code retryable=false}.</li>
|
||||
* <li><strong>Technical error (pre-fingerprint / extraction / AI infrastructure):</strong>
|
||||
* <li><strong>Technical error below the transient retry limit:</strong>
|
||||
* Status becomes {@link ProcessingStatus#FAILED_RETRYABLE},
|
||||
* transient error counter incremented by 1, {@code retryable=true}.</li>
|
||||
* <li><strong>Technical error at or above the transient retry limit:</strong>
|
||||
* Status becomes {@link ProcessingStatus#FAILED_FINAL},
|
||||
* transient error counter incremented by 1, {@code retryable=false}.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Transient retry limit semantics</h2>
|
||||
* <p>
|
||||
* {@code maxRetriesTransient} is interpreted as the maximum number of historised
|
||||
* transient error attempts per fingerprint. The attempt that causes the counter
|
||||
* to reach {@code maxRetriesTransient} finalises the document status to
|
||||
* {@link ProcessingStatus#FAILED_FINAL}. Valid values are integers >= 1;
|
||||
* the value 0 is invalid startup configuration and must be rejected before
|
||||
* the batch run begins.
|
||||
* <p>
|
||||
* Examples:
|
||||
* <ul>
|
||||
* <li>{@code maxRetriesTransient = 1}: the first historised transient error
|
||||
* immediately finalises to {@code FAILED_FINAL}.</li>
|
||||
* <li>{@code maxRetriesTransient = 2}: the first transient error yields
|
||||
* {@code FAILED_RETRYABLE}; the second finalises to {@code FAILED_FINAL}.</li>
|
||||
* </ul>
|
||||
*/
|
||||
final class ProcessingOutcomeTransition {
|
||||
@@ -52,24 +72,33 @@ final class ProcessingOutcomeTransition {
|
||||
* <p>
|
||||
* For new documents, all failure counters start at zero.
|
||||
*
|
||||
* @param pipelineOutcome the outcome from the processing pipeline
|
||||
* @param pipelineOutcome the outcome from the processing pipeline
|
||||
* @param maxRetriesTransient maximum number of historised transient error attempts
|
||||
* before the document is finalised to {@code FAILED_FINAL};
|
||||
* must be >= 1
|
||||
* @return the mapped outcome with status, counters, and retryability
|
||||
*/
|
||||
static ProcessingOutcome forNewDocument(DocumentProcessingOutcome pipelineOutcome) {
|
||||
return forKnownDocument(pipelineOutcome, FailureCounters.zero());
|
||||
static ProcessingOutcome forNewDocument(
|
||||
DocumentProcessingOutcome pipelineOutcome,
|
||||
int maxRetriesTransient) {
|
||||
return forKnownDocument(pipelineOutcome, FailureCounters.zero(), maxRetriesTransient);
|
||||
}
|
||||
|
||||
/**
|
||||
* Maps a pipeline outcome to a processing outcome, considering the existing
|
||||
* failure counter state from a known document's history.
|
||||
*
|
||||
* @param pipelineOutcome the outcome from the processing pipeline
|
||||
* @param existingCounters the current failure counter values from the document's master record
|
||||
* @param pipelineOutcome the outcome from the processing pipeline
|
||||
* @param existingCounters the current failure counter values from the document's master record
|
||||
* @param maxRetriesTransient maximum number of historised transient error attempts
|
||||
* before the document is finalised to {@code FAILED_FINAL};
|
||||
* must be >= 1
|
||||
* @return the mapped outcome with updated status, counters, and retryability
|
||||
*/
|
||||
static ProcessingOutcome forKnownDocument(
|
||||
DocumentProcessingOutcome pipelineOutcome,
|
||||
FailureCounters existingCounters) {
|
||||
FailureCounters existingCounters,
|
||||
int maxRetriesTransient) {
|
||||
|
||||
return switch (pipelineOutcome) {
|
||||
case NamingProposalReady ignored -> {
|
||||
@@ -106,31 +135,37 @@ final class ProcessingOutcomeTransition {
|
||||
}
|
||||
|
||||
case TechnicalDocumentError ignored4 -> {
|
||||
// Technical error (extraction / infrastructure): retryable, transient counter +1
|
||||
// Technical error (extraction / infrastructure): apply transient retry limit
|
||||
FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount();
|
||||
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
|
||||
yield new ProcessingOutcome(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
existingCounters.withIncrementedTransientErrorCount(),
|
||||
true
|
||||
limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE,
|
||||
updatedCounters,
|
||||
!limitReached
|
||||
);
|
||||
}
|
||||
|
||||
case AiTechnicalFailure ignored5 -> {
|
||||
// Technical AI error (timeout, unreachable, bad JSON): retryable, transient counter +1
|
||||
// Technical AI error (timeout, unreachable, bad JSON): apply transient retry limit
|
||||
FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount();
|
||||
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
|
||||
yield new ProcessingOutcome(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
existingCounters.withIncrementedTransientErrorCount(),
|
||||
true
|
||||
limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE,
|
||||
updatedCounters,
|
||||
!limitReached
|
||||
);
|
||||
}
|
||||
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored6 -> {
|
||||
// Pre-check passed without AI step: in normal flow this should not appear at
|
||||
// the outcome transition level once the AI pipeline is fully wired. Treat it
|
||||
// as a technical error to avoid silent inconsistency.
|
||||
// as a technical error and apply the transient retry limit.
|
||||
FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount();
|
||||
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
|
||||
yield new ProcessingOutcome(
|
||||
ProcessingStatus.FAILED_RETRYABLE,
|
||||
existingCounters.withIncrementedTransientErrorCount(),
|
||||
true
|
||||
limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE,
|
||||
updatedCounters,
|
||||
!limitReached
|
||||
);
|
||||
}
|
||||
};
|
||||
|
||||
+107
@@ -0,0 +1,107 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.service;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision;
|
||||
|
||||
/**
|
||||
* Application service contract for deriving authoritative retry decisions from
|
||||
* document error state and configuration.
|
||||
* <p>
|
||||
* This interface defines the single, testable entry point for all retry policy
|
||||
* evaluations. Implementations must apply the verbindlichen retry rules exactly
|
||||
* as specified:
|
||||
* <ul>
|
||||
* <li><strong>Deterministic content errors</strong> ({@link DocumentErrorClassification#DETERMINISTIC_CONTENT_ERROR}):
|
||||
* the <em>first</em> historised content error for a fingerprint results in
|
||||
* {@link RetryDecision.ContentErrorRetryable}; the <em>second</em> results in
|
||||
* {@link RetryDecision.ContentErrorFinal}.</li>
|
||||
* <li><strong>Transient technical errors</strong> ({@link DocumentErrorClassification#TRANSIENT_TECHNICAL_ERROR}):
|
||||
* the error remains retryable while the transient-error counter after incrementing
|
||||
* stays strictly below {@code maxRetriesTransient}. When the counter after
|
||||
* incrementing reaches {@code maxRetriesTransient}, the result is
|
||||
* {@link RetryDecision.TransientErrorFinal}.</li>
|
||||
* <li><strong>Target copy failures</strong> ({@link DocumentErrorClassification#TARGET_COPY_TECHNICAL_ERROR})
|
||||
* on the <em>first</em> copy attempt within a run: result is
|
||||
* {@link RetryDecision.TargetCopyWithImmediateRetry}. After the immediate retry
|
||||
* has itself failed, the failure is re-evaluated as a
|
||||
* {@link DocumentErrorClassification#TRANSIENT_TECHNICAL_ERROR}.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Counter semantics:</strong>
|
||||
* <ul>
|
||||
* <li>The {@code currentCounters} passed to {@link #evaluate} reflect the state
|
||||
* <em>before</em> the current attempt's counter increment. The evaluator is
|
||||
* responsible for determining what the counter will be after incrementing and
|
||||
* applying the threshold check accordingly.</li>
|
||||
* <li>Skip events ({@code SKIPPED_ALREADY_PROCESSED}, {@code SKIPPED_FINAL_FAILURE})
|
||||
* are not routed through this evaluator and never produce a
|
||||
* {@link RetryDecision}.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>{@code maxRetriesTransient} invariant:</strong>
|
||||
* The value must be an Integer ≥ 1. A value of {@code 0} is invalid configuration
|
||||
* and must be rejected at startup before any batch run begins. Implementations of
|
||||
* this interface may assume the value is always ≥ 1 when called.
|
||||
* <p>
|
||||
* Example for {@code maxRetriesTransient = 1}:
|
||||
* <ul>
|
||||
* <li>transient-error counter before = 0 → after increment = 1 = limit → {@link RetryDecision.TransientErrorFinal}</li>
|
||||
* </ul>
|
||||
* Example for {@code maxRetriesTransient = 2}:
|
||||
* <ul>
|
||||
* <li>transient-error counter before = 0 → after increment = 1 < 2 → {@link RetryDecision.TransientErrorRetryable}</li>
|
||||
* <li>transient-error counter before = 1 → after increment = 2 = limit → {@link RetryDecision.TransientErrorFinal}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Single-truth rule:</strong> No parallel persistence source for retry
|
||||
* decisions is introduced. Evaluations are derived solely from the document master
|
||||
* record's failure counters and the configured limit.
|
||||
*/
|
||||
public interface RetryDecisionEvaluator {
|
||||
|
||||
/**
|
||||
* Derives the authoritative retry decision for a document-level error.
|
||||
* <p>
|
||||
* The decision is determined by the error classification, the existing failure
|
||||
* counters (before any increment for the current attempt), and the configured
|
||||
* transient-retry limit.
|
||||
*
|
||||
* @param errorClass classification of the error that occurred; never {@code null}
|
||||
* @param currentCounters failure counters <em>before</em> incrementing for this
|
||||
* attempt; never {@code null}
|
||||
* @param maxRetriesTransient configured maximum number of historised transient errors
|
||||
* allowed per fingerprint; must be ≥ 1
|
||||
* @param failureClass short, stable failure class identifier for persistence
|
||||
* and logging; never {@code null} or blank
|
||||
* @param failureMessage human-readable description of the error; never {@code null}
|
||||
* or blank
|
||||
* @return the authoritative {@link RetryDecision}; never {@code null}
|
||||
* @throws IllegalArgumentException if {@code maxRetriesTransient} is less than 1
|
||||
*/
|
||||
RetryDecision evaluate(
|
||||
DocumentErrorClassification errorClass,
|
||||
FailureCounters currentCounters,
|
||||
int maxRetriesTransient,
|
||||
String failureClass,
|
||||
String failureMessage);
|
||||
|
||||
/**
|
||||
* Determines whether an immediate within-run retry of the target copy operation
|
||||
* is permitted.
|
||||
* <p>
|
||||
* An immediate retry is {@link ImmediateRetryDecision#ALLOWED} only when the copy
|
||||
* has failed on its first attempt within the current run. If this is the second
|
||||
* copy attempt within the same run (i.e. the immediate retry itself has failed),
|
||||
* the result is {@link ImmediateRetryDecision#DENIED}.
|
||||
*
|
||||
* @param isFirstCopyAttemptInThisRun {@code true} if the failing copy attempt was
|
||||
* the first copy attempt for this document in
|
||||
* the current run; {@code false} if it was the
|
||||
* immediate retry attempt
|
||||
* @return {@link ImmediateRetryDecision#ALLOWED} or {@link ImmediateRetryDecision#DENIED};
|
||||
* never {@code null}
|
||||
*/
|
||||
ImmediateRetryDecision evaluateImmediateRetry(boolean isFirstCopyAttemptInThisRun);
|
||||
}
|
||||
+31
-15
@@ -222,23 +222,30 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
* <p>
|
||||
* Processing order:
|
||||
* <ol>
|
||||
* <li><strong>Log:</strong> detected source file at INFO level with run-ID (pre-fingerprint
|
||||
* correlation via run-ID and candidate description).</li>
|
||||
* <li>Record the attempt start instant.</li>
|
||||
* <li>Compute the SHA-256 fingerprint of the candidate file content.</li>
|
||||
* <li>If fingerprint computation fails: log as non-identifiable run event and
|
||||
* return true — no SQLite record is created, but no persistence failure occurred.</li>
|
||||
* <li>If fingerprint computation fails: log as non-identifiable run event with run-ID
|
||||
* and return true — no SQLite record is created, no persistence failure.</li>
|
||||
* <li>Load document master record.</li>
|
||||
* <li>If already {@code SUCCESS} → persist skip attempt with
|
||||
* {@code SKIPPED_ALREADY_PROCESSED}.</li>
|
||||
* <li>If already {@code FAILED_FINAL} → persist skip attempt with
|
||||
* {@code SKIPPED_FINAL_FAILURE}.</li>
|
||||
* <li>Otherwise execute the pipeline (extraction + pre-checks).</li>
|
||||
* <li>Map result into status, counters and retryable flag.</li>
|
||||
* <li>If already {@code SUCCESS} → log skip at INFO level with fingerprint;
|
||||
* persist skip attempt with {@code SKIPPED_ALREADY_PROCESSED}.</li>
|
||||
* <li>If already {@code FAILED_FINAL} → log skip at INFO level with fingerprint;
|
||||
* persist skip attempt with {@code SKIPPED_FINAL_FAILURE}.</li>
|
||||
* <li>Otherwise execute the pipeline (extraction + pre-checks + AI naming).</li>
|
||||
* <li>Map result into status, counters, and retryable flag.</li>
|
||||
* <li><strong>Log:</strong> retry decision at INFO level with fingerprint and error
|
||||
* classification (FAILED_RETRYABLE or FAILED_FINAL).</li>
|
||||
* <li>Persist exactly one historised processing attempt.</li>
|
||||
* <li>Persist the updated document master record.</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* Per-document errors do not abort the overall batch run. Each candidate ends
|
||||
* controlled regardless of its outcome.
|
||||
* <p>
|
||||
* Post-fingerprint log entries carry the document fingerprint for correlation.
|
||||
* Pre-fingerprint log entries (steps 1–4) use run-ID and candidate description.
|
||||
*
|
||||
* @param candidate the candidate to process
|
||||
* @param context the current batch run context
|
||||
@@ -246,14 +253,15 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
* errors return true; persistence failures return false)
|
||||
*/
|
||||
private boolean processCandidate(SourceDocumentCandidate candidate, BatchRunContext context) {
|
||||
logger.debug("Processing candidate: {}", candidate.uniqueIdentifier());
|
||||
logger.info("Detected source file '{}' for processing (RunId: {}).",
|
||||
candidate.uniqueIdentifier(), context.runId());
|
||||
|
||||
Instant attemptStart = Instant.now();
|
||||
FingerprintResult fingerprintResult = fingerprintPort.computeFingerprint(candidate);
|
||||
|
||||
return switch (fingerprintResult) {
|
||||
case FingerprintTechnicalError fingerprintError -> {
|
||||
handleFingerprintError(candidate, fingerprintError);
|
||||
handleFingerprintError(candidate, fingerprintError, context);
|
||||
yield true; // fingerprint errors are not persistence failures
|
||||
}
|
||||
case FingerprintSuccess fingerprintSuccess ->
|
||||
@@ -262,15 +270,23 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
}
|
||||
|
||||
/**
|
||||
* Handles a fingerprint computation error by logging it as a non-identifiable event.
|
||||
* Handles a fingerprint computation error by logging it as a non-identifiable run event.
|
||||
* No SQLite record is created for this candidate.
|
||||
* <p>
|
||||
* Log entries before a successful fingerprint are correlated via the batch run identifier
|
||||
* and the candidate description, as no fingerprint is available for document-level
|
||||
* correlation.
|
||||
*
|
||||
* @param candidate the candidate that could not be fingerprinted
|
||||
* @param error the fingerprint error
|
||||
* @param error the fingerprint error
|
||||
* @param context the current batch run context; used for run-level log correlation
|
||||
*/
|
||||
private void handleFingerprintError(SourceDocumentCandidate candidate, FingerprintTechnicalError error) {
|
||||
logger.warn("Fingerprint computation failed for '{}': {} — candidate skipped (not historised).",
|
||||
candidate.uniqueIdentifier(), error.errorMessage());
|
||||
private void handleFingerprintError(
|
||||
SourceDocumentCandidate candidate,
|
||||
FingerprintTechnicalError error,
|
||||
BatchRunContext context) {
|
||||
logger.warn("Fingerprint computation failed for '{}' (RunId: {}): {} — candidate not historised.",
|
||||
candidate.uniqueIdentifier(), context.runId(), error.errorMessage());
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
Reference in New Issue
Block a user