1
0

Umsetzung von Meilenstein M7

This commit is contained in:
2026-04-07 17:26:02 +02:00
parent ffd91c766d
commit e9e9b2d17a
30 changed files with 2328 additions and 206 deletions

View File

@@ -227,8 +227,8 @@ public class StartConfigurationValidator {
}
private void validateMaxRetriesTransient(int maxRetriesTransient, List<String> errors) {
if (maxRetriesTransient < 0) {
errors.add("- max.retries.transient: must be >= 0, got: " + maxRetriesTransient);
if (maxRetriesTransient < 1) {
errors.add("- max.retries.transient: must be >= 1, got: " + maxRetriesTransient);
}
}

View File

@@ -108,6 +108,7 @@ public class PropertiesConfigurationPortAdapter implements ConfigurationPort {
}
private StartConfiguration buildStartConfiguration(Properties props, String apiKey) {
boolean logAiSensitive = Boolean.parseBoolean(getOptionalProperty(props, "log.ai.sensitive", "false"));
return new StartConfiguration(
Paths.get(getRequiredProperty(props, "source.folder")),
Paths.get(getRequiredProperty(props, "target.folder")),
@@ -122,7 +123,8 @@ public class PropertiesConfigurationPortAdapter implements ConfigurationPort {
Paths.get(getOptionalProperty(props, "runtime.lock.file", "")),
Paths.get(getOptionalProperty(props, "log.directory", "")),
getOptionalProperty(props, "log.level", "INFO"),
apiKey
apiKey,
logAiSensitive
);
}

View File

@@ -89,7 +89,8 @@ class OpenAiHttpAdapterTest {
Paths.get("/lock"),
Paths.get("/logs"),
"INFO",
API_KEY
API_KEY,
false
);
// Use the package-private constructor with injected mock HttpClient
adapter = new OpenAiHttpAdapter(testConfiguration, httpClient);
@@ -450,7 +451,8 @@ class OpenAiHttpAdapterTest {
Paths.get("/lock"),
Paths.get("/logs"),
"INFO",
API_KEY
API_KEY,
false
);
assertThatThrownBy(() -> new OpenAiHttpAdapter(invalidConfig, httpClient))
@@ -475,7 +477,8 @@ class OpenAiHttpAdapterTest {
Paths.get("/lock"),
Paths.get("/logs"),
"INFO",
API_KEY
API_KEY,
false
);
assertThatThrownBy(() -> new OpenAiHttpAdapter(invalidConfig, httpClient))
@@ -500,7 +503,8 @@ class OpenAiHttpAdapterTest {
Paths.get("/lock"),
Paths.get("/logs"),
"INFO",
API_KEY
API_KEY,
false
);
assertThatThrownBy(() -> new OpenAiHttpAdapter(invalidConfig, httpClient))
@@ -526,7 +530,8 @@ class OpenAiHttpAdapterTest {
Paths.get("/lock"),
Paths.get("/logs"),
"INFO",
"" // Empty key
"", // Empty key
false
);
OpenAiHttpAdapter adapterWithEmptyKey = new OpenAiHttpAdapter(configWithEmptyKey, httpClient);

View File

@@ -44,7 +44,8 @@ class StartConfigurationValidatorTest {
tempDir.resolve("lock.lock"),
tempDir.resolve("logs"),
"INFO",
"test-api-key"
"test-api-key",
false
);
assertDoesNotThrow(() -> validator.validate(config));
@@ -66,7 +67,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -92,7 +94,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -118,7 +121,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -149,7 +153,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -180,7 +185,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -210,7 +216,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -241,7 +248,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -272,14 +280,48 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
InvalidStartConfigurationException.class,
() -> validator.validate(config)
);
assertTrue(exception.getMessage().contains("max.retries.transient: must be >= 0"));
assertTrue(exception.getMessage().contains("max.retries.transient: must be >= 1"));
}
@Test
void validate_failsWhenMaxRetriesTransientIsZero() throws Exception {
Path sourceFolder = Files.createDirectory(tempDir.resolve("source2"));
Path targetFolder = Files.createDirectory(tempDir.resolve("target2"));
Path sqliteFile = Files.createFile(tempDir.resolve("db2.sqlite"));
Path promptTemplateFile = Files.createFile(tempDir.resolve("prompt2.txt"));
StartConfiguration config = new StartConfiguration(
sourceFolder,
targetFolder,
sqliteFile,
URI.create("https://api.example.com"),
"gpt-4",
30,
0,
100,
50000,
promptTemplateFile,
null,
null,
"INFO",
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
InvalidStartConfigurationException.class,
() -> validator.validate(config)
);
assertTrue(exception.getMessage().contains("max.retries.transient: must be >= 1"),
"max.retries.transient = 0 is invalid startup configuration");
}
@Test
@@ -303,7 +345,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -334,7 +377,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -345,7 +389,7 @@ class StartConfigurationValidatorTest {
}
@Test
void validate_maxRetriesTransientZeroIsValid() throws Exception {
void validate_maxRetriesTransientOneIsValid() throws Exception {
Path sourceFolder = Files.createDirectory(tempDir.resolve("source"));
Path targetFolder = Files.createDirectory(tempDir.resolve("target"));
Path sqliteFile = Files.createFile(tempDir.resolve("db.sqlite"));
@@ -358,14 +402,15 @@ class StartConfigurationValidatorTest {
URI.create("https://api.example.com"),
"gpt-4",
30,
0, // maxRetriesTransient = 0 ist gültig
1, // maxRetriesTransient = 1 is the minimum valid value
100,
50000,
promptTemplateFile,
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
assertDoesNotThrow(() -> validator.validate(config));
@@ -392,7 +437,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -422,7 +468,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -453,7 +500,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -485,7 +533,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
assertDoesNotThrow(() -> validator.validate(config),
@@ -516,7 +565,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -548,7 +598,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -578,7 +629,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -609,7 +661,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -640,7 +693,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -670,7 +724,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -701,7 +756,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -731,7 +787,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -757,7 +814,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -772,7 +830,7 @@ class StartConfigurationValidatorTest {
assertTrue(message.contains("api.model: must not be null or blank"));
assertTrue(message.contains("prompt.template.file: must not be null"));
assertTrue(message.contains("api.timeoutSeconds: must be > 0"));
assertTrue(message.contains("max.retries.transient: must be >= 0"));
assertTrue(message.contains("max.retries.transient: must be >= 1"));
assertTrue(message.contains("max.pages: must be > 0"));
assertTrue(message.contains("max.text.characters: must be > 0"));
}
@@ -804,7 +862,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
// Mock: always return "does not exist" error for any path
@@ -840,7 +899,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
// Mock: simulate path exists but is not a directory
@@ -876,7 +936,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
// Mock: simulate path exists, is directory, but is not readable
@@ -914,7 +975,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
// Mock: all checks pass (return null)
@@ -951,7 +1013,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -985,7 +1048,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -1016,7 +1080,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -1047,7 +1112,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -1078,7 +1144,8 @@ class StartConfigurationValidatorTest {
tempDir.resolve("nonexistent/lock.lock"), // Lock file mit nicht existierendem Parent
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -1113,7 +1180,8 @@ class StartConfigurationValidatorTest {
lockFileWithFileAsParent, // Lock file mit Datei als Parent
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -1147,7 +1215,8 @@ class StartConfigurationValidatorTest {
null,
logFileInsteadOfDirectory, // Datei statt Verzeichnis
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(
@@ -1178,7 +1247,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
assertDoesNotThrow(() -> validator.validate(config),
@@ -1206,7 +1276,8 @@ class StartConfigurationValidatorTest {
null,
null,
"INFO",
"test-api-key"
"test-api-key",
false
);
InvalidStartConfigurationException exception = assertThrows(

View File

@@ -1,6 +1,7 @@
package de.gecheckt.pdf.umbenenner.adapter.out.configuration;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertFalse;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -328,6 +329,56 @@ class PropertiesConfigurationPortAdapterTest {
"Invalid URI value should throw ConfigurationLoadingException");
}
@Test
void loadConfiguration_logAiSensitiveDefaultsFalseWhenAbsent() throws Exception {
Path configFile = createInlineConfig(
"source.folder=/tmp/source\n" +
"target.folder=/tmp/target\n" +
"sqlite.file=/tmp/db.sqlite\n" +
"api.baseUrl=https://api.example.com\n" +
"api.model=gpt-4\n" +
"api.timeoutSeconds=30\n" +
"max.retries.transient=3\n" +
"max.pages=100\n" +
"max.text.characters=50000\n" +
"prompt.template.file=/tmp/prompt.txt\n" +
"api.key=test-key\n"
// log.ai.sensitive intentionally omitted
);
PropertiesConfigurationPortAdapter adapter = new PropertiesConfigurationPortAdapter(emptyEnvLookup, configFile);
var config = adapter.loadConfiguration();
assertFalse(config.logAiSensitive(),
"log.ai.sensitive must default to false when the property is absent");
}
@Test
void loadConfiguration_logAiSensitiveParsedTrueWhenExplicitlySet() throws Exception {
Path configFile = createInlineConfig(
"source.folder=/tmp/source\n" +
"target.folder=/tmp/target\n" +
"sqlite.file=/tmp/db.sqlite\n" +
"api.baseUrl=https://api.example.com\n" +
"api.model=gpt-4\n" +
"api.timeoutSeconds=30\n" +
"max.retries.transient=3\n" +
"max.pages=100\n" +
"max.text.characters=50000\n" +
"prompt.template.file=/tmp/prompt.txt\n" +
"api.key=test-key\n" +
"log.ai.sensitive=true\n"
);
PropertiesConfigurationPortAdapter adapter = new PropertiesConfigurationPortAdapter(emptyEnvLookup, configFile);
var config = adapter.loadConfiguration();
assertTrue(config.logAiSensitive(),
"log.ai.sensitive must be parsed as true when explicitly set to 'true'");
}
private Path createConfigFile(String resourceName) throws Exception {
Path sourceResource = Path.of("src/test/resources", resourceName);
Path targetConfigFile = tempDir.resolve("application.properties");

View File

@@ -1,5 +1,7 @@
package de.gecheckt.pdf.umbenenner.application.config;
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
/**
* Minimal runtime configuration for the application layer.
* <p>
@@ -9,12 +11,59 @@ package de.gecheckt.pdf.umbenenner.application.config;
* <p>
* This intentionally small contract ensures the application layer depends only on
* the configuration values it actually uses, following hexagonal architecture principles.
*
* <h2>Validation invariants</h2>
* <ul>
* <li>{@link #maxPages()} must be &ge; 1.</li>
* <li>{@link #maxRetriesTransient()} must be &ge; 1. The value {@code 0} is invalid
* start configuration and must prevent the batch run from starting with exit
* code&nbsp;1.</li>
* <li>{@link #aiContentSensitivity()} must not be {@code null}. The safe default is
* {@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT}.</li>
* </ul>
*
* <h2>AI content sensitivity</h2>
* <p>
* The {@link #aiContentSensitivity()} field is derived from the {@code log.ai.sensitive}
* configuration property (default: {@code false}). It governs whether the complete AI raw
* response and complete AI {@code reasoning} may be written to log files. Sensitive AI
* content is always persisted in SQLite regardless of this setting; only log output is
* affected.
* <p>
* The safe default ({@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT}) must be used
* whenever {@code log.ai.sensitive} is absent, {@code false}, or set to any value other
* than the explicit opt-in.
*/
public record RuntimeConfiguration(
/**
* Maximum number of pages a document can have to be processed.
* Documents exceeding this limit are rejected during pre-checks.
*/
int maxPages
int maxPages,
/**
* Maximum number of historised transient technical errors allowed per fingerprint
* across all scheduler runs.
* <p>
* The attempt that causes the counter to reach this value finalises the document
* to {@code FAILED_FINAL}. Must be an Integer &ge; 1; the value {@code 0} is
* invalid start configuration.
* <p>
* Example: {@code maxRetriesTransient = 1} means the first transient error
* immediately finalises the document.
*/
int maxRetriesTransient,
/**
* Sensitivity decision governing whether AI-generated content may be written to log files.
* <p>
* Derived from the {@code log.ai.sensitive} configuration property. The default is
* {@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT} (do not log sensitive content).
* Only {@link AiContentSensitivity#LOG_SENSITIVE_CONTENT} is produced when
* {@code log.ai.sensitive = true} is explicitly set.
* <p>
* Must not be {@code null}.
*/
AiContentSensitivity aiContentSensitivity
)
{ }

View File

@@ -10,6 +10,16 @@ import java.nio.file.Path;
* loaded and validated at bootstrap time. This is a complete configuration model
* for the entire application startup, including paths, API settings, persistence,
* and operational parameters.
*
* <h2>AI content sensitivity ({@code log.ai.sensitive})</h2>
* <p>
* The boolean property {@code log.ai.sensitive} controls whether sensitive AI-generated
* content (complete raw AI response, complete AI {@code reasoning}) may be written to
* log files. The default is {@code false} (safe/protect). Set to {@code true} only when
* explicit diagnostic logging of AI content is required.
* <p>
* Sensitive AI content is always persisted in SQLite regardless of this setting.
* Only log output is affected.
*/
public record StartConfiguration(
Path sourceFolder,
@@ -25,6 +35,13 @@ public record StartConfiguration(
Path runtimeLockFile,
Path logDirectory,
String logLevel,
String apiKey
String apiKey,
/**
* Whether sensitive AI content (raw response, reasoning) may be written to log files.
* Corresponds to the {@code log.ai.sensitive} configuration property.
* Default: {@code false} (do not log sensitive content).
*/
boolean logAiSensitive
)
{ }

View File

@@ -0,0 +1,46 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Sensitivity decision governing whether AI-generated content may be written to log files.
* <p>
* The following AI-generated content items are classified as sensitive and are subject to
* this decision:
* <ul>
* <li>The <strong>complete AI raw response</strong> (full JSON body returned by the
* AI service)</li>
* <li>The <strong>complete AI {@code reasoning}</strong> field extracted from the
* AI response</li>
* </ul>
* <p>
* Sensitive AI content is always written to SQLite (for traceability) regardless of
* this decision. The decision controls only whether the content is also emitted into
* log files.
* <p>
* <strong>Default behaviour:</strong> The default is {@link #PROTECT_SENSITIVE_CONTENT}.
* Logging of sensitive AI content must be explicitly enabled by setting the boolean
* configuration property {@code log.ai.sensitive = true}. Any other value, or the
* absence of the property, results in {@link #PROTECT_SENSITIVE_CONTENT}.
* <p>
* <strong>Non-sensitive AI content</strong> (e.g. the resolved title, the resolved date,
* the date source) is not covered by this decision and may always be logged.
*/
public enum AiContentSensitivity {
/**
* Sensitive AI content (raw response, reasoning) must <strong>not</strong> be written
* to log files.
* <p>
* This is the safe default. It is active whenever {@code log.ai.sensitive} is absent,
* {@code false}, or set to any value other than the explicit opt-in.
*/
PROTECT_SENSITIVE_CONTENT,
/**
* Sensitive AI content (raw response, reasoning) <strong>may</strong> be written
* to log files.
* <p>
* This value is only produced when {@code log.ai.sensitive = true} is explicitly set
* in the application configuration. It must never be the implicit default.
*/
LOG_SENSITIVE_CONTENT
}

View File

@@ -0,0 +1,90 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Unified classification of all document-level errors in the end state.
* <p>
* This enumeration provides a single, exhaustive taxonomy for every error category
* that the retry policy and logging infrastructure must distinguish. It replaces
* any ad-hoc string-based classification where an authoritative type is needed.
* <p>
* <strong>Mapping to failure counters:</strong>
* <ul>
* <li>{@link #DETERMINISTIC_CONTENT_ERROR} → increments the content-error counter
* ({@link FailureCounters#contentErrorCount()}). The first occurrence leads to
* {@code FAILED_RETRYABLE}; the second leads to {@code FAILED_FINAL}.
* There is no further retry after the second deterministic content error.</li>
* <li>{@link #TRANSIENT_TECHNICAL_ERROR} → increments the transient-error counter
* ({@link FailureCounters#transientErrorCount()}). Remains retryable until the
* counter reaches the configured {@code max.retries.transient} limit (Integer &ge; 1).
* The attempt that reaches the limit finalises the document to {@code FAILED_FINAL}.</li>
* <li>{@link #TARGET_COPY_TECHNICAL_ERROR} → signals a failure on the physical target
* file copy path. Within the same run, exactly one immediate technical retry is
* allowed. If the immediate retry also fails, the error is treated as a
* {@link #TRANSIENT_TECHNICAL_ERROR} for the purposes of counter updates and
* cross-run retry evaluation.</li>
* </ul>
* <p>
* <strong>Scope of deterministic content errors:</strong>
* <ul>
* <li>No usable PDF text extracted</li>
* <li>Page limit exceeded</li>
* <li>AI response functionally invalid (generic/unusable title, unparseable date)</li>
* <li>Document content ambiguous or not uniquely interpretable</li>
* </ul>
* <p>
* <strong>Scope of transient technical errors:</strong>
* <ul>
* <li>AI service unreachable, HTTP timeout, network error</li>
* <li>Unparseable or structurally invalid AI JSON</li>
* <li>Temporary I/O error during PDF text extraction</li>
* <li>Temporary SQLite lock or persistence failure</li>
* <li>Any other non-deterministic infrastructure failure</li>
* </ul>
* <p>
* <strong>Architecture note:</strong> This type carries no infrastructure dependencies.
* It is safe to reference from Domain, Application and Adapter layers.
*/
public enum DocumentErrorClassification {
/**
* A deterministic content error that cannot be resolved by retrying with the same
* document content.
* <p>
* Examples: no extractable text, page limit exceeded, AI-returned title is generic
* or unusable, document content is ambiguous.
* <p>
* Retry rule: the first historised occurrence of this error for a fingerprint leads
* to {@code FAILED_RETRYABLE} (one later run may retry). The second historised
* occurrence leads to {@code FAILED_FINAL} (no further retries).
*/
DETERMINISTIC_CONTENT_ERROR,
/**
* A transient technical infrastructure failure unrelated to the document content.
* <p>
* Examples: AI endpoint not reachable, HTTP timeout, malformed or non-parseable
* JSON, temporary I/O failure, temporary SQLite lock.
* <p>
* Retry rule: remains {@code FAILED_RETRYABLE} until the transient-error counter
* reaches the configured {@code max.retries.transient} limit. The attempt that
* reaches the limit finalises the document to {@code FAILED_FINAL}.
* The configured limit must be an Integer &ge; 1; the value {@code 0} is invalid
* start configuration and prevents the batch run from starting.
*/
TRANSIENT_TECHNICAL_ERROR,
/**
* A technical failure specifically on the physical target-file copy path.
* <p>
* This error class is distinct from {@link #TRANSIENT_TECHNICAL_ERROR} because it
* triggers a special within-run handling: exactly one immediate technical retry of
* the copy operation is allowed within the same document run. No new AI call and no
* new naming proposal derivation occur during the immediate retry.
* <p>
* If the immediate retry succeeds, the document proceeds to {@code SUCCESS}.
* If the immediate retry also fails, the combined failure is recorded as a
* {@link #TRANSIENT_TECHNICAL_ERROR} for counter and cross-run retry evaluation.
* The immediate retry is not counted in the laufübergreifenden transient-error counter.
*/
TARGET_COPY_TECHNICAL_ERROR
}

View File

@@ -0,0 +1,81 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
/**
* Sealed type carrying the correlation context for all document-related log entries.
* <p>
* The logging correlation rule distinguishes two phases of document processing:
* <ol>
* <li><strong>Pre-fingerprint phase:</strong> Before a {@link DocumentFingerprint} has
* been successfully computed (e.g. the source file cannot be read for hashing),
* log entries are correlated via the batch run identifier and a stable candidate
* description derived from the candidate's own identifier (typically its source
* file path or name). Use {@link CandidateCorrelation}.</li>
* <li><strong>Post-fingerprint phase:</strong> Once the fingerprint has been
* successfully computed, all subsequent document-related log entries are correlated
* via the batch run identifier and the fingerprint. Use
* {@link FingerprintCorrelation}.</li>
* </ol>
* <p>
* <strong>Architecture constraints:</strong>
* <ul>
* <li>This type contains no filesystem ({@code Path}, {@code File}) or NIO types.</li>
* <li>This type introduces no additional persistence truth source.</li>
* <li>The correlation is a logging concern only and does not influence the processing
* outcome, retry decision, or persistence model.</li>
* </ul>
*/
public sealed interface DocumentLogCorrelation {
/**
* Returns the batch run identifier shared by all log entries within one run.
*
* @return run identifier; never {@code null}
*/
RunId runId();
// -------------------------------------------------------------------------
// Pre-fingerprint correlation
// -------------------------------------------------------------------------
/**
* Correlation context available before a {@link DocumentFingerprint} has been
* successfully computed.
* <p>
* Used when the fingerprint computation itself fails or when a log entry must be
* emitted at the very start of candidate processing (before any hashing result is
* available).
* <p>
* The {@code candidateDescription} is a stable, human-readable identifier for the
* candidate derived from the candidate's own unique identifier — typically the
* source file name or path representation. It must not change between log entries
* for the same candidate within a single run.
*
* @param runId batch run identifier; never {@code null}
* @param candidateDescription stable human-readable candidate identifier;
* never {@code null} or blank
*/
record CandidateCorrelation(RunId runId, String candidateDescription)
implements DocumentLogCorrelation {}
// -------------------------------------------------------------------------
// Post-fingerprint correlation
// -------------------------------------------------------------------------
/**
* Correlation context available after a {@link DocumentFingerprint} has been
* successfully computed.
* <p>
* Used for all document-related log entries from the point at which the fingerprint
* is known. The fingerprint is the authoritative, content-stable document identity
* and must appear in or be unambiguously derivable from every subsequent log entry
* for this document.
*
* @param runId batch run identifier; never {@code null}
* @param fingerprint content-based document identity; never {@code null}
*/
record FingerprintCorrelation(RunId runId, DocumentFingerprint fingerprint)
implements DocumentLogCorrelation {}
}

View File

@@ -7,24 +7,34 @@ package de.gecheckt.pdf.umbenenner.application.port.out;
* <ul>
* <li><strong>Content error counter</strong> ({@link #contentErrorCount()}):
* counts how many times a deterministic content error occurred for this document
* (no usable text, page limit exceeded). At count&nbsp;1 the document is
* {@code FAILED_RETRYABLE}; at count&nbsp;2 it becomes {@code FAILED_FINAL}.
* (no usable text, page limit exceeded, AI functional failure, ambiguous content).
* At count&nbsp;1 the document transitions to {@code FAILED_RETRYABLE};
* at count&nbsp;2 it transitions to {@code FAILED_FINAL}.
* Skip events do <em>not</em> increase this counter.</li>
* <li><strong>Transient error counter</strong> ({@link #transientErrorCount()}):
* counts how many times a technical infrastructure error occurred after a
* successful fingerprint was computed. The document remains
* {@code FAILED_RETRYABLE} until the configured maximum is reached in later
* milestones. Skip events do <em>not</em> increase this counter.</li>
* counts how many times a transient technical error occurred after a successful
* fingerprint was computed. The document remains {@code FAILED_RETRYABLE} while
* this counter is strictly less than the configured {@code max.retries.transient}
* value. The attempt that causes the counter to reach {@code max.retries.transient}
* transitions the document to {@code FAILED_FINAL}.
* The configured limit must be an Integer &ge; 1.
* Skip events do <em>not</em> increase this counter.</li>
* </ul>
* <p>
* A freshly discovered document starts with both counters at zero.
* Counters are only written by the repository layer on the instructions of the
* application use case; they never change as a side-effect of a read operation.
* <strong>Immediate within-run target copy retry:</strong>
* The physical target-copy retry within the same run is not tracked in either counter.
* It is a purely technical within-run mechanism and does not affect the
* laufübergreifenden counter state.
* <p>
* <strong>Counter invariant:</strong>
* Both counters start at zero for a newly discovered document and only increase
* monotonically. The counters are written by the repository layer on the instructions
* of the application use case; they never change as a side-effect of a read operation.
*
* @param contentErrorCount number of deterministic content errors recorded so far;
* must be &gt;= 0
* @param transientErrorCount number of transient technical errors recorded so far;
* must be &gt;= 0
* @param contentErrorCount number of historised deterministic content errors;
* must be &ge; 0
* @param transientErrorCount number of historised transient technical errors;
* must be &ge; 0
*/
public record FailureCounters(int contentErrorCount, int transientErrorCount) {

View File

@@ -0,0 +1,44 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Decision governing whether a within-run immediate technical retry of the target copy
* operation is permitted.
* <p>
* The immediate retry mechanism is strictly scoped:
* <ul>
* <li>It applies <strong>only</strong> to the physical target-file copy path.</li>
* <li>It is permitted <strong>at most once</strong> per document per run (first copy
* attempt failed; one additional attempt is allowed).</li>
* <li>It does <strong>not</strong> involve a new AI call, a new naming-proposal
* derivation, or any other pipeline stage.</li>
* <li>It does <strong>not</strong> increment the laufübergreifenden
* transient-error counter regardless of outcome.</li>
* <li>It is a purely technical within-run recovery mechanism and is
* <strong>not</strong> counted as a cross-run retry in the sense of
* {@code max.retries.transient}.</li>
* </ul>
* <p>
* The concrete retry decision for the subsequent persistence step is derived from the
* combined outcome after the immediate retry completes (see {@link RetryDecision}).
*/
public enum ImmediateRetryDecision {
/**
* An immediate within-run retry of the target copy operation is permitted.
* <p>
* This value is produced when the first physical copy attempt within the current
* document run has failed. The copy must be retried exactly once more.
* No other pipeline stage is repeated.
*/
ALLOWED,
/**
* No immediate within-run retry is permitted.
* <p>
* This value is produced when the immediate retry quota for this document run has
* already been consumed (i.e. the immediate retry attempt itself has failed), or
* when the failure did not occur on the target copy path.
* The error must be escalated to the cross-run retry evaluation.
*/
DENIED
}

View File

@@ -0,0 +1,172 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Sealed type representing the complete, authoritative retry decision for a document
* after an error has been classified.
* <p>
* A {@code RetryDecision} is the output of the retry policy evaluation. It unambiguously
* encodes what must happen next for the document: which status to persist, which counter
* to increment, and whether a within-run immediate retry is still possible.
* <p>
* <strong>Decision cases and their semantics:</strong>
* <ol>
* <li>{@link ContentErrorRetryable} — first deterministic content error. Document moves
* to {@code FAILED_RETRYABLE}; content-error counter is incremented by 1. One later
* scheduler run may retry.</li>
* <li>{@link ContentErrorFinal} — second (or later) deterministic content error. Document
* moves to {@code FAILED_FINAL}; content-error counter is incremented by 1. No further
* processing in any future run.</li>
* <li>{@link TransientErrorRetryable} — transient technical error with remaining retry budget.
* Document moves to {@code FAILED_RETRYABLE}; transient-error counter is incremented by 1.
* A later scheduler run may retry, as long as the counter stays below
* {@code max.retries.transient}.</li>
* <li>{@link TransientErrorFinal} — transient technical error that exhausts the configured
* {@code max.retries.transient} budget. Document moves to {@code FAILED_FINAL};
* transient-error counter is incremented by 1. No further processing in any future run.</li>
* <li>{@link TargetCopyWithImmediateRetry} — first physical copy failure within the current
* run. The document has not yet changed status; exactly one immediate within-run retry
* of the copy step is permitted. No new AI call and no new naming-proposal derivation
* occur. This decision does not yet modify any counter or status; the outcome of the
* immediate retry determines which subsequent decision applies.</li>
* </ol>
* <p>
* <strong>What this type does NOT cover:</strong>
* <ul>
* <li>Skip decisions ({@code SKIPPED_ALREADY_PROCESSED}, {@code SKIPPED_FINAL_FAILURE})
* — skips are not retry decisions; they are pure historisation events.</li>
* <li>Success — a successful outcome is not a retry decision.</li>
* <li>Pre-fingerprint failures — errors before the fingerprint is computed are not
* historised as attempts and therefore do not produce a {@code RetryDecision}.</li>
* </ul>
* <p>
* <strong>Counter invariant:</strong> Skip decisions ({@code SKIPPED_ALREADY_PROCESSED},
* {@code SKIPPED_FINAL_FAILURE}) never produce a {@code RetryDecision} and never change
* any failure counter.
* <p>
* <strong>Single-truth rule:</strong> The retry decision is derived exclusively from the
* document master record and the attempt history. No additional, parallel truth source
* for retry state is introduced.
*/
public sealed interface RetryDecision {
/**
* Returns the failure class identifier for persistence and logging.
* <p>
* The failure class is a short, stable string identifying the type of failure,
* typically the enum constant name of the original error or exception class name.
*
* @return failure class string; never {@code null} or blank
*/
String failureClass();
/**
* Returns a human-readable failure message for persistence and logging.
*
* @return failure message; never {@code null} or blank
*/
String failureMessage();
// -------------------------------------------------------------------------
// Deterministic content error cases
// -------------------------------------------------------------------------
/**
* First historised deterministic content error for this fingerprint.
* <p>
* The document must be persisted with status {@code FAILED_RETRYABLE} and the
* content-error counter incremented by 1. Exactly one later scheduler run is
* permitted to retry.
*
* @param failureClass failure class identifier; never {@code null} or blank
* @param failureMessage human-readable failure description; never {@code null} or blank
*/
record ContentErrorRetryable(String failureClass, String failureMessage)
implements RetryDecision {}
/**
* Second (or subsequent) historised deterministic content error for this fingerprint.
* <p>
* The document must be persisted with status {@code FAILED_FINAL} and the
* content-error counter incremented by 1. No further processing is allowed in
* any future run.
*
* @param failureClass failure class identifier; never {@code null} or blank
* @param failureMessage human-readable failure description; never {@code null} or blank
*/
record ContentErrorFinal(String failureClass, String failureMessage)
implements RetryDecision {}
// -------------------------------------------------------------------------
// Transient technical error cases
// -------------------------------------------------------------------------
/**
* Transient technical error with remaining retry budget.
* <p>
* The transient-error counter after incrementing is strictly less than
* {@code max.retries.transient}. The document must be persisted with status
* {@code FAILED_RETRYABLE} and the transient-error counter incremented by 1.
* A later scheduler run may retry.
*
* @param failureClass failure class identifier; never {@code null} or blank
* @param failureMessage human-readable failure description; never {@code null} or blank
*/
record TransientErrorRetryable(String failureClass, String failureMessage)
implements RetryDecision {}
/**
* Transient technical error that exhausts the configured {@code max.retries.transient}
* budget.
* <p>
* The transient-error counter after incrementing equals {@code max.retries.transient}.
* The document must be persisted with status {@code FAILED_FINAL} and the
* transient-error counter incremented by 1. No further processing is allowed in
* any future run.
* <p>
* Example: with {@code max.retries.transient = 1}, the very first transient error
* produces this decision immediately.
*
* @param failureClass failure class identifier; never {@code null} or blank
* @param failureMessage human-readable failure description; never {@code null} or blank
*/
record TransientErrorFinal(String failureClass, String failureMessage)
implements RetryDecision {}
// -------------------------------------------------------------------------
// Target copy immediate retry case
// -------------------------------------------------------------------------
/**
* First physical target-file copy failure within the current run.
* <p>
* Exactly one immediate technical retry of the copy operation is permitted within
* the same document run. This decision does not change any counter or document
* status — it defers the final outcome until the immediate retry completes:
* <ul>
* <li>If the immediate retry succeeds → document proceeds to {@code SUCCESS}.</li>
* <li>If the immediate retry also fails → the combined failure is classified as
* a transient technical error and a {@link TransientErrorRetryable} or
* {@link TransientErrorFinal} decision is produced for the final persistence
* step.</li>
* </ul>
* <p>
* The immediate retry is strictly limited to the physical copy path. No new AI call
* and no new naming-proposal derivation occur. This mechanism does not increment the
* laufübergreifenden transient-error counter.
*
* @param failureMessage human-readable description of the initial copy failure;
* never {@code null} or blank
*/
record TargetCopyWithImmediateRetry(String failureMessage) implements RetryDecision {
/**
* Returns the constant failure class identifier for target copy failures.
*
* @return {@code "TARGET_COPY_TECHNICAL_ERROR"}
*/
@Override
public String failureClass() {
return DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR.name();
}
}
}

View File

@@ -62,6 +62,20 @@
* — Sealed result of parsing raw response into JSON structure (success or parsing failure)</li>
* </ul>
* <p>
* Retry policy and logging types:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification}
* — Unified classification of all document-level errors (content, transient, target copy)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision}
* — Sealed type representing the authoritative retry decision for a document error</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision}
* — Decision governing whether a within-run target copy retry is permitted</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity}
* — Sensitivity decision governing whether AI-generated content may be logged</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentLogCorrelation}
* — Sealed type carrying the correlation context for document-related log entries</li>
* </ul>
* <p>
* Exception types:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException}

View File

@@ -0,0 +1,200 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification;
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
import de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision;
import de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision;
import java.util.Objects;
/**
* Default implementation of the {@link RetryDecisionEvaluator} interface.
* <p>
* Applies the binding retry policy rules exactly as specified:
* <ul>
* <li><strong>Deterministic content errors</strong>: the first historised occurrence
* for a fingerprint leads to {@link RetryDecision.ContentErrorRetryable} (one later
* scheduler run may retry); the second occurrence leads to
* {@link RetryDecision.ContentErrorFinal} (no further retries).</li>
* <li><strong>Transient technical errors</strong>: the error remains
* {@link RetryDecision.TransientErrorRetryable} while the counter after incrementing
* is strictly less than {@code maxRetriesTransient}. When the counter after
* incrementing reaches {@code maxRetriesTransient}, the result is
* {@link RetryDecision.TransientErrorFinal}.</li>
* <li><strong>Target copy failures</strong>: the first copy failure within a run
* produces {@link RetryDecision.TargetCopyWithImmediateRetry}, allowing exactly
* one immediate within-run retry of the physical copy step. This decision does
* not modify any counter.</li>
* </ul>
* <p>
* <strong>Counter semantics:</strong> The {@code currentCounters} passed to
* {@link #evaluate} reflect the state <em>before</em> the current attempt's counter
* increment. This evaluator computes what the counter will be after incrementing and
* applies the threshold check accordingly.
* <p>
* <strong>Skip events</strong> ({@code SKIPPED_ALREADY_PROCESSED},
* {@code SKIPPED_FINAL_FAILURE}) are not routed through this evaluator and never
* produce a {@link RetryDecision}. No failure counter is changed by skip events.
* <p>
* <strong>Immediate within-run retry</strong> for the target copy path is a purely
* technical within-run mechanism. It does not increment the laufübergreifenden
* transient-error counter regardless of outcome, and it is not part of the
* cross-run retry budget governed by {@code max.retries.transient}.
* <p>
* <strong>Single-truth rule:</strong> Evaluations are derived solely from the document
* master record's failure counters and the configured limit. No additional, parallel
* persistence source for retry decisions is introduced.
* <p>
* This class is stateless and thread-safe.
*/
public final class DefaultRetryDecisionEvaluator implements RetryDecisionEvaluator {
/**
* Derives the authoritative retry decision for a document-level error.
* <p>
* Decision rules by error class:
* <ul>
* <li>{@link DocumentErrorClassification#DETERMINISTIC_CONTENT_ERROR}:
* {@code contentErrorCount} before increment = 0 →
* {@link RetryDecision.ContentErrorRetryable}; else →
* {@link RetryDecision.ContentErrorFinal}.</li>
* <li>{@link DocumentErrorClassification#TRANSIENT_TECHNICAL_ERROR}:
* {@code transientErrorCount + 1 < maxRetriesTransient} →
* {@link RetryDecision.TransientErrorRetryable};
* {@code transientErrorCount + 1 >= maxRetriesTransient} →
* {@link RetryDecision.TransientErrorFinal}.</li>
* <li>{@link DocumentErrorClassification#TARGET_COPY_TECHNICAL_ERROR}:
* always → {@link RetryDecision.TargetCopyWithImmediateRetry}.
* No counter is modified by this decision.</li>
* </ul>
*
* @param errorClass classification of the error that occurred; never {@code null}
* @param currentCounters failure counters <em>before</em> incrementing for this
* attempt; never {@code null}
* @param maxRetriesTransient configured maximum number of historised transient errors
* allowed per fingerprint; must be &ge; 1
* @param failureClass short, stable failure class identifier; never {@code null} or blank
* @param failureMessage human-readable description of the error; never {@code null} or blank
* @return the authoritative {@link RetryDecision}; never {@code null}
* @throws IllegalArgumentException if {@code maxRetriesTransient} is less than 1
* @throws NullPointerException if any reference parameter is {@code null}
*/
@Override
public RetryDecision evaluate(
DocumentErrorClassification errorClass,
FailureCounters currentCounters,
int maxRetriesTransient,
String failureClass,
String failureMessage) {
Objects.requireNonNull(errorClass, "errorClass must not be null");
Objects.requireNonNull(currentCounters, "currentCounters must not be null");
Objects.requireNonNull(failureClass, "failureClass must not be null");
Objects.requireNonNull(failureMessage, "failureMessage must not be null");
if (failureClass.isBlank()) {
throw new IllegalArgumentException("failureClass must not be blank");
}
if (failureMessage.isBlank()) {
throw new IllegalArgumentException("failureMessage must not be blank");
}
if (maxRetriesTransient < 1) {
throw new IllegalArgumentException(
"maxRetriesTransient must be >= 1, but was: " + maxRetriesTransient);
}
return switch (errorClass) {
case DETERMINISTIC_CONTENT_ERROR -> evaluateContentError(
currentCounters, failureClass, failureMessage);
case TRANSIENT_TECHNICAL_ERROR -> evaluateTransientError(
currentCounters, maxRetriesTransient, failureClass, failureMessage);
case TARGET_COPY_TECHNICAL_ERROR ->
new RetryDecision.TargetCopyWithImmediateRetry(failureMessage);
};
}
/**
* Determines whether an immediate within-run retry of the target copy operation
* is permitted.
* <p>
* {@link ImmediateRetryDecision#ALLOWED} is returned only when the copy has failed
* on its first attempt within the current run. If this is the second copy attempt
* (the immediate retry itself has failed), {@link ImmediateRetryDecision#DENIED} is
* returned and the failure must be escalated to the cross-run retry evaluation.
*
* @param isFirstCopyAttemptInThisRun {@code true} if the failing copy attempt was
* the first copy attempt for this document in
* the current run
* @return {@link ImmediateRetryDecision#ALLOWED} or {@link ImmediateRetryDecision#DENIED};
* never {@code null}
*/
@Override
public ImmediateRetryDecision evaluateImmediateRetry(boolean isFirstCopyAttemptInThisRun) {
return isFirstCopyAttemptInThisRun
? ImmediateRetryDecision.ALLOWED
: ImmediateRetryDecision.DENIED;
}
/**
* Evaluates the retry decision for a deterministic content error.
* <p>
* The content-error counter before this attempt determines the decision:
* <ul>
* <li>Count = 0 (first error) → {@link RetryDecision.ContentErrorRetryable};
* one later scheduler run may retry.</li>
* <li>Count &ge; 1 (second or subsequent error) → {@link RetryDecision.ContentErrorFinal};
* no further retries.</li>
* </ul>
*
* @param currentCounters failure counters before incrementing
* @param failureClass failure class identifier
* @param failureMessage failure description
* @return the appropriate content-error retry decision
*/
private static RetryDecision evaluateContentError(
FailureCounters currentCounters,
String failureClass,
String failureMessage) {
if (currentCounters.contentErrorCount() == 0) {
return new RetryDecision.ContentErrorRetryable(failureClass, failureMessage);
}
return new RetryDecision.ContentErrorFinal(failureClass, failureMessage);
}
/**
* Evaluates the retry decision for a transient technical error.
* <p>
* The transient-error counter after incrementing determines the decision:
* <ul>
* <li>Counter after increment strictly less than {@code maxRetriesTransient} →
* {@link RetryDecision.TransientErrorRetryable}; a later scheduler run may retry.</li>
* <li>Counter after increment equals or exceeds {@code maxRetriesTransient} →
* {@link RetryDecision.TransientErrorFinal}; no further retries.</li>
* </ul>
* <p>
* Example with {@code maxRetriesTransient = 1}: counter before = 0,
* counter after = 1 = limit → {@link RetryDecision.TransientErrorFinal} immediately.
* <p>
* Example with {@code maxRetriesTransient = 2}: counter before = 0,
* counter after = 1 &lt; 2 → {@link RetryDecision.TransientErrorRetryable};
* counter before = 1, counter after = 2 = limit → {@link RetryDecision.TransientErrorFinal}.
*
* @param currentCounters failure counters before incrementing
* @param maxRetriesTransient configured maximum historised transient errors (&ge; 1)
* @param failureClass failure class identifier
* @param failureMessage failure description
* @return the appropriate transient-error retry decision
*/
private static RetryDecision evaluateTransientError(
FailureCounters currentCounters,
int maxRetriesTransient,
String failureClass,
String failureMessage) {
int counterAfterIncrement = currentCounters.transientErrorCount() + 1;
if (counterAfterIncrement < maxRetriesTransient) {
return new RetryDecision.TransientErrorRetryable(failureClass, failureMessage);
}
return new RetryDecision.TransientErrorFinal(failureClass, failureMessage);
}
}

View File

@@ -43,40 +43,76 @@ import java.util.function.Function;
* Application-level service that implements the per-document processing logic.
* <p>
* This service is the single authoritative place for the decision rules:
* idempotency checks, status/counter mapping, target-copy finalization, and consistent
* two-level persistence.
* idempotency checks, status/counter mapping, target-copy finalization, retry
* finalization, skip semantics, and consistent two-level persistence.
*
* <h2>Processing order per candidate</h2>
* <ol>
* <li>Load the document master record by fingerprint.</li>
* <li>If the overall status is {@link ProcessingStatus#SUCCESS} → create and persist
* a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}.</li>
* <li>If the overall status is {@link ProcessingStatus#FAILED_FINAL} → create and persist
* a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.</li>
* <li>If the overall status is {@link ProcessingStatus#SUCCESS} →
* <strong>log skip at INFO with fingerprint</strong>;
* persist a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}.
* Failure counters are not changed.</li>
* <li>If the overall status is {@link ProcessingStatus#FAILED_FINAL} →
* <strong>log skip at INFO with fingerprint</strong>;
* persist a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.
* Failure counters are not changed.</li>
* <li>If the overall status is {@link ProcessingStatus#PROPOSAL_READY} → load the
* leading proposal attempt and execute the target-copy finalization flow:
* build the base filename, resolve duplicates, write the copy, persist SUCCESS or
* FAILED_RETRYABLE.</li>
* build the base filename, resolve duplicates,
* <strong>log generated target filename at INFO with fingerprint</strong>,
* write the copy, persist SUCCESS or FAILED_RETRYABLE.</li>
* <li>Otherwise execute the pipeline (extraction + pre-checks + AI naming) and map
* the result into status, counters, and retryable flag.</li>
* <li><strong>Log retry decision at INFO with fingerprint and error classification</strong>:
* FAILED_RETRYABLE (will retry in a later scheduler run) or
* FAILED_FINAL (retry budget exhausted, no further processing).</li>
* <li>Persist exactly one historised processing attempt for the identified document.</li>
* <li>Persist the updated document master record.</li>
* </ol>
*
* <h2>Retry finalization rules</h2>
* <ul>
* <li><strong>Deterministic content errors:</strong> The first historised occurrence
* leads to {@link ProcessingStatus#FAILED_RETRYABLE} (content-error counter incremented
* by 1). The second historised occurrence leads to {@link ProcessingStatus#FAILED_FINAL}
* (content-error counter incremented by 1). No further retry is possible.</li>
* <li><strong>Transient technical errors:</strong> The transient-error counter is
* incremented by 1 per occurrence. The document remains
* {@link ProcessingStatus#FAILED_RETRYABLE} as long as the counter is strictly less
* than {@code maxRetriesTransient}. The attempt that causes the counter to reach
* {@code maxRetriesTransient} finalises the document to
* {@link ProcessingStatus#FAILED_FINAL}. Valid values of {@code maxRetriesTransient}
* are integers &ge; 1; the value 0 is invalid startup configuration.</li>
* <li><strong>Skip events</strong> ({@code SKIPPED_ALREADY_PROCESSED},
* {@code SKIPPED_FINAL_FAILURE}) never change any failure counter.</li>
* </ul>
*
* <h2>Status transitions</h2>
* <ul>
* <li>Pre-check passed + AI naming proposal ready → {@link ProcessingStatus#PROPOSAL_READY}</li>
* <li>First deterministic content failure → {@link ProcessingStatus#FAILED_RETRYABLE}</li>
* <li>Second deterministic content failure → {@link ProcessingStatus#FAILED_FINAL}</li>
* <li>Technical infrastructure failure → {@link ProcessingStatus#FAILED_RETRYABLE}</li>
* <li>Technical failure at transient retry limit → {@link ProcessingStatus#FAILED_FINAL}</li>
* <li>{@link ProcessingStatus#PROPOSAL_READY} + successful target copy + consistent
* persistence → {@link ProcessingStatus#SUCCESS}</li>
* <li>{@link ProcessingStatus#PROPOSAL_READY} + first copy failure + successful immediate retry
* → treated as successful copy, proceeds to {@link ProcessingStatus#SUCCESS}</li>
* <li>{@link ProcessingStatus#PROPOSAL_READY} + both copy attempts fail → cross-run
* {@link ProcessingStatus#FAILED_RETRYABLE}, transient error counter +1</li>
* <li>{@link ProcessingStatus#PROPOSAL_READY} + technical failure → {@link ProcessingStatus#FAILED_RETRYABLE},
* transient error counter +1</li>
* <li>{@link ProcessingStatus#SUCCESS} → {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED} skip</li>
* <li>{@link ProcessingStatus#FAILED_FINAL} → {@link ProcessingStatus#SKIPPED_FINAL_FAILURE} skip</li>
* </ul>
*
* <h2>Log correlation</h2>
* <p>
* All log entries emitted by this coordinator are post-fingerprint: the fingerprint is
* available for every document that reaches this coordinator. Relevant log entries carry
* the document fingerprint for unambiguous correlation across runs.
*
* <h2>Leading source for the naming proposal (verbindlich)</h2>
* <p>
* When a document is in {@code PROPOSAL_READY} state, the authoritative source for the
@@ -116,9 +152,16 @@ public class DocumentProcessingCoordinator {
private final TargetFolderPort targetFolderPort;
private final TargetFileCopyPort targetFileCopyPort;
private final ProcessingLogger logger;
private final int maxRetriesTransient;
/**
* Creates the document processing coordinator with all required ports and the logger.
* Creates the document processing coordinator with all required ports, logger, and
* the transient retry limit.
* <p>
* {@code maxRetriesTransient} is the maximum number of historised transient error attempts
* per fingerprint before the document is finalised to
* {@link ProcessingStatus#FAILED_FINAL}. The attempt that causes the counter to
* reach this value finalises the document. Must be &gt;= 1.
*
* @param documentRecordRepository port for reading and writing the document master record;
* must not be null
@@ -130,7 +173,10 @@ public class DocumentProcessingCoordinator {
* @param targetFileCopyPort port for copying source files to the target folder;
* must not be null
* @param logger for processing-related logging; must not be null
* @throws NullPointerException if any parameter is null
* @param maxRetriesTransient maximum number of historised transient error attempts
* before finalisation; must be &gt;= 1
* @throws NullPointerException if any object parameter is null
* @throws IllegalArgumentException if {@code maxRetriesTransient} is less than 1
*/
public DocumentProcessingCoordinator(
DocumentRecordRepository documentRecordRepository,
@@ -138,7 +184,12 @@ public class DocumentProcessingCoordinator {
UnitOfWorkPort unitOfWorkPort,
TargetFolderPort targetFolderPort,
TargetFileCopyPort targetFileCopyPort,
ProcessingLogger logger) {
ProcessingLogger logger,
int maxRetriesTransient) {
if (maxRetriesTransient < 1) {
throw new IllegalArgumentException(
"maxRetriesTransient must be >= 1, got: " + maxRetriesTransient);
}
this.documentRecordRepository =
Objects.requireNonNull(documentRecordRepository, "documentRecordRepository must not be null");
this.processingAttemptRepository =
@@ -150,6 +201,7 @@ public class DocumentProcessingCoordinator {
this.targetFileCopyPort =
Objects.requireNonNull(targetFileCopyPort, "targetFileCopyPort must not be null");
this.logger = Objects.requireNonNull(logger, "logger must not be null");
this.maxRetriesTransient = maxRetriesTransient;
}
/**
@@ -285,7 +337,7 @@ public class DocumentProcessingCoordinator {
}
// =========================================================================
// M6 target-copy finalization path
// Target-copy finalization path
// =========================================================================
/**
@@ -297,6 +349,10 @@ public class DocumentProcessingCoordinator {
* <li>Build the base filename from the proposal's date and title.</li>
* <li>Resolve the first available unique filename in the target folder.</li>
* <li>Copy the source file to the target folder.</li>
* <li>If the copy fails: attempt exactly one immediate within-run retry of the same
* physical copy step. No new AI call and no new naming-proposal derivation occur.
* If the retry also fails, treat the combined failure as a transient error and
* skip the SUCCESS path.</li>
* <li>Persist a new {@code SUCCESS} attempt and update the master record.</li>
* <li>If persistence fails after a successful copy: attempt best-effort rollback
* of the copy and persist {@code FAILED_RETRYABLE} instead.</li>
@@ -365,19 +421,41 @@ public class DocumentProcessingCoordinator {
String resolvedFilename =
((ResolvedTargetFilename) resolutionResult).resolvedFilename();
logger.info("Resolved target filename for '{}': '{}'.",
candidate.uniqueIdentifier(), resolvedFilename);
logger.info("Generated target filename for '{}' (fingerprint: {}): '{}'.",
candidate.uniqueIdentifier(), fingerprint.sha256Hex(), resolvedFilename);
// --- Step 4: Copy file to target ---
// --- Step 4: Copy file to target (with one immediate within-run retry) ---
TargetFileCopyResult copyResult =
targetFileCopyPort.copyToTarget(candidate.locator(), resolvedFilename);
if (copyResult instanceof TargetFileCopyTechnicalFailure copyFailure) {
logger.error("Target copy failed for '{}': {}",
candidate.uniqueIdentifier(), copyFailure.errorMessage());
return persistTransientError(
candidate, fingerprint, existingRecord, context, attemptStart, now,
"Target file copy failed: " + copyFailure.errorMessage());
if (copyResult instanceof TargetFileCopyTechnicalFailure firstCopyFailure) {
// First copy attempt failed — perform exactly one immediate within-run retry.
// The retry reuses the same resolved filename and document context; no new AI
// call, no new naming-proposal derivation. This mechanism does not increment
// the cross-run transient-error counter by itself.
logger.warn("First target copy attempt failed for '{}': {}. Performing immediate within-run retry.",
candidate.uniqueIdentifier(), firstCopyFailure.errorMessage());
TargetFileCopyResult retryCopyResult =
targetFileCopyPort.copyToTarget(candidate.locator(), resolvedFilename);
if (retryCopyResult instanceof TargetFileCopyTechnicalFailure retryCopyFailure) {
// Immediate retry also failed — the combined failure is escalated as a
// cross-run transient technical error. No further within-run retry is
// attempted. This is the only document-level result for persistence.
logger.error("Immediate within-run retry also failed for '{}': {}",
candidate.uniqueIdentifier(), retryCopyFailure.errorMessage());
String combinedMessage = "Target file copy failed after immediate within-run retry."
+ " First attempt: " + firstCopyFailure.errorMessage()
+ "; Retry attempt: " + retryCopyFailure.errorMessage();
return persistTransientError(
candidate, fingerprint, existingRecord, context, attemptStart, now,
combinedMessage);
}
// Immediate retry succeeded — proceed to SUCCESS path as if the copy
// had succeeded on the first attempt.
logger.info("Immediate within-run retry succeeded for '{}'.", candidate.uniqueIdentifier());
}
// Copy succeeded — attempt to persist SUCCESS
@@ -447,8 +525,15 @@ public class DocumentProcessingCoordinator {
}
/**
* Persists a {@code FAILED_RETRYABLE} attempt with an incremented transient error counter
* for a document-level technical error during the target-copy finalization stage.
* Persists a transient error for a document-level technical failure during the
* target-copy finalization stage.
* <p>
* The resulting status is {@link ProcessingStatus#FAILED_FINAL} if the incremented
* transient error counter reaches {@code maxRetriesTransient}; otherwise
* {@link ProcessingStatus#FAILED_RETRYABLE}. The transient error counter is always
* incremented by exactly one. This method does not increment the laufübergreifenden
* transient counter for the within-run immediate retry — only the combined outcome
* of the retry is reported here.
*
* @return true if the error was persisted; false if the error persistence itself failed
*/
@@ -463,26 +548,38 @@ public class DocumentProcessingCoordinator {
FailureCounters updatedCounters =
existingRecord.failureCounters().withIncrementedTransientErrorCount();
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
ProcessingStatus errorStatus = limitReached
? ProcessingStatus.FAILED_FINAL
: ProcessingStatus.FAILED_RETRYABLE;
try {
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
ProcessingAttempt errorAttempt = ProcessingAttempt.withoutAiFields(
fingerprint, context.runId(), attemptNumber, attemptStart, now,
ProcessingStatus.FAILED_RETRYABLE,
ProcessingStatus.FAILED_RETRYABLE.name(),
errorMessage, true);
errorStatus,
errorStatus.name(),
errorMessage, !limitReached);
DocumentRecord errorRecord = buildTransientErrorRecord(
existingRecord, candidate, updatedCounters, now);
existingRecord, candidate, updatedCounters, errorStatus, now);
unitOfWorkPort.executeInTransaction(txOps -> {
txOps.saveProcessingAttempt(errorAttempt);
txOps.updateDocumentRecord(errorRecord);
});
logger.debug("Transient error persisted for '{}': status=FAILED_RETRYABLE, "
+ "transientErrors={}.",
candidate.uniqueIdentifier(),
updatedCounters.transientErrorCount());
if (limitReached) {
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_FINAL — "
+ "transient error limit reached ({}/{} attempts). No further retry.",
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
updatedCounters.transientErrorCount(), maxRetriesTransient);
} else {
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_RETRYABLE — "
+ "transient error, will retry in later run ({}/{} attempts).",
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
updatedCounters.transientErrorCount(), maxRetriesTransient);
}
return true;
} catch (DocumentPersistenceException persistEx) {
@@ -493,9 +590,13 @@ public class DocumentProcessingCoordinator {
}
/**
* Attempts to persist a {@code FAILED_RETRYABLE} attempt after a persistence failure
* that occurred following a successful target copy. This is a secondary persistence
* effort; its failure is logged but does not change the return value.
* Attempts to persist a transient error after a persistence failure that occurred
* following a successful target copy. This is a secondary persistence effort;
* its failure is logged but does not change the return value.
* <p>
* Applies the same transient limit check as {@link #persistTransientError}: if the
* incremented counter reaches {@code maxRetriesTransient}, the secondary attempt
* is persisted as {@link ProcessingStatus#FAILED_FINAL}.
*/
private void persistTransientErrorAfterPersistenceFailure(
SourceDocumentCandidate candidate,
@@ -508,16 +609,21 @@ public class DocumentProcessingCoordinator {
FailureCounters updatedCounters =
existingRecord.failureCounters().withIncrementedTransientErrorCount();
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
ProcessingStatus errorStatus = limitReached
? ProcessingStatus.FAILED_FINAL
: ProcessingStatus.FAILED_RETRYABLE;
try {
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
ProcessingAttempt errorAttempt = ProcessingAttempt.withoutAiFields(
fingerprint, context.runId(), attemptNumber, attemptStart, now,
ProcessingStatus.FAILED_RETRYABLE,
ProcessingStatus.FAILED_RETRYABLE.name(),
errorMessage, true);
errorStatus,
errorStatus.name(),
errorMessage, !limitReached);
DocumentRecord errorRecord = buildTransientErrorRecord(
existingRecord, candidate, updatedCounters, now);
existingRecord, candidate, updatedCounters, errorStatus, now);
unitOfWorkPort.executeInTransaction(txOps -> {
txOps.saveProcessingAttempt(errorAttempt);
@@ -618,13 +724,13 @@ public class DocumentProcessingCoordinator {
private ProcessingOutcomeTransition.ProcessingOutcome mapOutcomeForNewDocument(
DocumentProcessingOutcome pipelineOutcome) {
return ProcessingOutcomeTransition.forNewDocument(pipelineOutcome);
return ProcessingOutcomeTransition.forNewDocument(pipelineOutcome, maxRetriesTransient);
}
private ProcessingOutcomeTransition.ProcessingOutcome mapOutcomeForKnownDocument(
DocumentProcessingOutcome pipelineOutcome,
FailureCounters existingCounters) {
return ProcessingOutcomeTransition.forKnownDocument(pipelineOutcome, existingCounters);
return ProcessingOutcomeTransition.forKnownDocument(pipelineOutcome, existingCounters, maxRetriesTransient);
}
// =========================================================================
@@ -717,12 +823,13 @@ public class DocumentProcessingCoordinator {
DocumentRecord existingRecord,
SourceDocumentCandidate candidate,
FailureCounters updatedCounters,
ProcessingStatus targetStatus,
Instant now) {
return new DocumentRecord(
existingRecord.fingerprint(),
new SourceDocumentLocator(candidate.locator().value()),
candidate.uniqueIdentifier(),
ProcessingStatus.FAILED_RETRYABLE,
targetStatus,
updatedCounters,
now, // lastFailureInstant
existingRecord.lastSuccessInstant(),
@@ -764,11 +871,27 @@ public class DocumentProcessingCoordinator {
recordWriter.accept(txOps);
});
logger.info("Document '{}' processed: status={}, contentErrors={}, transientErrors={}.",
candidate.uniqueIdentifier(),
outcome.overallStatus(),
outcome.counters().contentErrorCount(),
outcome.counters().transientErrorCount());
if (outcome.overallStatus() == ProcessingStatus.FAILED_RETRYABLE) {
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_RETRYABLE — "
+ "will retry in later scheduler run. "
+ "ContentErrors={}, TransientErrors={}.",
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
outcome.counters().contentErrorCount(),
outcome.counters().transientErrorCount());
} else if (outcome.overallStatus() == ProcessingStatus.FAILED_FINAL) {
logger.info("Retry decision for '{}' (fingerprint: {}): FAILED_FINAL — "
+ "permanently failed, no further retry. "
+ "ContentErrors={}, TransientErrors={}.",
candidate.uniqueIdentifier(), fingerprint.sha256Hex(),
outcome.counters().contentErrorCount(),
outcome.counters().transientErrorCount());
} else {
logger.info("Document '{}' processed: status={} (fingerprint: {}). "
+ "ContentErrors={}, TransientErrors={}.",
candidate.uniqueIdentifier(), outcome.overallStatus(), fingerprint.sha256Hex(),
outcome.counters().contentErrorCount(),
outcome.counters().transientErrorCount());
}
return true;
} catch (DocumentPersistenceException e) {

View File

@@ -36,9 +36,29 @@ import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
* <li><strong>AI functional failure (second or later occurrence):</strong>
* Status becomes {@link ProcessingStatus#FAILED_FINAL},
* content error counter incremented by 1, {@code retryable=false}.</li>
* <li><strong>Technical error (pre-fingerprint / extraction / AI infrastructure):</strong>
* <li><strong>Technical error below the transient retry limit:</strong>
* Status becomes {@link ProcessingStatus#FAILED_RETRYABLE},
* transient error counter incremented by 1, {@code retryable=true}.</li>
* <li><strong>Technical error at or above the transient retry limit:</strong>
* Status becomes {@link ProcessingStatus#FAILED_FINAL},
* transient error counter incremented by 1, {@code retryable=false}.</li>
* </ul>
*
* <h2>Transient retry limit semantics</h2>
* <p>
* {@code maxRetriesTransient} is interpreted as the maximum number of historised
* transient error attempts per fingerprint. The attempt that causes the counter
* to reach {@code maxRetriesTransient} finalises the document status to
* {@link ProcessingStatus#FAILED_FINAL}. Valid values are integers &gt;= 1;
* the value 0 is invalid startup configuration and must be rejected before
* the batch run begins.
* <p>
* Examples:
* <ul>
* <li>{@code maxRetriesTransient = 1}: the first historised transient error
* immediately finalises to {@code FAILED_FINAL}.</li>
* <li>{@code maxRetriesTransient = 2}: the first transient error yields
* {@code FAILED_RETRYABLE}; the second finalises to {@code FAILED_FINAL}.</li>
* </ul>
*/
final class ProcessingOutcomeTransition {
@@ -52,24 +72,33 @@ final class ProcessingOutcomeTransition {
* <p>
* For new documents, all failure counters start at zero.
*
* @param pipelineOutcome the outcome from the processing pipeline
* @param pipelineOutcome the outcome from the processing pipeline
* @param maxRetriesTransient maximum number of historised transient error attempts
* before the document is finalised to {@code FAILED_FINAL};
* must be &gt;= 1
* @return the mapped outcome with status, counters, and retryability
*/
static ProcessingOutcome forNewDocument(DocumentProcessingOutcome pipelineOutcome) {
return forKnownDocument(pipelineOutcome, FailureCounters.zero());
static ProcessingOutcome forNewDocument(
DocumentProcessingOutcome pipelineOutcome,
int maxRetriesTransient) {
return forKnownDocument(pipelineOutcome, FailureCounters.zero(), maxRetriesTransient);
}
/**
* Maps a pipeline outcome to a processing outcome, considering the existing
* failure counter state from a known document's history.
*
* @param pipelineOutcome the outcome from the processing pipeline
* @param existingCounters the current failure counter values from the document's master record
* @param pipelineOutcome the outcome from the processing pipeline
* @param existingCounters the current failure counter values from the document's master record
* @param maxRetriesTransient maximum number of historised transient error attempts
* before the document is finalised to {@code FAILED_FINAL};
* must be &gt;= 1
* @return the mapped outcome with updated status, counters, and retryability
*/
static ProcessingOutcome forKnownDocument(
DocumentProcessingOutcome pipelineOutcome,
FailureCounters existingCounters) {
FailureCounters existingCounters,
int maxRetriesTransient) {
return switch (pipelineOutcome) {
case NamingProposalReady ignored -> {
@@ -106,31 +135,37 @@ final class ProcessingOutcomeTransition {
}
case TechnicalDocumentError ignored4 -> {
// Technical error (extraction / infrastructure): retryable, transient counter +1
// Technical error (extraction / infrastructure): apply transient retry limit
FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount();
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
yield new ProcessingOutcome(
ProcessingStatus.FAILED_RETRYABLE,
existingCounters.withIncrementedTransientErrorCount(),
true
limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE,
updatedCounters,
!limitReached
);
}
case AiTechnicalFailure ignored5 -> {
// Technical AI error (timeout, unreachable, bad JSON): retryable, transient counter +1
// Technical AI error (timeout, unreachable, bad JSON): apply transient retry limit
FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount();
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
yield new ProcessingOutcome(
ProcessingStatus.FAILED_RETRYABLE,
existingCounters.withIncrementedTransientErrorCount(),
true
limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE,
updatedCounters,
!limitReached
);
}
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored6 -> {
// Pre-check passed without AI step: in normal flow this should not appear at
// the outcome transition level once the AI pipeline is fully wired. Treat it
// as a technical error to avoid silent inconsistency.
// as a technical error and apply the transient retry limit.
FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount();
boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient;
yield new ProcessingOutcome(
ProcessingStatus.FAILED_RETRYABLE,
existingCounters.withIncrementedTransientErrorCount(),
true
limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE,
updatedCounters,
!limitReached
);
}
};

View File

@@ -0,0 +1,107 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification;
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
import de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision;
import de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision;
/**
* Application service contract for deriving authoritative retry decisions from
* document error state and configuration.
* <p>
* This interface defines the single, testable entry point for all retry policy
* evaluations. Implementations must apply the verbindlichen retry rules exactly
* as specified:
* <ul>
* <li><strong>Deterministic content errors</strong> ({@link DocumentErrorClassification#DETERMINISTIC_CONTENT_ERROR}):
* the <em>first</em> historised content error for a fingerprint results in
* {@link RetryDecision.ContentErrorRetryable}; the <em>second</em> results in
* {@link RetryDecision.ContentErrorFinal}.</li>
* <li><strong>Transient technical errors</strong> ({@link DocumentErrorClassification#TRANSIENT_TECHNICAL_ERROR}):
* the error remains retryable while the transient-error counter after incrementing
* stays strictly below {@code maxRetriesTransient}. When the counter after
* incrementing reaches {@code maxRetriesTransient}, the result is
* {@link RetryDecision.TransientErrorFinal}.</li>
* <li><strong>Target copy failures</strong> ({@link DocumentErrorClassification#TARGET_COPY_TECHNICAL_ERROR})
* on the <em>first</em> copy attempt within a run: result is
* {@link RetryDecision.TargetCopyWithImmediateRetry}. After the immediate retry
* has itself failed, the failure is re-evaluated as a
* {@link DocumentErrorClassification#TRANSIENT_TECHNICAL_ERROR}.</li>
* </ul>
* <p>
* <strong>Counter semantics:</strong>
* <ul>
* <li>The {@code currentCounters} passed to {@link #evaluate} reflect the state
* <em>before</em> the current attempt's counter increment. The evaluator is
* responsible for determining what the counter will be after incrementing and
* applying the threshold check accordingly.</li>
* <li>Skip events ({@code SKIPPED_ALREADY_PROCESSED}, {@code SKIPPED_FINAL_FAILURE})
* are not routed through this evaluator and never produce a
* {@link RetryDecision}.</li>
* </ul>
* <p>
* <strong>{@code maxRetriesTransient} invariant:</strong>
* The value must be an Integer &ge; 1. A value of {@code 0} is invalid configuration
* and must be rejected at startup before any batch run begins. Implementations of
* this interface may assume the value is always &ge; 1 when called.
* <p>
* Example for {@code maxRetriesTransient = 1}:
* <ul>
* <li>transient-error counter before = 0 → after increment = 1 = limit → {@link RetryDecision.TransientErrorFinal}</li>
* </ul>
* Example for {@code maxRetriesTransient = 2}:
* <ul>
* <li>transient-error counter before = 0 → after increment = 1 &lt; 2 → {@link RetryDecision.TransientErrorRetryable}</li>
* <li>transient-error counter before = 1 → after increment = 2 = limit → {@link RetryDecision.TransientErrorFinal}</li>
* </ul>
* <p>
* <strong>Single-truth rule:</strong> No parallel persistence source for retry
* decisions is introduced. Evaluations are derived solely from the document master
* record's failure counters and the configured limit.
*/
public interface RetryDecisionEvaluator {
/**
* Derives the authoritative retry decision for a document-level error.
* <p>
* The decision is determined by the error classification, the existing failure
* counters (before any increment for the current attempt), and the configured
* transient-retry limit.
*
* @param errorClass classification of the error that occurred; never {@code null}
* @param currentCounters failure counters <em>before</em> incrementing for this
* attempt; never {@code null}
* @param maxRetriesTransient configured maximum number of historised transient errors
* allowed per fingerprint; must be &ge; 1
* @param failureClass short, stable failure class identifier for persistence
* and logging; never {@code null} or blank
* @param failureMessage human-readable description of the error; never {@code null}
* or blank
* @return the authoritative {@link RetryDecision}; never {@code null}
* @throws IllegalArgumentException if {@code maxRetriesTransient} is less than 1
*/
RetryDecision evaluate(
DocumentErrorClassification errorClass,
FailureCounters currentCounters,
int maxRetriesTransient,
String failureClass,
String failureMessage);
/**
* Determines whether an immediate within-run retry of the target copy operation
* is permitted.
* <p>
* An immediate retry is {@link ImmediateRetryDecision#ALLOWED} only when the copy
* has failed on its first attempt within the current run. If this is the second
* copy attempt within the same run (i.e. the immediate retry itself has failed),
* the result is {@link ImmediateRetryDecision#DENIED}.
*
* @param isFirstCopyAttemptInThisRun {@code true} if the failing copy attempt was
* the first copy attempt for this document in
* the current run; {@code false} if it was the
* immediate retry attempt
* @return {@link ImmediateRetryDecision#ALLOWED} or {@link ImmediateRetryDecision#DENIED};
* never {@code null}
*/
ImmediateRetryDecision evaluateImmediateRetry(boolean isFirstCopyAttemptInThisRun);
}

View File

@@ -222,23 +222,30 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
* <p>
* Processing order:
* <ol>
* <li><strong>Log:</strong> detected source file at INFO level with run-ID (pre-fingerprint
* correlation via run-ID and candidate description).</li>
* <li>Record the attempt start instant.</li>
* <li>Compute the SHA-256 fingerprint of the candidate file content.</li>
* <li>If fingerprint computation fails: log as non-identifiable run event and
* return true — no SQLite record is created, but no persistence failure occurred.</li>
* <li>If fingerprint computation fails: log as non-identifiable run event with run-ID
* and return true — no SQLite record is created, no persistence failure.</li>
* <li>Load document master record.</li>
* <li>If already {@code SUCCESS} → persist skip attempt with
* {@code SKIPPED_ALREADY_PROCESSED}.</li>
* <li>If already {@code FAILED_FINAL} → persist skip attempt with
* {@code SKIPPED_FINAL_FAILURE}.</li>
* <li>Otherwise execute the pipeline (extraction + pre-checks).</li>
* <li>Map result into status, counters and retryable flag.</li>
* <li>If already {@code SUCCESS} → log skip at INFO level with fingerprint;
* persist skip attempt with {@code SKIPPED_ALREADY_PROCESSED}.</li>
* <li>If already {@code FAILED_FINAL} → log skip at INFO level with fingerprint;
* persist skip attempt with {@code SKIPPED_FINAL_FAILURE}.</li>
* <li>Otherwise execute the pipeline (extraction + pre-checks + AI naming).</li>
* <li>Map result into status, counters, and retryable flag.</li>
* <li><strong>Log:</strong> retry decision at INFO level with fingerprint and error
* classification (FAILED_RETRYABLE or FAILED_FINAL).</li>
* <li>Persist exactly one historised processing attempt.</li>
* <li>Persist the updated document master record.</li>
* </ol>
* <p>
* Per-document errors do not abort the overall batch run. Each candidate ends
* controlled regardless of its outcome.
* <p>
* Post-fingerprint log entries carry the document fingerprint for correlation.
* Pre-fingerprint log entries (steps 14) use run-ID and candidate description.
*
* @param candidate the candidate to process
* @param context the current batch run context
@@ -246,14 +253,15 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
* errors return true; persistence failures return false)
*/
private boolean processCandidate(SourceDocumentCandidate candidate, BatchRunContext context) {
logger.debug("Processing candidate: {}", candidate.uniqueIdentifier());
logger.info("Detected source file '{}' for processing (RunId: {}).",
candidate.uniqueIdentifier(), context.runId());
Instant attemptStart = Instant.now();
FingerprintResult fingerprintResult = fingerprintPort.computeFingerprint(candidate);
return switch (fingerprintResult) {
case FingerprintTechnicalError fingerprintError -> {
handleFingerprintError(candidate, fingerprintError);
handleFingerprintError(candidate, fingerprintError, context);
yield true; // fingerprint errors are not persistence failures
}
case FingerprintSuccess fingerprintSuccess ->
@@ -262,15 +270,23 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
}
/**
* Handles a fingerprint computation error by logging it as a non-identifiable event.
* Handles a fingerprint computation error by logging it as a non-identifiable run event.
* No SQLite record is created for this candidate.
* <p>
* Log entries before a successful fingerprint are correlated via the batch run identifier
* and the candidate description, as no fingerprint is available for document-level
* correlation.
*
* @param candidate the candidate that could not be fingerprinted
* @param error the fingerprint error
* @param error the fingerprint error
* @param context the current batch run context; used for run-level log correlation
*/
private void handleFingerprintError(SourceDocumentCandidate candidate, FingerprintTechnicalError error) {
logger.warn("Fingerprint computation failed for '{}': {} — candidate skipped (not historised).",
candidate.uniqueIdentifier(), error.errorMessage());
private void handleFingerprintError(
SourceDocumentCandidate candidate,
FingerprintTechnicalError error,
BatchRunContext context) {
logger.warn("Fingerprint computation failed for '{}' (RunId: {}): {} — candidate not historised.",
candidate.uniqueIdentifier(), context.runId(), error.errorMessage());
}
/**

View File

@@ -0,0 +1,189 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
/**
* Tests for the {@link DocumentLogCorrelation} sealed type and its two permitted implementations.
* <p>
* Verifies:
* <ul>
* <li>{@link DocumentLogCorrelation.CandidateCorrelation} stores the run identifier and
* candidate description correctly (pre-fingerprint phase).</li>
* <li>{@link DocumentLogCorrelation.FingerprintCorrelation} stores the run identifier and
* fingerprint correctly (post-fingerprint phase).</li>
* <li>The sealed type contract: only the two permitted subtypes exist.</li>
* </ul>
*/
class DocumentLogCorrelationTest {
private static final String RUN_ID_VALUE = "run-correlation-test-001";
private static final String CANDIDATE_DESCRIPTION = "invoice-2026-01-15.pdf";
private static final String FINGERPRINT_HEX = "a".repeat(64);
// -------------------------------------------------------------------------
// CandidateCorrelation pre-fingerprint phase
// -------------------------------------------------------------------------
@Test
void candidateCorrelation_storesRunId() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentLogCorrelation.CandidateCorrelation correlation =
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
assertEquals(runId, correlation.runId());
}
@Test
void candidateCorrelation_storesCandidateDescription() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentLogCorrelation.CandidateCorrelation correlation =
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
assertEquals(CANDIDATE_DESCRIPTION, correlation.candidateDescription());
}
@Test
void candidateCorrelation_runIdAccessibleViaInterface() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentLogCorrelation correlation =
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
// runId() is declared on the sealed interface and must be accessible polymorphically
assertEquals(runId, correlation.runId());
}
@Test
void candidateCorrelation_twoInstancesWithSameDataAreEqual() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentLogCorrelation.CandidateCorrelation first =
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
DocumentLogCorrelation.CandidateCorrelation second =
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
assertEquals(first, second);
}
@Test
void candidateCorrelation_implementsDocumentLogCorrelation() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentLogCorrelation.CandidateCorrelation correlation =
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
assertInstanceOf(DocumentLogCorrelation.class, correlation);
}
// -------------------------------------------------------------------------
// FingerprintCorrelation post-fingerprint phase
// -------------------------------------------------------------------------
@Test
void fingerprintCorrelation_storesRunId() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
DocumentLogCorrelation.FingerprintCorrelation correlation =
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
assertEquals(runId, correlation.runId());
}
@Test
void fingerprintCorrelation_storesFingerprint() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
DocumentLogCorrelation.FingerprintCorrelation correlation =
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
assertEquals(fingerprint, correlation.fingerprint());
}
@Test
void fingerprintCorrelation_runIdAccessibleViaInterface() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
DocumentLogCorrelation correlation =
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
// runId() is declared on the sealed interface and must be accessible polymorphically
assertEquals(runId, correlation.runId());
}
@Test
void fingerprintCorrelation_twoInstancesWithSameDataAreEqual() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
DocumentLogCorrelation.FingerprintCorrelation first =
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
DocumentLogCorrelation.FingerprintCorrelation second =
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
assertEquals(first, second);
}
@Test
void fingerprintCorrelation_implementsDocumentLogCorrelation() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
DocumentLogCorrelation.FingerprintCorrelation correlation =
new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint);
assertInstanceOf(DocumentLogCorrelation.class, correlation);
}
// -------------------------------------------------------------------------
// Sealed type structural contract
// -------------------------------------------------------------------------
@Test
void sealedType_patternMatchExhaustsAllPermittedSubtypes() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentLogCorrelation candidatePhase =
new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION);
DocumentLogCorrelation fingerprintPhase =
new DocumentLogCorrelation.FingerprintCorrelation(runId, new DocumentFingerprint(FINGERPRINT_HEX));
// Pattern match on the sealed type must compile exhaustively for exactly these two cases
String candidatePhaseResult = describe(candidatePhase);
String fingerprintPhaseResult = describe(fingerprintPhase);
assertEquals("candidate", candidatePhaseResult);
assertEquals("fingerprint", fingerprintPhaseResult);
}
/** Helper method using an exhaustive switch over the sealed type. */
private static String describe(DocumentLogCorrelation correlation) {
return switch (correlation) {
case DocumentLogCorrelation.CandidateCorrelation ignored -> "candidate";
case DocumentLogCorrelation.FingerprintCorrelation ignored -> "fingerprint";
};
}
@Test
void candidateCorrelation_differentDescriptions_areNotEqual() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentLogCorrelation.CandidateCorrelation withFirst =
new DocumentLogCorrelation.CandidateCorrelation(runId, "first.pdf");
DocumentLogCorrelation.CandidateCorrelation withSecond =
new DocumentLogCorrelation.CandidateCorrelation(runId, "second.pdf");
assertNotEquals(withFirst, withSecond);
}
@Test
void fingerprintCorrelation_differentFingerprints_areNotEqual() {
RunId runId = new RunId(RUN_ID_VALUE);
DocumentFingerprint first = new DocumentFingerprint("a".repeat(64));
DocumentFingerprint second = new DocumentFingerprint("b".repeat(64));
DocumentLogCorrelation.FingerprintCorrelation withFirst =
new DocumentLogCorrelation.FingerprintCorrelation(runId, first);
DocumentLogCorrelation.FingerprintCorrelation withSecond =
new DocumentLogCorrelation.FingerprintCorrelation(runId, second);
assertNotEquals(withFirst, withSecond);
}
}

View File

@@ -0,0 +1,320 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification;
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
import de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision;
import de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import static org.junit.jupiter.api.Assertions.*;
/**
* Tests for {@link DefaultRetryDecisionEvaluator}.
* <p>
* Verifies the binding retry policy rules for deterministic content errors,
* transient technical errors, target copy failures, and the within-run
* immediate retry mechanism.
*/
class DefaultRetryDecisionEvaluatorTest {
private static final String FAILURE_CLASS = "SOME_FAILURE";
private static final String FAILURE_MESSAGE = "Something went wrong";
private DefaultRetryDecisionEvaluator evaluator;
@BeforeEach
void setUp() {
evaluator = new DefaultRetryDecisionEvaluator();
}
// -------------------------------------------------------------------------
// Deterministic content error rules
// -------------------------------------------------------------------------
@Test
void evaluate_firstContentError_returnsContentErrorRetryable() {
FailureCounters counters = new FailureCounters(0, 0);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.ContentErrorRetryable.class, decision);
RetryDecision.ContentErrorRetryable retryable = (RetryDecision.ContentErrorRetryable) decision;
assertEquals(FAILURE_CLASS, retryable.failureClass());
assertEquals(FAILURE_MESSAGE, retryable.failureMessage());
}
@Test
void evaluate_secondContentError_returnsContentErrorFinal() {
FailureCounters counters = new FailureCounters(1, 0);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.ContentErrorFinal.class, decision);
RetryDecision.ContentErrorFinal finalDecision = (RetryDecision.ContentErrorFinal) decision;
assertEquals(FAILURE_CLASS, finalDecision.failureClass());
assertEquals(FAILURE_MESSAGE, finalDecision.failureMessage());
}
@Test
void evaluate_subsequentContentErrors_alwaysReturnContentErrorFinal() {
// Any count >= 1 results in final (covers legacy M4-M6 data with higher counts)
for (int count = 1; count <= 5; count++) {
FailureCounters counters = new FailureCounters(count, 0);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.ContentErrorFinal.class, decision,
"Expected ContentErrorFinal for contentErrorCount=" + count);
}
}
@Test
void evaluate_contentError_transientCounterIsIrrelevant() {
// Non-zero transient counter must not affect content error decision
FailureCounters counters = new FailureCounters(0, 5);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.ContentErrorRetryable.class, decision);
}
// -------------------------------------------------------------------------
// Transient technical error rules
// -------------------------------------------------------------------------
@Test
void evaluate_transientError_maxRetriesTransientOne_firstError_returnsTransientErrorFinal() {
// maxRetriesTransient=1: counter before=0, after=1=limit → final immediately
FailureCounters counters = new FailureCounters(0, 0);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision,
"With maxRetriesTransient=1, first transient error must be final");
RetryDecision.TransientErrorFinal finalDecision = (RetryDecision.TransientErrorFinal) decision;
assertEquals(FAILURE_CLASS, finalDecision.failureClass());
assertEquals(FAILURE_MESSAGE, finalDecision.failureMessage());
}
@Test
void evaluate_transientError_maxRetriesTransientTwo_firstError_returnsTransientErrorRetryable() {
// maxRetriesTransient=2: counter before=0, after=1 < 2 → retryable
FailureCounters counters = new FailureCounters(0, 0);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
counters, 2, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision);
RetryDecision.TransientErrorRetryable retryable = (RetryDecision.TransientErrorRetryable) decision;
assertEquals(FAILURE_CLASS, retryable.failureClass());
assertEquals(FAILURE_MESSAGE, retryable.failureMessage());
}
@Test
void evaluate_transientError_maxRetriesTransientTwo_secondError_returnsTransientErrorFinal() {
// maxRetriesTransient=2: counter before=1, after=2=limit → final
FailureCounters counters = new FailureCounters(0, 1);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
counters, 2, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision,
"With maxRetriesTransient=2, second transient error must be final");
}
@Test
void evaluate_transientError_maxRetriesTransientThree_firstError_returnsRetryable() {
FailureCounters counters = new FailureCounters(0, 0);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
counters, 3, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision);
}
@Test
void evaluate_transientError_maxRetriesTransientThree_secondError_returnsRetryable() {
FailureCounters counters = new FailureCounters(0, 1);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
counters, 3, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision);
}
@Test
void evaluate_transientError_maxRetriesTransientThree_thirdError_returnsFinal() {
// counter before=2, after=3=limit → final
FailureCounters counters = new FailureCounters(0, 2);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
counters, 3, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision,
"Third transient error with maxRetriesTransient=3 must be final");
}
@Test
void evaluate_transientError_contentCounterIsIrrelevant() {
// Non-zero content error counter must not affect transient error decision
FailureCounters counters = new FailureCounters(1, 0);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
counters, 2, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision);
}
@Test
void evaluate_transientError_legacyDataWithHigherCounts_finalizesCorrectly() {
// Existing M4-M6 data may have counter values beyond normal expectations;
// the evaluator must still apply the threshold check consistently
FailureCounters counters = new FailureCounters(3, 5);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
counters, 3, FAILURE_CLASS, FAILURE_MESSAGE);
// counter before=5, after=6 >= 3 → final
assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision);
}
// -------------------------------------------------------------------------
// Target copy technical error rule
// -------------------------------------------------------------------------
@Test
void evaluate_targetCopyError_returnsTargetCopyWithImmediateRetry() {
FailureCounters counters = new FailureCounters(0, 0);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR,
counters, 1, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.TargetCopyWithImmediateRetry.class, decision);
RetryDecision.TargetCopyWithImmediateRetry immediate =
(RetryDecision.TargetCopyWithImmediateRetry) decision;
assertEquals(FAILURE_MESSAGE, immediate.failureMessage());
assertEquals(DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR.name(),
immediate.failureClass());
}
@Test
void evaluate_targetCopyError_countersAndMaxRetriesAreIgnored() {
// Target copy decision is independent of counters and maxRetriesTransient
FailureCounters counters = new FailureCounters(2, 3);
RetryDecision decision = evaluator.evaluate(
DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR,
counters, 5, FAILURE_CLASS, FAILURE_MESSAGE);
assertInstanceOf(RetryDecision.TargetCopyWithImmediateRetry.class, decision);
}
// -------------------------------------------------------------------------
// Immediate within-run retry decision
// -------------------------------------------------------------------------
@Test
void evaluateImmediateRetry_firstAttempt_returnsAllowed() {
ImmediateRetryDecision decision = evaluator.evaluateImmediateRetry(true);
assertEquals(ImmediateRetryDecision.ALLOWED, decision);
}
@Test
void evaluateImmediateRetry_secondAttempt_returnsDenied() {
ImmediateRetryDecision decision = evaluator.evaluateImmediateRetry(false);
assertEquals(ImmediateRetryDecision.DENIED, decision);
}
// -------------------------------------------------------------------------
// Guard conditions
// -------------------------------------------------------------------------
@Test
void evaluate_throwsWhenMaxRetriesTransientIsZero() {
FailureCounters counters = FailureCounters.zero();
assertThrows(IllegalArgumentException.class, () ->
evaluator.evaluate(
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
counters, 0, FAILURE_CLASS, FAILURE_MESSAGE));
}
@Test
void evaluate_throwsWhenMaxRetriesTransientIsNegative() {
FailureCounters counters = FailureCounters.zero();
assertThrows(IllegalArgumentException.class, () ->
evaluator.evaluate(
DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR,
counters, -1, FAILURE_CLASS, FAILURE_MESSAGE));
}
@Test
void evaluate_throwsWhenErrorClassIsNull() {
assertThrows(NullPointerException.class, () ->
evaluator.evaluate(null, FailureCounters.zero(), 1,
FAILURE_CLASS, FAILURE_MESSAGE));
}
@Test
void evaluate_throwsWhenCountersAreNull() {
assertThrows(NullPointerException.class, () ->
evaluator.evaluate(
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
null, 1, FAILURE_CLASS, FAILURE_MESSAGE));
}
@Test
void evaluate_throwsWhenFailureClassIsNull() {
assertThrows(NullPointerException.class, () ->
evaluator.evaluate(
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
FailureCounters.zero(), 1, null, FAILURE_MESSAGE));
}
@Test
void evaluate_throwsWhenFailureClassIsBlank() {
assertThrows(IllegalArgumentException.class, () ->
evaluator.evaluate(
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
FailureCounters.zero(), 1, " ", FAILURE_MESSAGE));
}
@Test
void evaluate_throwsWhenFailureMessageIsNull() {
assertThrows(NullPointerException.class, () ->
evaluator.evaluate(
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
FailureCounters.zero(), 1, FAILURE_CLASS, null));
}
@Test
void evaluate_throwsWhenFailureMessageIsBlank() {
assertThrows(IllegalArgumentException.class, () ->
evaluator.evaluate(
DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR,
FailureCounters.zero(), 1, FAILURE_CLASS, " "));
}
}

View File

@@ -70,6 +70,9 @@ class DocumentProcessingCoordinatorTest {
private static final String FINGERPRINT_HEX =
"a".repeat(64); // 64 lowercase hex chars
/** Default transient retry limit used in the shared {@link #processor} instance. */
private static final int DEFAULT_MAX_RETRIES_TRANSIENT = 3;
private CapturingDocumentRecordRepository recordRepo;
private CapturingProcessingAttemptRepository attemptRepo;
private CapturingUnitOfWorkPort unitOfWorkPort;
@@ -86,7 +89,8 @@ class DocumentProcessingCoordinatorTest {
attemptRepo = new CapturingProcessingAttemptRepository();
unitOfWorkPort = new CapturingUnitOfWorkPort(recordRepo, attemptRepo);
processor = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
candidate = new SourceDocumentCandidate(
"test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf"));
@@ -198,9 +202,11 @@ class DocumentProcessingCoordinatorTest {
@Test
void process_knownDocument_technicalError_incrementsTransientCounter_remainsRetryable() {
// Starting with 1 transient error; with DEFAULT_MAX_RETRIES_TRANSIENT=3, counter
// becomes 2 after this run which is still below the limit → FAILED_RETRYABLE
DocumentRecord existingRecord = buildRecord(
ProcessingStatus.FAILED_RETRYABLE,
new FailureCounters(0, 2));
new FailureCounters(0, 1));
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
DocumentProcessingOutcome outcome = new TechnicalDocumentError(
@@ -212,10 +218,54 @@ class DocumentProcessingCoordinatorTest {
DocumentRecord record = recordRepo.updatedRecords.get(0);
assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
assertEquals(0, record.failureCounters().contentErrorCount());
assertEquals(3, record.failureCounters().transientErrorCount());
assertEquals(2, record.failureCounters().transientErrorCount());
assertTrue(attemptRepo.savedAttempts.get(0).retryable());
}
@Test
void process_knownDocument_technicalError_atTransientLimit_persistsFailedFinal() {
// Counter already at limit - 1: the next error finalises the document
DocumentRecord existingRecord = buildRecord(
ProcessingStatus.FAILED_RETRYABLE,
new FailureCounters(0, DEFAULT_MAX_RETRIES_TRANSIENT - 1));
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
DocumentProcessingOutcome outcome = new TechnicalDocumentError(
candidate, "Timeout at limit", null);
processor.process(candidate, fingerprint, outcome, context, attemptStart);
assertEquals(1, recordRepo.updatedRecords.size());
DocumentRecord record = recordRepo.updatedRecords.get(0);
assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus(),
"Document must be finalised when transient limit is reached");
assertEquals(DEFAULT_MAX_RETRIES_TRANSIENT, record.failureCounters().transientErrorCount(),
"Transient counter must be incremented to the limit value");
assertFalse(attemptRepo.savedAttempts.get(0).retryable(),
"Attempt must not be retryable when transient limit is reached");
}
@Test
void process_newDocument_technicalError_maxRetriesTransient1_immediatelyFinalises() {
// With maxRetriesTransient=1, the very first transient error finalises the document
DocumentProcessingCoordinator coordinatorWith1Retry = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 1);
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome outcome = new TechnicalDocumentError(
candidate, "I/O error", null);
coordinatorWith1Retry.process(candidate, fingerprint, outcome, context, attemptStart);
assertEquals(1, recordRepo.createdRecords.size());
DocumentRecord record = recordRepo.createdRecords.get(0);
assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus(),
"With maxRetriesTransient=1, the first transient error must immediately finalise");
assertEquals(1, record.failureCounters().transientErrorCount());
assertFalse(attemptRepo.savedAttempts.get(0).retryable());
}
@Test
void process_knownDocument_namingProposalReady_persistsProposalReadyStatus() {
DocumentRecord existingRecord = buildRecord(
@@ -617,7 +667,8 @@ class DocumentProcessingCoordinatorTest {
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
recordRepo.setLookupResult(new PersistenceLookupTechnicalFailure("Datenbank nicht erreichbar", null));
DocumentProcessingOutcome outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
@@ -634,7 +685,8 @@ class DocumentProcessingCoordinatorTest {
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
DocumentProcessingOutcome outcome = new PreCheckPassed(
@@ -652,7 +704,8 @@ class DocumentProcessingCoordinatorTest {
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
DocumentRecord existingRecord = buildRecord(ProcessingStatus.FAILED_FINAL, new FailureCounters(2, 0));
recordRepo.setLookupResult(new DocumentTerminalFinalFailure(existingRecord));
DocumentProcessingOutcome outcome = new PreCheckFailed(
@@ -670,7 +723,8 @@ class DocumentProcessingCoordinatorTest {
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
@@ -687,7 +741,8 @@ class DocumentProcessingCoordinatorTest {
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
recordRepo.setLookupResult(new DocumentUnknown());
unitOfWorkPort.failOnExecute = true;
DocumentProcessingOutcome outcome = new PreCheckPassed(
@@ -705,7 +760,8 @@ class DocumentProcessingCoordinatorTest {
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
DocumentProcessingOutcome outcome = new PreCheckPassed(
@@ -723,7 +779,8 @@ class DocumentProcessingCoordinatorTest {
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
unitOfWorkPort.failOnExecute = true;
@@ -811,7 +868,8 @@ class DocumentProcessingCoordinatorTest {
DocumentProcessingCoordinator coordinatorWithFailingFolder = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithFailingFolder.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
@@ -830,7 +888,8 @@ class DocumentProcessingCoordinatorTest {
DocumentProcessingCoordinator coordinatorWithFailingCopy = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger());
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithFailingCopy.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
@@ -902,6 +961,175 @@ class DocumentProcessingCoordinatorTest {
assertFalse(result, "Should return false when persistence fails after successful copy");
}
@Test
void processDeferredOutcome_proposalReady_firstCopyFails_immediateRetrySucceeds_persistsSuccess() {
// First copy attempt fails, immediate within-run retry succeeds → SUCCESS
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
CountingTargetFileCopyPort countingCopyPort = new CountingTargetFileCopyPort(1); // fail first call only
DocumentProcessingCoordinator coordinatorWithCountingCopy = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
boolean result = coordinatorWithCountingCopy.processDeferredOutcome(
candidate, fingerprint, context, attemptStart, c -> {
throw new AssertionError("Pipeline must not run for PROPOSAL_READY");
});
assertTrue(result, "Should succeed when immediate retry of target copy succeeds");
ProcessingAttempt successAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.SUCCESS)
.findFirst()
.orElse(null);
assertNotNull(successAttempt, "A SUCCESS attempt must be persisted after a successful immediate retry");
DocumentRecord updated = recordRepo.updatedRecords.get(0);
assertEquals(ProcessingStatus.SUCCESS, updated.overallStatus(),
"Master record must show SUCCESS after successful immediate retry");
assertEquals(2, countingCopyPort.callCount,
"copyToTarget must have been called exactly twice: first attempt + one retry");
}
@Test
void processDeferredOutcome_proposalReady_bothCopyAttemptsFail_persistsTransientError() {
// Both the first copy attempt and the immediate retry fail → FAILED_RETRYABLE
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
CountingTargetFileCopyPort countingCopyPort = new CountingTargetFileCopyPort(2); // fail both calls
DocumentProcessingCoordinator coordinatorWithCountingCopy = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCountingCopy.processDeferredOutcome(
candidate, fingerprint, context, attemptStart, c -> null);
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
.orElse(null);
assertNotNull(errorAttempt, "A FAILED_RETRYABLE attempt must be persisted when both copy attempts fail");
assertTrue(errorAttempt.retryable(), "Error must be retryable after exhausting immediate retry");
assertEquals(2, countingCopyPort.callCount,
"copyToTarget must have been called exactly twice: first attempt + one immediate retry");
}
@Test
void processDeferredOutcome_proposalReady_immediateRetryDoesNotTriggerAiOrNewProposal() {
// Ensures that during the immediate retry path no pipeline (AI) execution happens
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
CountingTargetFileCopyPort countingCopyPort = new CountingTargetFileCopyPort(1); // fail first, succeed second
DocumentProcessingCoordinator coordinatorWithCountingCopy = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCountingCopy.processDeferredOutcome(
candidate, fingerprint, context, attemptStart,
c -> { throw new AssertionError("AI pipeline must NOT run during immediate retry"); });
// No FAILED_RETRYABLE must have been persisted — the retry succeeded
long failedRetryableCount = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.count();
assertEquals(0, failedRetryableCount,
"No FAILED_RETRYABLE must be persisted when immediate retry succeeds");
}
// -------------------------------------------------------------------------
// Sequential multi-run lifecycle tests
// -------------------------------------------------------------------------
@Test
void process_contentErrorLifecycle_firstRunRetryable_secondRunFinal_thirdRunSkipped() {
// Run 1: new document, first deterministic content error → FAILED_RETRYABLE
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome contentError = new PreCheckFailed(
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
processor.process(candidate, fingerprint, contentError, context, attemptStart);
DocumentRecord afterRun1 = recordRepo.createdRecords.get(0);
assertEquals(ProcessingStatus.FAILED_RETRYABLE, afterRun1.overallStatus(),
"First content error must yield FAILED_RETRYABLE");
assertEquals(1, afterRun1.failureCounters().contentErrorCount());
assertTrue(attemptRepo.savedAttempts.get(0).retryable(),
"First content error attempt must be retryable");
// Run 2: known document (FAILED_RETRYABLE, contentErrorCount=1), second content error → FAILED_FINAL
recordRepo.setLookupResult(new DocumentKnownProcessable(afterRun1));
processor.process(candidate, fingerprint, contentError, context, attemptStart);
DocumentRecord afterRun2 = recordRepo.updatedRecords.get(0);
assertEquals(ProcessingStatus.FAILED_FINAL, afterRun2.overallStatus(),
"Second content error must yield FAILED_FINAL");
assertEquals(2, afterRun2.failureCounters().contentErrorCount());
assertFalse(attemptRepo.savedAttempts.get(1).retryable(),
"Second content error attempt must not be retryable");
// Run 3: terminal FAILED_FINAL → SKIPPED_FINAL_FAILURE; counters must not change
recordRepo.setLookupResult(new DocumentTerminalFinalFailure(afterRun2));
processor.process(candidate, fingerprint, contentError, context, attemptStart);
assertEquals(3, attemptRepo.savedAttempts.size(),
"Three attempts must be recorded across the three runs");
ProcessingAttempt skipAttempt = attemptRepo.savedAttempts.get(2);
assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, skipAttempt.status());
assertFalse(skipAttempt.retryable());
DocumentRecord afterRun3 = recordRepo.updatedRecords.get(1);
assertEquals(2, afterRun3.failureCounters().contentErrorCount(),
"Content error counter must remain 2 after a SKIPPED_FINAL_FAILURE event");
assertEquals(0, afterRun3.failureCounters().transientErrorCount(),
"Transient error counter must remain 0 after a SKIPPED_FINAL_FAILURE event");
}
@Test
void process_transientErrorLifecycle_maxRetriesTransient2_firstRetryable_secondFinal() {
// maxRetriesTransient=2: first transient error → FAILED_RETRYABLE, second → FAILED_FINAL
DocumentProcessingCoordinator coordinatorWith2Retries = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 2);
DocumentProcessingOutcome transientError = new TechnicalDocumentError(candidate, "Timeout", null);
// Run 1: new document, first transient error → FAILED_RETRYABLE, transientErrorCount=1
recordRepo.setLookupResult(new DocumentUnknown());
coordinatorWith2Retries.process(candidate, fingerprint, transientError, context, attemptStart);
DocumentRecord afterRun1 = recordRepo.createdRecords.get(0);
assertEquals(ProcessingStatus.FAILED_RETRYABLE, afterRun1.overallStatus(),
"First transient error must yield FAILED_RETRYABLE when limit not yet reached");
assertEquals(1, afterRun1.failureCounters().transientErrorCount());
assertTrue(attemptRepo.savedAttempts.get(0).retryable());
// Run 2: transientErrorCount=1, second transient error reaches limit=2 → FAILED_FINAL
recordRepo.setLookupResult(new DocumentKnownProcessable(afterRun1));
coordinatorWith2Retries.process(candidate, fingerprint, transientError, context, attemptStart);
DocumentRecord afterRun2 = recordRepo.updatedRecords.get(0);
assertEquals(ProcessingStatus.FAILED_FINAL, afterRun2.overallStatus(),
"Second transient error must yield FAILED_FINAL when maxRetriesTransient=2 is reached");
assertEquals(2, afterRun2.failureCounters().transientErrorCount(),
"Transient error counter must equal maxRetriesTransient after finalisation");
assertFalse(attemptRepo.savedAttempts.get(1).retryable(),
"Final transient error attempt must not be retryable");
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
@@ -1089,6 +1317,26 @@ class DocumentProcessingCoordinatorTest {
}
}
private static class CountingTargetFileCopyPort implements TargetFileCopyPort {
private int callCount = 0;
private final int failFirstNCalls;
CountingTargetFileCopyPort(int failFirstNCalls) {
this.failFirstNCalls = failFirstNCalls;
}
@Override
public TargetFileCopyResult copyToTarget(
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator sourceLocator,
String resolvedFilename) {
callCount++;
if (callCount <= failFirstNCalls) {
return new TargetFileCopyTechnicalFailure("Simulated copy failure on call " + callCount, false);
}
return new TargetFileCopySuccess();
}
}
private static class NoOpTargetFolderPort implements TargetFolderPort {
@Override
public String getTargetFolderLocator() {

View File

@@ -1,6 +1,7 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
@@ -44,8 +45,8 @@ class DocumentProcessingServiceTest {
SourceDocumentLocator locator = new SourceDocumentLocator(pdfFile.toString());
candidate = new SourceDocumentCandidate("document.pdf", 2048L, locator);
// Create runtime configuration with maxPages limit
runtimeConfig = new RuntimeConfiguration(10);
// Create runtime configuration with maxPages limit and default transient retry limit
runtimeConfig = new RuntimeConfiguration(10, 3, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT);
}
@Test

View File

@@ -1,6 +1,7 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
@@ -236,7 +237,7 @@ class PreCheckEvaluatorTest {
// =========================================================================
private RuntimeConfiguration buildConfig(int maxPages) throws Exception {
return new RuntimeConfiguration(maxPages);
return new RuntimeConfiguration(maxPages, 3, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT);
}
private int maxPages(int limit) {

View File

@@ -2,6 +2,7 @@ package de.gecheckt.pdf.umbenenner.application.usecase;
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort;
@@ -52,6 +53,7 @@ import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Instant;
import java.util.ArrayList;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -460,7 +462,7 @@ class BatchRunProcessingUseCaseTest {
DocumentProcessingCoordinator failingProcessor = new DocumentProcessingCoordinator(
new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(),
new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(),
new NoOpProcessingLogger()) {
new NoOpProcessingLogger(), 3) {
@Override
public boolean processDeferredOutcome(
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate,
@@ -504,7 +506,7 @@ class BatchRunProcessingUseCaseTest {
DocumentProcessingCoordinator selectiveFailingProcessor = new DocumentProcessingCoordinator(
new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(),
new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(),
new NoOpProcessingLogger()) {
new NoOpProcessingLogger(), 3) {
private int callCount = 0;
@Override
@@ -595,7 +597,7 @@ class BatchRunProcessingUseCaseTest {
DocumentProcessingCoordinator failingCoordinator = new DocumentProcessingCoordinator(
new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(),
new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(),
new NoOpProcessingLogger()) {
new NoOpProcessingLogger(), 3) {
@Override
public boolean processDeferredOutcome(
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate c,
@@ -660,11 +662,12 @@ class BatchRunProcessingUseCaseTest {
// Prüft, dass bei erfolgreich verarbeiteter Datei debug() durch logExtractionResult
// und info() durch logProcessingOutcome aufgerufen wird.
// Erwartete debug()-Aufrufe für einen Kandidaten (success-Pfad):
// L138 (lock acquired) + L249 (processCandidate) + L293 (fingerprint) + L337 (logExtractionResult) + L213 (lock released) = 5
// Ohne logExtractionResult-Aufruf: 4
// lock acquired + fingerprint computed + logExtractionResult + lock released = 4
// Ohne logExtractionResult-Aufruf wären es nur 3 debug()-Aufrufe.
// Erwartete info()-Aufrufe für einen Kandidaten (success-Pfad):
// L130 (initiiert) + L145 (gestartet) + L178 (Kandidaten gefunden) + L365 (PreCheckPassed) + L190 (abgeschlossen) = 5
// Ohne logProcessingOutcome-Aufruf: 4
// Batch initiiert + Batch gestartet + Kandidaten gefunden + erkannte Quelldatei
// + logProcessingOutcome (PreCheckPassed) + Batch abgeschlossen = 6
// Ohne logProcessingOutcome-Aufruf wären es 5 info()-Aufrufe.
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
RuntimeConfiguration config = buildConfig(tempDir);
@@ -680,21 +683,21 @@ class BatchRunProcessingUseCaseTest {
useCase.execute(new BatchRunContext(new RunId("log-precheck"), Instant.now()));
// Ohne logExtractionResult wären es mindestens 4 debug()-Aufrufe; mit logExtractionResult 5
assertTrue(capturingLogger.debugCallCount >= 5,
"logExtractionResult muss bei PdfExtractionSuccess debug() aufrufen (erwartet >= 5, war: "
// Ohne logExtractionResult wären es nur 3 debug()-Aufrufe; mit logExtractionResult >= 4
assertTrue(capturingLogger.debugCallCount >= 4,
"logExtractionResult muss bei PdfExtractionSuccess debug() aufrufen (erwartet >= 4, war: "
+ capturingLogger.debugCallCount + ")");
// Ohne logProcessingOutcome wären es 4 info()-Aufrufe; mit logProcessingOutcome 5
assertTrue(capturingLogger.infoCallCount >= 5,
"logProcessingOutcome muss bei PreCheckPassed info() aufrufen (erwartet >= 5, war: "
// Ohne logProcessingOutcome wären es 5 info()-Aufrufe; mit logProcessingOutcome >= 6
assertTrue(capturingLogger.infoCallCount >= 6,
"logProcessingOutcome muss bei PreCheckPassed info() aufrufen (erwartet >= 6, war: "
+ capturingLogger.infoCallCount + ")");
}
@Test
void execute_extractionContentError_logsDebugAndPreCheckFailedInfo() throws Exception {
// Prüft, dass bei PdfExtractionContentError debug (logExtractionResult) und info (logProcessingOutcome) geloggt wird.
// Erwartete debug()-Aufrufe: 5 (lock + processCandidate + fingerprint + logExtractionResult (content) + lock released)
// Erwartete info()-Aufrufe: 5 (L130 + L145 + L178 + L369 PreCheckFailed + L190)
// Erwartete debug()-Aufrufe: 4 (lock acquired + fingerprint + logExtractionResult (content) + lock released)
// Erwartete info()-Aufrufe: 6 (Batch initiiert + gestartet + Kandidaten gefunden + erkannte Quelldatei + PreCheckFailed + abgeschlossen)
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
RuntimeConfiguration config = buildConfig(tempDir);
@@ -710,20 +713,20 @@ class BatchRunProcessingUseCaseTest {
useCase.execute(new BatchRunContext(new RunId("log-content-error"), Instant.now()));
// Ohne logExtractionResult wären es 4 debug()-Aufrufe; mit logExtractionResult 5
assertTrue(capturingLogger.debugCallCount >= 5,
"logExtractionResult muss bei PdfExtractionContentError debug() aufrufen (erwartet >= 5, war: "
// Ohne logExtractionResult wären es nur 3 debug()-Aufrufe; mit logExtractionResult >= 4
assertTrue(capturingLogger.debugCallCount >= 4,
"logExtractionResult muss bei PdfExtractionContentError debug() aufrufen (erwartet >= 4, war: "
+ capturingLogger.debugCallCount + ")");
// Ohne logProcessingOutcome (PreCheckFailed) wären es 4 info()-Aufrufe; mit 5
assertTrue(capturingLogger.infoCallCount >= 5,
"logProcessingOutcome muss bei PreCheckFailed info() aufrufen (erwartet >= 5, war: "
// Ohne logProcessingOutcome (PreCheckFailed) wären es 5 info()-Aufrufe; mit >= 6
assertTrue(capturingLogger.infoCallCount >= 6,
"logProcessingOutcome muss bei PreCheckFailed info() aufrufen (erwartet >= 6, war: "
+ capturingLogger.infoCallCount + ")");
}
@Test
void execute_extractionTechnicalError_logsDebugAndWarn() throws Exception {
// Prüft, dass bei PdfExtractionTechnicalError debug (logExtractionResult) und warn (logProcessingOutcome) geloggt wird.
// Erwartete debug()-Aufrufe: 5 (lock + processCandidate + fingerprint + logExtractionResult + lock released)
// Erwartete debug()-Aufrufe: 4 (lock acquired + fingerprint + logExtractionResult + lock released)
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
RuntimeConfiguration config = buildConfig(tempDir);
@@ -739,15 +742,86 @@ class BatchRunProcessingUseCaseTest {
useCase.execute(new BatchRunContext(new RunId("log-tech-error"), Instant.now()));
// Ohne logExtractionResult wären es 4 debug()-Aufrufe; mit logExtractionResult 5
assertTrue(capturingLogger.debugCallCount >= 5,
"logExtractionResult muss bei PdfExtractionTechnicalError debug() aufrufen (erwartet >= 5, war: "
// Ohne logExtractionResult wären es nur 3 debug()-Aufrufe; mit logExtractionResult >= 4
assertTrue(capturingLogger.debugCallCount >= 4,
"logExtractionResult muss bei PdfExtractionTechnicalError debug() aufrufen (erwartet >= 4, war: "
+ capturingLogger.debugCallCount + ")");
// logProcessingOutcome ruft warn() auf für TechnicalDocumentError
assertTrue(capturingLogger.warnCallCount > 0,
"logProcessingOutcome muss bei TechnicalDocumentError warn() aufrufen");
}
// -------------------------------------------------------------------------
// Log correlation tests
// -------------------------------------------------------------------------
@Test
void execute_preFingerprintError_logContainsRunIdAndCandidateDescription() throws Exception {
// When fingerprint computation fails, the warning log must reference both the run-ID
// and the candidate's unique identifier (pre-fingerprint correlation rule).
String runIdValue = "run-correlation-pre-fp";
String candidateFilename = "unreadable-candidate.pdf";
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
RuntimeConfiguration config = buildConfig(tempDir);
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(
List.of(makeCandidate(candidateFilename)));
// Fingerprint port that always fails
FingerprintPort failingFingerprintPort = c ->
new FingerprintTechnicalError("File not readable", null);
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, new MockRunLockPort(), candidatesPort, new NoOpExtractionPort(),
failingFingerprintPort, new NoOpDocumentProcessingCoordinator(),
buildStubAiNamingService(), capturingLogger);
useCase.execute(new BatchRunContext(new RunId(runIdValue), Instant.now()));
// At least one warning message must contain both run-ID and candidate filename
boolean correlationPresent = capturingLogger.warnMessages.stream()
.anyMatch(m -> m.contains(runIdValue) && m.contains(candidateFilename));
assertTrue(correlationPresent,
"Pre-fingerprint warning must reference both run-ID '" + runIdValue
+ "' and candidate '" + candidateFilename + "'. "
+ "Captured warn messages: " + capturingLogger.warnMessages);
}
@Test
void execute_postFingerprintProcessing_logContainsFingerprintHex() throws Exception {
// After a successful fingerprint computation, at least one log message must contain
// the fingerprint's SHA-256 hex value (post-fingerprint correlation rule).
String candidateFilename = "identifiable.pdf";
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
RuntimeConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate candidate = makeCandidate(candidateFilename);
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(
new PdfExtractionSuccess("Some invoice text", new PdfPageCount(1)));
// Deterministic fingerprint port so we can verify the exact hex in the log
AlwaysSuccessFingerprintPort fingerprintPort = new AlwaysSuccessFingerprintPort();
DocumentFingerprint expectedFingerprint = ((FingerprintSuccess) fingerprintPort.computeFingerprint(candidate)).fingerprint();
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, new MockRunLockPort(), candidatesPort, extractionPort,
fingerprintPort, new TrackingDocumentProcessingCoordinator(),
buildStubAiNamingService(), capturingLogger);
useCase.execute(new BatchRunContext(new RunId("run-correlation-post-fp"), Instant.now()));
String fingerprintHex = expectedFingerprint.sha256Hex();
boolean fingerprintInLog = capturingLogger.allMessages().stream()
.anyMatch(m -> m.contains(fingerprintHex));
assertTrue(fingerprintInLog,
"At least one log message must contain the fingerprint hex '" + fingerprintHex
+ "' after successful fingerprint computation. "
+ "Captured messages: " + capturingLogger.allMessages());
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
@@ -779,8 +853,8 @@ class BatchRunProcessingUseCaseTest {
}
private static RuntimeConfiguration buildConfig(Path tempDir) throws Exception {
// maxPages set to 3 useful for page-limit tests
return new RuntimeConfiguration(3);
// maxPages set to 3 useful for page-limit tests; maxRetriesTransient set to 3
return new RuntimeConfiguration(3, 3, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT);
}
private static SourceDocumentCandidate makeCandidate(String filename) {
@@ -937,7 +1011,7 @@ class BatchRunProcessingUseCaseTest {
private static class NoOpDocumentProcessingCoordinator extends DocumentProcessingCoordinator {
NoOpDocumentProcessingCoordinator() {
super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(),
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 3);
}
}
@@ -949,7 +1023,7 @@ class BatchRunProcessingUseCaseTest {
TrackingDocumentProcessingCoordinator() {
super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(),
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 3);
}
@Override
@@ -1094,6 +1168,62 @@ class BatchRunProcessingUseCaseTest {
}
}
/**
* Captures formatted log messages for each log level.
* Used by log-correlation tests that must inspect message content.
*/
private static class MessageCapturingProcessingLogger implements ProcessingLogger {
final List<String> infoMessages = new ArrayList<>();
final List<String> debugMessages = new ArrayList<>();
final List<String> warnMessages = new ArrayList<>();
final List<String> errorMessages = new ArrayList<>();
/** Formats a message template with its arguments the same way SLF4J/Log4j2 does. */
private static String format(String message, Object... args) {
if (args == null || args.length == 0) return message;
StringBuilder sb = new StringBuilder();
int argIndex = 0;
int start = 0;
int pos;
while ((pos = message.indexOf("{}", start)) != -1 && argIndex < args.length) {
sb.append(message, start, pos);
sb.append(args[argIndex++]);
start = pos + 2;
}
sb.append(message, start, message.length());
return sb.toString();
}
@Override
public void info(String message, Object... args) {
infoMessages.add(format(message, args));
}
@Override
public void debug(String message, Object... args) {
debugMessages.add(format(message, args));
}
@Override
public void warn(String message, Object... args) {
warnMessages.add(format(message, args));
}
@Override
public void error(String message, Object... args) {
errorMessages.add(format(message, args));
}
List<String> allMessages() {
List<String> all = new ArrayList<>();
all.addAll(infoMessages);
all.addAll(debugMessages);
all.addAll(warnMessages);
all.addAll(errorMessages);
return all;
}
}
/** Zählt Logger-Aufrufe je Level, um VoidMethodCallMutator-Mutationen zu erkennen. */
private static class CapturingProcessingLogger implements ProcessingLogger {
int infoCallCount = 0;

View File

@@ -30,6 +30,7 @@ import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase;
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort;
@@ -204,7 +205,7 @@ public class BootstrapRunner {
this.schemaInitPortFactory = SqliteSchemaInitializationAdapter::new;
this.useCaseFactory = (startConfig, lock) -> {
// Extract runtime configuration from startup configuration
RuntimeConfiguration runtimeConfig = new RuntimeConfiguration(startConfig.maxPages());
RuntimeConfiguration runtimeConfig = new RuntimeConfiguration(startConfig.maxPages(), startConfig.maxRetriesTransient(), resolveAiContentSensitivity(startConfig.logAiSensitive()));
String jdbcUrl = buildJdbcUrl(startConfig);
FingerprintPort fingerprintPort = new Sha256FingerprintAdapter();
@@ -218,7 +219,9 @@ public class BootstrapRunner {
TargetFolderPort targetFolderPort = new FilesystemTargetFolderAdapter(startConfig.targetFolder());
TargetFileCopyPort targetFileCopyPort = new FilesystemTargetFileCopyAdapter(startConfig.targetFolder());
DocumentProcessingCoordinator documentProcessingCoordinator =
new DocumentProcessingCoordinator(documentRecordRepository, processingAttemptRepository, unitOfWorkPort, targetFolderPort, targetFileCopyPort, coordinatorLogger);
new DocumentProcessingCoordinator(documentRecordRepository, processingAttemptRepository,
unitOfWorkPort, targetFolderPort, targetFileCopyPort, coordinatorLogger,
startConfig.maxRetriesTransient());
// Wire AI naming pipeline
AiInvocationPort aiInvocationPort = new OpenAiHttpAdapter(startConfig);
@@ -408,6 +411,23 @@ public class BootstrapRunner {
}
}
/**
* Derives the {@link AiContentSensitivity} decision from the raw {@code log.ai.sensitive}
* configuration flag.
* <p>
* The safe default is {@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT}.
* {@link AiContentSensitivity#LOG_SENSITIVE_CONTENT} is only produced when
* {@code logAiSensitive} is explicitly {@code true}.
*
* @param logAiSensitive the parsed boolean value of the {@code log.ai.sensitive} property
* @return the appropriate sensitivity decision; never {@code null}
*/
static AiContentSensitivity resolveAiContentSensitivity(boolean logAiSensitive) {
return logAiSensitive
? AiContentSensitivity.LOG_SENSITIVE_CONTENT
: AiContentSensitivity.PROTECT_SENSITIVE_CONTENT;
}
/**
* Builds the JDBC URL for the SQLite database from the configured file path.
*

View File

@@ -7,6 +7,7 @@ import de.gecheckt.pdf.umbenenner.adapter.out.configuration.ConfigurationLoading
import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase;
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort;
@@ -60,7 +61,8 @@ class BootstrapRunnerEdgeCasesTest {
null, // null runtimeLockFile
tempDir.resolve("logs"),
"INFO",
"test-key"
"test-key",
false
);
AtomicReference<Path> capturedLockPath = new AtomicReference<>();
@@ -106,7 +108,8 @@ class BootstrapRunnerEdgeCasesTest {
tempDir.resolve("lock.lock"),
tempDir.resolve("logs"),
"INFO",
"test-key"
"test-key",
false
);
String jdbcUrl = BootstrapRunner.buildJdbcUrl(config);
@@ -132,7 +135,8 @@ class BootstrapRunnerEdgeCasesTest {
tempDir.resolve("lock.lock"),
tempDir.resolve("logs"),
"INFO",
"test-key"
"test-key",
false
);
String jdbcUrl = BootstrapRunner.buildJdbcUrl(config);
@@ -159,7 +163,8 @@ class BootstrapRunnerEdgeCasesTest {
tempDir.resolve("lock.lock"),
tempDir.resolve("logs"),
"INFO",
"test-key"
"test-key",
false
);
// Verify BatchRunContext can be created (used internally by BootstrapRunner)
@@ -223,7 +228,7 @@ class BootstrapRunnerEdgeCasesTest {
return new StartConfiguration(sourceDir, targetDir, dbFile,
URI.create("https://api.example.com"), "gpt-4", 30, 3, 100, 50000,
promptFile, tempDir.resolve("lock.lock"), tempDir.resolve("logs"),
"INFO", "key");
"INFO", "key", false);
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -319,6 +324,24 @@ class BootstrapRunnerEdgeCasesTest {
assertEquals(1, runner.run(), "LOCK_UNAVAILABLE outcome should map to exit code 1");
}
// =========================================================================
// AI Content Sensitivity Resolution
// =========================================================================
@Test
void resolveAiContentSensitivity_falseYieldsProtectSensitiveContent() {
assertEquals(AiContentSensitivity.PROTECT_SENSITIVE_CONTENT,
BootstrapRunner.resolveAiContentSensitivity(false),
"logAiSensitive=false must resolve to PROTECT_SENSITIVE_CONTENT (safe default)");
}
@Test
void resolveAiContentSensitivity_trueYieldsLogSensitiveContent() {
assertEquals(AiContentSensitivity.LOG_SENSITIVE_CONTENT,
BootstrapRunner.resolveAiContentSensitivity(true),
"logAiSensitive=true must resolve to LOG_SENSITIVE_CONTENT");
}
// =========================================================================
// Mocks
// =========================================================================
@@ -364,7 +387,8 @@ class BootstrapRunnerEdgeCasesTest {
tempDir.resolve("lock.lock"),
tempDir.resolve("logs"),
"INFO",
"test-api-key"
"test-api-key",
false
);
} catch (Exception e) {
throw new RuntimeException("Failed to create mock configuration", e);

View File

@@ -186,7 +186,8 @@ class BootstrapRunnerTest {
Paths.get(""), // empty simulates unconfigured runtime.lock.file
tempDir.resolve("logs"),
"INFO",
"test-key"
"test-key",
false
);
AtomicReference<Path> capturedLockPath = new AtomicReference<>();
@@ -319,7 +320,8 @@ class BootstrapRunnerTest {
tempDir.resolve("lock.lock"),
tempDir.resolve("logs"),
"INFO",
"test-api-key"
"test-api-key",
false
);
} catch (Exception e) {
throw new RuntimeException("Failed to create mock configuration", e);

View File

@@ -17,10 +17,14 @@ package de.gecheckt.pdf.umbenenner.domain.model;
* processed further in the next stage (target copy, final filename generation).
* This is a non-terminal intermediate state.</li>
* <li>{@link #SUCCESS} — document was fully processed end-to-end and written to the
* target location. Status is final and irreversible; skip in all future runs.</li>
* target location. Status is final and irreversible; skip in all future runs with
* {@link #SKIPPED_ALREADY_PROCESSED}.</li>
* <li>{@link #FAILED_RETRYABLE} — last attempt failed but is retryable; process again
* in the next run according to the applicable retry rule.</li>
* <li>{@link #FAILED_FINAL} — all allowed retries exhausted; skip in all future runs.</li>
* in the next run according to the applicable retry rule. This status is only
* valid as long as at least one further scheduler run is fachlich zulässig.</li>
* <li>{@link #FAILED_FINAL} — all allowed retries are exhausted; skip in all future
* runs with {@link #SKIPPED_FINAL_FAILURE}. This status is terminal and
* irreversible.</li>
* <li>{@link #PROCESSING} — document is currently being processed (transient, within a
* run); if found persisted after a crash, treat as {@link #FAILED_RETRYABLE}.</li>
* </ul>
@@ -38,6 +42,32 @@ package de.gecheckt.pdf.umbenenner.domain.model;
* overall status was already {@link #FAILED_FINAL}.</li>
* </ul>
* <p>
* <strong>Terminal status rule:</strong>
* {@link #SUCCESS} and {@link #FAILED_FINAL} are the only truly terminal statuses.
* Documents with either of these statuses as their overall status are never reprocessed;
* they receive a historised skip attempt instead. Documents with
* {@link #FAILED_RETRYABLE}, {@link #READY_FOR_AI}, or {@link #PROPOSAL_READY} remain
* verarbeitbar in future runs, subject to their applicable retry rule.
* <p>
* <strong>Transition to {@link #FAILED_FINAL} — retry exhaustion rules:</strong>
* <ul>
* <li><em>Deterministic content errors</em>: The first historised deterministic content
* error leads to {@link #FAILED_RETRYABLE}. The second historised deterministic
* content error for the same fingerprint leads to {@link #FAILED_FINAL}. No further
* retry is possible after the second content error.</li>
* <li><em>Transient technical errors</em>: Each transient error increments the transient-error
* counter. When the counter reaches the configured {@code max.retries.transient} value
* (Integer &ge; 1), the attempt that reaches the limit transitions the document to
* {@link #FAILED_FINAL}. The value {@code 0} for {@code max.retries.transient} is
* invalid start configuration.</li>
* </ul>
* <p>
* <strong>Immediate within-run target copy retry:</strong>
* A single technical retry of the physical target-file copy step is permitted within the
* same run. This mechanism is not a cross-run retry and does not increment the
* transient-error counter. It does not change the document's overall status until the
* outcome of the retry is known.
* <p>
* <strong>Counter rules:</strong>
* <ul>
* <li>Only {@link #FAILED_RETRYABLE} and {@link #FAILED_FINAL} outcomes may increase
@@ -46,11 +76,17 @@ package de.gecheckt.pdf.umbenenner.domain.model;
* never change any failure counter.</li>
* <li>A deterministic content error at first occurrence → {@link #FAILED_RETRYABLE},
* content-error counter +1. At second occurrence → {@link #FAILED_FINAL},
* content-error counter +2 (cumulative).</li>
* <li>A transient technical error after a successful fingerprint → {@link #FAILED_RETRYABLE},
* transient-error counter +1.</li>
* content-error counter +1 (cumulative total = 2).</li>
* <li>A transient technical error after a successful fingerprint → counter +1; status
* is {@link #FAILED_RETRYABLE} while counter &lt; {@code max.retries.transient},
* or {@link #FAILED_FINAL} when counter reaches {@code max.retries.transient}.</li>
* </ul>
*
* <p>
* <strong>Document-level errors and batch exit code:</strong>
* Document-level errors (content errors, transient errors, target copy failures) do not
* escalate to exit-code&nbsp;1. The batch run continues with remaining documents and
* exits with code&nbsp;0 as long as the run itself started cleanly. Exit-code&nbsp;1 is
* reserved for hard start or bootstrap failures only.
*/
public enum ProcessingStatus {

View File

@@ -57,5 +57,23 @@
* <li>Immutable value objects or enumerations</li>
* <li>Reusable across all layers via the Application and Adapter contracts</li>
* </ul>
* <p>
* <strong>Retry and status semantics summary (see {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus}
* for full detail):</strong>
* <ul>
* <li>{@code SUCCESS} and {@code FAILED_FINAL} are terminal; documents with these statuses
* receive a historised skip attempt in future runs and are never reprocessed.</li>
* <li>{@code FAILED_RETRYABLE} is only valid while at least one further scheduler run
* is fachlich zulässig for the document.</li>
* <li>Deterministic content errors follow the 1-retry rule: first occurrence →
* {@code FAILED_RETRYABLE}; second occurrence → {@code FAILED_FINAL}.</li>
* <li>Transient technical errors follow the configurable {@code max.retries.transient}
* rule (Integer &ge; 1): the attempt reaching the limit transitions to
* {@code FAILED_FINAL}.</li>
* <li>The within-run target copy immediate retry does not increment the transient-error
* counter and is not a cross-run retry.</li>
* <li>Document-level errors never produce exit-code&nbsp;1; they are contained within
* the batch run.</li>
* </ul>
*/
package de.gecheckt.pdf.umbenenner.domain.model;