diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidator.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidator.java index 6ef1ac0..93143c8 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidator.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidator.java @@ -227,8 +227,8 @@ public class StartConfigurationValidator { } private void validateMaxRetriesTransient(int maxRetriesTransient, List errors) { - if (maxRetriesTransient < 0) { - errors.add("- max.retries.transient: must be >= 0, got: " + maxRetriesTransient); + if (maxRetriesTransient < 1) { + errors.add("- max.retries.transient: must be >= 1, got: " + maxRetriesTransient); } } diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/configuration/PropertiesConfigurationPortAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/configuration/PropertiesConfigurationPortAdapter.java index ecc5a97..6418b37 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/configuration/PropertiesConfigurationPortAdapter.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/configuration/PropertiesConfigurationPortAdapter.java @@ -108,6 +108,7 @@ public class PropertiesConfigurationPortAdapter implements ConfigurationPort { } private StartConfiguration buildStartConfiguration(Properties props, String apiKey) { + boolean logAiSensitive = Boolean.parseBoolean(getOptionalProperty(props, "log.ai.sensitive", "false")); return new StartConfiguration( Paths.get(getRequiredProperty(props, "source.folder")), Paths.get(getRequiredProperty(props, "target.folder")), @@ -122,7 +123,8 @@ public class PropertiesConfigurationPortAdapter implements ConfigurationPort { Paths.get(getOptionalProperty(props, "runtime.lock.file", "")), Paths.get(getOptionalProperty(props, "log.directory", "")), getOptionalProperty(props, "log.level", "INFO"), - apiKey + apiKey, + logAiSensitive ); } diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/ai/OpenAiHttpAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/ai/OpenAiHttpAdapterTest.java index 3e0569a..18e217a 100644 --- a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/ai/OpenAiHttpAdapterTest.java +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/ai/OpenAiHttpAdapterTest.java @@ -89,7 +89,8 @@ class OpenAiHttpAdapterTest { Paths.get("/lock"), Paths.get("/logs"), "INFO", - API_KEY + API_KEY, + false ); // Use the package-private constructor with injected mock HttpClient adapter = new OpenAiHttpAdapter(testConfiguration, httpClient); @@ -450,7 +451,8 @@ class OpenAiHttpAdapterTest { Paths.get("/lock"), Paths.get("/logs"), "INFO", - API_KEY + API_KEY, + false ); assertThatThrownBy(() -> new OpenAiHttpAdapter(invalidConfig, httpClient)) @@ -475,7 +477,8 @@ class OpenAiHttpAdapterTest { Paths.get("/lock"), Paths.get("/logs"), "INFO", - API_KEY + API_KEY, + false ); assertThatThrownBy(() -> new OpenAiHttpAdapter(invalidConfig, httpClient)) @@ -500,7 +503,8 @@ class OpenAiHttpAdapterTest { Paths.get("/lock"), Paths.get("/logs"), "INFO", - API_KEY + API_KEY, + false ); assertThatThrownBy(() -> new OpenAiHttpAdapter(invalidConfig, httpClient)) @@ -526,7 +530,8 @@ class OpenAiHttpAdapterTest { Paths.get("/lock"), Paths.get("/logs"), "INFO", - "" // Empty key + "", // Empty key + false ); OpenAiHttpAdapter adapterWithEmptyKey = new OpenAiHttpAdapter(configWithEmptyKey, httpClient); diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java index 605de0a..0b71089 100644 --- a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java @@ -44,7 +44,8 @@ class StartConfigurationValidatorTest { tempDir.resolve("lock.lock"), tempDir.resolve("logs"), "INFO", - "test-api-key" + "test-api-key", + false ); assertDoesNotThrow(() -> validator.validate(config)); @@ -66,7 +67,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -92,7 +94,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -118,7 +121,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -149,7 +153,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -180,7 +185,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -210,7 +216,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -241,7 +248,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -272,14 +280,48 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( InvalidStartConfigurationException.class, () -> validator.validate(config) ); - assertTrue(exception.getMessage().contains("max.retries.transient: must be >= 0")); + assertTrue(exception.getMessage().contains("max.retries.transient: must be >= 1")); + } + + @Test + void validate_failsWhenMaxRetriesTransientIsZero() throws Exception { + Path sourceFolder = Files.createDirectory(tempDir.resolve("source2")); + Path targetFolder = Files.createDirectory(tempDir.resolve("target2")); + Path sqliteFile = Files.createFile(tempDir.resolve("db2.sqlite")); + Path promptTemplateFile = Files.createFile(tempDir.resolve("prompt2.txt")); + + StartConfiguration config = new StartConfiguration( + sourceFolder, + targetFolder, + sqliteFile, + URI.create("https://api.example.com"), + "gpt-4", + 30, + 0, + 100, + 50000, + promptTemplateFile, + null, + null, + "INFO", + "test-api-key", + false + ); + + InvalidStartConfigurationException exception = assertThrows( + InvalidStartConfigurationException.class, + () -> validator.validate(config) + ); + assertTrue(exception.getMessage().contains("max.retries.transient: must be >= 1"), + "max.retries.transient = 0 is invalid startup configuration"); } @Test @@ -303,7 +345,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -334,7 +377,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -345,7 +389,7 @@ class StartConfigurationValidatorTest { } @Test - void validate_maxRetriesTransientZeroIsValid() throws Exception { + void validate_maxRetriesTransientOneIsValid() throws Exception { Path sourceFolder = Files.createDirectory(tempDir.resolve("source")); Path targetFolder = Files.createDirectory(tempDir.resolve("target")); Path sqliteFile = Files.createFile(tempDir.resolve("db.sqlite")); @@ -358,14 +402,15 @@ class StartConfigurationValidatorTest { URI.create("https://api.example.com"), "gpt-4", 30, - 0, // maxRetriesTransient = 0 ist gültig + 1, // maxRetriesTransient = 1 is the minimum valid value 100, 50000, promptTemplateFile, null, null, "INFO", - "test-api-key" + "test-api-key", + false ); assertDoesNotThrow(() -> validator.validate(config)); @@ -392,7 +437,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -422,7 +468,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -453,7 +500,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -485,7 +533,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); assertDoesNotThrow(() -> validator.validate(config), @@ -516,7 +565,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -548,7 +598,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -578,7 +629,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -609,7 +661,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -640,7 +693,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -670,7 +724,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -701,7 +756,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -731,7 +787,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -757,7 +814,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -772,7 +830,7 @@ class StartConfigurationValidatorTest { assertTrue(message.contains("api.model: must not be null or blank")); assertTrue(message.contains("prompt.template.file: must not be null")); assertTrue(message.contains("api.timeoutSeconds: must be > 0")); - assertTrue(message.contains("max.retries.transient: must be >= 0")); + assertTrue(message.contains("max.retries.transient: must be >= 1")); assertTrue(message.contains("max.pages: must be > 0")); assertTrue(message.contains("max.text.characters: must be > 0")); } @@ -804,7 +862,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); // Mock: always return "does not exist" error for any path @@ -840,7 +899,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); // Mock: simulate path exists but is not a directory @@ -876,7 +936,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); // Mock: simulate path exists, is directory, but is not readable @@ -914,7 +975,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); // Mock: all checks pass (return null) @@ -951,7 +1013,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -985,7 +1048,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -1016,7 +1080,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -1047,7 +1112,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -1078,7 +1144,8 @@ class StartConfigurationValidatorTest { tempDir.resolve("nonexistent/lock.lock"), // Lock file mit nicht existierendem Parent null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -1113,7 +1180,8 @@ class StartConfigurationValidatorTest { lockFileWithFileAsParent, // Lock file mit Datei als Parent null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -1147,7 +1215,8 @@ class StartConfigurationValidatorTest { null, logFileInsteadOfDirectory, // Datei statt Verzeichnis "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( @@ -1178,7 +1247,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); assertDoesNotThrow(() -> validator.validate(config), @@ -1206,7 +1276,8 @@ class StartConfigurationValidatorTest { null, null, "INFO", - "test-api-key" + "test-api-key", + false ); InvalidStartConfigurationException exception = assertThrows( diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/configuration/PropertiesConfigurationPortAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/configuration/PropertiesConfigurationPortAdapterTest.java index 7ad2be6..7fac74d 100644 --- a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/configuration/PropertiesConfigurationPortAdapterTest.java +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/configuration/PropertiesConfigurationPortAdapterTest.java @@ -1,6 +1,7 @@ package de.gecheckt.pdf.umbenenner.adapter.out.configuration; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assertions.assertTrue; @@ -328,6 +329,56 @@ class PropertiesConfigurationPortAdapterTest { "Invalid URI value should throw ConfigurationLoadingException"); } + @Test + void loadConfiguration_logAiSensitiveDefaultsFalseWhenAbsent() throws Exception { + Path configFile = createInlineConfig( + "source.folder=/tmp/source\n" + + "target.folder=/tmp/target\n" + + "sqlite.file=/tmp/db.sqlite\n" + + "api.baseUrl=https://api.example.com\n" + + "api.model=gpt-4\n" + + "api.timeoutSeconds=30\n" + + "max.retries.transient=3\n" + + "max.pages=100\n" + + "max.text.characters=50000\n" + + "prompt.template.file=/tmp/prompt.txt\n" + + "api.key=test-key\n" + // log.ai.sensitive intentionally omitted + ); + + PropertiesConfigurationPortAdapter adapter = new PropertiesConfigurationPortAdapter(emptyEnvLookup, configFile); + + var config = adapter.loadConfiguration(); + + assertFalse(config.logAiSensitive(), + "log.ai.sensitive must default to false when the property is absent"); + } + + @Test + void loadConfiguration_logAiSensitiveParsedTrueWhenExplicitlySet() throws Exception { + Path configFile = createInlineConfig( + "source.folder=/tmp/source\n" + + "target.folder=/tmp/target\n" + + "sqlite.file=/tmp/db.sqlite\n" + + "api.baseUrl=https://api.example.com\n" + + "api.model=gpt-4\n" + + "api.timeoutSeconds=30\n" + + "max.retries.transient=3\n" + + "max.pages=100\n" + + "max.text.characters=50000\n" + + "prompt.template.file=/tmp/prompt.txt\n" + + "api.key=test-key\n" + + "log.ai.sensitive=true\n" + ); + + PropertiesConfigurationPortAdapter adapter = new PropertiesConfigurationPortAdapter(emptyEnvLookup, configFile); + + var config = adapter.loadConfiguration(); + + assertTrue(config.logAiSensitive(), + "log.ai.sensitive must be parsed as true when explicitly set to 'true'"); + } + private Path createConfigFile(String resourceName) throws Exception { Path sourceResource = Path.of("src/test/resources", resourceName); Path targetConfigFile = tempDir.resolve("application.properties"); diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/config/RuntimeConfiguration.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/config/RuntimeConfiguration.java index e6b907f..41f3596 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/config/RuntimeConfiguration.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/config/RuntimeConfiguration.java @@ -1,5 +1,7 @@ package de.gecheckt.pdf.umbenenner.application.config; +import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity; + /** * Minimal runtime configuration for the application layer. *

@@ -9,12 +11,59 @@ package de.gecheckt.pdf.umbenenner.application.config; *

* This intentionally small contract ensures the application layer depends only on * the configuration values it actually uses, following hexagonal architecture principles. + * + *

Validation invariants

+ * + * + *

AI content sensitivity

+ *

+ * The {@link #aiContentSensitivity()} field is derived from the {@code log.ai.sensitive} + * configuration property (default: {@code false}). It governs whether the complete AI raw + * response and complete AI {@code reasoning} may be written to log files. Sensitive AI + * content is always persisted in SQLite regardless of this setting; only log output is + * affected. + *

+ * The safe default ({@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT}) must be used + * whenever {@code log.ai.sensitive} is absent, {@code false}, or set to any value other + * than the explicit opt-in. */ public record RuntimeConfiguration( /** * Maximum number of pages a document can have to be processed. * Documents exceeding this limit are rejected during pre-checks. */ - int maxPages + int maxPages, + + /** + * Maximum number of historised transient technical errors allowed per fingerprint + * across all scheduler runs. + *

+ * The attempt that causes the counter to reach this value finalises the document + * to {@code FAILED_FINAL}. Must be an Integer ≥ 1; the value {@code 0} is + * invalid start configuration. + *

+ * Example: {@code maxRetriesTransient = 1} means the first transient error + * immediately finalises the document. + */ + int maxRetriesTransient, + + /** + * Sensitivity decision governing whether AI-generated content may be written to log files. + *

+ * Derived from the {@code log.ai.sensitive} configuration property. The default is + * {@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT} (do not log sensitive content). + * Only {@link AiContentSensitivity#LOG_SENSITIVE_CONTENT} is produced when + * {@code log.ai.sensitive = true} is explicitly set. + *

+ * Must not be {@code null}. + */ + AiContentSensitivity aiContentSensitivity ) { } diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/config/startup/StartConfiguration.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/config/startup/StartConfiguration.java index 9fbaa12..dac24f2 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/config/startup/StartConfiguration.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/config/startup/StartConfiguration.java @@ -10,6 +10,16 @@ import java.nio.file.Path; * loaded and validated at bootstrap time. This is a complete configuration model * for the entire application startup, including paths, API settings, persistence, * and operational parameters. + * + *

AI content sensitivity ({@code log.ai.sensitive})

+ *

+ * The boolean property {@code log.ai.sensitive} controls whether sensitive AI-generated + * content (complete raw AI response, complete AI {@code reasoning}) may be written to + * log files. The default is {@code false} (safe/protect). Set to {@code true} only when + * explicit diagnostic logging of AI content is required. + *

+ * Sensitive AI content is always persisted in SQLite regardless of this setting. + * Only log output is affected. */ public record StartConfiguration( Path sourceFolder, @@ -25,6 +35,13 @@ public record StartConfiguration( Path runtimeLockFile, Path logDirectory, String logLevel, - String apiKey + String apiKey, + + /** + * Whether sensitive AI content (raw response, reasoning) may be written to log files. + * Corresponds to the {@code log.ai.sensitive} configuration property. + * Default: {@code false} (do not log sensitive content). + */ + boolean logAiSensitive ) { } diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/AiContentSensitivity.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/AiContentSensitivity.java new file mode 100644 index 0000000..092c1a5 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/AiContentSensitivity.java @@ -0,0 +1,46 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Sensitivity decision governing whether AI-generated content may be written to log files. + *

+ * The following AI-generated content items are classified as sensitive and are subject to + * this decision: + *

+ *

+ * Sensitive AI content is always written to SQLite (for traceability) regardless of + * this decision. The decision controls only whether the content is also emitted into + * log files. + *

+ * Default behaviour: The default is {@link #PROTECT_SENSITIVE_CONTENT}. + * Logging of sensitive AI content must be explicitly enabled by setting the boolean + * configuration property {@code log.ai.sensitive = true}. Any other value, or the + * absence of the property, results in {@link #PROTECT_SENSITIVE_CONTENT}. + *

+ * Non-sensitive AI content (e.g. the resolved title, the resolved date, + * the date source) is not covered by this decision and may always be logged. + */ +public enum AiContentSensitivity { + + /** + * Sensitive AI content (raw response, reasoning) must not be written + * to log files. + *

+ * This is the safe default. It is active whenever {@code log.ai.sensitive} is absent, + * {@code false}, or set to any value other than the explicit opt-in. + */ + PROTECT_SENSITIVE_CONTENT, + + /** + * Sensitive AI content (raw response, reasoning) may be written + * to log files. + *

+ * This value is only produced when {@code log.ai.sensitive = true} is explicitly set + * in the application configuration. It must never be the implicit default. + */ + LOG_SENSITIVE_CONTENT +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentErrorClassification.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentErrorClassification.java new file mode 100644 index 0000000..808176c --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentErrorClassification.java @@ -0,0 +1,90 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Unified classification of all document-level errors in the end state. + *

+ * This enumeration provides a single, exhaustive taxonomy for every error category + * that the retry policy and logging infrastructure must distinguish. It replaces + * any ad-hoc string-based classification where an authoritative type is needed. + *

+ * Mapping to failure counters: + *

+ *

+ * Scope of deterministic content errors: + *

+ *

+ * Scope of transient technical errors: + *

+ *

+ * Architecture note: This type carries no infrastructure dependencies. + * It is safe to reference from Domain, Application and Adapter layers. + */ +public enum DocumentErrorClassification { + + /** + * A deterministic content error that cannot be resolved by retrying with the same + * document content. + *

+ * Examples: no extractable text, page limit exceeded, AI-returned title is generic + * or unusable, document content is ambiguous. + *

+ * Retry rule: the first historised occurrence of this error for a fingerprint leads + * to {@code FAILED_RETRYABLE} (one later run may retry). The second historised + * occurrence leads to {@code FAILED_FINAL} (no further retries). + */ + DETERMINISTIC_CONTENT_ERROR, + + /** + * A transient technical infrastructure failure unrelated to the document content. + *

+ * Examples: AI endpoint not reachable, HTTP timeout, malformed or non-parseable + * JSON, temporary I/O failure, temporary SQLite lock. + *

+ * Retry rule: remains {@code FAILED_RETRYABLE} until the transient-error counter + * reaches the configured {@code max.retries.transient} limit. The attempt that + * reaches the limit finalises the document to {@code FAILED_FINAL}. + * The configured limit must be an Integer ≥ 1; the value {@code 0} is invalid + * start configuration and prevents the batch run from starting. + */ + TRANSIENT_TECHNICAL_ERROR, + + /** + * A technical failure specifically on the physical target-file copy path. + *

+ * This error class is distinct from {@link #TRANSIENT_TECHNICAL_ERROR} because it + * triggers a special within-run handling: exactly one immediate technical retry of + * the copy operation is allowed within the same document run. No new AI call and no + * new naming proposal derivation occur during the immediate retry. + *

+ * If the immediate retry succeeds, the document proceeds to {@code SUCCESS}. + * If the immediate retry also fails, the combined failure is recorded as a + * {@link #TRANSIENT_TECHNICAL_ERROR} for counter and cross-run retry evaluation. + * The immediate retry is not counted in the laufübergreifenden transient-error counter. + */ + TARGET_COPY_TECHNICAL_ERROR +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentLogCorrelation.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentLogCorrelation.java new file mode 100644 index 0000000..ab7d1ce --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentLogCorrelation.java @@ -0,0 +1,81 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.RunId; + +/** + * Sealed type carrying the correlation context for all document-related log entries. + *

+ * The logging correlation rule distinguishes two phases of document processing: + *

    + *
  1. Pre-fingerprint phase: Before a {@link DocumentFingerprint} has + * been successfully computed (e.g. the source file cannot be read for hashing), + * log entries are correlated via the batch run identifier and a stable candidate + * description derived from the candidate's own identifier (typically its source + * file path or name). Use {@link CandidateCorrelation}.
  2. + *
  3. Post-fingerprint phase: Once the fingerprint has been + * successfully computed, all subsequent document-related log entries are correlated + * via the batch run identifier and the fingerprint. Use + * {@link FingerprintCorrelation}.
  4. + *
+ *

+ * Architecture constraints: + *

+ */ +public sealed interface DocumentLogCorrelation { + + /** + * Returns the batch run identifier shared by all log entries within one run. + * + * @return run identifier; never {@code null} + */ + RunId runId(); + + // ------------------------------------------------------------------------- + // Pre-fingerprint correlation + // ------------------------------------------------------------------------- + + /** + * Correlation context available before a {@link DocumentFingerprint} has been + * successfully computed. + *

+ * Used when the fingerprint computation itself fails or when a log entry must be + * emitted at the very start of candidate processing (before any hashing result is + * available). + *

+ * The {@code candidateDescription} is a stable, human-readable identifier for the + * candidate derived from the candidate's own unique identifier — typically the + * source file name or path representation. It must not change between log entries + * for the same candidate within a single run. + * + * @param runId batch run identifier; never {@code null} + * @param candidateDescription stable human-readable candidate identifier; + * never {@code null} or blank + */ + record CandidateCorrelation(RunId runId, String candidateDescription) + implements DocumentLogCorrelation {} + + // ------------------------------------------------------------------------- + // Post-fingerprint correlation + // ------------------------------------------------------------------------- + + /** + * Correlation context available after a {@link DocumentFingerprint} has been + * successfully computed. + *

+ * Used for all document-related log entries from the point at which the fingerprint + * is known. The fingerprint is the authoritative, content-stable document identity + * and must appear in or be unambiguously derivable from every subsequent log entry + * for this document. + * + * @param runId batch run identifier; never {@code null} + * @param fingerprint content-based document identity; never {@code null} + */ + record FingerprintCorrelation(RunId runId, DocumentFingerprint fingerprint) + implements DocumentLogCorrelation {} +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FailureCounters.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FailureCounters.java index d9a9d80..f580fce 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FailureCounters.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FailureCounters.java @@ -7,24 +7,34 @@ package de.gecheckt.pdf.umbenenner.application.port.out; *

*

- * A freshly discovered document starts with both counters at zero. - * Counters are only written by the repository layer on the instructions of the - * application use case; they never change as a side-effect of a read operation. + * Immediate within-run target copy retry: + * The physical target-copy retry within the same run is not tracked in either counter. + * It is a purely technical within-run mechanism and does not affect the + * laufübergreifenden counter state. + *

+ * Counter invariant: + * Both counters start at zero for a newly discovered document and only increase + * monotonically. The counters are written by the repository layer on the instructions + * of the application use case; they never change as a side-effect of a read operation. * - * @param contentErrorCount number of deterministic content errors recorded so far; - * must be >= 0 - * @param transientErrorCount number of transient technical errors recorded so far; - * must be >= 0 + * @param contentErrorCount number of historised deterministic content errors; + * must be ≥ 0 + * @param transientErrorCount number of historised transient technical errors; + * must be ≥ 0 */ public record FailureCounters(int contentErrorCount, int transientErrorCount) { diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ImmediateRetryDecision.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ImmediateRetryDecision.java new file mode 100644 index 0000000..dc24cfd --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ImmediateRetryDecision.java @@ -0,0 +1,44 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Decision governing whether a within-run immediate technical retry of the target copy + * operation is permitted. + *

+ * The immediate retry mechanism is strictly scoped: + *

+ *

+ * The concrete retry decision for the subsequent persistence step is derived from the + * combined outcome after the immediate retry completes (see {@link RetryDecision}). + */ +public enum ImmediateRetryDecision { + + /** + * An immediate within-run retry of the target copy operation is permitted. + *

+ * This value is produced when the first physical copy attempt within the current + * document run has failed. The copy must be retried exactly once more. + * No other pipeline stage is repeated. + */ + ALLOWED, + + /** + * No immediate within-run retry is permitted. + *

+ * This value is produced when the immediate retry quota for this document run has + * already been consumed (i.e. the immediate retry attempt itself has failed), or + * when the failure did not occur on the target copy path. + * The error must be escalated to the cross-run retry evaluation. + */ + DENIED +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/RetryDecision.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/RetryDecision.java new file mode 100644 index 0000000..f8e5fc1 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/RetryDecision.java @@ -0,0 +1,172 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Sealed type representing the complete, authoritative retry decision for a document + * after an error has been classified. + *

+ * A {@code RetryDecision} is the output of the retry policy evaluation. It unambiguously + * encodes what must happen next for the document: which status to persist, which counter + * to increment, and whether a within-run immediate retry is still possible. + *

+ * Decision cases and their semantics: + *

    + *
  1. {@link ContentErrorRetryable} — first deterministic content error. Document moves + * to {@code FAILED_RETRYABLE}; content-error counter is incremented by 1. One later + * scheduler run may retry.
  2. + *
  3. {@link ContentErrorFinal} — second (or later) deterministic content error. Document + * moves to {@code FAILED_FINAL}; content-error counter is incremented by 1. No further + * processing in any future run.
  4. + *
  5. {@link TransientErrorRetryable} — transient technical error with remaining retry budget. + * Document moves to {@code FAILED_RETRYABLE}; transient-error counter is incremented by 1. + * A later scheduler run may retry, as long as the counter stays below + * {@code max.retries.transient}.
  6. + *
  7. {@link TransientErrorFinal} — transient technical error that exhausts the configured + * {@code max.retries.transient} budget. Document moves to {@code FAILED_FINAL}; + * transient-error counter is incremented by 1. No further processing in any future run.
  8. + *
  9. {@link TargetCopyWithImmediateRetry} — first physical copy failure within the current + * run. The document has not yet changed status; exactly one immediate within-run retry + * of the copy step is permitted. No new AI call and no new naming-proposal derivation + * occur. This decision does not yet modify any counter or status; the outcome of the + * immediate retry determines which subsequent decision applies.
  10. + *
+ *

+ * What this type does NOT cover: + *

+ *

+ * Counter invariant: Skip decisions ({@code SKIPPED_ALREADY_PROCESSED}, + * {@code SKIPPED_FINAL_FAILURE}) never produce a {@code RetryDecision} and never change + * any failure counter. + *

+ * Single-truth rule: The retry decision is derived exclusively from the + * document master record and the attempt history. No additional, parallel truth source + * for retry state is introduced. + */ +public sealed interface RetryDecision { + + /** + * Returns the failure class identifier for persistence and logging. + *

+ * The failure class is a short, stable string identifying the type of failure, + * typically the enum constant name of the original error or exception class name. + * + * @return failure class string; never {@code null} or blank + */ + String failureClass(); + + /** + * Returns a human-readable failure message for persistence and logging. + * + * @return failure message; never {@code null} or blank + */ + String failureMessage(); + + // ------------------------------------------------------------------------- + // Deterministic content error cases + // ------------------------------------------------------------------------- + + /** + * First historised deterministic content error for this fingerprint. + *

+ * The document must be persisted with status {@code FAILED_RETRYABLE} and the + * content-error counter incremented by 1. Exactly one later scheduler run is + * permitted to retry. + * + * @param failureClass failure class identifier; never {@code null} or blank + * @param failureMessage human-readable failure description; never {@code null} or blank + */ + record ContentErrorRetryable(String failureClass, String failureMessage) + implements RetryDecision {} + + /** + * Second (or subsequent) historised deterministic content error for this fingerprint. + *

+ * The document must be persisted with status {@code FAILED_FINAL} and the + * content-error counter incremented by 1. No further processing is allowed in + * any future run. + * + * @param failureClass failure class identifier; never {@code null} or blank + * @param failureMessage human-readable failure description; never {@code null} or blank + */ + record ContentErrorFinal(String failureClass, String failureMessage) + implements RetryDecision {} + + // ------------------------------------------------------------------------- + // Transient technical error cases + // ------------------------------------------------------------------------- + + /** + * Transient technical error with remaining retry budget. + *

+ * The transient-error counter after incrementing is strictly less than + * {@code max.retries.transient}. The document must be persisted with status + * {@code FAILED_RETRYABLE} and the transient-error counter incremented by 1. + * A later scheduler run may retry. + * + * @param failureClass failure class identifier; never {@code null} or blank + * @param failureMessage human-readable failure description; never {@code null} or blank + */ + record TransientErrorRetryable(String failureClass, String failureMessage) + implements RetryDecision {} + + /** + * Transient technical error that exhausts the configured {@code max.retries.transient} + * budget. + *

+ * The transient-error counter after incrementing equals {@code max.retries.transient}. + * The document must be persisted with status {@code FAILED_FINAL} and the + * transient-error counter incremented by 1. No further processing is allowed in + * any future run. + *

+ * Example: with {@code max.retries.transient = 1}, the very first transient error + * produces this decision immediately. + * + * @param failureClass failure class identifier; never {@code null} or blank + * @param failureMessage human-readable failure description; never {@code null} or blank + */ + record TransientErrorFinal(String failureClass, String failureMessage) + implements RetryDecision {} + + // ------------------------------------------------------------------------- + // Target copy immediate retry case + // ------------------------------------------------------------------------- + + /** + * First physical target-file copy failure within the current run. + *

+ * Exactly one immediate technical retry of the copy operation is permitted within + * the same document run. This decision does not change any counter or document + * status — it defers the final outcome until the immediate retry completes: + *

+ *

+ * The immediate retry is strictly limited to the physical copy path. No new AI call + * and no new naming-proposal derivation occur. This mechanism does not increment the + * laufübergreifenden transient-error counter. + * + * @param failureMessage human-readable description of the initial copy failure; + * never {@code null} or blank + */ + record TargetCopyWithImmediateRetry(String failureMessage) implements RetryDecision { + + /** + * Returns the constant failure class identifier for target copy failures. + * + * @return {@code "TARGET_COPY_TECHNICAL_ERROR"} + */ + @Override + public String failureClass() { + return DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR.name(); + } + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/package-info.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/package-info.java index 0aea497..916a700 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/package-info.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/package-info.java @@ -62,6 +62,20 @@ * — Sealed result of parsing raw response into JSON structure (success or parsing failure) * *

+ * Retry policy and logging types: + *

+ *

* Exception types: *

+ * + *

Transient retry limit semantics

+ *

+ * {@code maxRetriesTransient} is interpreted as the maximum number of historised + * transient error attempts per fingerprint. The attempt that causes the counter + * to reach {@code maxRetriesTransient} finalises the document status to + * {@link ProcessingStatus#FAILED_FINAL}. Valid values are integers >= 1; + * the value 0 is invalid startup configuration and must be rejected before + * the batch run begins. + *

+ * Examples: + *

*/ final class ProcessingOutcomeTransition { @@ -52,24 +72,33 @@ final class ProcessingOutcomeTransition { *

* For new documents, all failure counters start at zero. * - * @param pipelineOutcome the outcome from the processing pipeline + * @param pipelineOutcome the outcome from the processing pipeline + * @param maxRetriesTransient maximum number of historised transient error attempts + * before the document is finalised to {@code FAILED_FINAL}; + * must be >= 1 * @return the mapped outcome with status, counters, and retryability */ - static ProcessingOutcome forNewDocument(DocumentProcessingOutcome pipelineOutcome) { - return forKnownDocument(pipelineOutcome, FailureCounters.zero()); + static ProcessingOutcome forNewDocument( + DocumentProcessingOutcome pipelineOutcome, + int maxRetriesTransient) { + return forKnownDocument(pipelineOutcome, FailureCounters.zero(), maxRetriesTransient); } /** * Maps a pipeline outcome to a processing outcome, considering the existing * failure counter state from a known document's history. * - * @param pipelineOutcome the outcome from the processing pipeline - * @param existingCounters the current failure counter values from the document's master record + * @param pipelineOutcome the outcome from the processing pipeline + * @param existingCounters the current failure counter values from the document's master record + * @param maxRetriesTransient maximum number of historised transient error attempts + * before the document is finalised to {@code FAILED_FINAL}; + * must be >= 1 * @return the mapped outcome with updated status, counters, and retryability */ static ProcessingOutcome forKnownDocument( DocumentProcessingOutcome pipelineOutcome, - FailureCounters existingCounters) { + FailureCounters existingCounters, + int maxRetriesTransient) { return switch (pipelineOutcome) { case NamingProposalReady ignored -> { @@ -106,31 +135,37 @@ final class ProcessingOutcomeTransition { } case TechnicalDocumentError ignored4 -> { - // Technical error (extraction / infrastructure): retryable, transient counter +1 + // Technical error (extraction / infrastructure): apply transient retry limit + FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount(); + boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient; yield new ProcessingOutcome( - ProcessingStatus.FAILED_RETRYABLE, - existingCounters.withIncrementedTransientErrorCount(), - true + limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE, + updatedCounters, + !limitReached ); } case AiTechnicalFailure ignored5 -> { - // Technical AI error (timeout, unreachable, bad JSON): retryable, transient counter +1 + // Technical AI error (timeout, unreachable, bad JSON): apply transient retry limit + FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount(); + boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient; yield new ProcessingOutcome( - ProcessingStatus.FAILED_RETRYABLE, - existingCounters.withIncrementedTransientErrorCount(), - true + limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE, + updatedCounters, + !limitReached ); } case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored6 -> { // Pre-check passed without AI step: in normal flow this should not appear at // the outcome transition level once the AI pipeline is fully wired. Treat it - // as a technical error to avoid silent inconsistency. + // as a technical error and apply the transient retry limit. + FailureCounters updatedCounters = existingCounters.withIncrementedTransientErrorCount(); + boolean limitReached = updatedCounters.transientErrorCount() >= maxRetriesTransient; yield new ProcessingOutcome( - ProcessingStatus.FAILED_RETRYABLE, - existingCounters.withIncrementedTransientErrorCount(), - true + limitReached ? ProcessingStatus.FAILED_FINAL : ProcessingStatus.FAILED_RETRYABLE, + updatedCounters, + !limitReached ); } }; diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/RetryDecisionEvaluator.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/RetryDecisionEvaluator.java new file mode 100644 index 0000000..64b0a55 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/RetryDecisionEvaluator.java @@ -0,0 +1,107 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification; +import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters; +import de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision; +import de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision; + +/** + * Application service contract for deriving authoritative retry decisions from + * document error state and configuration. + *

+ * This interface defines the single, testable entry point for all retry policy + * evaluations. Implementations must apply the verbindlichen retry rules exactly + * as specified: + *

+ *

+ * Counter semantics: + *

+ *

+ * {@code maxRetriesTransient} invariant: + * The value must be an Integer ≥ 1. A value of {@code 0} is invalid configuration + * and must be rejected at startup before any batch run begins. Implementations of + * this interface may assume the value is always ≥ 1 when called. + *

+ * Example for {@code maxRetriesTransient = 1}: + *

+ * Example for {@code maxRetriesTransient = 2}: + * + *

+ * Single-truth rule: No parallel persistence source for retry + * decisions is introduced. Evaluations are derived solely from the document master + * record's failure counters and the configured limit. + */ +public interface RetryDecisionEvaluator { + + /** + * Derives the authoritative retry decision for a document-level error. + *

+ * The decision is determined by the error classification, the existing failure + * counters (before any increment for the current attempt), and the configured + * transient-retry limit. + * + * @param errorClass classification of the error that occurred; never {@code null} + * @param currentCounters failure counters before incrementing for this + * attempt; never {@code null} + * @param maxRetriesTransient configured maximum number of historised transient errors + * allowed per fingerprint; must be ≥ 1 + * @param failureClass short, stable failure class identifier for persistence + * and logging; never {@code null} or blank + * @param failureMessage human-readable description of the error; never {@code null} + * or blank + * @return the authoritative {@link RetryDecision}; never {@code null} + * @throws IllegalArgumentException if {@code maxRetriesTransient} is less than 1 + */ + RetryDecision evaluate( + DocumentErrorClassification errorClass, + FailureCounters currentCounters, + int maxRetriesTransient, + String failureClass, + String failureMessage); + + /** + * Determines whether an immediate within-run retry of the target copy operation + * is permitted. + *

+ * An immediate retry is {@link ImmediateRetryDecision#ALLOWED} only when the copy + * has failed on its first attempt within the current run. If this is the second + * copy attempt within the same run (i.e. the immediate retry itself has failed), + * the result is {@link ImmediateRetryDecision#DENIED}. + * + * @param isFirstCopyAttemptInThisRun {@code true} if the failing copy attempt was + * the first copy attempt for this document in + * the current run; {@code false} if it was the + * immediate retry attempt + * @return {@link ImmediateRetryDecision#ALLOWED} or {@link ImmediateRetryDecision#DENIED}; + * never {@code null} + */ + ImmediateRetryDecision evaluateImmediateRetry(boolean isFirstCopyAttemptInThisRun); +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java index c669a7e..e88c24c 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java @@ -222,23 +222,30 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa *

* Processing order: *

    + *
  1. Log: detected source file at INFO level with run-ID (pre-fingerprint + * correlation via run-ID and candidate description).
  2. *
  3. Record the attempt start instant.
  4. *
  5. Compute the SHA-256 fingerprint of the candidate file content.
  6. - *
  7. If fingerprint computation fails: log as non-identifiable run event and - * return true — no SQLite record is created, but no persistence failure occurred.
  8. + *
  9. If fingerprint computation fails: log as non-identifiable run event with run-ID + * and return true — no SQLite record is created, no persistence failure.
  10. *
  11. Load document master record.
  12. - *
  13. If already {@code SUCCESS} → persist skip attempt with - * {@code SKIPPED_ALREADY_PROCESSED}.
  14. - *
  15. If already {@code FAILED_FINAL} → persist skip attempt with - * {@code SKIPPED_FINAL_FAILURE}.
  16. - *
  17. Otherwise execute the pipeline (extraction + pre-checks).
  18. - *
  19. Map result into status, counters and retryable flag.
  20. + *
  21. If already {@code SUCCESS} → log skip at INFO level with fingerprint; + * persist skip attempt with {@code SKIPPED_ALREADY_PROCESSED}.
  22. + *
  23. If already {@code FAILED_FINAL} → log skip at INFO level with fingerprint; + * persist skip attempt with {@code SKIPPED_FINAL_FAILURE}.
  24. + *
  25. Otherwise execute the pipeline (extraction + pre-checks + AI naming).
  26. + *
  27. Map result into status, counters, and retryable flag.
  28. + *
  29. Log: retry decision at INFO level with fingerprint and error + * classification (FAILED_RETRYABLE or FAILED_FINAL).
  30. *
  31. Persist exactly one historised processing attempt.
  32. *
  33. Persist the updated document master record.
  34. *
*

* Per-document errors do not abort the overall batch run. Each candidate ends * controlled regardless of its outcome. + *

+ * Post-fingerprint log entries carry the document fingerprint for correlation. + * Pre-fingerprint log entries (steps 1–4) use run-ID and candidate description. * * @param candidate the candidate to process * @param context the current batch run context @@ -246,14 +253,15 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa * errors return true; persistence failures return false) */ private boolean processCandidate(SourceDocumentCandidate candidate, BatchRunContext context) { - logger.debug("Processing candidate: {}", candidate.uniqueIdentifier()); + logger.info("Detected source file '{}' for processing (RunId: {}).", + candidate.uniqueIdentifier(), context.runId()); Instant attemptStart = Instant.now(); FingerprintResult fingerprintResult = fingerprintPort.computeFingerprint(candidate); return switch (fingerprintResult) { case FingerprintTechnicalError fingerprintError -> { - handleFingerprintError(candidate, fingerprintError); + handleFingerprintError(candidate, fingerprintError, context); yield true; // fingerprint errors are not persistence failures } case FingerprintSuccess fingerprintSuccess -> @@ -262,15 +270,23 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa } /** - * Handles a fingerprint computation error by logging it as a non-identifiable event. + * Handles a fingerprint computation error by logging it as a non-identifiable run event. * No SQLite record is created for this candidate. + *

+ * Log entries before a successful fingerprint are correlated via the batch run identifier + * and the candidate description, as no fingerprint is available for document-level + * correlation. * * @param candidate the candidate that could not be fingerprinted - * @param error the fingerprint error + * @param error the fingerprint error + * @param context the current batch run context; used for run-level log correlation */ - private void handleFingerprintError(SourceDocumentCandidate candidate, FingerprintTechnicalError error) { - logger.warn("Fingerprint computation failed for '{}': {} — candidate skipped (not historised).", - candidate.uniqueIdentifier(), error.errorMessage()); + private void handleFingerprintError( + SourceDocumentCandidate candidate, + FingerprintTechnicalError error, + BatchRunContext context) { + logger.warn("Fingerprint computation failed for '{}' (RunId: {}): {} — candidate not historised.", + candidate.uniqueIdentifier(), context.runId(), error.errorMessage()); } /** diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentLogCorrelationTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentLogCorrelationTest.java new file mode 100644 index 0000000..9a1707d --- /dev/null +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentLogCorrelationTest.java @@ -0,0 +1,189 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.RunId; + +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for the {@link DocumentLogCorrelation} sealed type and its two permitted implementations. + *

+ * Verifies: + *

+ */ +class DocumentLogCorrelationTest { + + private static final String RUN_ID_VALUE = "run-correlation-test-001"; + private static final String CANDIDATE_DESCRIPTION = "invoice-2026-01-15.pdf"; + private static final String FINGERPRINT_HEX = "a".repeat(64); + + // ------------------------------------------------------------------------- + // CandidateCorrelation – pre-fingerprint phase + // ------------------------------------------------------------------------- + + @Test + void candidateCorrelation_storesRunId() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentLogCorrelation.CandidateCorrelation correlation = + new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION); + + assertEquals(runId, correlation.runId()); + } + + @Test + void candidateCorrelation_storesCandidateDescription() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentLogCorrelation.CandidateCorrelation correlation = + new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION); + + assertEquals(CANDIDATE_DESCRIPTION, correlation.candidateDescription()); + } + + @Test + void candidateCorrelation_runIdAccessibleViaInterface() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentLogCorrelation correlation = + new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION); + + // runId() is declared on the sealed interface and must be accessible polymorphically + assertEquals(runId, correlation.runId()); + } + + @Test + void candidateCorrelation_twoInstancesWithSameDataAreEqual() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentLogCorrelation.CandidateCorrelation first = + new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION); + DocumentLogCorrelation.CandidateCorrelation second = + new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION); + + assertEquals(first, second); + } + + @Test + void candidateCorrelation_implementsDocumentLogCorrelation() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentLogCorrelation.CandidateCorrelation correlation = + new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION); + + assertInstanceOf(DocumentLogCorrelation.class, correlation); + } + + // ------------------------------------------------------------------------- + // FingerprintCorrelation – post-fingerprint phase + // ------------------------------------------------------------------------- + + @Test + void fingerprintCorrelation_storesRunId() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX); + DocumentLogCorrelation.FingerprintCorrelation correlation = + new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint); + + assertEquals(runId, correlation.runId()); + } + + @Test + void fingerprintCorrelation_storesFingerprint() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX); + DocumentLogCorrelation.FingerprintCorrelation correlation = + new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint); + + assertEquals(fingerprint, correlation.fingerprint()); + } + + @Test + void fingerprintCorrelation_runIdAccessibleViaInterface() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX); + DocumentLogCorrelation correlation = + new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint); + + // runId() is declared on the sealed interface and must be accessible polymorphically + assertEquals(runId, correlation.runId()); + } + + @Test + void fingerprintCorrelation_twoInstancesWithSameDataAreEqual() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX); + DocumentLogCorrelation.FingerprintCorrelation first = + new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint); + DocumentLogCorrelation.FingerprintCorrelation second = + new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint); + + assertEquals(first, second); + } + + @Test + void fingerprintCorrelation_implementsDocumentLogCorrelation() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentFingerprint fingerprint = new DocumentFingerprint(FINGERPRINT_HEX); + DocumentLogCorrelation.FingerprintCorrelation correlation = + new DocumentLogCorrelation.FingerprintCorrelation(runId, fingerprint); + + assertInstanceOf(DocumentLogCorrelation.class, correlation); + } + + // ------------------------------------------------------------------------- + // Sealed type structural contract + // ------------------------------------------------------------------------- + + @Test + void sealedType_patternMatchExhaustsAllPermittedSubtypes() { + RunId runId = new RunId(RUN_ID_VALUE); + + DocumentLogCorrelation candidatePhase = + new DocumentLogCorrelation.CandidateCorrelation(runId, CANDIDATE_DESCRIPTION); + DocumentLogCorrelation fingerprintPhase = + new DocumentLogCorrelation.FingerprintCorrelation(runId, new DocumentFingerprint(FINGERPRINT_HEX)); + + // Pattern match on the sealed type must compile exhaustively for exactly these two cases + String candidatePhaseResult = describe(candidatePhase); + String fingerprintPhaseResult = describe(fingerprintPhase); + + assertEquals("candidate", candidatePhaseResult); + assertEquals("fingerprint", fingerprintPhaseResult); + } + + /** Helper method using an exhaustive switch over the sealed type. */ + private static String describe(DocumentLogCorrelation correlation) { + return switch (correlation) { + case DocumentLogCorrelation.CandidateCorrelation ignored -> "candidate"; + case DocumentLogCorrelation.FingerprintCorrelation ignored -> "fingerprint"; + }; + } + + @Test + void candidateCorrelation_differentDescriptions_areNotEqual() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentLogCorrelation.CandidateCorrelation withFirst = + new DocumentLogCorrelation.CandidateCorrelation(runId, "first.pdf"); + DocumentLogCorrelation.CandidateCorrelation withSecond = + new DocumentLogCorrelation.CandidateCorrelation(runId, "second.pdf"); + + assertNotEquals(withFirst, withSecond); + } + + @Test + void fingerprintCorrelation_differentFingerprints_areNotEqual() { + RunId runId = new RunId(RUN_ID_VALUE); + DocumentFingerprint first = new DocumentFingerprint("a".repeat(64)); + DocumentFingerprint second = new DocumentFingerprint("b".repeat(64)); + DocumentLogCorrelation.FingerprintCorrelation withFirst = + new DocumentLogCorrelation.FingerprintCorrelation(runId, first); + DocumentLogCorrelation.FingerprintCorrelation withSecond = + new DocumentLogCorrelation.FingerprintCorrelation(runId, second); + + assertNotEquals(withFirst, withSecond); + } +} diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DefaultRetryDecisionEvaluatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DefaultRetryDecisionEvaluatorTest.java new file mode 100644 index 0000000..c732a00 --- /dev/null +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DefaultRetryDecisionEvaluatorTest.java @@ -0,0 +1,320 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentErrorClassification; +import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters; +import de.gecheckt.pdf.umbenenner.application.port.out.ImmediateRetryDecision; +import de.gecheckt.pdf.umbenenner.application.port.out.RetryDecision; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for {@link DefaultRetryDecisionEvaluator}. + *

+ * Verifies the binding retry policy rules for deterministic content errors, + * transient technical errors, target copy failures, and the within-run + * immediate retry mechanism. + */ +class DefaultRetryDecisionEvaluatorTest { + + private static final String FAILURE_CLASS = "SOME_FAILURE"; + private static final String FAILURE_MESSAGE = "Something went wrong"; + + private DefaultRetryDecisionEvaluator evaluator; + + @BeforeEach + void setUp() { + evaluator = new DefaultRetryDecisionEvaluator(); + } + + // ------------------------------------------------------------------------- + // Deterministic content error rules + // ------------------------------------------------------------------------- + + @Test + void evaluate_firstContentError_returnsContentErrorRetryable() { + FailureCounters counters = new FailureCounters(0, 0); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR, + counters, 1, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.ContentErrorRetryable.class, decision); + RetryDecision.ContentErrorRetryable retryable = (RetryDecision.ContentErrorRetryable) decision; + assertEquals(FAILURE_CLASS, retryable.failureClass()); + assertEquals(FAILURE_MESSAGE, retryable.failureMessage()); + } + + @Test + void evaluate_secondContentError_returnsContentErrorFinal() { + FailureCounters counters = new FailureCounters(1, 0); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR, + counters, 1, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.ContentErrorFinal.class, decision); + RetryDecision.ContentErrorFinal finalDecision = (RetryDecision.ContentErrorFinal) decision; + assertEquals(FAILURE_CLASS, finalDecision.failureClass()); + assertEquals(FAILURE_MESSAGE, finalDecision.failureMessage()); + } + + @Test + void evaluate_subsequentContentErrors_alwaysReturnContentErrorFinal() { + // Any count >= 1 results in final (covers legacy M4-M6 data with higher counts) + for (int count = 1; count <= 5; count++) { + FailureCounters counters = new FailureCounters(count, 0); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR, + counters, 1, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.ContentErrorFinal.class, decision, + "Expected ContentErrorFinal for contentErrorCount=" + count); + } + } + + @Test + void evaluate_contentError_transientCounterIsIrrelevant() { + // Non-zero transient counter must not affect content error decision + FailureCounters counters = new FailureCounters(0, 5); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR, + counters, 1, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.ContentErrorRetryable.class, decision); + } + + // ------------------------------------------------------------------------- + // Transient technical error rules + // ------------------------------------------------------------------------- + + @Test + void evaluate_transientError_maxRetriesTransientOne_firstError_returnsTransientErrorFinal() { + // maxRetriesTransient=1: counter before=0, after=1=limit → final immediately + FailureCounters counters = new FailureCounters(0, 0); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR, + counters, 1, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision, + "With maxRetriesTransient=1, first transient error must be final"); + RetryDecision.TransientErrorFinal finalDecision = (RetryDecision.TransientErrorFinal) decision; + assertEquals(FAILURE_CLASS, finalDecision.failureClass()); + assertEquals(FAILURE_MESSAGE, finalDecision.failureMessage()); + } + + @Test + void evaluate_transientError_maxRetriesTransientTwo_firstError_returnsTransientErrorRetryable() { + // maxRetriesTransient=2: counter before=0, after=1 < 2 → retryable + FailureCounters counters = new FailureCounters(0, 0); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR, + counters, 2, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision); + RetryDecision.TransientErrorRetryable retryable = (RetryDecision.TransientErrorRetryable) decision; + assertEquals(FAILURE_CLASS, retryable.failureClass()); + assertEquals(FAILURE_MESSAGE, retryable.failureMessage()); + } + + @Test + void evaluate_transientError_maxRetriesTransientTwo_secondError_returnsTransientErrorFinal() { + // maxRetriesTransient=2: counter before=1, after=2=limit → final + FailureCounters counters = new FailureCounters(0, 1); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR, + counters, 2, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision, + "With maxRetriesTransient=2, second transient error must be final"); + } + + @Test + void evaluate_transientError_maxRetriesTransientThree_firstError_returnsRetryable() { + FailureCounters counters = new FailureCounters(0, 0); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR, + counters, 3, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision); + } + + @Test + void evaluate_transientError_maxRetriesTransientThree_secondError_returnsRetryable() { + FailureCounters counters = new FailureCounters(0, 1); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR, + counters, 3, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision); + } + + @Test + void evaluate_transientError_maxRetriesTransientThree_thirdError_returnsFinal() { + // counter before=2, after=3=limit → final + FailureCounters counters = new FailureCounters(0, 2); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR, + counters, 3, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision, + "Third transient error with maxRetriesTransient=3 must be final"); + } + + @Test + void evaluate_transientError_contentCounterIsIrrelevant() { + // Non-zero content error counter must not affect transient error decision + FailureCounters counters = new FailureCounters(1, 0); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR, + counters, 2, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.TransientErrorRetryable.class, decision); + } + + @Test + void evaluate_transientError_legacyDataWithHigherCounts_finalizesCorrectly() { + // Existing M4-M6 data may have counter values beyond normal expectations; + // the evaluator must still apply the threshold check consistently + FailureCounters counters = new FailureCounters(3, 5); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR, + counters, 3, FAILURE_CLASS, FAILURE_MESSAGE); + + // counter before=5, after=6 >= 3 → final + assertInstanceOf(RetryDecision.TransientErrorFinal.class, decision); + } + + // ------------------------------------------------------------------------- + // Target copy technical error rule + // ------------------------------------------------------------------------- + + @Test + void evaluate_targetCopyError_returnsTargetCopyWithImmediateRetry() { + FailureCounters counters = new FailureCounters(0, 0); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR, + counters, 1, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.TargetCopyWithImmediateRetry.class, decision); + RetryDecision.TargetCopyWithImmediateRetry immediate = + (RetryDecision.TargetCopyWithImmediateRetry) decision; + assertEquals(FAILURE_MESSAGE, immediate.failureMessage()); + assertEquals(DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR.name(), + immediate.failureClass()); + } + + @Test + void evaluate_targetCopyError_countersAndMaxRetriesAreIgnored() { + // Target copy decision is independent of counters and maxRetriesTransient + FailureCounters counters = new FailureCounters(2, 3); + + RetryDecision decision = evaluator.evaluate( + DocumentErrorClassification.TARGET_COPY_TECHNICAL_ERROR, + counters, 5, FAILURE_CLASS, FAILURE_MESSAGE); + + assertInstanceOf(RetryDecision.TargetCopyWithImmediateRetry.class, decision); + } + + // ------------------------------------------------------------------------- + // Immediate within-run retry decision + // ------------------------------------------------------------------------- + + @Test + void evaluateImmediateRetry_firstAttempt_returnsAllowed() { + ImmediateRetryDecision decision = evaluator.evaluateImmediateRetry(true); + + assertEquals(ImmediateRetryDecision.ALLOWED, decision); + } + + @Test + void evaluateImmediateRetry_secondAttempt_returnsDenied() { + ImmediateRetryDecision decision = evaluator.evaluateImmediateRetry(false); + + assertEquals(ImmediateRetryDecision.DENIED, decision); + } + + // ------------------------------------------------------------------------- + // Guard conditions + // ------------------------------------------------------------------------- + + @Test + void evaluate_throwsWhenMaxRetriesTransientIsZero() { + FailureCounters counters = FailureCounters.zero(); + + assertThrows(IllegalArgumentException.class, () -> + evaluator.evaluate( + DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR, + counters, 0, FAILURE_CLASS, FAILURE_MESSAGE)); + } + + @Test + void evaluate_throwsWhenMaxRetriesTransientIsNegative() { + FailureCounters counters = FailureCounters.zero(); + + assertThrows(IllegalArgumentException.class, () -> + evaluator.evaluate( + DocumentErrorClassification.TRANSIENT_TECHNICAL_ERROR, + counters, -1, FAILURE_CLASS, FAILURE_MESSAGE)); + } + + @Test + void evaluate_throwsWhenErrorClassIsNull() { + assertThrows(NullPointerException.class, () -> + evaluator.evaluate(null, FailureCounters.zero(), 1, + FAILURE_CLASS, FAILURE_MESSAGE)); + } + + @Test + void evaluate_throwsWhenCountersAreNull() { + assertThrows(NullPointerException.class, () -> + evaluator.evaluate( + DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR, + null, 1, FAILURE_CLASS, FAILURE_MESSAGE)); + } + + @Test + void evaluate_throwsWhenFailureClassIsNull() { + assertThrows(NullPointerException.class, () -> + evaluator.evaluate( + DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR, + FailureCounters.zero(), 1, null, FAILURE_MESSAGE)); + } + + @Test + void evaluate_throwsWhenFailureClassIsBlank() { + assertThrows(IllegalArgumentException.class, () -> + evaluator.evaluate( + DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR, + FailureCounters.zero(), 1, " ", FAILURE_MESSAGE)); + } + + @Test + void evaluate_throwsWhenFailureMessageIsNull() { + assertThrows(NullPointerException.class, () -> + evaluator.evaluate( + DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR, + FailureCounters.zero(), 1, FAILURE_CLASS, null)); + } + + @Test + void evaluate_throwsWhenFailureMessageIsBlank() { + assertThrows(IllegalArgumentException.class, () -> + evaluator.evaluate( + DocumentErrorClassification.DETERMINISTIC_CONTENT_ERROR, + FailureCounters.zero(), 1, FAILURE_CLASS, " ")); + } +} diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java index 3790d17..810b471 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java @@ -70,6 +70,9 @@ class DocumentProcessingCoordinatorTest { private static final String FINGERPRINT_HEX = "a".repeat(64); // 64 lowercase hex chars + /** Default transient retry limit used in the shared {@link #processor} instance. */ + private static final int DEFAULT_MAX_RETRIES_TRANSIENT = 3; + private CapturingDocumentRecordRepository recordRepo; private CapturingProcessingAttemptRepository attemptRepo; private CapturingUnitOfWorkPort unitOfWorkPort; @@ -86,7 +89,8 @@ class DocumentProcessingCoordinatorTest { attemptRepo = new CapturingProcessingAttemptRepository(); unitOfWorkPort = new CapturingUnitOfWorkPort(recordRepo, attemptRepo); processor = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, - new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger()); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), + DEFAULT_MAX_RETRIES_TRANSIENT); candidate = new SourceDocumentCandidate( "test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf")); @@ -198,9 +202,11 @@ class DocumentProcessingCoordinatorTest { @Test void process_knownDocument_technicalError_incrementsTransientCounter_remainsRetryable() { + // Starting with 1 transient error; with DEFAULT_MAX_RETRIES_TRANSIENT=3, counter + // becomes 2 after this run which is still below the limit → FAILED_RETRYABLE DocumentRecord existingRecord = buildRecord( ProcessingStatus.FAILED_RETRYABLE, - new FailureCounters(0, 2)); + new FailureCounters(0, 1)); recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); DocumentProcessingOutcome outcome = new TechnicalDocumentError( @@ -212,10 +218,54 @@ class DocumentProcessingCoordinatorTest { DocumentRecord record = recordRepo.updatedRecords.get(0); assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus()); assertEquals(0, record.failureCounters().contentErrorCount()); - assertEquals(3, record.failureCounters().transientErrorCount()); + assertEquals(2, record.failureCounters().transientErrorCount()); assertTrue(attemptRepo.savedAttempts.get(0).retryable()); } + @Test + void process_knownDocument_technicalError_atTransientLimit_persistsFailedFinal() { + // Counter already at limit - 1: the next error finalises the document + DocumentRecord existingRecord = buildRecord( + ProcessingStatus.FAILED_RETRYABLE, + new FailureCounters(0, DEFAULT_MAX_RETRIES_TRANSIENT - 1)); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + + DocumentProcessingOutcome outcome = new TechnicalDocumentError( + candidate, "Timeout at limit", null); + + processor.process(candidate, fingerprint, outcome, context, attemptStart); + + assertEquals(1, recordRepo.updatedRecords.size()); + DocumentRecord record = recordRepo.updatedRecords.get(0); + assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus(), + "Document must be finalised when transient limit is reached"); + assertEquals(DEFAULT_MAX_RETRIES_TRANSIENT, record.failureCounters().transientErrorCount(), + "Transient counter must be incremented to the limit value"); + assertFalse(attemptRepo.savedAttempts.get(0).retryable(), + "Attempt must not be retryable when transient limit is reached"); + } + + @Test + void process_newDocument_technicalError_maxRetriesTransient1_immediatelyFinalises() { + // With maxRetriesTransient=1, the very first transient error finalises the document + DocumentProcessingCoordinator coordinatorWith1Retry = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 1); + recordRepo.setLookupResult(new DocumentUnknown()); + + DocumentProcessingOutcome outcome = new TechnicalDocumentError( + candidate, "I/O error", null); + + coordinatorWith1Retry.process(candidate, fingerprint, outcome, context, attemptStart); + + assertEquals(1, recordRepo.createdRecords.size()); + DocumentRecord record = recordRepo.createdRecords.get(0); + assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus(), + "With maxRetriesTransient=1, the first transient error must immediately finalise"); + assertEquals(1, record.failureCounters().transientErrorCount()); + assertFalse(attemptRepo.savedAttempts.get(0).retryable()); + } + @Test void process_knownDocument_namingProposalReady_persistsProposalReadyStatus() { DocumentRecord existingRecord = buildRecord( @@ -617,7 +667,8 @@ class DocumentProcessingCoordinatorTest { CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, - new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); recordRepo.setLookupResult(new PersistenceLookupTechnicalFailure("Datenbank nicht erreichbar", null)); DocumentProcessingOutcome outcome = new PreCheckPassed( candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); @@ -634,7 +685,8 @@ class DocumentProcessingCoordinatorTest { CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, - new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero()); recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord)); DocumentProcessingOutcome outcome = new PreCheckPassed( @@ -652,7 +704,8 @@ class DocumentProcessingCoordinatorTest { CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, - new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); DocumentRecord existingRecord = buildRecord(ProcessingStatus.FAILED_FINAL, new FailureCounters(2, 0)); recordRepo.setLookupResult(new DocumentTerminalFinalFailure(existingRecord)); DocumentProcessingOutcome outcome = new PreCheckFailed( @@ -670,7 +723,8 @@ class DocumentProcessingCoordinatorTest { CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, - new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); recordRepo.setLookupResult(new DocumentUnknown()); DocumentProcessingOutcome outcome = new PreCheckPassed( candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); @@ -687,7 +741,8 @@ class DocumentProcessingCoordinatorTest { CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, - new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); recordRepo.setLookupResult(new DocumentUnknown()); unitOfWorkPort.failOnExecute = true; DocumentProcessingOutcome outcome = new PreCheckPassed( @@ -705,7 +760,8 @@ class DocumentProcessingCoordinatorTest { CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, - new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero()); recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord)); DocumentProcessingOutcome outcome = new PreCheckPassed( @@ -723,7 +779,8 @@ class DocumentProcessingCoordinatorTest { CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, - new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero()); recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord)); unitOfWorkPort.failOnExecute = true; @@ -811,7 +868,8 @@ class DocumentProcessingCoordinatorTest { DocumentProcessingCoordinator coordinatorWithFailingFolder = new DocumentProcessingCoordinator( recordRepo, attemptRepo, unitOfWorkPort, - new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger()); + new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), + DEFAULT_MAX_RETRIES_TRANSIENT); coordinatorWithFailingFolder.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); @@ -830,7 +888,8 @@ class DocumentProcessingCoordinatorTest { DocumentProcessingCoordinator coordinatorWithFailingCopy = new DocumentProcessingCoordinator( recordRepo, attemptRepo, unitOfWorkPort, - new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger()); + new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger(), + DEFAULT_MAX_RETRIES_TRANSIENT); coordinatorWithFailingCopy.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); @@ -902,6 +961,175 @@ class DocumentProcessingCoordinatorTest { assertFalse(result, "Should return false when persistence fails after successful copy"); } + @Test + void processDeferredOutcome_proposalReady_firstCopyFails_immediateRetrySucceeds_persistsSuccess() { + // First copy attempt fails, immediate within-run retry succeeds → SUCCESS + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + CountingTargetFileCopyPort countingCopyPort = new CountingTargetFileCopyPort(1); // fail first call only + DocumentProcessingCoordinator coordinatorWithCountingCopy = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(), + DEFAULT_MAX_RETRIES_TRANSIENT); + + boolean result = coordinatorWithCountingCopy.processDeferredOutcome( + candidate, fingerprint, context, attemptStart, c -> { + throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); + }); + + assertTrue(result, "Should succeed when immediate retry of target copy succeeds"); + + ProcessingAttempt successAttempt = attemptRepo.savedAttempts.stream() + .filter(a -> a.status() == ProcessingStatus.SUCCESS) + .findFirst() + .orElse(null); + assertNotNull(successAttempt, "A SUCCESS attempt must be persisted after a successful immediate retry"); + + DocumentRecord updated = recordRepo.updatedRecords.get(0); + assertEquals(ProcessingStatus.SUCCESS, updated.overallStatus(), + "Master record must show SUCCESS after successful immediate retry"); + + assertEquals(2, countingCopyPort.callCount, + "copyToTarget must have been called exactly twice: first attempt + one retry"); + } + + @Test + void processDeferredOutcome_proposalReady_bothCopyAttemptsFail_persistsTransientError() { + // Both the first copy attempt and the immediate retry fail → FAILED_RETRYABLE + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + CountingTargetFileCopyPort countingCopyPort = new CountingTargetFileCopyPort(2); // fail both calls + DocumentProcessingCoordinator coordinatorWithCountingCopy = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(), + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCountingCopy.processDeferredOutcome( + candidate, fingerprint, context, attemptStart, c -> null); + + ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() + .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) + .findFirst() + .orElse(null); + assertNotNull(errorAttempt, "A FAILED_RETRYABLE attempt must be persisted when both copy attempts fail"); + assertTrue(errorAttempt.retryable(), "Error must be retryable after exhausting immediate retry"); + + assertEquals(2, countingCopyPort.callCount, + "copyToTarget must have been called exactly twice: first attempt + one immediate retry"); + } + + @Test + void processDeferredOutcome_proposalReady_immediateRetryDoesNotTriggerAiOrNewProposal() { + // Ensures that during the immediate retry path no pipeline (AI) execution happens + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + CountingTargetFileCopyPort countingCopyPort = new CountingTargetFileCopyPort(1); // fail first, succeed second + DocumentProcessingCoordinator coordinatorWithCountingCopy = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(), + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCountingCopy.processDeferredOutcome( + candidate, fingerprint, context, attemptStart, + c -> { throw new AssertionError("AI pipeline must NOT run during immediate retry"); }); + + // No FAILED_RETRYABLE must have been persisted — the retry succeeded + long failedRetryableCount = attemptRepo.savedAttempts.stream() + .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) + .count(); + assertEquals(0, failedRetryableCount, + "No FAILED_RETRYABLE must be persisted when immediate retry succeeds"); + } + + // ------------------------------------------------------------------------- + // Sequential multi-run lifecycle tests + // ------------------------------------------------------------------------- + + @Test + void process_contentErrorLifecycle_firstRunRetryable_secondRunFinal_thirdRunSkipped() { + // Run 1: new document, first deterministic content error → FAILED_RETRYABLE + recordRepo.setLookupResult(new DocumentUnknown()); + DocumentProcessingOutcome contentError = new PreCheckFailed( + candidate, PreCheckFailureReason.NO_USABLE_TEXT); + + processor.process(candidate, fingerprint, contentError, context, attemptStart); + + DocumentRecord afterRun1 = recordRepo.createdRecords.get(0); + assertEquals(ProcessingStatus.FAILED_RETRYABLE, afterRun1.overallStatus(), + "First content error must yield FAILED_RETRYABLE"); + assertEquals(1, afterRun1.failureCounters().contentErrorCount()); + assertTrue(attemptRepo.savedAttempts.get(0).retryable(), + "First content error attempt must be retryable"); + + // Run 2: known document (FAILED_RETRYABLE, contentErrorCount=1), second content error → FAILED_FINAL + recordRepo.setLookupResult(new DocumentKnownProcessable(afterRun1)); + + processor.process(candidate, fingerprint, contentError, context, attemptStart); + + DocumentRecord afterRun2 = recordRepo.updatedRecords.get(0); + assertEquals(ProcessingStatus.FAILED_FINAL, afterRun2.overallStatus(), + "Second content error must yield FAILED_FINAL"); + assertEquals(2, afterRun2.failureCounters().contentErrorCount()); + assertFalse(attemptRepo.savedAttempts.get(1).retryable(), + "Second content error attempt must not be retryable"); + + // Run 3: terminal FAILED_FINAL → SKIPPED_FINAL_FAILURE; counters must not change + recordRepo.setLookupResult(new DocumentTerminalFinalFailure(afterRun2)); + + processor.process(candidate, fingerprint, contentError, context, attemptStart); + + assertEquals(3, attemptRepo.savedAttempts.size(), + "Three attempts must be recorded across the three runs"); + ProcessingAttempt skipAttempt = attemptRepo.savedAttempts.get(2); + assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, skipAttempt.status()); + assertFalse(skipAttempt.retryable()); + + DocumentRecord afterRun3 = recordRepo.updatedRecords.get(1); + assertEquals(2, afterRun3.failureCounters().contentErrorCount(), + "Content error counter must remain 2 after a SKIPPED_FINAL_FAILURE event"); + assertEquals(0, afterRun3.failureCounters().transientErrorCount(), + "Transient error counter must remain 0 after a SKIPPED_FINAL_FAILURE event"); + } + + @Test + void process_transientErrorLifecycle_maxRetriesTransient2_firstRetryable_secondFinal() { + // maxRetriesTransient=2: first transient error → FAILED_RETRYABLE, second → FAILED_FINAL + DocumentProcessingCoordinator coordinatorWith2Retries = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 2); + DocumentProcessingOutcome transientError = new TechnicalDocumentError(candidate, "Timeout", null); + + // Run 1: new document, first transient error → FAILED_RETRYABLE, transientErrorCount=1 + recordRepo.setLookupResult(new DocumentUnknown()); + + coordinatorWith2Retries.process(candidate, fingerprint, transientError, context, attemptStart); + + DocumentRecord afterRun1 = recordRepo.createdRecords.get(0); + assertEquals(ProcessingStatus.FAILED_RETRYABLE, afterRun1.overallStatus(), + "First transient error must yield FAILED_RETRYABLE when limit not yet reached"); + assertEquals(1, afterRun1.failureCounters().transientErrorCount()); + assertTrue(attemptRepo.savedAttempts.get(0).retryable()); + + // Run 2: transientErrorCount=1, second transient error reaches limit=2 → FAILED_FINAL + recordRepo.setLookupResult(new DocumentKnownProcessable(afterRun1)); + + coordinatorWith2Retries.process(candidate, fingerprint, transientError, context, attemptStart); + + DocumentRecord afterRun2 = recordRepo.updatedRecords.get(0); + assertEquals(ProcessingStatus.FAILED_FINAL, afterRun2.overallStatus(), + "Second transient error must yield FAILED_FINAL when maxRetriesTransient=2 is reached"); + assertEquals(2, afterRun2.failureCounters().transientErrorCount(), + "Transient error counter must equal maxRetriesTransient after finalisation"); + assertFalse(attemptRepo.savedAttempts.get(1).retryable(), + "Final transient error attempt must not be retryable"); + } + // ------------------------------------------------------------------------- // Helpers // ------------------------------------------------------------------------- @@ -1089,6 +1317,26 @@ class DocumentProcessingCoordinatorTest { } } + private static class CountingTargetFileCopyPort implements TargetFileCopyPort { + private int callCount = 0; + private final int failFirstNCalls; + + CountingTargetFileCopyPort(int failFirstNCalls) { + this.failFirstNCalls = failFirstNCalls; + } + + @Override + public TargetFileCopyResult copyToTarget( + de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator sourceLocator, + String resolvedFilename) { + callCount++; + if (callCount <= failFirstNCalls) { + return new TargetFileCopyTechnicalFailure("Simulated copy failure on call " + callCount, false); + } + return new TargetFileCopySuccess(); + } + } + private static class NoOpTargetFolderPort implements TargetFolderPort { @Override public String getTargetFolderLocator() { diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java index 3ae55fb..245d87b 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java @@ -1,6 +1,7 @@ package de.gecheckt.pdf.umbenenner.application.service; import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration; +import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity; import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason; @@ -44,8 +45,8 @@ class DocumentProcessingServiceTest { SourceDocumentLocator locator = new SourceDocumentLocator(pdfFile.toString()); candidate = new SourceDocumentCandidate("document.pdf", 2048L, locator); - // Create runtime configuration with maxPages limit - runtimeConfig = new RuntimeConfiguration(10); + // Create runtime configuration with maxPages limit and default transient retry limit + runtimeConfig = new RuntimeConfiguration(10, 3, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT); } @Test diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluatorTest.java index aeaf86e..777f38c 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluatorTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/PreCheckEvaluatorTest.java @@ -1,6 +1,7 @@ package de.gecheckt.pdf.umbenenner.application.service; import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration; +import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity; import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason; @@ -236,7 +237,7 @@ class PreCheckEvaluatorTest { // ========================================================================= private RuntimeConfiguration buildConfig(int maxPages) throws Exception { - return new RuntimeConfiguration(maxPages); + return new RuntimeConfiguration(maxPages, 3, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT); } private int maxPages(int limit) { diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java index 4219a76..2eeb5f3 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java @@ -2,6 +2,7 @@ package de.gecheckt.pdf.umbenenner.application.usecase; import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; +import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity; import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort; import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure; import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort; @@ -52,6 +53,7 @@ import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; import java.time.Instant; +import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -460,7 +462,7 @@ class BatchRunProcessingUseCaseTest { DocumentProcessingCoordinator failingProcessor = new DocumentProcessingCoordinator( new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), - new NoOpProcessingLogger()) { + new NoOpProcessingLogger(), 3) { @Override public boolean processDeferredOutcome( de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate, @@ -504,7 +506,7 @@ class BatchRunProcessingUseCaseTest { DocumentProcessingCoordinator selectiveFailingProcessor = new DocumentProcessingCoordinator( new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), - new NoOpProcessingLogger()) { + new NoOpProcessingLogger(), 3) { private int callCount = 0; @Override @@ -595,7 +597,7 @@ class BatchRunProcessingUseCaseTest { DocumentProcessingCoordinator failingCoordinator = new DocumentProcessingCoordinator( new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), - new NoOpProcessingLogger()) { + new NoOpProcessingLogger(), 3) { @Override public boolean processDeferredOutcome( de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate c, @@ -660,11 +662,12 @@ class BatchRunProcessingUseCaseTest { // Prüft, dass bei erfolgreich verarbeiteter Datei debug() durch logExtractionResult // und info() durch logProcessingOutcome aufgerufen wird. // Erwartete debug()-Aufrufe für einen Kandidaten (success-Pfad): - // L138 (lock acquired) + L249 (processCandidate) + L293 (fingerprint) + L337 (logExtractionResult) + L213 (lock released) = 5 - // Ohne logExtractionResult-Aufruf: 4 + // lock acquired + fingerprint computed + logExtractionResult + lock released = 4 + // Ohne logExtractionResult-Aufruf wären es nur 3 debug()-Aufrufe. // Erwartete info()-Aufrufe für einen Kandidaten (success-Pfad): - // L130 (initiiert) + L145 (gestartet) + L178 (Kandidaten gefunden) + L365 (PreCheckPassed) + L190 (abgeschlossen) = 5 - // Ohne logProcessingOutcome-Aufruf: 4 + // Batch initiiert + Batch gestartet + Kandidaten gefunden + erkannte Quelldatei + // + logProcessingOutcome (PreCheckPassed) + Batch abgeschlossen = 6 + // Ohne logProcessingOutcome-Aufruf wären es 5 info()-Aufrufe. CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); RuntimeConfiguration config = buildConfig(tempDir); @@ -680,21 +683,21 @@ class BatchRunProcessingUseCaseTest { useCase.execute(new BatchRunContext(new RunId("log-precheck"), Instant.now())); - // Ohne logExtractionResult wären es mindestens 4 debug()-Aufrufe; mit logExtractionResult 5 - assertTrue(capturingLogger.debugCallCount >= 5, - "logExtractionResult muss bei PdfExtractionSuccess debug() aufrufen (erwartet >= 5, war: " + // Ohne logExtractionResult wären es nur 3 debug()-Aufrufe; mit logExtractionResult >= 4 + assertTrue(capturingLogger.debugCallCount >= 4, + "logExtractionResult muss bei PdfExtractionSuccess debug() aufrufen (erwartet >= 4, war: " + capturingLogger.debugCallCount + ")"); - // Ohne logProcessingOutcome wären es 4 info()-Aufrufe; mit logProcessingOutcome 5 - assertTrue(capturingLogger.infoCallCount >= 5, - "logProcessingOutcome muss bei PreCheckPassed info() aufrufen (erwartet >= 5, war: " + // Ohne logProcessingOutcome wären es 5 info()-Aufrufe; mit logProcessingOutcome >= 6 + assertTrue(capturingLogger.infoCallCount >= 6, + "logProcessingOutcome muss bei PreCheckPassed info() aufrufen (erwartet >= 6, war: " + capturingLogger.infoCallCount + ")"); } @Test void execute_extractionContentError_logsDebugAndPreCheckFailedInfo() throws Exception { // Prüft, dass bei PdfExtractionContentError debug (logExtractionResult) und info (logProcessingOutcome) geloggt wird. - // Erwartete debug()-Aufrufe: 5 (lock + processCandidate + fingerprint + logExtractionResult (content) + lock released) - // Erwartete info()-Aufrufe: 5 (L130 + L145 + L178 + L369 PreCheckFailed + L190) + // Erwartete debug()-Aufrufe: 4 (lock acquired + fingerprint + logExtractionResult (content) + lock released) + // Erwartete info()-Aufrufe: 6 (Batch initiiert + gestartet + Kandidaten gefunden + erkannte Quelldatei + PreCheckFailed + abgeschlossen) CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); RuntimeConfiguration config = buildConfig(tempDir); @@ -710,20 +713,20 @@ class BatchRunProcessingUseCaseTest { useCase.execute(new BatchRunContext(new RunId("log-content-error"), Instant.now())); - // Ohne logExtractionResult wären es 4 debug()-Aufrufe; mit logExtractionResult 5 - assertTrue(capturingLogger.debugCallCount >= 5, - "logExtractionResult muss bei PdfExtractionContentError debug() aufrufen (erwartet >= 5, war: " + // Ohne logExtractionResult wären es nur 3 debug()-Aufrufe; mit logExtractionResult >= 4 + assertTrue(capturingLogger.debugCallCount >= 4, + "logExtractionResult muss bei PdfExtractionContentError debug() aufrufen (erwartet >= 4, war: " + capturingLogger.debugCallCount + ")"); - // Ohne logProcessingOutcome (PreCheckFailed) wären es 4 info()-Aufrufe; mit 5 - assertTrue(capturingLogger.infoCallCount >= 5, - "logProcessingOutcome muss bei PreCheckFailed info() aufrufen (erwartet >= 5, war: " + // Ohne logProcessingOutcome (PreCheckFailed) wären es 5 info()-Aufrufe; mit >= 6 + assertTrue(capturingLogger.infoCallCount >= 6, + "logProcessingOutcome muss bei PreCheckFailed info() aufrufen (erwartet >= 6, war: " + capturingLogger.infoCallCount + ")"); } @Test void execute_extractionTechnicalError_logsDebugAndWarn() throws Exception { // Prüft, dass bei PdfExtractionTechnicalError debug (logExtractionResult) und warn (logProcessingOutcome) geloggt wird. - // Erwartete debug()-Aufrufe: 5 (lock + processCandidate + fingerprint + logExtractionResult + lock released) + // Erwartete debug()-Aufrufe: 4 (lock acquired + fingerprint + logExtractionResult + lock released) CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); RuntimeConfiguration config = buildConfig(tempDir); @@ -739,15 +742,86 @@ class BatchRunProcessingUseCaseTest { useCase.execute(new BatchRunContext(new RunId("log-tech-error"), Instant.now())); - // Ohne logExtractionResult wären es 4 debug()-Aufrufe; mit logExtractionResult 5 - assertTrue(capturingLogger.debugCallCount >= 5, - "logExtractionResult muss bei PdfExtractionTechnicalError debug() aufrufen (erwartet >= 5, war: " + // Ohne logExtractionResult wären es nur 3 debug()-Aufrufe; mit logExtractionResult >= 4 + assertTrue(capturingLogger.debugCallCount >= 4, + "logExtractionResult muss bei PdfExtractionTechnicalError debug() aufrufen (erwartet >= 4, war: " + capturingLogger.debugCallCount + ")"); // logProcessingOutcome ruft warn() auf für TechnicalDocumentError assertTrue(capturingLogger.warnCallCount > 0, "logProcessingOutcome muss bei TechnicalDocumentError warn() aufrufen"); } + // ------------------------------------------------------------------------- + // Log correlation tests + // ------------------------------------------------------------------------- + + @Test + void execute_preFingerprintError_logContainsRunIdAndCandidateDescription() throws Exception { + // When fingerprint computation fails, the warning log must reference both the run-ID + // and the candidate's unique identifier (pre-fingerprint correlation rule). + String runIdValue = "run-correlation-pre-fp"; + String candidateFilename = "unreadable-candidate.pdf"; + + MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger(); + RuntimeConfiguration config = buildConfig(tempDir); + + FixedCandidatesPort candidatesPort = new FixedCandidatesPort( + List.of(makeCandidate(candidateFilename))); + + // Fingerprint port that always fails + FingerprintPort failingFingerprintPort = c -> + new FingerprintTechnicalError("File not readable", null); + + DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( + config, new MockRunLockPort(), candidatesPort, new NoOpExtractionPort(), + failingFingerprintPort, new NoOpDocumentProcessingCoordinator(), + buildStubAiNamingService(), capturingLogger); + + useCase.execute(new BatchRunContext(new RunId(runIdValue), Instant.now())); + + // At least one warning message must contain both run-ID and candidate filename + boolean correlationPresent = capturingLogger.warnMessages.stream() + .anyMatch(m -> m.contains(runIdValue) && m.contains(candidateFilename)); + assertTrue(correlationPresent, + "Pre-fingerprint warning must reference both run-ID '" + runIdValue + + "' and candidate '" + candidateFilename + "'. " + + "Captured warn messages: " + capturingLogger.warnMessages); + } + + @Test + void execute_postFingerprintProcessing_logContainsFingerprintHex() throws Exception { + // After a successful fingerprint computation, at least one log message must contain + // the fingerprint's SHA-256 hex value (post-fingerprint correlation rule). + String candidateFilename = "identifiable.pdf"; + + MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger(); + RuntimeConfiguration config = buildConfig(tempDir); + + SourceDocumentCandidate candidate = makeCandidate(candidateFilename); + FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate)); + FixedExtractionPort extractionPort = new FixedExtractionPort( + new PdfExtractionSuccess("Some invoice text", new PdfPageCount(1))); + + // Deterministic fingerprint port so we can verify the exact hex in the log + AlwaysSuccessFingerprintPort fingerprintPort = new AlwaysSuccessFingerprintPort(); + DocumentFingerprint expectedFingerprint = ((FingerprintSuccess) fingerprintPort.computeFingerprint(candidate)).fingerprint(); + + DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( + config, new MockRunLockPort(), candidatesPort, extractionPort, + fingerprintPort, new TrackingDocumentProcessingCoordinator(), + buildStubAiNamingService(), capturingLogger); + + useCase.execute(new BatchRunContext(new RunId("run-correlation-post-fp"), Instant.now())); + + String fingerprintHex = expectedFingerprint.sha256Hex(); + boolean fingerprintInLog = capturingLogger.allMessages().stream() + .anyMatch(m -> m.contains(fingerprintHex)); + assertTrue(fingerprintInLog, + "At least one log message must contain the fingerprint hex '" + fingerprintHex + + "' after successful fingerprint computation. " + + "Captured messages: " + capturingLogger.allMessages()); + } + // ------------------------------------------------------------------------- // Helpers // ------------------------------------------------------------------------- @@ -779,8 +853,8 @@ class BatchRunProcessingUseCaseTest { } private static RuntimeConfiguration buildConfig(Path tempDir) throws Exception { - // maxPages set to 3 – useful for page-limit tests - return new RuntimeConfiguration(3); + // maxPages set to 3 – useful for page-limit tests; maxRetriesTransient set to 3 + return new RuntimeConfiguration(3, 3, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT); } private static SourceDocumentCandidate makeCandidate(String filename) { @@ -937,7 +1011,7 @@ class BatchRunProcessingUseCaseTest { private static class NoOpDocumentProcessingCoordinator extends DocumentProcessingCoordinator { NoOpDocumentProcessingCoordinator() { super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(), - new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger()); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 3); } } @@ -949,7 +1023,7 @@ class BatchRunProcessingUseCaseTest { TrackingDocumentProcessingCoordinator() { super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(), - new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger()); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 3); } @Override @@ -1094,6 +1168,62 @@ class BatchRunProcessingUseCaseTest { } } + /** + * Captures formatted log messages for each log level. + * Used by log-correlation tests that must inspect message content. + */ + private static class MessageCapturingProcessingLogger implements ProcessingLogger { + final List infoMessages = new ArrayList<>(); + final List debugMessages = new ArrayList<>(); + final List warnMessages = new ArrayList<>(); + final List errorMessages = new ArrayList<>(); + + /** Formats a message template with its arguments the same way SLF4J/Log4j2 does. */ + private static String format(String message, Object... args) { + if (args == null || args.length == 0) return message; + StringBuilder sb = new StringBuilder(); + int argIndex = 0; + int start = 0; + int pos; + while ((pos = message.indexOf("{}", start)) != -1 && argIndex < args.length) { + sb.append(message, start, pos); + sb.append(args[argIndex++]); + start = pos + 2; + } + sb.append(message, start, message.length()); + return sb.toString(); + } + + @Override + public void info(String message, Object... args) { + infoMessages.add(format(message, args)); + } + + @Override + public void debug(String message, Object... args) { + debugMessages.add(format(message, args)); + } + + @Override + public void warn(String message, Object... args) { + warnMessages.add(format(message, args)); + } + + @Override + public void error(String message, Object... args) { + errorMessages.add(format(message, args)); + } + + List allMessages() { + List all = new ArrayList<>(); + all.addAll(infoMessages); + all.addAll(debugMessages); + all.addAll(warnMessages); + all.addAll(errorMessages); + return all; + } + } + /** Zählt Logger-Aufrufe je Level, um VoidMethodCallMutator-Mutationen zu erkennen. */ private static class CapturingProcessingLogger implements ProcessingLogger { int infoCallCount = 0; diff --git a/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java b/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java index beedbaf..4d0f3eb 100644 --- a/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java +++ b/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java @@ -30,6 +30,7 @@ import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration; import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase; +import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity; import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort; import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort; import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort; @@ -204,7 +205,7 @@ public class BootstrapRunner { this.schemaInitPortFactory = SqliteSchemaInitializationAdapter::new; this.useCaseFactory = (startConfig, lock) -> { // Extract runtime configuration from startup configuration - RuntimeConfiguration runtimeConfig = new RuntimeConfiguration(startConfig.maxPages()); + RuntimeConfiguration runtimeConfig = new RuntimeConfiguration(startConfig.maxPages(), startConfig.maxRetriesTransient(), resolveAiContentSensitivity(startConfig.logAiSensitive())); String jdbcUrl = buildJdbcUrl(startConfig); FingerprintPort fingerprintPort = new Sha256FingerprintAdapter(); @@ -218,7 +219,9 @@ public class BootstrapRunner { TargetFolderPort targetFolderPort = new FilesystemTargetFolderAdapter(startConfig.targetFolder()); TargetFileCopyPort targetFileCopyPort = new FilesystemTargetFileCopyAdapter(startConfig.targetFolder()); DocumentProcessingCoordinator documentProcessingCoordinator = - new DocumentProcessingCoordinator(documentRecordRepository, processingAttemptRepository, unitOfWorkPort, targetFolderPort, targetFileCopyPort, coordinatorLogger); + new DocumentProcessingCoordinator(documentRecordRepository, processingAttemptRepository, + unitOfWorkPort, targetFolderPort, targetFileCopyPort, coordinatorLogger, + startConfig.maxRetriesTransient()); // Wire AI naming pipeline AiInvocationPort aiInvocationPort = new OpenAiHttpAdapter(startConfig); @@ -408,6 +411,23 @@ public class BootstrapRunner { } } + /** + * Derives the {@link AiContentSensitivity} decision from the raw {@code log.ai.sensitive} + * configuration flag. + *

+ * The safe default is {@link AiContentSensitivity#PROTECT_SENSITIVE_CONTENT}. + * {@link AiContentSensitivity#LOG_SENSITIVE_CONTENT} is only produced when + * {@code logAiSensitive} is explicitly {@code true}. + * + * @param logAiSensitive the parsed boolean value of the {@code log.ai.sensitive} property + * @return the appropriate sensitivity decision; never {@code null} + */ + static AiContentSensitivity resolveAiContentSensitivity(boolean logAiSensitive) { + return logAiSensitive + ? AiContentSensitivity.LOG_SENSITIVE_CONTENT + : AiContentSensitivity.PROTECT_SENSITIVE_CONTENT; + } + /** * Builds the JDBC URL for the SQLite database from the configured file path. * diff --git a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunnerEdgeCasesTest.java b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunnerEdgeCasesTest.java index d45fba3..567a29a 100644 --- a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunnerEdgeCasesTest.java +++ b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunnerEdgeCasesTest.java @@ -7,6 +7,7 @@ import de.gecheckt.pdf.umbenenner.adapter.out.configuration.ConfigurationLoading import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase; +import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity; import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort; @@ -60,7 +61,8 @@ class BootstrapRunnerEdgeCasesTest { null, // null runtimeLockFile tempDir.resolve("logs"), "INFO", - "test-key" + "test-key", + false ); AtomicReference capturedLockPath = new AtomicReference<>(); @@ -106,7 +108,8 @@ class BootstrapRunnerEdgeCasesTest { tempDir.resolve("lock.lock"), tempDir.resolve("logs"), "INFO", - "test-key" + "test-key", + false ); String jdbcUrl = BootstrapRunner.buildJdbcUrl(config); @@ -132,7 +135,8 @@ class BootstrapRunnerEdgeCasesTest { tempDir.resolve("lock.lock"), tempDir.resolve("logs"), "INFO", - "test-key" + "test-key", + false ); String jdbcUrl = BootstrapRunner.buildJdbcUrl(config); @@ -159,7 +163,8 @@ class BootstrapRunnerEdgeCasesTest { tempDir.resolve("lock.lock"), tempDir.resolve("logs"), "INFO", - "test-key" + "test-key", + false ); // Verify BatchRunContext can be created (used internally by BootstrapRunner) @@ -223,7 +228,7 @@ class BootstrapRunnerEdgeCasesTest { return new StartConfiguration(sourceDir, targetDir, dbFile, URI.create("https://api.example.com"), "gpt-4", 30, 3, 100, 50000, promptFile, tempDir.resolve("lock.lock"), tempDir.resolve("logs"), - "INFO", "key"); + "INFO", "key", false); } catch (Exception e) { throw new RuntimeException(e); } @@ -319,6 +324,24 @@ class BootstrapRunnerEdgeCasesTest { assertEquals(1, runner.run(), "LOCK_UNAVAILABLE outcome should map to exit code 1"); } + // ========================================================================= + // AI Content Sensitivity Resolution + // ========================================================================= + + @Test + void resolveAiContentSensitivity_falseYieldsProtectSensitiveContent() { + assertEquals(AiContentSensitivity.PROTECT_SENSITIVE_CONTENT, + BootstrapRunner.resolveAiContentSensitivity(false), + "logAiSensitive=false must resolve to PROTECT_SENSITIVE_CONTENT (safe default)"); + } + + @Test + void resolveAiContentSensitivity_trueYieldsLogSensitiveContent() { + assertEquals(AiContentSensitivity.LOG_SENSITIVE_CONTENT, + BootstrapRunner.resolveAiContentSensitivity(true), + "logAiSensitive=true must resolve to LOG_SENSITIVE_CONTENT"); + } + // ========================================================================= // Mocks // ========================================================================= @@ -364,7 +387,8 @@ class BootstrapRunnerEdgeCasesTest { tempDir.resolve("lock.lock"), tempDir.resolve("logs"), "INFO", - "test-api-key" + "test-api-key", + false ); } catch (Exception e) { throw new RuntimeException("Failed to create mock configuration", e); diff --git a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunnerTest.java b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunnerTest.java index 17694ea..4c0d435 100644 --- a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunnerTest.java +++ b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunnerTest.java @@ -186,7 +186,8 @@ class BootstrapRunnerTest { Paths.get(""), // empty – simulates unconfigured runtime.lock.file tempDir.resolve("logs"), "INFO", - "test-key" + "test-key", + false ); AtomicReference capturedLockPath = new AtomicReference<>(); @@ -319,7 +320,8 @@ class BootstrapRunnerTest { tempDir.resolve("lock.lock"), tempDir.resolve("logs"), "INFO", - "test-api-key" + "test-api-key", + false ); } catch (Exception e) { throw new RuntimeException("Failed to create mock configuration", e); diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/ProcessingStatus.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/ProcessingStatus.java index be43455..c32b262 100644 --- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/ProcessingStatus.java +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/ProcessingStatus.java @@ -17,10 +17,14 @@ package de.gecheckt.pdf.umbenenner.domain.model; * processed further in the next stage (target copy, final filename generation). * This is a non-terminal intermediate state. *

  • {@link #SUCCESS} — document was fully processed end-to-end and written to the - * target location. Status is final and irreversible; skip in all future runs.
  • + * target location. Status is final and irreversible; skip in all future runs with + * {@link #SKIPPED_ALREADY_PROCESSED}. *
  • {@link #FAILED_RETRYABLE} — last attempt failed but is retryable; process again - * in the next run according to the applicable retry rule.
  • - *
  • {@link #FAILED_FINAL} — all allowed retries exhausted; skip in all future runs.
  • + * in the next run according to the applicable retry rule. This status is only + * valid as long as at least one further scheduler run is fachlich zulässig. + *
  • {@link #FAILED_FINAL} — all allowed retries are exhausted; skip in all future + * runs with {@link #SKIPPED_FINAL_FAILURE}. This status is terminal and + * irreversible.
  • *
  • {@link #PROCESSING} — document is currently being processed (transient, within a * run); if found persisted after a crash, treat as {@link #FAILED_RETRYABLE}.
  • * @@ -38,6 +42,32 @@ package de.gecheckt.pdf.umbenenner.domain.model; * overall status was already {@link #FAILED_FINAL}. * *

    + * Terminal status rule: + * {@link #SUCCESS} and {@link #FAILED_FINAL} are the only truly terminal statuses. + * Documents with either of these statuses as their overall status are never reprocessed; + * they receive a historised skip attempt instead. Documents with + * {@link #FAILED_RETRYABLE}, {@link #READY_FOR_AI}, or {@link #PROPOSAL_READY} remain + * verarbeitbar in future runs, subject to their applicable retry rule. + *

    + * Transition to {@link #FAILED_FINAL} — retry exhaustion rules: + *

    + *

    + * Immediate within-run target copy retry: + * A single technical retry of the physical target-file copy step is permitted within the + * same run. This mechanism is not a cross-run retry and does not increment the + * transient-error counter. It does not change the document's overall status until the + * outcome of the retry is known. + *

    * Counter rules: *

    - * + *

    + * Document-level errors and batch exit code: + * Document-level errors (content errors, transient errors, target copy failures) do not + * escalate to exit-code 1. The batch run continues with remaining documents and + * exits with code 0 as long as the run itself started cleanly. Exit-code 1 is + * reserved for hard start or bootstrap failures only. */ public enum ProcessingStatus { diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java index 06a461c..abf6bcb 100644 --- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java @@ -57,5 +57,23 @@ *

  • Immutable value objects or enumerations
  • *
  • Reusable across all layers via the Application and Adapter contracts
  • * + *

    + * Retry and status semantics summary (see {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus} + * for full detail): + *

    */ package de.gecheckt.pdf.umbenenner.domain.model;