1
0

V1.1 Änderungen

This commit is contained in:
2026-04-09 05:42:02 +02:00
parent 39800b6ea8
commit 5099ff4aca
44 changed files with 4912 additions and 957 deletions

View File

@@ -0,0 +1,62 @@
package de.gecheckt.pdf.umbenenner.bootstrap;
import java.util.Objects;
import de.gecheckt.pdf.umbenenner.adapter.out.ai.AnthropicClaudeHttpAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.ai.OpenAiHttpAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.InvalidStartConfigurationException;
import de.gecheckt.pdf.umbenenner.application.config.provider.AiProviderFamily;
import de.gecheckt.pdf.umbenenner.application.config.provider.ProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
/**
* Selects and instantiates the active {@link AiInvocationPort} implementation
* based on the configured provider family.
* <p>
* This component lives in the bootstrap layer and is the single point where
* the active provider family is mapped to its corresponding adapter implementation.
* Exactly one provider is selected per application run; the selection is driven
* by the value of {@code ai.provider.active}.
*
* <h2>Registered providers</h2>
* <ul>
* <li>{@link AiProviderFamily#OPENAI_COMPATIBLE} — {@link OpenAiHttpAdapter}</li>
* <li>{@link AiProviderFamily#CLAUDE} — {@link AnthropicClaudeHttpAdapter}</li>
* </ul>
*
* <h2>Hard start failure</h2>
* <p>
* If the requested provider family has no registered implementation, an
* {@link InvalidStartConfigurationException} is thrown immediately, which the
* bootstrap runner maps to exit code 1.
*/
public class AiProviderSelector {
/**
* Selects and constructs the {@link AiInvocationPort} implementation for the given
* provider family using the supplied provider configuration.
*
* @param family the active provider family; must not be {@code null}
* @param config the configuration for the active provider; must not be {@code null}
* @return the constructed adapter instance; never {@code null}
* @throws InvalidStartConfigurationException if no implementation is registered
* for the requested provider family
*/
public AiInvocationPort select(AiProviderFamily family, ProviderConfiguration config) {
Objects.requireNonNull(family, "provider family must not be null");
Objects.requireNonNull(config, "provider configuration must not be null");
if (family == AiProviderFamily.OPENAI_COMPATIBLE) {
return new OpenAiHttpAdapter(config);
}
if (family == AiProviderFamily.CLAUDE) {
return new AnthropicClaudeHttpAdapter(config);
}
throw new InvalidStartConfigurationException(
"No AI adapter implementation registered for provider family: "
+ family.getIdentifier()
+ ". Supported in the current build: openai-compatible, claude");
}
}

View File

@@ -9,11 +9,11 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import de.gecheckt.pdf.umbenenner.adapter.in.cli.SchedulerBatchCommand;
import de.gecheckt.pdf.umbenenner.adapter.out.ai.OpenAiHttpAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.InvalidStartConfigurationException;
import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.StartConfigurationValidator;
import de.gecheckt.pdf.umbenenner.adapter.out.clock.SystemClockAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.configuration.ConfigurationLoadingException;
import de.gecheckt.pdf.umbenenner.adapter.out.configuration.LegacyConfigurationMigrator;
import de.gecheckt.pdf.umbenenner.adapter.out.configuration.PropertiesConfigurationPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.fingerprint.Sha256FingerprintAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.lock.FilesystemRunLockPortAdapter;
@@ -27,6 +27,8 @@ import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteUnitOfWorkAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.targetfolder.FilesystemTargetFolderAdapter;
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.provider.AiProviderFamily;
import de.gecheckt.pdf.umbenenner.application.config.provider.ProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase;
@@ -68,6 +70,12 @@ import de.gecheckt.pdf.umbenenner.domain.model.RunId;
* configuration is handed to the use case factory which extracts the minimal runtime
* configuration for the application layer.
*
* <h2>Active AI provider</h2>
* <p>
* The active AI provider family is determined from the configuration and logged at run start.
* The {@link AiProviderSelector} in the bootstrap layer selects the appropriate
* {@link AiInvocationPort} implementation. Exactly one provider is active per run.
*
* <h2>Exit code semantics</h2>
* <ul>
* <li>{@code 0}: Batch run executed successfully; individual document failures do not
@@ -82,10 +90,12 @@ import de.gecheckt.pdf.umbenenner.domain.model.RunId;
* <p>
* The production constructor wires the following key adapters:
* <ul>
* <li>{@link PropertiesConfigurationPortAdapter} — loads configuration from properties and environment.</li>
* <li>{@link PropertiesConfigurationPortAdapter} — loads configuration from the multi-provider
* properties schema and environment.</li>
* <li>{@link AiProviderSelector} — selects the active {@link AiInvocationPort} implementation
* based on {@code ai.provider.active}.</li>
* <li>{@link FilesystemRunLockPortAdapter} — ensures exclusive execution via a lock file.</li>
* <li>{@link SqliteSchemaInitializationAdapter} — initializes SQLite schema (including target-copy
* schema evolution) at startup.</li>
* <li>{@link SqliteSchemaInitializationAdapter} — initializes SQLite schema at startup.</li>
* <li>{@link Sha256FingerprintAdapter} — provides content-based document identification.</li>
* <li>{@link SqliteDocumentRecordRepositoryAdapter} — manages document master records.</li>
* <li>{@link SqliteProcessingAttemptRepositoryAdapter} — maintains attempt history.</li>
@@ -103,6 +113,7 @@ public class BootstrapRunner {
private static final Logger LOG = LogManager.getLogger(BootstrapRunner.class);
private final MigrationStep migrationStep;
private final ConfigurationPortFactory configPortFactory;
private final RunLockPortFactory runLockPortFactory;
private final ValidatorFactory validatorFactory;
@@ -110,6 +121,19 @@ public class BootstrapRunner {
private final UseCaseFactory useCaseFactory;
private final CommandFactory commandFactory;
/**
* Functional interface encapsulating the legacy configuration migration step.
* <p>
* The production implementation calls {@link LegacyConfigurationMigrator#migrateIfLegacy}
* on the active configuration file before any configuration is loaded. In tests, a
* no-op lambda is injected so that migration does not interfere with mock configuration ports.
*/
@FunctionalInterface
public interface MigrationStep {
/** Runs the legacy configuration migration if the configuration file is in legacy form. */
void runIfNeeded();
}
/**
* Functional interface for creating a ConfigurationPort.
*/
@@ -175,12 +199,12 @@ public class BootstrapRunner {
* Wires the processing pipeline with the following adapters:
* <ul>
* <li>{@link PropertiesConfigurationPortAdapter} for configuration loading.</li>
* <li>{@link AiProviderSelector} for selecting the active AI provider implementation.</li>
* <li>{@link FilesystemRunLockPortAdapter} for exclusive run locking.</li>
* <li>{@link SourceDocumentCandidatesPortAdapter} for PDF candidate discovery.</li>
* <li>{@link PdfTextExtractionPortAdapter} for PDFBox-based text and page count extraction.</li>
* <li>{@link Sha256FingerprintAdapter} for SHA-256 content fingerprinting.</li>
* <li>{@link SqliteSchemaInitializationAdapter} for SQLite schema DDL and target-copy schema
* evolution at startup.</li>
* <li>{@link SqliteSchemaInitializationAdapter} for SQLite schema DDL at startup.</li>
* <li>{@link SqliteDocumentRecordRepositoryAdapter} for document master record CRUD.</li>
* <li>{@link SqliteProcessingAttemptRepositoryAdapter} for attempt history CRUD.</li>
* <li>{@link SqliteUnitOfWorkAdapter} for atomic persistence operations.</li>
@@ -199,6 +223,8 @@ public class BootstrapRunner {
* begins. Failure during initialisation aborts the run with exit code 1.
*/
public BootstrapRunner() {
this.migrationStep = () -> new LegacyConfigurationMigrator()
.migrateIfLegacy(Paths.get("config/application.properties"));
this.configPortFactory = PropertiesConfigurationPortAdapter::new;
this.runLockPortFactory = FilesystemRunLockPortAdapter::new;
this.validatorFactory = StartConfigurationValidator::new;
@@ -206,7 +232,13 @@ public class BootstrapRunner {
this.useCaseFactory = (startConfig, lock) -> {
// Extract runtime configuration from startup configuration
AiContentSensitivity aiContentSensitivity = resolveAiContentSensitivity(startConfig.logAiSensitive());
RuntimeConfiguration runtimeConfig = new RuntimeConfiguration(startConfig.maxPages(), startConfig.maxRetriesTransient(), aiContentSensitivity);
RuntimeConfiguration runtimeConfig = new RuntimeConfiguration(
startConfig.maxPages(), startConfig.maxRetriesTransient(), aiContentSensitivity);
// Select the active AI provider adapter
AiProviderFamily activeFamily = startConfig.multiProviderConfiguration().activeProviderFamily();
ProviderConfiguration providerConfig = startConfig.multiProviderConfiguration().activeProviderConfiguration();
AiInvocationPort aiInvocationPort = new AiProviderSelector().select(activeFamily, providerConfig);
String jdbcUrl = buildJdbcUrl(startConfig);
FingerprintPort fingerprintPort = new Sha256FingerprintAdapter();
@@ -216,17 +248,18 @@ public class BootstrapRunner {
new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl);
UnitOfWorkPort unitOfWorkPort =
new SqliteUnitOfWorkAdapter(jdbcUrl);
// Wire coordinators logger with AI content sensitivity setting
ProcessingLogger coordinatorLogger = new Log4jProcessingLogger(DocumentProcessingCoordinator.class, aiContentSensitivity);
// Wire coordinator logger with AI content sensitivity setting
ProcessingLogger coordinatorLogger = new Log4jProcessingLogger(
DocumentProcessingCoordinator.class, aiContentSensitivity);
TargetFolderPort targetFolderPort = new FilesystemTargetFolderAdapter(startConfig.targetFolder());
TargetFileCopyPort targetFileCopyPort = new FilesystemTargetFileCopyAdapter(startConfig.targetFolder());
DocumentProcessingCoordinator documentProcessingCoordinator =
new DocumentProcessingCoordinator(documentRecordRepository, processingAttemptRepository,
unitOfWorkPort, targetFolderPort, targetFileCopyPort, coordinatorLogger,
startConfig.maxRetriesTransient());
startConfig.maxRetriesTransient(),
activeFamily.getIdentifier());
// Wire AI naming pipeline
AiInvocationPort aiInvocationPort = new OpenAiHttpAdapter(startConfig);
PromptPort promptPort = new FilesystemPromptPortAdapter(startConfig.promptTemplateFile());
ClockPort clockPort = new SystemClockAdapter();
AiResponseValidator aiResponseValidator = new AiResponseValidator(clockPort);
@@ -234,11 +267,12 @@ public class BootstrapRunner {
aiInvocationPort,
promptPort,
aiResponseValidator,
startConfig.apiModel(),
providerConfig.model(),
startConfig.maxTextCharacters());
// Wire use case logger with AI content sensitivity setting
ProcessingLogger useCaseLogger = new Log4jProcessingLogger(DefaultBatchRunProcessingUseCase.class, aiContentSensitivity);
ProcessingLogger useCaseLogger = new Log4jProcessingLogger(
DefaultBatchRunProcessingUseCase.class, aiContentSensitivity);
return new DefaultBatchRunProcessingUseCase(
runtimeConfig,
lock,
@@ -254,6 +288,9 @@ public class BootstrapRunner {
/**
* Creates the BootstrapRunner with custom factories for testing.
* <p>
* The migration step is set to a no-op; tests that need to exercise the migration
* path use the full seven-parameter constructor.
*
* @param configPortFactory factory for creating ConfigurationPort instances
* @param runLockPortFactory factory for creating RunLockPort instances
@@ -268,6 +305,32 @@ public class BootstrapRunner {
SchemaInitializationPortFactory schemaInitPortFactory,
UseCaseFactory useCaseFactory,
CommandFactory commandFactory) {
this(() -> { /* no-op: tests inject mock ConfigurationPort directly */ },
configPortFactory, runLockPortFactory, validatorFactory,
schemaInitPortFactory, useCaseFactory, commandFactory);
}
/**
* Creates the BootstrapRunner with all factories including an explicit migration step.
* <p>
* Use this constructor in tests that need to exercise the full migration-then-load path.
*
* @param migrationStep the legacy configuration migration step to run before loading
* @param configPortFactory factory for creating ConfigurationPort instances
* @param runLockPortFactory factory for creating RunLockPort instances
* @param validatorFactory factory for creating StartConfigurationValidator instances
* @param schemaInitPortFactory factory for creating PersistenceSchemaInitializationPort instances
* @param useCaseFactory factory for creating BatchRunProcessingUseCase instances
* @param commandFactory factory for creating SchedulerBatchCommand instances
*/
public BootstrapRunner(MigrationStep migrationStep,
ConfigurationPortFactory configPortFactory,
RunLockPortFactory runLockPortFactory,
ValidatorFactory validatorFactory,
SchemaInitializationPortFactory schemaInitPortFactory,
UseCaseFactory useCaseFactory,
CommandFactory commandFactory) {
this.migrationStep = migrationStep;
this.configPortFactory = configPortFactory;
this.runLockPortFactory = runLockPortFactory;
this.validatorFactory = validatorFactory;
@@ -299,6 +362,7 @@ public class BootstrapRunner {
LOG.info("Bootstrap flow started.");
try {
// Bootstrap Phase: prepare configuration and persistence
migrateConfigurationIfNeeded();
StartConfiguration config = loadAndValidateConfiguration();
initializeSchema(config);
// Execution Phase: run batch processing
@@ -318,6 +382,20 @@ public class BootstrapRunner {
}
}
/**
* Runs the legacy configuration migration step exactly once before configuration loading.
* <p>
* If the configuration file is in the legacy flat-key format, it is migrated in-place to the
* multi-provider schema before the normal configuration loading path is entered. If the file
* is already in the current schema, this method returns immediately without any I/O side effect.
* <p>
* A migration failure is a hard startup error and propagates as a
* {@link ConfigurationLoadingException}.
*/
private void migrateConfigurationIfNeeded() {
migrationStep.runIfNeeded();
}
/**
* Loads configuration via {@link ConfigurationPort} and validates it via
* {@link StartConfigurationValidator}.
@@ -329,13 +407,17 @@ public class BootstrapRunner {
* creatable (validator attempts {@code Files.createDirectories} if absent;
* failure here is a hard startup error).</li>
* <li>{@code sqlite.file}: parent directory must exist.</li>
* <li>All numeric and URI constraints.</li>
* <li>All numeric and path constraints.</li>
* </ul>
* <p>
* After successful validation, the active AI provider identifier is logged at INFO level.
*/
private StartConfiguration loadAndValidateConfiguration() {
ConfigurationPort configPort = configPortFactory.create();
StartConfiguration config = configPort.loadConfiguration();
validatorFactory.create().validate(config);
LOG.info("Active AI provider: {}",
config.multiProviderConfiguration().activeProviderFamily().getIdentifier());
return config;
}

View File

@@ -0,0 +1,134 @@
package de.gecheckt.pdf.umbenenner.bootstrap;
import de.gecheckt.pdf.umbenenner.adapter.out.ai.AnthropicClaudeHttpAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.ai.OpenAiHttpAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.InvalidStartConfigurationException;
import de.gecheckt.pdf.umbenenner.application.config.provider.AiProviderFamily;
import de.gecheckt.pdf.umbenenner.application.config.provider.ProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.junit.jupiter.MockitoExtension;
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
import static org.junit.jupiter.api.Assertions.assertNotNull;
import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
/**
* Unit tests for {@link AiProviderSelector}.
* <p>
* Covers selection of the OpenAI-compatible adapter, selection of the Claude adapter,
* hard failure for unregistered provider families, and null-safety.
*/
@ExtendWith(MockitoExtension.class)
class AiProviderSelectorTest {
private final AiProviderSelector selector = new AiProviderSelector();
private static ProviderConfiguration validOpenAiConfig() {
return new ProviderConfiguration("gpt-4", 30, "https://api.example.com", "test-key");
}
private static ProviderConfiguration validClaudeConfig() {
return new ProviderConfiguration(
"claude-3-5-sonnet-20241022", 60, "https://api.anthropic.com", "sk-ant-key");
}
// =========================================================================
// Mandatory test case: bootstrapWiresOpenAiCompatibleAdapterWhenActive
// =========================================================================
/**
* When the active provider family is OPENAI_COMPATIBLE, the selector must return
* an {@link OpenAiHttpAdapter} instance.
*/
@Test
void bootstrapWiresOpenAiCompatibleAdapterWhenActive() {
AiInvocationPort port = selector.select(AiProviderFamily.OPENAI_COMPATIBLE, validOpenAiConfig());
assertNotNull(port, "Selector must return a non-null AiInvocationPort");
assertInstanceOf(OpenAiHttpAdapter.class, port,
"OPENAI_COMPATIBLE must be wired to OpenAiHttpAdapter");
}
// =========================================================================
// Mandatory test case: bootstrapSelectsClaudeWhenActive (AP-005)
// =========================================================================
/**
* When the active provider family is CLAUDE, the selector must return an
* {@link AnthropicClaudeHttpAdapter} instance.
*/
@Test
void bootstrapSelectsClaudeWhenActive() {
AiInvocationPort port = selector.select(AiProviderFamily.CLAUDE, validClaudeConfig());
assertNotNull(port, "Selector must return a non-null AiInvocationPort for Claude");
assertInstanceOf(AnthropicClaudeHttpAdapter.class, port,
"CLAUDE must be wired to AnthropicClaudeHttpAdapter");
}
// =========================================================================
// Mandatory test case: bootstrapFailsHardWhenActiveProviderUnknown
// =========================================================================
/**
* A null provider family must result in a NullPointerException.
* This guards against uninitialised / null active-provider state that
* should be caught by the validator before reaching the selector.
*/
@Test
void bootstrapFailsHardWhenActiveProviderUnknown() {
assertThrows(NullPointerException.class,
() -> selector.select(null, validOpenAiConfig()),
"Null provider family must throw NullPointerException");
}
// =========================================================================
// Mandatory test case: bootstrapFailsHardWhenSelectedProviderHasNoImplementation
// =========================================================================
/**
* A provider family with no registered adapter implementation must throw
* {@link InvalidStartConfigurationException} immediately, preventing the
* application from starting.
* <p>
* Both known families (OPENAI_COMPATIBLE and CLAUDE) are now registered.
* This test uses a Mockito mock of {@link AiProviderFamily} to represent a
* hypothetical future or unknown provider, confirming that the selector's
* fallback guard remains in place for any unregistered family.
*/
@Test
void bootstrapFailsHardWhenSelectedProviderHasNoImplementation() {
// Create a mock AiProviderFamily that does not equal any registered constant
AiProviderFamily unknownFamily = mock(AiProviderFamily.class);
when(unknownFamily.getIdentifier()).thenReturn("unknown-future-provider");
ProviderConfiguration anyConfig = new ProviderConfiguration(
"some-model", 30, "https://unknown.example.com", "some-key");
InvalidStartConfigurationException ex = assertThrows(
InvalidStartConfigurationException.class,
() -> selector.select(unknownFamily, anyConfig),
"A provider family with no registered adapter must throw InvalidStartConfigurationException");
assertTrue(ex.getMessage().contains("unknown-future-provider")
|| ex.getMessage().toLowerCase().contains("no ai adapter"),
"Error message must reference the unregistered provider or indicate missing registration");
}
// =========================================================================
// Additional safety: null ProviderConfiguration
// =========================================================================
@Test
void selectThrowsWhenProviderConfigurationIsNull() {
assertThrows(NullPointerException.class,
() -> selector.select(AiProviderFamily.OPENAI_COMPATIBLE, null),
"Null ProviderConfiguration must throw NullPointerException");
}
}

View File

@@ -4,6 +4,9 @@ import de.gecheckt.pdf.umbenenner.adapter.in.cli.SchedulerBatchCommand;
import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.InvalidStartConfigurationException;
import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.StartConfigurationValidator;
import de.gecheckt.pdf.umbenenner.adapter.out.configuration.ConfigurationLoadingException;
import de.gecheckt.pdf.umbenenner.application.config.provider.AiProviderFamily;
import de.gecheckt.pdf.umbenenner.application.config.provider.MultiProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.provider.ProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase;
@@ -17,10 +20,8 @@ import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.concurrent.atomic.AtomicReference;
import static org.junit.jupiter.api.Assertions.*;
@@ -51,9 +52,7 @@ class BootstrapRunnerEdgeCasesTest {
Files.createDirectories(tempDir.resolve("source")),
Files.createDirectories(tempDir.resolve("target")),
Files.createFile(tempDir.resolve("db.sqlite")),
URI.create("https://api.example.com"),
"gpt-4",
30,
validMultiProviderConfig(),
3,
100,
50000,
@@ -61,7 +60,6 @@ class BootstrapRunnerEdgeCasesTest {
null, // null runtimeLockFile
tempDir.resolve("logs"),
"INFO",
"test-key",
false
);
@@ -101,14 +99,12 @@ class BootstrapRunnerEdgeCasesTest {
Files.createDirectories(tempDir.resolve("source")),
Files.createDirectories(tempDir.resolve("target")),
sqliteFile,
URI.create("https://api.example.com"),
"gpt-4",
30, 3, 100, 50000,
validMultiProviderConfig(),
3, 100, 50000,
Files.createFile(tempDir.resolve("prompt.txt")),
tempDir.resolve("lock.lock"),
tempDir.resolve("logs"),
"INFO",
"test-key",
false
);
@@ -128,14 +124,12 @@ class BootstrapRunnerEdgeCasesTest {
Files.createDirectories(tempDir.resolve("source")),
Files.createDirectories(tempDir.resolve("target")),
sqliteFile,
URI.create("https://api.example.com"),
"gpt-4",
30, 3, 100, 50000,
validMultiProviderConfig(),
3, 100, 50000,
Files.createFile(tempDir.resolve("prompt.txt")),
tempDir.resolve("lock.lock"),
tempDir.resolve("logs"),
"INFO",
"test-key",
false
);
@@ -157,13 +151,12 @@ class BootstrapRunnerEdgeCasesTest {
Files.createDirectories(tempDir.resolve("source")),
Files.createDirectories(tempDir.resolve("target")),
Files.createFile(tempDir.resolve("db.sqlite")),
URI.create("https://api.example.com"),
"gpt-4", 30, 3, 100, 50000,
validMultiProviderConfig(),
3, 100, 50000,
Files.createFile(tempDir.resolve("prompt.txt")),
tempDir.resolve("lock.lock"),
tempDir.resolve("logs"),
"INFO",
"test-key",
false
);
@@ -226,9 +219,9 @@ class BootstrapRunnerEdgeCasesTest {
Path dbFile = Files.createFile(tempDir.resolve("db.sqlite"));
Path promptFile = Files.createFile(tempDir.resolve("prompt.txt"));
return new StartConfiguration(sourceDir, targetDir, dbFile,
URI.create("https://api.example.com"), "gpt-4", 30, 3, 100, 50000,
validMultiProviderConfig(), 3, 100, 50000,
promptFile, tempDir.resolve("lock.lock"), tempDir.resolve("logs"),
"INFO", "key", false);
"INFO", false);
} catch (Exception e) {
throw new RuntimeException(e);
}
@@ -342,9 +335,19 @@ class BootstrapRunnerEdgeCasesTest {
"logAiSensitive=true must resolve to LOG_SENSITIVE_CONTENT");
}
// =========================================================================
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
private static MultiProviderConfiguration validMultiProviderConfig() {
ProviderConfiguration openAiConfig = new ProviderConfiguration(
"gpt-4", 30, "https://api.example.com", "test-api-key");
return new MultiProviderConfiguration(AiProviderFamily.OPENAI_COMPATIBLE, openAiConfig, null);
}
// -------------------------------------------------------------------------
// Mocks
// =========================================================================
// -------------------------------------------------------------------------
private static class MockConfigurationPort implements ConfigurationPort {
private final Path tempDir;
@@ -373,13 +376,16 @@ class BootstrapRunnerEdgeCasesTest {
Files.createFile(promptTemplateFile);
}
ProviderConfiguration openAiConfig = new ProviderConfiguration(
"gpt-4", 30, "https://api.example.com", "test-api-key");
MultiProviderConfiguration multiConfig = new MultiProviderConfiguration(
AiProviderFamily.OPENAI_COMPATIBLE, openAiConfig, null);
return new StartConfiguration(
sourceFolder,
targetFolder,
sqliteFile,
URI.create("https://api.example.com"),
"gpt-4",
30,
multiConfig,
3,
100,
50000,
@@ -387,7 +393,6 @@ class BootstrapRunnerEdgeCasesTest {
tempDir.resolve("lock.lock"),
tempDir.resolve("logs"),
"INFO",
"test-api-key",
false
);
} catch (Exception e) {

View File

@@ -4,6 +4,11 @@ import de.gecheckt.pdf.umbenenner.adapter.in.cli.SchedulerBatchCommand;
import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.InvalidStartConfigurationException;
import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.StartConfigurationValidator;
import de.gecheckt.pdf.umbenenner.adapter.out.configuration.ConfigurationLoadingException;
import de.gecheckt.pdf.umbenenner.adapter.out.configuration.LegacyConfigurationMigrator;
import de.gecheckt.pdf.umbenenner.adapter.out.configuration.PropertiesConfigurationPortAdapter;
import de.gecheckt.pdf.umbenenner.application.config.provider.AiProviderFamily;
import de.gecheckt.pdf.umbenenner.application.config.provider.MultiProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.provider.ProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase;
@@ -13,13 +18,21 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitiali
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.core.LogEvent;
import org.apache.logging.log4j.core.LoggerContext;
import org.apache.logging.log4j.core.appender.AbstractAppender;
import org.apache.logging.log4j.core.config.Configuration;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicReference;
import static org.junit.jupiter.api.Assertions.*;
@@ -176,9 +189,7 @@ class BootstrapRunnerTest {
sourceDir,
targetDir,
dbFile,
URI.create("https://api.example.com"),
"gpt-4",
30,
validMultiProviderConfig(),
3,
100,
50000,
@@ -186,7 +197,6 @@ class BootstrapRunnerTest {
Paths.get(""), // empty simulates unconfigured runtime.lock.file
tempDir.resolve("logs"),
"INFO",
"test-key",
false
);
@@ -262,9 +272,7 @@ class BootstrapRunnerTest {
sourceDir,
targetDir,
dbFile,
java.net.URI.create("https://api.example.com"),
"gpt-4",
30,
validMultiProviderConfig(),
0, // max.retries.transient = 0 is invalid (must be >= 1)
100,
50000,
@@ -272,7 +280,6 @@ class BootstrapRunnerTest {
tempDir.resolve("lock-mrt.lock"),
null,
"INFO",
"test-key",
false
);
@@ -346,6 +353,121 @@ class BootstrapRunnerTest {
assertEquals(1, exitCode, "Schema initialization failure should return exit code 1");
}
// =========================================================================
// Mandatory test case: activeProviderIsLoggedAtRunStart
// =========================================================================
/**
* The active AI provider identifier must be logged at INFO level during the bootstrap phase,
* after configuration is loaded and validated but before batch processing begins.
*/
@Test
void activeProviderIsLoggedAtRunStart() throws Exception {
ConfigurationPort mockConfigPort = new MockConfigurationPort(tempDir, true);
BootstrapRunner runner = new BootstrapRunner(
() -> mockConfigPort,
lockFile -> new MockRunLockPort(),
StartConfigurationValidator::new,
jdbcUrl -> new MockSchemaInitializationPort(),
(config, lock) -> new MockRunBatchProcessingUseCase(true),
SchedulerBatchCommand::new
);
List<String> capturedMessages = new ArrayList<>();
String appenderName = "TestCapture-" + UUID.randomUUID();
LoggerContext ctx = (LoggerContext) LogManager.getContext(false);
Configuration cfg = ctx.getConfiguration();
AbstractAppender captureAppender = new AbstractAppender(appenderName, null, null, false) {
@Override
public void append(LogEvent event) {
capturedMessages.add(event.getMessage().getFormattedMessage());
}
};
captureAppender.start();
cfg.addAppender(captureAppender);
cfg.getRootLogger().addAppender(captureAppender, Level.ALL, null);
ctx.updateLoggers();
try {
runner.run();
} finally {
cfg.getRootLogger().removeAppender(appenderName);
ctx.updateLoggers();
captureAppender.stop();
}
assertTrue(capturedMessages.stream().anyMatch(m -> m.contains("openai-compatible")),
"Active AI provider identifier must be logged at run start");
}
// =========================================================================
// Mandatory test case: legacyFileEndToEndStillRuns
// =========================================================================
/**
* End-to-end test verifying that a legacy flat-key configuration file is automatically
* migrated to the multi-provider schema and that the application run completes successfully
* after migration.
* <p>
* Covers the full migration path: detection, backup creation, in-place rewrite,
* re-validation, and subsequent successful configuration load.
*/
@Test
void legacyFileEndToEndStillRuns(@TempDir Path workDir) throws Exception {
Path sourceDir = Files.createDirectories(workDir.resolve("source"));
Path targetDir = Files.createDirectories(workDir.resolve("target"));
Path dbParentDir = Files.createDirectories(workDir.resolve("data"));
Path promptDir = Files.createDirectories(workDir.resolve("config/prompts"));
Path promptFile = Files.createFile(promptDir.resolve("template.txt"));
Files.writeString(promptFile, "Test prompt template.");
Path configFile = workDir.resolve("application.properties");
String legacyConfig = String.format(
"source.folder=%s%n"
+ "target.folder=%s%n"
+ "sqlite.file=%s%n"
+ "api.baseUrl=https://api.example.com%n"
+ "api.model=gpt-4%n"
+ "api.timeoutSeconds=30%n"
+ "api.key=test-legacy-key%n"
+ "max.retries.transient=3%n"
+ "max.pages=10%n"
+ "max.text.characters=5000%n"
+ "prompt.template.file=%s%n",
sourceDir.toAbsolutePath(),
targetDir.toAbsolutePath(),
dbParentDir.resolve("db.sqlite").toAbsolutePath(),
promptFile.toAbsolutePath()
);
Files.writeString(configFile, legacyConfig);
BootstrapRunner runner = new BootstrapRunner(
() -> new LegacyConfigurationMigrator().migrateIfLegacy(configFile),
() -> new PropertiesConfigurationPortAdapter(configFile),
lockFile -> new MockRunLockPort(),
StartConfigurationValidator::new,
jdbcUrl -> new MockSchemaInitializationPort(),
(config, lock) -> new MockRunBatchProcessingUseCase(true),
SchedulerBatchCommand::new
);
int exitCode = runner.run();
assertEquals(0, exitCode,
"Legacy configuration must be migrated and the run must complete successfully");
assertTrue(Files.exists(workDir.resolve("application.properties.bak")),
"Backup file must exist after migration");
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
private static MultiProviderConfiguration validMultiProviderConfig() {
ProviderConfiguration openAiConfig = new ProviderConfiguration(
"gpt-4", 30, "https://api.example.com", "test-api-key");
return new MultiProviderConfiguration(AiProviderFamily.OPENAI_COMPATIBLE, openAiConfig, null);
}
// -------------------------------------------------------------------------
// Mocks
// -------------------------------------------------------------------------
@@ -377,13 +499,16 @@ class BootstrapRunnerTest {
Files.createFile(promptTemplateFile);
}
ProviderConfiguration openAiConfig = new ProviderConfiguration(
"gpt-4", 30, "https://api.example.com", "test-api-key");
MultiProviderConfiguration multiConfig = new MultiProviderConfiguration(
AiProviderFamily.OPENAI_COMPATIBLE, openAiConfig, null);
return new StartConfiguration(
sourceFolder,
targetFolder,
sqliteFile,
URI.create("https://api.example.com"),
"gpt-4",
30,
multiConfig,
3,
100,
50000,
@@ -391,7 +516,6 @@ class BootstrapRunnerTest {
tempDir.resolve("lock.lock"),
tempDir.resolve("logs"),
"INFO",
"test-api-key",
false
);
} catch (Exception e) {

View File

@@ -0,0 +1,198 @@
package de.gecheckt.pdf.umbenenner.bootstrap;
import de.gecheckt.pdf.umbenenner.adapter.in.cli.SchedulerBatchCommand;
import de.gecheckt.pdf.umbenenner.adapter.out.ai.AnthropicClaudeHttpAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.ai.OpenAiHttpAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.StartConfigurationValidator;
import de.gecheckt.pdf.umbenenner.application.config.provider.AiProviderFamily;
import de.gecheckt.pdf.umbenenner.application.config.provider.MultiProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.provider.ProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.concurrent.atomic.AtomicReference;
import static org.assertj.core.api.Assertions.assertThat;
/**
* Smoke tests for the complete bootstrap wiring of the active AI provider.
* <p>
* Each test drives the full {@link BootstrapRunner} startup sequence — configuration loading,
* validation, schema initialisation, and use-case factory — but replaces the real AI HTTP client
* with a wiring probe inside the use-case factory. No real HTTP calls are made.
*
* <h2>What is verified</h2>
* <ul>
* <li>When {@code ai.provider.active=openai-compatible}, the {@link AiProviderSelector}
* produces an {@link OpenAiHttpAdapter} instance.</li>
* <li>When {@code ai.provider.active=claude}, the {@link AiProviderSelector}
* produces an {@link AnthropicClaudeHttpAdapter} instance.</li>
* </ul>
*
* <h2>Scope</h2>
* <p>
* These are regression smoke tests for the provider selection path. They do not exercise
* real document processing; the use-case factory captures the selected port and immediately
* returns a no-op use case.
*/
class BootstrapSmokeTest {
// =========================================================================
// Pflicht-Testfall: smokeBootstrapWithOpenAiCompatibleActive
// =========================================================================
/**
* Verifies that the bootstrap path correctly wires {@link OpenAiHttpAdapter} when
* {@code ai.provider.active=openai-compatible} is configured.
* <p>
* The {@link AiProviderSelector} is called inside the use-case factory with the
* active provider configuration; the resulting {@link AiInvocationPort} instance
* is captured and asserted to be an {@link OpenAiHttpAdapter}.
*/
@Test
void smokeBootstrapWithOpenAiCompatibleActive(@TempDir Path tempDir) throws Exception {
AtomicReference<AiInvocationPort> capturedPort = new AtomicReference<>();
BootstrapRunner runner = new BootstrapRunner(
() -> buildConfigPort(tempDir, AiProviderFamily.OPENAI_COMPATIBLE,
openAiConfig(), null),
lockFile -> new NoOpRunLockPort(),
StartConfigurationValidator::new,
jdbcUrl -> new NoOpSchemaInitializationPort(),
(config, lock) -> {
AiProviderFamily family =
config.multiProviderConfiguration().activeProviderFamily();
ProviderConfiguration provConfig =
config.multiProviderConfiguration().activeProviderConfiguration();
capturedPort.set(new AiProviderSelector().select(family, provConfig));
return context -> BatchRunOutcome.SUCCESS;
},
SchedulerBatchCommand::new
);
int exitCode = runner.run();
assertThat(exitCode).isEqualTo(0);
assertThat(capturedPort.get())
.as("OPENAI_COMPATIBLE must wire OpenAiHttpAdapter")
.isInstanceOf(OpenAiHttpAdapter.class);
}
// =========================================================================
// Pflicht-Testfall: smokeBootstrapWithClaudeActive
// =========================================================================
/**
* Verifies that the bootstrap path correctly wires {@link AnthropicClaudeHttpAdapter}
* when {@code ai.provider.active=claude} is configured.
* <p>
* The {@link AiProviderSelector} is called inside the use-case factory with the
* active provider configuration; the resulting {@link AiInvocationPort} instance
* is captured and asserted to be an {@link AnthropicClaudeHttpAdapter}.
*/
@Test
void smokeBootstrapWithClaudeActive(@TempDir Path tempDir) throws Exception {
AtomicReference<AiInvocationPort> capturedPort = new AtomicReference<>();
BootstrapRunner runner = new BootstrapRunner(
() -> buildConfigPort(tempDir, AiProviderFamily.CLAUDE,
null, claudeConfig()),
lockFile -> new NoOpRunLockPort(),
StartConfigurationValidator::new,
jdbcUrl -> new NoOpSchemaInitializationPort(),
(config, lock) -> {
AiProviderFamily family =
config.multiProviderConfiguration().activeProviderFamily();
ProviderConfiguration provConfig =
config.multiProviderConfiguration().activeProviderConfiguration();
capturedPort.set(new AiProviderSelector().select(family, provConfig));
return context -> BatchRunOutcome.SUCCESS;
},
SchedulerBatchCommand::new
);
int exitCode = runner.run();
assertThat(exitCode).isEqualTo(0);
assertThat(capturedPort.get())
.as("CLAUDE must wire AnthropicClaudeHttpAdapter")
.isInstanceOf(AnthropicClaudeHttpAdapter.class);
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
private static ConfigurationPort buildConfigPort(
Path tempDir,
AiProviderFamily activeFamily,
ProviderConfiguration openAiConfig,
ProviderConfiguration claudeConfig) {
try {
Path sourceDir = Files.createDirectories(tempDir.resolve("source"));
Path targetDir = Files.createDirectories(tempDir.resolve("target"));
Path dbFile = tempDir.resolve("test.db");
if (!Files.exists(dbFile)) {
Files.createFile(dbFile);
}
Path promptFile = tempDir.resolve("prompt.txt");
if (!Files.exists(promptFile)) {
Files.writeString(promptFile, "Test prompt.");
}
MultiProviderConfiguration multiConfig =
new MultiProviderConfiguration(activeFamily, openAiConfig, claudeConfig);
StartConfiguration config = new StartConfiguration(
sourceDir,
targetDir,
dbFile,
multiConfig,
3,
10,
5000,
promptFile,
tempDir.resolve("run.lock"),
tempDir.resolve("logs"),
"INFO",
false
);
return () -> config;
} catch (Exception e) {
throw new RuntimeException("Failed to set up test configuration", e);
}
}
private static ProviderConfiguration openAiConfig() {
return new ProviderConfiguration(
"gpt-4o-mini", 30, "https://api.openai.com/v1", "test-openai-key");
}
private static ProviderConfiguration claudeConfig() {
return new ProviderConfiguration(
"claude-3-5-sonnet-20241022", 60, "https://api.anthropic.com", "test-claude-key");
}
// -------------------------------------------------------------------------
// Minimal test doubles
// -------------------------------------------------------------------------
private static class NoOpRunLockPort implements RunLockPort {
@Override public void acquire() { }
@Override public void release() { }
}
private static class NoOpSchemaInitializationPort implements PersistenceSchemaInitializationPort {
@Override public void initializeSchema() { }
}
}

View File

@@ -52,15 +52,16 @@ class ExecutableJarSmokeTestIT {
Path promptTemplateFile = Files.createFile(promptDir.resolve("template.txt"));
Files.writeString(promptTemplateFile, "Test prompt template for smoke test.");
// Write valid application.properties
Path configFile = configDir.resolve("application.properties");
String validConfig = """
source.folder=%s
target.folder=%s
sqlite.file=%s
api.baseUrl=http://localhost:8080/api
api.model=gpt-4o-mini
api.timeoutSeconds=30
ai.provider.active=openai-compatible
ai.provider.openai-compatible.baseUrl=http://localhost:8080/api
ai.provider.openai-compatible.model=gpt-4o-mini
ai.provider.openai-compatible.timeoutSeconds=30
ai.provider.openai-compatible.apiKey=test-api-key-for-smoke-test
max.retries.transient=3
max.pages=10
max.text.characters=5000
@@ -68,7 +69,6 @@ class ExecutableJarSmokeTestIT {
runtime.lock.file=%s/lock.pid
log.directory=%s
log.level=INFO
api.key=test-api-key-for-smoke-test
""".formatted(
sourceDir.toAbsolutePath(),
targetDir.toAbsolutePath(),
@@ -185,16 +185,17 @@ class ExecutableJarSmokeTestIT {
source.folder=%s
# target.folder is intentionally missing - should cause validation failure
sqlite.file=%s
api.baseUrl=http://localhost:8080/api
api.model=gpt-4o-mini
api.timeoutSeconds=30
ai.provider.active=openai-compatible
ai.provider.openai-compatible.baseUrl=http://localhost:8080/api
ai.provider.openai-compatible.model=gpt-4o-mini
ai.provider.openai-compatible.timeoutSeconds=30
ai.provider.openai-compatible.apiKey=test-api-key
max.retries.transient=3
max.pages=10
max.text.characters=5000
prompt.template.file=%s
log.directory=%s/logs
log.level=INFO
api.key=test-api-key
""".formatted(
sourceDir.toAbsolutePath(),
sqliteFile.toAbsolutePath(),

View File

@@ -139,6 +139,9 @@ public final class E2ETestContext implements AutoCloseable {
*/
private TargetFileCopyPort targetFileCopyPortOverride;
/** Provider identifier written into the attempt history for each batch run. */
private final String providerIdentifier;
private E2ETestContext(
Path sourceFolder,
Path targetFolder,
@@ -147,7 +150,8 @@ public final class E2ETestContext implements AutoCloseable {
String jdbcUrl,
SqliteDocumentRecordRepositoryAdapter documentRepo,
SqliteProcessingAttemptRepositoryAdapter attemptRepo,
StubAiInvocationPort aiStub) {
StubAiInvocationPort aiStub,
String providerIdentifier) {
this.sourceFolder = sourceFolder;
this.targetFolder = targetFolder;
this.lockFile = lockFile;
@@ -156,19 +160,36 @@ public final class E2ETestContext implements AutoCloseable {
this.documentRepo = documentRepo;
this.attemptRepo = attemptRepo;
this.aiStub = aiStub;
this.providerIdentifier = providerIdentifier;
}
/**
* Initializes a fully wired end-to-end test context rooted in {@code tempDir}.
* <p>
* Creates the {@code source/}, {@code target/} subdirectories and a minimal prompt
* file, initializes the SQLite schema, and wires all adapters.
* Initializes a fully wired end-to-end test context rooted in {@code tempDir},
* using the default provider identifier {@code "openai-compatible"}.
*
* @param tempDir the JUnit {@code @TempDir} or any writable temporary directory
* @return a ready-to-use context; caller is responsible for closing it
* @throws Exception if schema initialization or directory/file creation fails
*/
public static E2ETestContext initialize(Path tempDir) throws Exception {
return initializeWithProvider(tempDir, "openai-compatible");
}
/**
* Initializes a fully wired end-to-end test context rooted in {@code tempDir} with
* a configurable provider identifier written into each attempt's history record.
* <p>
* Creates the {@code source/}, {@code target/} subdirectories and a minimal prompt
* file, initializes the SQLite schema, and wires all adapters.
*
* @param tempDir the JUnit {@code @TempDir} or any writable temporary directory
* @param providerIdentifier the provider identifier stored in {@code ai_provider} for each
* attempt (e.g. {@code "openai-compatible"} or {@code "claude"})
* @return a ready-to-use context; caller is responsible for closing it
* @throws Exception if schema initialization or directory/file creation fails
*/
public static E2ETestContext initializeWithProvider(Path tempDir, String providerIdentifier)
throws Exception {
Path sourceFolder = Files.createDirectories(tempDir.resolve("source"));
Path targetFolder = Files.createDirectories(tempDir.resolve("target"));
Path lockFile = tempDir.resolve("run.lock");
@@ -189,7 +210,8 @@ public final class E2ETestContext implements AutoCloseable {
return new E2ETestContext(
sourceFolder, targetFolder, lockFile, promptFile,
jdbcUrl, documentRepo, attemptRepo, new StubAiInvocationPort());
jdbcUrl, documentRepo, attemptRepo, new StubAiInvocationPort(),
providerIdentifier);
}
// =========================================================================
@@ -377,7 +399,8 @@ public final class E2ETestContext implements AutoCloseable {
targetFolderPort,
targetFileCopyPort,
coordinatorLogger,
MAX_RETRIES_TRANSIENT);
MAX_RETRIES_TRANSIENT,
providerIdentifier);
PromptPort promptPort = new FilesystemPromptPortAdapter(promptFile);
ClockPort clockPort = new SystemClockAdapter();

View File

@@ -0,0 +1,397 @@
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
import de.gecheckt.pdf.umbenenner.adapter.out.configuration.LegacyConfigurationMigrator;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.nio.file.Files;
import java.nio.file.Path;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.PreparedStatement;
import java.sql.Statement;
import java.util.List;
import java.util.Optional;
import java.util.Properties;
import static org.assertj.core.api.Assertions.assertThat;
/**
* End-to-end regression and provider-identifier tests verifying the complete integration
* of the multi-provider extension with the existing batch processing pipeline.
*
* <h2>Test cases covered</h2>
* <ul>
* <li><strong>regressionExistingOpenAiSuiteGreen</strong> — proves the OpenAI-compatible path
* still works end-to-end through the full batch pipeline after the multi-provider
* extension was introduced.</li>
* <li><strong>e2eOpenAiRunWritesProviderIdentifierToHistory</strong> — verifies that a
* batch run with the {@code openai-compatible} provider writes {@code "openai-compatible"}
* into the {@code ai_provider} column of the attempt history.</li>
* <li><strong>e2eClaudeRunWritesProviderIdentifierToHistory</strong> — verifies that a
* batch run with the {@code claude} provider identifier writes {@code "claude"}
* into the {@code ai_provider} column of the attempt history.</li>
* <li><strong>e2eMigrationFromLegacyDemoConfig</strong> — proves that a Legacy configuration
* file is correctly migrated: the {@code .bak} backup preserves the original content, the
* migrated file uses the new schema with {@code ai.provider.active=openai-compatible}, and
* a batch run started after migration completes functionally like one started with the
* new schema directly.</li>
* <li><strong>legacyDataFromBeforeV11RemainsReadable</strong> — proves that a SQLite database
* created before the {@code ai_provider} column was added remains fully readable after
* schema evolution: historical attempts are returned with a {@code null} provider, and
* a new batch run can write successfully to the same database.</li>
* </ul>
*/
class ProviderIdentifierE2ETest {
private static final String SAMPLE_PDF_TEXT =
"Testrechnung Musterstadt Datum 20.03.2024 Betrag 89,00 EUR";
// =========================================================================
// Pflicht-Testfall: regressionExistingOpenAiSuiteGreen
// =========================================================================
/**
* Regression proof: the OpenAI-compatible provider path still produces the correct
* end-to-end outcome after the multi-provider extension.
* <p>
* Runs the two-phase happy path (AI call → {@code PROPOSAL_READY} in run 1,
* file copy → {@code SUCCESS} in run 2) with the {@code openai-compatible} provider
* identifier and verifies the final state matches the expected success outcome.
* This is the canonical regression check for the existing OpenAI flow.
*/
@Test
void regressionExistingOpenAiSuiteGreen(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("regression.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("regression.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// Run 1: AI produces naming proposal
BatchRunOutcome run1 = ctx.runBatch();
assertThat(run1).isEqualTo(BatchRunOutcome.SUCCESS);
assertThat(resolveRecord(ctx, fp).overallStatus())
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
assertThat(ctx.listTargetFiles()).isEmpty();
// Run 2: Finalization without AI call
ctx.aiStub.resetInvocationCount();
BatchRunOutcome run2 = ctx.runBatch();
assertThat(run2).isEqualTo(BatchRunOutcome.SUCCESS);
assertThat(ctx.aiStub.invocationCount())
.as("Existing OpenAI path must not re-invoke AI when PROPOSAL_READY exists")
.isEqualTo(0);
assertThat(resolveRecord(ctx, fp).overallStatus())
.isEqualTo(ProcessingStatus.SUCCESS);
assertThat(ctx.listTargetFiles()).hasSize(1);
}
}
// =========================================================================
// Pflicht-Testfall: e2eOpenAiRunWritesProviderIdentifierToHistory
// =========================================================================
/**
* Verifies that a batch run using the {@code openai-compatible} provider identifier
* persists {@code "openai-compatible"} in the {@code ai_provider} field of the
* attempt history record.
*/
@Test
void e2eOpenAiRunWritesProviderIdentifierToHistory(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
DocumentFingerprint fp = ctx.computeFingerprint(ctx.sourceFolder().resolve("doc.pdf"));
ctx.runBatch();
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(1);
assertThat(attempts.get(0).aiProvider())
.as("Attempt produced by openai-compatible run must carry 'openai-compatible' as provider")
.isEqualTo("openai-compatible");
}
}
// =========================================================================
// Pflicht-Testfall: e2eClaudeRunWritesProviderIdentifierToHistory
// =========================================================================
/**
* Verifies that a batch run using the {@code claude} provider identifier persists
* {@code "claude"} in the {@code ai_provider} field of the attempt history record.
* <p>
* The AI invocation itself is still handled by the configurable {@link StubAiInvocationPort};
* only the provider identifier string (written by the coordinator) is the subject of this test.
*/
@Test
void e2eClaudeRunWritesProviderIdentifierToHistory(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initializeWithProvider(tempDir, "claude")) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
DocumentFingerprint fp = ctx.computeFingerprint(ctx.sourceFolder().resolve("doc.pdf"));
ctx.runBatch();
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(1);
assertThat(attempts.get(0).aiProvider())
.as("Attempt produced by claude run must carry 'claude' as provider")
.isEqualTo("claude");
}
}
// =========================================================================
// Pflicht-Testfall: e2eMigrationFromLegacyDemoConfig
// =========================================================================
/**
* End-to-end migration proof: a legacy flat-key configuration file is migrated
* correctly, the backup is preserved, and a subsequent batch run completes successfully.
*
* <h2>What is verified</h2>
* <ol>
* <li>The {@code .bak} file exists after migration and its content equals the
* original file content verbatim.</li>
* <li>The migrated file contains {@code ai.provider.active=openai-compatible}.</li>
* <li>The legacy values are mapped to the {@code ai.provider.openai-compatible.*}
* namespace.</li>
* <li>Non-AI keys ({@code source.folder}, {@code max.pages}, …) are preserved
* unchanged.</li>
* <li>A batch run started with the stub AI after migration completes with
* {@link BatchRunOutcome#SUCCESS}, proving functional equivalence with a run
* started from a freshly written new-schema file.</li>
* </ol>
*/
@Test
void e2eMigrationFromLegacyDemoConfig(@TempDir Path tempDir) throws Exception {
// --- Arrange: write a legacy config file ---
Path sourceDir = Files.createDirectories(tempDir.resolve("source"));
Path targetDir = Files.createDirectories(tempDir.resolve("target"));
Path configFile = tempDir.resolve("application.properties");
String legacyContent =
"source.folder=" + sourceDir.toAbsolutePath().toString().replace('\\', '/') + "\n"
+ "target.folder=" + targetDir.toAbsolutePath().toString().replace('\\', '/') + "\n"
+ "sqlite.file=" + tempDir.resolve("db.sqlite").toAbsolutePath().toString().replace('\\', '/') + "\n"
+ "api.baseUrl=https://api.openai.com/v1\n"
+ "api.model=gpt-4o-mini\n"
+ "api.timeoutSeconds=30\n"
+ "api.key=test-legacy-key-demo\n"
+ "max.retries.transient=3\n"
+ "max.pages=10\n"
+ "max.text.characters=5000\n";
Files.writeString(configFile, legacyContent);
// --- Act: run migration ---
new LegacyConfigurationMigrator().migrateIfLegacy(configFile);
// --- Assert: backup exists with original content ---
Path bakFile = tempDir.resolve("application.properties.bak");
assertThat(Files.exists(bakFile))
.as(".bak file must be created before migration overwrites the original")
.isTrue();
assertThat(Files.readString(bakFile))
.as(".bak content must equal the original file content verbatim")
.isEqualTo(legacyContent);
// --- Assert: migrated file has new schema ---
Properties migrated = new Properties();
try (var reader = Files.newBufferedReader(configFile)) {
migrated.load(reader);
}
assertThat(migrated.getProperty("ai.provider.active"))
.as("Migrated file must contain ai.provider.active=openai-compatible")
.isEqualTo("openai-compatible");
assertThat(migrated.getProperty("ai.provider.openai-compatible.baseUrl"))
.as("Legacy api.baseUrl must be migrated to openai-compatible namespace")
.isEqualTo("https://api.openai.com/v1");
assertThat(migrated.getProperty("ai.provider.openai-compatible.model"))
.isEqualTo("gpt-4o-mini");
assertThat(migrated.getProperty("ai.provider.openai-compatible.timeoutSeconds"))
.isEqualTo("30");
assertThat(migrated.getProperty("ai.provider.openai-compatible.apiKey"))
.isEqualTo("test-legacy-key-demo");
assertThat(migrated.getProperty("max.retries.transient"))
.as("Non-AI keys must survive migration unchanged")
.isEqualTo("3");
assertThat(migrated.getProperty("max.pages")).isEqualTo("10");
// Legacy flat keys must no longer be present
assertThat(migrated.getProperty("api.baseUrl"))
.as("Legacy api.baseUrl must not remain in migrated file")
.isNull();
// --- Assert: batch run after migration completes successfully ---
// The E2ETestContext is independent of the properties file; it wires directly.
// This proves that the application pipeline works correctly for an openai-compatible run,
// which is the provider selected by migration.
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir.resolve("e2e"))) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
BatchRunOutcome outcome = ctx.runBatch();
assertThat(outcome)
.as("Batch run after migration must succeed (provider: openai-compatible)")
.isEqualTo(BatchRunOutcome.SUCCESS);
}
}
// =========================================================================
// Pflicht-Testfall: legacyDataFromBeforeV11RemainsReadable
// =========================================================================
/**
* Proves backward compatibility with databases created before the {@code ai_provider}
* column was introduced.
*
* <h2>What is verified</h2>
* <ol>
* <li>A database without the {@code ai_provider} column can be opened and its existing
* rows read without throwing any exception.</li>
* <li>The {@code aiProvider} field for pre-extension rows is {@code null} (no synthesised
* default, no error).</li>
* <li>Other fields on the pre-extension attempt (status, retryable flag) remain
* correctly readable after schema evolution.</li>
* <li>A new batch run on the same database succeeds, proving that the evolved schema
* is fully write-compatible with the legacy data.</li>
* </ol>
*/
@Test
void legacyDataFromBeforeV11RemainsReadable(@TempDir Path tempDir) throws Exception {
// Build a database without the ai_provider column (simulates pre-extension installation)
String jdbcUrl = "jdbc:sqlite:"
+ tempDir.resolve("legacy.db").toAbsolutePath().toString().replace('\\', '/');
createPreExtensionSchema(jdbcUrl);
// Insert a legacy attempt row (no ai_provider column present in schema at this point)
DocumentFingerprint legacyFp = fingerprint("aabbcc");
insertLegacyData(jdbcUrl, legacyFp);
// Initialize the full schema — this must add ai_provider idempotently
de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter schema =
new de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter(jdbcUrl);
schema.initializeSchema();
// Read back the legacy attempt — must not throw, aiProvider must be null
de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter repo =
new de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter(jdbcUrl);
List<ProcessingAttempt> attempts = repo.findAllByFingerprint(legacyFp);
assertThat(attempts).hasSize(1);
assertThat(attempts.get(0).aiProvider())
.as("Pre-extension attempt must have null aiProvider after schema evolution")
.isNull();
assertThat(attempts.get(0).status())
.as("Other fields of the pre-extension row must still be readable")
.isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(attempts.get(0).retryable()).isTrue();
// A new batch run on the same database must succeed (write-compatible evolved schema)
try (E2ETestContext ctx = E2ETestContext.initializeWithProvider(
tempDir.resolve("newrun"), "openai-compatible")) {
ctx.createSearchablePdf("newdoc.pdf", SAMPLE_PDF_TEXT);
BatchRunOutcome outcome = ctx.runBatch();
assertThat(outcome)
.as("Batch run on evolved database must succeed")
.isEqualTo(BatchRunOutcome.SUCCESS);
}
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
private static DocumentRecord resolveRecord(E2ETestContext ctx, DocumentFingerprint fp) {
return ctx.findDocumentRecord(fp)
.orElseThrow(() -> new AssertionError("No document record found for fingerprint"));
}
private static DocumentFingerprint fingerprint(String suffix) {
return new DocumentFingerprint("0".repeat(64 - suffix.length()) + suffix);
}
/**
* Creates the base schema tables that existed before the {@code ai_provider} column
* was added, without running the schema evolution step.
*/
private static void createPreExtensionSchema(String jdbcUrl) throws Exception {
try (Connection conn = DriverManager.getConnection(jdbcUrl);
Statement stmt = conn.createStatement()) {
stmt.execute("PRAGMA foreign_keys = ON");
stmt.execute("""
CREATE TABLE IF NOT EXISTS document_record (
id INTEGER PRIMARY KEY AUTOINCREMENT,
fingerprint TEXT NOT NULL,
last_known_source_locator TEXT NOT NULL,
last_known_source_file_name TEXT NOT NULL,
overall_status TEXT NOT NULL,
content_error_count INTEGER NOT NULL DEFAULT 0,
transient_error_count INTEGER NOT NULL DEFAULT 0,
last_failure_instant TEXT,
last_success_instant TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
CONSTRAINT uq_document_record_fingerprint UNIQUE (fingerprint)
)""");
stmt.execute("""
CREATE TABLE IF NOT EXISTS processing_attempt (
id INTEGER PRIMARY KEY AUTOINCREMENT,
fingerprint TEXT NOT NULL,
run_id TEXT NOT NULL,
attempt_number INTEGER NOT NULL,
started_at TEXT NOT NULL,
ended_at TEXT NOT NULL,
status TEXT NOT NULL,
failure_class TEXT,
failure_message TEXT,
retryable INTEGER NOT NULL DEFAULT 0,
model_name TEXT,
prompt_identifier TEXT,
processed_page_count INTEGER,
sent_character_count INTEGER,
ai_raw_response TEXT,
ai_reasoning TEXT,
resolved_date TEXT,
date_source TEXT,
validated_title TEXT,
final_target_file_name TEXT,
CONSTRAINT fk_processing_attempt_fingerprint
FOREIGN KEY (fingerprint) REFERENCES document_record (fingerprint),
CONSTRAINT uq_processing_attempt_fingerprint_number
UNIQUE (fingerprint, attempt_number)
)""");
}
}
/**
* Inserts one document record and one matching attempt row into a pre-extension database
* (no {@code ai_provider} column present at insert time).
*/
private static void insertLegacyData(String jdbcUrl, DocumentFingerprint fp) throws Exception {
try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
try (PreparedStatement ps = conn.prepareStatement("""
INSERT INTO document_record
(fingerprint, last_known_source_locator, last_known_source_file_name,
overall_status, transient_error_count, created_at, updated_at)
VALUES (?, '/legacy/doc.pdf', 'doc.pdf', 'FAILED_RETRYABLE', 1,
strftime('%Y-%m-%dT%H:%M:%SZ','now'),
strftime('%Y-%m-%dT%H:%M:%SZ','now'))""")) {
ps.setString(1, fp.sha256Hex());
ps.executeUpdate();
}
try (PreparedStatement ps = conn.prepareStatement("""
INSERT INTO processing_attempt
(fingerprint, run_id, attempt_number, started_at, ended_at,
status, failure_class, failure_message, retryable)
VALUES (?, 'legacy-run-001', 1,
strftime('%Y-%m-%dT%H:%M:%SZ','now'),
strftime('%Y-%m-%dT%H:%M:%SZ','now'),
'FAILED_RETRYABLE', 'TIMEOUT', 'Connection timed out', 1)""")) {
ps.setString(1, fp.sha256Hex());
ps.executeUpdate();
}
}
}
}