V1.1 Änderungen

This commit is contained in:
2026-04-09 05:42:02 +02:00
parent 39800b6ea8
commit 5099ff4aca
44 changed files with 4912 additions and 957 deletions
@@ -0,0 +1,394 @@
package de.gecheckt.pdf.umbenenner.adapter.out.ai;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
import java.util.Objects;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import de.gecheckt.pdf.umbenenner.application.config.provider.ProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
/**
* Adapter implementing the native Anthropic Messages API for AI service invocation.
* <p>
* This adapter:
* <ul>
* <li>Translates an abstract {@link AiRequestRepresentation} into an Anthropic
* Messages API request (POST {@code /v1/messages})</li>
* <li>Configures HTTP connection, timeout, and authentication from the provider
* configuration using the Anthropic-specific authentication scheme
* ({@code x-api-key} header, not {@code Authorization: Bearer})</li>
* <li>Extracts the response text by concatenating all {@code text}-type content
* blocks from the Anthropic response, returning the result as a raw response
* for Application-layer parsing and validation</li>
* <li>Classifies technical failures (HTTP errors, timeouts, missing content blocks,
* unparseable JSON) according to the existing transient error semantics</li>
* </ul>
*
* <h2>Configuration</h2>
* <ul>
* <li>{@code baseUrl} — the HTTP(S) base URL; defaults to {@code https://api.anthropic.com}
* when absent or blank</li>
* <li>{@code model} — the Claude model identifier (e.g., {@code claude-3-5-sonnet-20241022})</li>
* <li>{@code timeoutSeconds} — connection and read timeout in seconds</li>
* <li>{@code apiKey} — the authentication token, resolved from environment variable
* {@code ANTHROPIC_API_KEY} or property {@code ai.provider.claude.apiKey};
* environment variable takes precedence (resolved by the configuration layer
* before this adapter is constructed)</li>
* </ul>
*
* <h2>HTTP request structure</h2>
* <p>
* The adapter sends a POST request to {@code {baseUrl}/v1/messages} with:
* <ul>
* <li>Header {@code x-api-key} containing the resolved API key</li>
* <li>Header {@code anthropic-version: 2023-06-01}</li>
* <li>Header {@code content-type: application/json}</li>
* <li>JSON body containing:
* <ul>
* <li>{@code model} — the configured model name</li>
* <li>{@code max_tokens} — fixed at 1024; sufficient for the expected JSON response
* without requiring a separate configuration property</li>
* <li>{@code system} — the prompt content (if non-blank); Anthropic uses a
* top-level field instead of a {@code role=system} message</li>
* <li>{@code messages} — an array with exactly one {@code user} message containing
* the document text</li>
* </ul>
* </li>
* </ul>
*
* <h2>Response handling</h2>
* <ul>
* <li><strong>HTTP 200:</strong> All {@code content} blocks with {@code type=="text"}
* are concatenated in order; the result is returned as {@link AiInvocationSuccess}
* with an {@link AiRawResponse} containing the concatenated text. The Application
* layer then parses and validates this text as a NamingProposal JSON object.</li>
* <li><strong>No text blocks in HTTP 200 response:</strong> Classified as a technical
* failure; the Application layer cannot derive a naming proposal without text.</li>
* <li><strong>Unparseable response JSON:</strong> Classified as a technical failure.</li>
* <li><strong>HTTP non-200:</strong> Classified as a technical failure.</li>
* </ul>
*
* <h2>Technical error classification</h2>
* <p>
* All errors are mapped to {@link AiInvocationTechnicalFailure} and follow the existing
* transient error semantics. No new error categories are introduced:
* <ul>
* <li>HTTP 4xx (including 401, 403, 429) and 5xx — technical failure</li>
* <li>Connection timeout, read timeout — {@code TIMEOUT}</li>
* <li>Connection failure — {@code CONNECTION_ERROR}</li>
* <li>DNS failure — {@code DNS_ERROR}</li>
* <li>IO errors — {@code IO_ERROR}</li>
* <li>Interrupted operation — {@code INTERRUPTED}</li>
* <li>JSON not parseable — {@code UNPARSEABLE_JSON}</li>
* <li>No {@code text}-type content block in response — {@code NO_TEXT_CONTENT}</li>
* </ul>
*
* <h2>Non-goals</h2>
* <ul>
* <li>NamingProposal JSON parsing or validation — the Application layer owns this</li>
* <li>Retry logic — this adapter executes a single request only</li>
* <li>Shared implementation with the OpenAI-compatible adapter — no common base class</li>
* </ul>
*/
public class AnthropicClaudeHttpAdapter implements AiInvocationPort {
private static final Logger LOG = LogManager.getLogger(AnthropicClaudeHttpAdapter.class);
private static final String MESSAGES_ENDPOINT = "/v1/messages";
private static final String ANTHROPIC_VERSION_HEADER = "anthropic-version";
private static final String ANTHROPIC_VERSION_VALUE = "2023-06-01";
private static final String API_KEY_HEADER = "x-api-key";
private static final String CONTENT_TYPE = "application/json";
private static final String DEFAULT_BASE_URL = "https://api.anthropic.com";
/**
* Fixed max_tokens value for the Anthropic request.
* <p>
* This value is sufficient for the expected NamingProposal JSON response
* ({@code date}, {@code title}, {@code reasoning}) without requiring a separate
* configuration property. Anthropic's API requires this field to be present.
*/
private static final int MAX_TOKENS = 1024;
private final HttpClient httpClient;
private final URI apiBaseUrl;
private final String apiModel;
private final String apiKey;
private final int apiTimeoutSeconds;
// Test-only field to capture the last built JSON body for assertion
private volatile String lastBuiltJsonBody;
/**
* Creates an adapter from the Claude provider configuration.
* <p>
* If {@code config.baseUrl()} is absent or blank, the default Anthropic endpoint
* {@code https://api.anthropic.com} is used. The HTTP client is initialized with
* the configured timeout.
*
* @param config the provider configuration for the Claude family; must not be null
* @throws NullPointerException if config is null
* @throws IllegalArgumentException if the model is missing or blank
*/
public AnthropicClaudeHttpAdapter(ProviderConfiguration config) {
this(config, HttpClient.newBuilder()
.connectTimeout(Duration.ofSeconds(config.timeoutSeconds()))
.build());
}
/**
* Creates an adapter with a custom HTTP client (primarily for testing).
* <p>
* This constructor allows tests to inject a mock or configurable HTTP client
* while keeping configuration validation consistent with the production constructor.
* <p>
* <strong>For testing only:</strong> This is package-private to remain internal to the adapter.
*
* @param config the provider configuration; must not be null
* @param httpClient the HTTP client to use; must not be null
* @throws NullPointerException if config or httpClient is null
* @throws IllegalArgumentException if the model is missing or blank
*/
AnthropicClaudeHttpAdapter(ProviderConfiguration config, HttpClient httpClient) {
Objects.requireNonNull(config, "config must not be null");
Objects.requireNonNull(httpClient, "httpClient must not be null");
if (config.model() == null || config.model().isBlank()) {
throw new IllegalArgumentException("API model must not be null or empty");
}
String baseUrlStr = (config.baseUrl() != null && !config.baseUrl().isBlank())
? config.baseUrl()
: DEFAULT_BASE_URL;
this.apiBaseUrl = URI.create(baseUrlStr);
this.apiModel = config.model();
this.apiKey = config.apiKey() != null ? config.apiKey() : "";
this.apiTimeoutSeconds = config.timeoutSeconds();
this.httpClient = httpClient;
LOG.debug("AnthropicClaudeHttpAdapter initialized with base URL: {}, model: {}, timeout: {}s",
apiBaseUrl, apiModel, apiTimeoutSeconds);
}
/**
* Invokes the Anthropic Claude AI service with the given request.
* <p>
* Constructs an Anthropic Messages API request from the request representation,
* executes it, extracts the text content from the response, and returns either
* a successful response or a classified technical failure.
*
* @param request the AI request with prompt and document text; must not be null
* @return an {@link AiInvocationResult} encoding either success (with extracted text)
* or a technical failure with classified reason
* @throws NullPointerException if request is null
*/
@Override
public AiInvocationResult invoke(AiRequestRepresentation request) {
Objects.requireNonNull(request, "request must not be null");
try {
HttpRequest httpRequest = buildRequest(request);
HttpResponse<String> response = executeRequest(httpRequest);
if (response.statusCode() == 200) {
return extractTextFromResponse(request, response.body());
} else {
String reason = "HTTP_" + response.statusCode();
String message = "Anthropic AI service returned status " + response.statusCode();
LOG.warn("Claude AI invocation returned non-200 status: {}", response.statusCode());
return new AiInvocationTechnicalFailure(request, reason, message);
}
} catch (java.net.http.HttpTimeoutException e) {
String message = "HTTP timeout: " + e.getClass().getSimpleName();
LOG.warn("Claude AI invocation timeout: {}", message);
return new AiInvocationTechnicalFailure(request, "TIMEOUT", message);
} catch (java.net.ConnectException e) {
String message = "Failed to connect to endpoint: " + e.getMessage();
LOG.warn("Claude AI invocation connection error: {}", message);
return new AiInvocationTechnicalFailure(request, "CONNECTION_ERROR", message);
} catch (java.net.UnknownHostException e) {
String message = "Endpoint hostname not resolvable: " + e.getMessage();
LOG.warn("Claude AI invocation DNS error: {}", message);
return new AiInvocationTechnicalFailure(request, "DNS_ERROR", message);
} catch (java.io.IOException e) {
String message = "IO error during AI invocation: " + e.getMessage();
LOG.warn("Claude AI invocation IO error: {}", message);
return new AiInvocationTechnicalFailure(request, "IO_ERROR", message);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
String message = "AI invocation interrupted: " + e.getMessage();
LOG.warn("Claude AI invocation interrupted: {}", message);
return new AiInvocationTechnicalFailure(request, "INTERRUPTED", message);
} catch (Exception e) {
String message = "Unexpected error during AI invocation: " + e.getClass().getSimpleName()
+ " - " + e.getMessage();
LOG.error("Unexpected error in Claude AI invocation", e);
return new AiInvocationTechnicalFailure(request, "UNEXPECTED_ERROR", message);
}
}
/**
* Builds an Anthropic Messages API request from the request representation.
* <p>
* Constructs:
* <ul>
* <li>Endpoint URL: {@code {apiBaseUrl}/v1/messages}</li>
* <li>Headers: {@code x-api-key}, {@code anthropic-version: 2023-06-01},
* {@code content-type: application/json}</li>
* <li>Body: JSON with {@code model}, {@code max_tokens}, optional {@code system}
* (prompt content), and {@code messages} with a single user message
* (document text)</li>
* <li>Timeout: configured timeout from provider configuration</li>
* </ul>
*
* @param request the request representation with prompt and document text
* @return an {@link HttpRequest} ready to send
*/
private HttpRequest buildRequest(AiRequestRepresentation request) {
URI endpoint = buildEndpointUri();
String requestBody = buildJsonRequestBody(request);
// Capture for test inspection (test-only field)
this.lastBuiltJsonBody = requestBody;
return HttpRequest.newBuilder(endpoint)
.header("content-type", CONTENT_TYPE)
.header(API_KEY_HEADER, apiKey)
.header(ANTHROPIC_VERSION_HEADER, ANTHROPIC_VERSION_VALUE)
.POST(HttpRequest.BodyPublishers.ofString(requestBody))
.timeout(Duration.ofSeconds(apiTimeoutSeconds))
.build();
}
/**
* Composes the endpoint URI from the configured base URL.
* <p>
* Resolves {@code {apiBaseUrl}/v1/messages}.
*
* @return the complete endpoint URI
*/
private URI buildEndpointUri() {
String endpointPath = apiBaseUrl.getPath().replaceAll("/$", "") + MESSAGES_ENDPOINT;
return URI.create(apiBaseUrl.getScheme() + "://" +
apiBaseUrl.getHost() +
(apiBaseUrl.getPort() > 0 ? ":" + apiBaseUrl.getPort() : "") +
endpointPath);
}
/**
* Builds the JSON request body for the Anthropic Messages API.
* <p>
* The body contains:
* <ul>
* <li>{@code model} — the configured model name</li>
* <li>{@code max_tokens} — fixed value sufficient for the expected response</li>
* <li>{@code system} — the prompt content as a top-level field (only when non-blank;
* Anthropic does not accept {@code role=system} inside the {@code messages} array)</li>
* <li>{@code messages} — an array with exactly one user message containing the
* document text</li>
* </ul>
* <p>
* <strong>Package-private for testing:</strong> This method is accessible to tests
* in the same package to verify the actual JSON body structure and content.
*
* @param request the request with prompt and document text
* @return JSON string ready to send in HTTP body
*/
String buildJsonRequestBody(AiRequestRepresentation request) {
JSONObject body = new JSONObject();
body.put("model", apiModel);
body.put("max_tokens", MAX_TOKENS);
// Prompt content goes to the top-level system field (not a role=system message)
if (request.promptContent() != null && !request.promptContent().isBlank()) {
body.put("system", request.promptContent());
}
JSONObject userMessage = new JSONObject();
userMessage.put("role", "user");
userMessage.put("content", request.documentText());
body.put("messages", new JSONArray().put(userMessage));
return body.toString();
}
/**
* Extracts the text content from a successful (HTTP 200) Anthropic response.
* <p>
* Concatenates all {@code content} blocks with {@code type=="text"} in order.
* Blocks of other types (e.g., tool use) are ignored.
* If no {@code text} blocks are present, a technical failure is returned.
*
* @param request the original request (carried through to the result)
* @param responseBody the raw HTTP response body
* @return success with the concatenated text, or a technical failure
*/
private AiInvocationResult extractTextFromResponse(AiRequestRepresentation request, String responseBody) {
try {
JSONObject json = new JSONObject(responseBody);
JSONArray contentArray = json.getJSONArray("content");
StringBuilder textBuilder = new StringBuilder();
for (int i = 0; i < contentArray.length(); i++) {
JSONObject block = contentArray.getJSONObject(i);
if ("text".equals(block.optString("type"))) {
textBuilder.append(block.getString("text"));
}
}
String extractedText = textBuilder.toString();
if (extractedText.isEmpty()) {
LOG.warn("Claude AI response contained no text-type content blocks");
return new AiInvocationTechnicalFailure(request, "NO_TEXT_CONTENT",
"Anthropic response contained no text-type content blocks");
}
return new AiInvocationSuccess(request, new AiRawResponse(extractedText));
} catch (JSONException e) {
LOG.warn("Claude AI response could not be parsed as JSON: {}", e.getMessage());
return new AiInvocationTechnicalFailure(request, "UNPARSEABLE_JSON",
"Anthropic response body is not valid JSON: " + e.getMessage());
}
}
/**
* Package-private accessor for the last constructed JSON body.
* <p>
* <strong>For testing only:</strong> Allows tests to verify the actual
* JSON body sent in HTTP requests without exposing the BodyPublisher internals.
*
* @return the last JSON body string constructed by {@link #buildRequest(AiRequestRepresentation)},
* or null if no request has been built yet
*/
String getLastBuiltJsonBodyForTesting() {
return lastBuiltJsonBody;
}
/**
* Executes the HTTP request and returns the response.
*
* @param httpRequest the HTTP request to execute
* @return the HTTP response with status code and body
* @throws java.net.http.HttpTimeoutException if the request times out
* @throws java.net.ConnectException if connection fails
* @throws java.io.IOException on other IO errors
* @throws InterruptedException if the request is interrupted
*/
private HttpResponse<String> executeRequest(HttpRequest httpRequest)
throws java.io.IOException, InterruptedException {
return httpClient.send(httpRequest, HttpResponse.BodyHandlers.ofString());
}
}
@@ -11,7 +11,7 @@ import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.json.JSONObject;
import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.provider.ProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess;
@@ -26,7 +26,7 @@ import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
* <ul>
* <li>Translates an abstract {@link AiRequestRepresentation} into an OpenAI Chat
* Completions API request</li>
* <li>Configures HTTP connection, timeout, and authentication from the startup configuration</li>
* <li>Configures HTTP connection, timeout, and authentication from the provider configuration</li>
* <li>Executes the HTTP request against the configured AI endpoint</li>
* <li>Distinguishes between successful HTTP responses (200) and technical failures
* (timeout, unreachable, connection error, etc.)</li>
@@ -36,16 +36,16 @@ import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
* <p>
* <strong>Configuration:</strong>
* <ul>
* <li>{@code apiBaseUrl} — the HTTP(S) base URL of the AI service endpoint</li>
* <li>{@code apiModel} — the model identifier requested from the AI service</li>
* <li>{@code apiTimeoutSeconds} — connection and read timeout in seconds</li>
* <li>{@code apiKey} — the authentication token (already resolved from environment
* variable {@code PDF_UMBENENNER_API_KEY} or property {@code api.key},
* <li>{@code baseUrl} — the HTTP(S) base URL of the AI service endpoint</li>
* <li>{@code model} — the model identifier requested from the AI service</li>
* <li>{@code timeoutSeconds} — connection and read timeout in seconds</li>
* <li>{@code apiKey} — the authentication token (resolved from environment variable
* {@code OPENAI_COMPATIBLE_API_KEY} or property {@code ai.provider.openai-compatible.apiKey},
* environment variable takes precedence)</li>
* </ul>
* <p>
* <strong>HTTP request structure:</strong>
* The adapter sends a POST request to the endpoint {@code {apiBaseUrl}/v1/chat/completions}
* The adapter sends a POST request to the endpoint {@code {baseUrl}/v1/chat/completions}
* with:
* <ul>
* <li>Authorization header containing the API key</li>
@@ -106,19 +106,18 @@ public class OpenAiHttpAdapter implements AiInvocationPort {
private volatile String lastBuiltJsonBody;
/**
* Creates an adapter with configuration from startup configuration.
* Creates an adapter from the OpenAI-compatible provider configuration.
* <p>
* The adapter initializes an HTTP client with the configured timeout and creates
* the endpoint URL from the base URL. Configuration values are validated for
* null/empty during initialization.
* The adapter initializes an HTTP client with the configured timeout and parses
* the endpoint URI from the configured base URL string.
*
* @param config the startup configuration containing API settings; must not be null
* @param config the provider configuration for the OpenAI-compatible family; must not be null
* @throws NullPointerException if config is null
* @throws IllegalArgumentException if API base URL or model is missing/empty
* @throws IllegalArgumentException if the base URL or model is missing/blank
*/
public OpenAiHttpAdapter(StartConfiguration config) {
public OpenAiHttpAdapter(ProviderConfiguration config) {
this(config, HttpClient.newBuilder()
.connectTimeout(Duration.ofSeconds(config.apiTimeoutSeconds()))
.connectTimeout(Duration.ofSeconds(config.timeoutSeconds()))
.build());
}
@@ -130,25 +129,25 @@ public class OpenAiHttpAdapter implements AiInvocationPort {
* <p>
* <strong>For testing only:</strong> This is package-private to remain internal to the adapter.
*
* @param config the startup configuration containing API settings; must not be null
* @param config the provider configuration; must not be null
* @param httpClient the HTTP client to use; must not be null
* @throws NullPointerException if config or httpClient is null
* @throws IllegalArgumentException if API base URL or model is missing/empty
* @throws IllegalArgumentException if the base URL or model is missing/blank
*/
OpenAiHttpAdapter(StartConfiguration config, HttpClient httpClient) {
OpenAiHttpAdapter(ProviderConfiguration config, HttpClient httpClient) {
Objects.requireNonNull(config, "config must not be null");
Objects.requireNonNull(httpClient, "httpClient must not be null");
if (config.apiBaseUrl() == null) {
if (config.baseUrl() == null || config.baseUrl().isBlank()) {
throw new IllegalArgumentException("API base URL must not be null");
}
if (config.apiModel() == null || config.apiModel().isBlank()) {
if (config.model() == null || config.model().isBlank()) {
throw new IllegalArgumentException("API model must not be null or empty");
}
this.apiBaseUrl = config.apiBaseUrl();
this.apiModel = config.apiModel();
this.apiBaseUrl = URI.create(config.baseUrl());
this.apiModel = config.model();
this.apiKey = config.apiKey() != null ? config.apiKey() : "";
this.apiTimeoutSeconds = config.apiTimeoutSeconds();
this.apiTimeoutSeconds = config.timeoutSeconds();
this.httpClient = httpClient;
LOG.debug("OpenAiHttpAdapter initialized with base URL: {}, model: {}, timeout: {}s",
@@ -229,7 +228,7 @@ public class OpenAiHttpAdapter implements AiInvocationPort {
* <li>Endpoint URL: {@code {apiBaseUrl}/v1/chat/completions}</li>
* <li>Headers: Authorization with Bearer token, Content-Type application/json</li>
* <li>Body: JSON with model, messages (system = prompt, user = document text)</li>
* <li>Timeout: configured timeout from startup configuration</li>
* <li>Timeout: configured timeout from provider configuration</li>
* </ul>
*
* @param request the request representation with prompt and document text
@@ -10,6 +10,7 @@ import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
/**
* Validates {@link StartConfiguration} before processing can begin.
* <p>
@@ -156,13 +157,13 @@ public class StartConfigurationValidator {
validateSourceFolder(config.sourceFolder(), errors);
validateTargetFolder(config.targetFolder(), errors);
validateSqliteFile(config.sqliteFile(), errors);
validateApiBaseUrl(config.apiBaseUrl(), errors);
validateApiModel(config.apiModel(), errors);
validatePromptTemplateFile(config.promptTemplateFile(), errors);
if (config.multiProviderConfiguration() == null) {
errors.add("- ai provider configuration: must not be null");
}
}
private void validateNumericConstraints(StartConfiguration config, List<String> errors) {
validateApiTimeoutSeconds(config.apiTimeoutSeconds(), errors);
validateMaxRetriesTransient(config.maxRetriesTransient(), errors);
validateMaxPages(config.maxPages(), errors);
validateMaxTextCharacters(config.maxTextCharacters(), errors);
@@ -199,33 +200,6 @@ public class StartConfigurationValidator {
validateRequiredFileParentDirectory(sqliteFile, "sqlite.file", errors);
}
private void validateApiBaseUrl(java.net.URI apiBaseUrl, List<String> errors) {
if (apiBaseUrl == null) {
errors.add("- api.baseUrl: must not be null");
return;
}
if (!apiBaseUrl.isAbsolute()) {
errors.add("- api.baseUrl: must be an absolute URI: " + apiBaseUrl);
return;
}
String scheme = apiBaseUrl.getScheme();
if (scheme == null || (!"http".equalsIgnoreCase(scheme) && !"https".equalsIgnoreCase(scheme))) {
errors.add("- api.baseUrl: scheme must be http or https, got: " + scheme);
}
}
private void validateApiModel(String apiModel, List<String> errors) {
if (apiModel == null || apiModel.isBlank()) {
errors.add("- api.model: must not be null or blank");
}
}
private void validateApiTimeoutSeconds(int apiTimeoutSeconds, List<String> errors) {
if (apiTimeoutSeconds <= 0) {
errors.add("- api.timeoutSeconds: must be > 0, got: " + apiTimeoutSeconds);
}
}
private void validateMaxRetriesTransient(int maxRetriesTransient, List<String> errors) {
if (maxRetriesTransient < 1) {
errors.add("- max.retries.transient: must be >= 1, got: " + maxRetriesTransient);
@@ -0,0 +1,306 @@
package de.gecheckt.pdf.umbenenner.adapter.out.configuration;
import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.InvalidStartConfigurationException;
import de.gecheckt.pdf.umbenenner.application.config.provider.MultiProviderConfiguration;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.io.StringReader;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.Properties;
/**
* Detects and migrates a legacy flat-key configuration file to the multi-provider schema.
*
* <h2>Legacy form</h2>
* A configuration file is considered legacy if it contains at least one of the flat property
* keys ({@code api.baseUrl}, {@code api.model}, {@code api.timeoutSeconds}, {@code api.key})
* and does <em>not</em> already contain {@code ai.provider.active}.
*
* <h2>Migration procedure</h2>
* <ol>
* <li>Detect legacy form; if absent, return immediately without any I/O side effect.</li>
* <li>Create a {@code .bak} backup of the original file before any changes. If a {@code .bak}
* file already exists, a numbered suffix is appended ({@code .bak.1}, {@code .bak.2}, …).
* Existing backups are never overwritten.</li>
* <li>Rewrite the file:
* <ul>
* <li>{@code api.baseUrl} → {@code ai.provider.openai-compatible.baseUrl}</li>
* <li>{@code api.model} → {@code ai.provider.openai-compatible.model}</li>
* <li>{@code api.timeoutSeconds} → {@code ai.provider.openai-compatible.timeoutSeconds}</li>
* <li>{@code api.key} → {@code ai.provider.openai-compatible.apiKey}</li>
* <li>{@code ai.provider.active=openai-compatible} is appended.</li>
* <li>A commented placeholder section for the Claude provider is appended.</li>
* <li>All other keys are carried over unchanged in stable order.</li>
* </ul>
* </li>
* <li>Write the migrated content via a temporary file ({@code <file>.tmp}) followed by an
* atomic move/rename. The original file is never partially overwritten.</li>
* <li>Reload the migrated file and validate it with {@link MultiProviderConfigurationParser}
* and {@link MultiProviderConfigurationValidator}. If validation fails, a
* {@link ConfigurationLoadingException} is thrown; the {@code .bak} is preserved.</li>
* </ol>
*/
public class LegacyConfigurationMigrator {
private static final Logger LOG = LogManager.getLogger(LegacyConfigurationMigrator.class);
/** Legacy flat key for base URL, replaced during migration. */
static final String LEGACY_BASE_URL = "api.baseUrl";
/** Legacy flat key for model name, replaced during migration. */
static final String LEGACY_MODEL = "api.model";
/** Legacy flat key for timeout, replaced during migration. */
static final String LEGACY_TIMEOUT = "api.timeoutSeconds";
/** Legacy flat key for API key, replaced during migration. */
static final String LEGACY_API_KEY = "api.key";
private static final String[][] LEGACY_KEY_MAPPINGS = {
{LEGACY_BASE_URL, "ai.provider.openai-compatible.baseUrl"},
{LEGACY_MODEL, "ai.provider.openai-compatible.model"},
{LEGACY_TIMEOUT, "ai.provider.openai-compatible.timeoutSeconds"},
{LEGACY_API_KEY, "ai.provider.openai-compatible.apiKey"},
};
private final MultiProviderConfigurationParser parser;
private final MultiProviderConfigurationValidator validator;
/**
* Creates a migrator backed by default parser and validator instances.
*/
public LegacyConfigurationMigrator() {
this(new MultiProviderConfigurationParser(), new MultiProviderConfigurationValidator());
}
/**
* Creates a migrator with injected parser and validator.
* <p>
* Intended for testing, where a controlled (e.g. always-failing) validator can be supplied
* to verify that the {@code .bak} backup is preserved when post-migration validation fails.
*
* @param parser parser used to re-read the migrated file; must not be {@code null}
* @param validator validator used to verify the migrated file; must not be {@code null}
*/
public LegacyConfigurationMigrator(MultiProviderConfigurationParser parser,
MultiProviderConfigurationValidator validator) {
this.parser = parser;
this.validator = validator;
}
/**
* Migrates the configuration file at {@code configFilePath} if it is in legacy form.
* <p>
* If the file does not contain legacy flat keys or already contains
* {@code ai.provider.active}, this method returns immediately without any I/O side effect.
*
* @param configFilePath path to the configuration file; must exist and be readable
* @throws ConfigurationLoadingException if the file cannot be read, the backup cannot be
* created, the migrated file cannot be written, or post-migration validation fails
*/
public void migrateIfLegacy(Path configFilePath) {
String originalContent = readFile(configFilePath);
Properties props = parsePropertiesFromContent(originalContent);
if (!isLegacyForm(props)) {
return;
}
LOG.info("Legacy configuration format detected. Migrating: {}", configFilePath);
createBakBackup(configFilePath, originalContent);
String migratedContent = generateMigratedContent(originalContent);
writeAtomically(configFilePath, migratedContent);
LOG.info("Configuration file migrated to multi-provider schema: {}", configFilePath);
validateMigratedFile(configFilePath);
}
/**
* Returns {@code true} if the given properties are in legacy form.
* <p>
* A properties set is considered legacy when it contains at least one of the four
* flat legacy keys and does not already contain {@code ai.provider.active}.
*
* @param props the parsed properties to inspect; must not be {@code null}
* @return {@code true} if migration is required, {@code false} otherwise
*/
boolean isLegacyForm(Properties props) {
boolean hasLegacyKey = props.containsKey(LEGACY_BASE_URL)
|| props.containsKey(LEGACY_MODEL)
|| props.containsKey(LEGACY_TIMEOUT)
|| props.containsKey(LEGACY_API_KEY);
boolean hasNewKey = props.containsKey(MultiProviderConfigurationParser.PROP_ACTIVE_PROVIDER);
return hasLegacyKey && !hasNewKey;
}
/**
* Creates a backup of the original file before overwriting it.
* <p>
* If {@code <file>.bak} does not yet exist, it is written directly. Otherwise,
* numbered suffixes ({@code .bak.1}, {@code .bak.2}, …) are tried in ascending order
* until a free slot is found. Existing backups are never overwritten.
*/
private void createBakBackup(Path configFilePath, String content) {
Path bakPath = configFilePath.resolveSibling(configFilePath.getFileName() + ".bak");
if (!Files.exists(bakPath)) {
writeFile(bakPath, content);
LOG.info("Backup created: {}", bakPath);
return;
}
for (int i = 1; ; i++) {
Path numbered = configFilePath.resolveSibling(configFilePath.getFileName() + ".bak." + i);
if (!Files.exists(numbered)) {
writeFile(numbered, content);
LOG.info("Backup created: {}", numbered);
return;
}
}
}
/**
* Produces the migrated file content from the given original content string.
* <p>
* Each line is inspected: lines that define a legacy key are rewritten with the
* corresponding new namespaced key; all other lines (comments, blank lines, other keys)
* pass through unchanged. After all original lines, a {@code ai.provider.active} entry
* and a commented Claude-provider placeholder block are appended.
*
* @param originalContent the raw original file content; must not be {@code null}
* @return the migrated content ready to be written to disk
*/
String generateMigratedContent(String originalContent) {
String[] lines = originalContent.split("\\r?\\n", -1);
StringBuilder sb = new StringBuilder();
for (String line : lines) {
sb.append(transformLine(line)).append("\n");
}
sb.append("\n");
sb.append("# Aktiver KI-Provider: openai-compatible oder claude\n");
sb.append("ai.provider.active=openai-compatible\n");
sb.append("\n");
sb.append("# Anthropic Claude-Provider (nur benoetigt wenn ai.provider.active=claude)\n");
sb.append("# ai.provider.claude.model=\n");
sb.append("# ai.provider.claude.timeoutSeconds=\n");
sb.append("# ai.provider.claude.apiKey=\n");
return sb.toString();
}
/**
* Transforms a single properties-file line, replacing a legacy key with its new equivalent.
* <p>
* Comment lines, blank lines, and lines defining keys other than the four legacy keys
* are returned unchanged.
*/
private String transformLine(String line) {
for (String[] mapping : LEGACY_KEY_MAPPINGS) {
String legacyKey = mapping[0];
String newKey = mapping[1];
if (lineDefinesKey(line, legacyKey)) {
int keyStart = line.indexOf(legacyKey);
return line.substring(0, keyStart) + newKey + line.substring(keyStart + legacyKey.length());
}
}
return line;
}
/**
* Returns {@code true} when {@code line} defines the given {@code key}.
* <p>
* A line defines a key if — after stripping any leading whitespace — it starts with
* the exact key string followed by {@code =}, {@code :}, whitespace, or end-of-string.
* Comment-introducing characters ({@code #} or {@code !}) cause an immediate {@code false}.
*/
private boolean lineDefinesKey(String line, String key) {
String trimmed = line.stripLeading();
if (trimmed.isEmpty() || trimmed.startsWith("#") || trimmed.startsWith("!")) {
return false;
}
if (!trimmed.startsWith(key)) {
return false;
}
if (trimmed.length() == key.length()) {
return true;
}
char next = trimmed.charAt(key.length());
return next == '=' || next == ':' || Character.isWhitespace(next);
}
/**
* Writes {@code content} to {@code target} via a temporary file and an atomic rename.
* <p>
* The temporary file is created as {@code <target>.tmp} in the same directory.
* After the content is fully written, the temporary file is moved to {@code target},
* replacing it. The original file is therefore never partially overwritten.
*/
private void writeAtomically(Path target, String content) {
Path tmpPath = target.resolveSibling(target.getFileName() + ".tmp");
try {
Files.writeString(tmpPath, content, StandardCharsets.UTF_8);
Files.move(tmpPath, target, StandardCopyOption.REPLACE_EXISTING);
} catch (IOException e) {
throw new ConfigurationLoadingException(
"Failed to write migrated configuration to " + target, e);
}
}
/**
* Re-reads the migrated file and validates it using the injected parser and validator.
* <p>
* A parse or validation failure is treated as a hard startup error. The {@code .bak} backup
* created before migration is preserved in this case.
*/
private void validateMigratedFile(Path configFilePath) {
String content = readFile(configFilePath);
Properties props = parsePropertiesFromContent(content);
MultiProviderConfiguration config;
try {
config = parser.parse(props);
} catch (ConfigurationLoadingException e) {
throw new ConfigurationLoadingException(
"Migrated configuration failed to parse: " + e.getMessage(), e);
}
try {
validator.validate(config);
} catch (InvalidStartConfigurationException e) {
throw new ConfigurationLoadingException(
"Migrated configuration failed validation (backup preserved): " + e.getMessage(), e);
}
}
private String readFile(Path path) {
try {
return Files.readString(path, StandardCharsets.UTF_8);
} catch (IOException e) {
throw new ConfigurationLoadingException("Failed to read file: " + path, e);
}
}
private void writeFile(Path path, String content) {
try {
Files.writeString(path, content, StandardCharsets.UTF_8);
} catch (IOException e) {
throw new ConfigurationLoadingException("Failed to write file: " + path, e);
}
}
private Properties parsePropertiesFromContent(String content) {
Properties props = new Properties();
try {
props.load(new StringReader(content));
} catch (IOException e) {
throw new ConfigurationLoadingException("Failed to parse properties content", e);
}
return props;
}
}
@@ -0,0 +1,203 @@
package de.gecheckt.pdf.umbenenner.adapter.out.configuration;
import de.gecheckt.pdf.umbenenner.application.config.provider.AiProviderFamily;
import de.gecheckt.pdf.umbenenner.application.config.provider.MultiProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.provider.ProviderConfiguration;
import java.util.Properties;
import java.util.function.Function;
/**
* Parses the multi-provider configuration schema from a {@link Properties} object.
* <p>
* Recognises the following property keys:
* <pre>
* ai.provider.active required; must be "openai-compatible" or "claude"
* ai.provider.openai-compatible.baseUrl required for active OpenAI-compatible provider
* ai.provider.openai-compatible.model required for active OpenAI-compatible provider
* ai.provider.openai-compatible.timeoutSeconds
* ai.provider.openai-compatible.apiKey
* ai.provider.claude.baseUrl optional; defaults to https://api.anthropic.com
* ai.provider.claude.model required for active Claude provider
* ai.provider.claude.timeoutSeconds
* ai.provider.claude.apiKey
* </pre>
*
* <h2>Environment-variable precedence for API keys</h2>
* <ul>
* <li>{@code OPENAI_COMPATIBLE_API_KEY} overrides {@code ai.provider.openai-compatible.apiKey}</li>
* <li>{@code ANTHROPIC_API_KEY} overrides {@code ai.provider.claude.apiKey}</li>
* </ul>
* Each environment variable is applied only to its own provider family; the variables
* of different families are never mixed.
*
* <h2>Error handling</h2>
* <ul>
* <li>If {@code ai.provider.active} is absent or blank, a {@link ConfigurationLoadingException}
* is thrown.</li>
* <li>If {@code ai.provider.active} holds an unrecognised value, a
* {@link ConfigurationLoadingException} is thrown.</li>
* <li>If a {@code timeoutSeconds} property is present but not a valid integer, a
* {@link ConfigurationLoadingException} is thrown.</li>
* <li>Missing optional fields result in {@code null} (String) or {@code 0} (int) stored in
* the returned record; the validator enforces required fields for the active provider.</li>
* </ul>
*
* <p>The returned {@link MultiProviderConfiguration} is not yet validated. Use
* {@link MultiProviderConfigurationValidator} after parsing.
*/
public class MultiProviderConfigurationParser {
/** Property key selecting the active provider family. */
static final String PROP_ACTIVE_PROVIDER = "ai.provider.active";
static final String PROP_OPENAI_BASE_URL = "ai.provider.openai-compatible.baseUrl";
static final String PROP_OPENAI_MODEL = "ai.provider.openai-compatible.model";
static final String PROP_OPENAI_TIMEOUT = "ai.provider.openai-compatible.timeoutSeconds";
static final String PROP_OPENAI_API_KEY = "ai.provider.openai-compatible.apiKey";
static final String PROP_CLAUDE_BASE_URL = "ai.provider.claude.baseUrl";
static final String PROP_CLAUDE_MODEL = "ai.provider.claude.model";
static final String PROP_CLAUDE_TIMEOUT = "ai.provider.claude.timeoutSeconds";
static final String PROP_CLAUDE_API_KEY = "ai.provider.claude.apiKey";
/** Environment variable for the OpenAI-compatible provider API key. */
static final String ENV_OPENAI_API_KEY = "OPENAI_COMPATIBLE_API_KEY";
/** Environment variable for the Anthropic Claude provider API key. */
static final String ENV_CLAUDE_API_KEY = "ANTHROPIC_API_KEY";
/** Default base URL for the Anthropic Claude provider when not explicitly configured. */
static final String CLAUDE_DEFAULT_BASE_URL = "https://api.anthropic.com";
private final Function<String, String> environmentLookup;
/**
* Creates a parser that uses the real system environment for API key resolution.
*/
public MultiProviderConfigurationParser() {
this(System::getenv);
}
/**
* Creates a parser with a custom environment lookup function.
* <p>
* This constructor is intended for testing to allow deterministic control over
* environment variable values without modifying the real process environment.
*
* @param environmentLookup a function that maps environment variable names to their values;
* must not be {@code null}
*/
public MultiProviderConfigurationParser(Function<String, String> environmentLookup) {
this.environmentLookup = environmentLookup;
}
/**
* Parses the multi-provider configuration from the given properties.
* <p>
* The Claude default base URL ({@code https://api.anthropic.com}) is applied when
* {@code ai.provider.claude.baseUrl} is absent. API keys are resolved with environment
* variable precedence. The resulting configuration is not yet validated; call
* {@link MultiProviderConfigurationValidator#validate(MultiProviderConfiguration)} afterward.
*
* @param props the properties to parse; must not be {@code null}
* @return the parsed (but not yet validated) multi-provider configuration
* @throws ConfigurationLoadingException if {@code ai.provider.active} is absent, blank,
* or holds an unrecognised value, or if any present timeout property is not a
* valid integer
*/
public MultiProviderConfiguration parse(Properties props) {
AiProviderFamily activeFamily = parseActiveProvider(props);
ProviderConfiguration openAiConfig = parseOpenAiCompatibleConfig(props);
ProviderConfiguration claudeConfig = parseClaudeConfig(props);
return new MultiProviderConfiguration(activeFamily, openAiConfig, claudeConfig);
}
private AiProviderFamily parseActiveProvider(Properties props) {
String raw = props.getProperty(PROP_ACTIVE_PROVIDER);
if (raw == null || raw.isBlank()) {
throw new ConfigurationLoadingException(
"Required property missing or blank: " + PROP_ACTIVE_PROVIDER
+ ". Valid values: openai-compatible, claude");
}
String trimmed = raw.trim();
return AiProviderFamily.fromIdentifier(trimmed).orElseThrow(() ->
new ConfigurationLoadingException(
"Unknown provider identifier for " + PROP_ACTIVE_PROVIDER + ": '" + trimmed
+ "'. Valid values: openai-compatible, claude"));
}
private ProviderConfiguration parseOpenAiCompatibleConfig(Properties props) {
String model = getOptionalString(props, PROP_OPENAI_MODEL);
int timeout = parseTimeoutSeconds(props, PROP_OPENAI_TIMEOUT);
String baseUrl = getOptionalString(props, PROP_OPENAI_BASE_URL);
String apiKey = resolveApiKey(props, PROP_OPENAI_API_KEY, ENV_OPENAI_API_KEY);
return new ProviderConfiguration(model, timeout, baseUrl, apiKey);
}
private ProviderConfiguration parseClaudeConfig(Properties props) {
String model = getOptionalString(props, PROP_CLAUDE_MODEL);
int timeout = parseTimeoutSeconds(props, PROP_CLAUDE_TIMEOUT);
String baseUrl = getStringOrDefault(props, PROP_CLAUDE_BASE_URL, CLAUDE_DEFAULT_BASE_URL);
String apiKey = resolveApiKey(props, PROP_CLAUDE_API_KEY, ENV_CLAUDE_API_KEY);
return new ProviderConfiguration(model, timeout, baseUrl, apiKey);
}
/**
* Returns the trimmed property value, or {@code null} if absent or blank.
*/
private String getOptionalString(Properties props, String key) {
String value = props.getProperty(key);
return (value == null || value.isBlank()) ? null : value.trim();
}
/**
* Returns the trimmed property value, or the {@code defaultValue} if absent or blank.
*/
private String getStringOrDefault(Properties props, String key, String defaultValue) {
String value = props.getProperty(key);
return (value == null || value.isBlank()) ? defaultValue : value.trim();
}
/**
* Parses a timeout property as a positive integer.
* <p>
* Returns {@code 0} when the property is absent or blank (indicating "not configured").
* Throws {@link ConfigurationLoadingException} when the property is present but not
* parseable as an integer.
*/
private int parseTimeoutSeconds(Properties props, String key) {
String value = props.getProperty(key);
if (value == null || value.isBlank()) {
return 0;
}
try {
return Integer.parseInt(value.trim());
} catch (NumberFormatException e) {
throw new ConfigurationLoadingException(
"Invalid integer value for property " + key + ": '" + value.trim() + "'", e);
}
}
/**
* Resolves the effective API key for a provider family.
* <p>
* The environment variable value takes precedence over the properties value.
* If the environment variable is absent or blank, the properties value is used.
* If both are absent or blank, an empty string is returned (the validator will
* reject this for the active provider).
*
* @param props the configuration properties
* @param propertyKey the property key for the API key of this provider family
* @param envVarName the environment variable name for this provider family
* @return the resolved API key; never {@code null}, but may be blank
*/
private String resolveApiKey(Properties props, String propertyKey, String envVarName) {
String envValue = environmentLookup.apply(envVarName);
if (envValue != null && !envValue.isBlank()) {
return envValue.trim();
}
String propsValue = props.getProperty(propertyKey);
return (propsValue != null) ? propsValue.trim() : "";
}
}
@@ -0,0 +1,106 @@
package de.gecheckt.pdf.umbenenner.adapter.out.configuration;
import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.InvalidStartConfigurationException;
import de.gecheckt.pdf.umbenenner.application.config.provider.AiProviderFamily;
import de.gecheckt.pdf.umbenenner.application.config.provider.MultiProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.provider.ProviderConfiguration;
import java.util.ArrayList;
import java.util.List;
/**
* Validates a {@link MultiProviderConfiguration} before the application run begins.
* <p>
* Enforces all requirements for the active provider:
* <ul>
* <li>{@code ai.provider.active} refers to a recognised provider family.</li>
* <li>{@code model} is non-blank.</li>
* <li>{@code timeoutSeconds} is a positive integer.</li>
* <li>{@code baseUrl} is non-blank (required for the OpenAI-compatible family;
* the Claude family always has a default).</li>
* <li>{@code apiKey} is non-blank after environment-variable precedence has been applied
* by {@link MultiProviderConfigurationParser}.</li>
* </ul>
* Required fields of the <em>inactive</em> provider are intentionally not enforced.
* <p>
* Validation errors are aggregated and reported together in a single
* {@link InvalidStartConfigurationException}.
*/
public class MultiProviderConfigurationValidator {
/**
* Validates the given multi-provider configuration.
* <p>
* Only the active provider's required fields are validated. The inactive provider's
* configuration may be incomplete.
*
* @param config the configuration to validate; must not be {@code null}
* @throws InvalidStartConfigurationException if any validation rule fails, with an aggregated
* message listing all problems found
*/
public void validate(MultiProviderConfiguration config) {
List<String> errors = new ArrayList<>();
validateActiveProvider(config, errors);
if (!errors.isEmpty()) {
throw new InvalidStartConfigurationException(
"Invalid AI provider configuration:\n" + String.join("\n", errors));
}
}
private void validateActiveProvider(MultiProviderConfiguration config, List<String> errors) {
AiProviderFamily activeFamily = config.activeProviderFamily();
if (activeFamily == null) {
// Parser already throws for missing/unknown ai.provider.active,
// but guard defensively in case the record is constructed directly in tests.
errors.add("- ai.provider.active: must be set to a supported provider "
+ "(openai-compatible, claude)");
return;
}
ProviderConfiguration providerConfig = config.activeProviderConfiguration();
String providerLabel = "ai.provider." + activeFamily.getIdentifier();
validateModel(providerConfig, providerLabel, errors);
validateTimeoutSeconds(providerConfig, providerLabel, errors);
validateBaseUrl(activeFamily, providerConfig, providerLabel, errors);
validateApiKey(providerConfig, providerLabel, errors);
}
private void validateModel(ProviderConfiguration config, String providerLabel, List<String> errors) {
if (config.model() == null || config.model().isBlank()) {
errors.add("- " + providerLabel + ".model: must not be blank");
}
}
private void validateTimeoutSeconds(ProviderConfiguration config, String providerLabel,
List<String> errors) {
if (config.timeoutSeconds() <= 0) {
errors.add("- " + providerLabel + ".timeoutSeconds: must be a positive integer, got: "
+ config.timeoutSeconds());
}
}
/**
* Validates base URL presence.
* <p>
* The OpenAI-compatible family requires an explicit base URL.
* The Claude family always has a default ({@code https://api.anthropic.com}) applied by the
* parser, so this check is a safety net rather than a primary enforcement mechanism.
*/
private void validateBaseUrl(AiProviderFamily family, ProviderConfiguration config,
String providerLabel, List<String> errors) {
if (config.baseUrl() == null || config.baseUrl().isBlank()) {
errors.add("- " + providerLabel + ".baseUrl: must not be blank");
}
}
private void validateApiKey(ProviderConfiguration config, String providerLabel,
List<String> errors) {
if (config.apiKey() == null || config.apiKey().isBlank()) {
errors.add("- " + providerLabel + ".apiKey: must not be blank "
+ "(set via environment variable or properties)");
}
}
}
@@ -2,8 +2,6 @@ package de.gecheckt.pdf.umbenenner.adapter.out.configuration;
import java.io.IOException;
import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
@@ -14,22 +12,24 @@ import java.util.function.Function;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import de.gecheckt.pdf.umbenenner.application.config.provider.MultiProviderConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort;
/**
* Properties-based implementation of {@link ConfigurationPort}.
* <p>
* Loads configuration from config/application.properties as the primary source.
* For sensitive values, environment variables take precedence: if the environment variable
* {@code PDF_UMBENENNER_API_KEY} is set, it overrides the {@code api.key} property from the file.
* This allows credentials to be managed securely without storing them in the configuration file.
* Loads configuration from {@code config/application.properties} as the primary source.
* The multi-provider AI configuration is parsed via {@link MultiProviderConfigurationParser}
* and validated via {@link MultiProviderConfigurationValidator}. Environment variables
* for API keys are resolved by the parser with provider-specific precedence rules:
* {@code OPENAI_COMPATIBLE_API_KEY} for the OpenAI-compatible family and
* {@code ANTHROPIC_API_KEY} for the Anthropic Claude family.
*/
public class PropertiesConfigurationPortAdapter implements ConfigurationPort {
private static final Logger LOG = LogManager.getLogger(PropertiesConfigurationPortAdapter.class);
private static final String DEFAULT_CONFIG_FILE_PATH = "config/application.properties";
private static final String API_KEY_ENV_VAR = "PDF_UMBENENNER_API_KEY";
private final Function<String, String> environmentLookup;
private final Path configFilePath;
@@ -81,8 +81,9 @@ public class PropertiesConfigurationPortAdapter implements ConfigurationPort {
@Override
public StartConfiguration loadConfiguration() {
Properties props = loadPropertiesFile();
String apiKey = getApiKey(props);
return buildStartConfiguration(props, apiKey);
MultiProviderConfiguration multiProviderConfig = parseAndValidateProviders(props);
boolean logAiSensitive = parseAiContentSensitivity(props);
return buildStartConfiguration(props, multiProviderConfig, logAiSensitive);
}
private Properties loadPropertiesFile() {
@@ -100,22 +101,28 @@ public class PropertiesConfigurationPortAdapter implements ConfigurationPort {
return props;
}
private String escapeBackslashes(String content) {
// Escape backslashes to prevent Java Properties from interpreting them as escape sequences.
// This is needed because Windows paths use backslashes (e.g., C:\temp\...)
// and Java Properties interprets \t as tab, \n as newline, etc.
return content.replace("\\", "\\\\");
/**
* Parses and validates the multi-provider AI configuration from the given properties.
* <p>
* Uses {@link MultiProviderConfigurationParser} for parsing and
* {@link MultiProviderConfigurationValidator} for validation. Throws on any
* configuration error before returning.
*/
private MultiProviderConfiguration parseAndValidateProviders(Properties props) {
MultiProviderConfigurationParser parser = new MultiProviderConfigurationParser(environmentLookup);
MultiProviderConfiguration config = parser.parse(props);
new MultiProviderConfigurationValidator().validate(config);
return config;
}
private StartConfiguration buildStartConfiguration(Properties props, String apiKey) {
boolean logAiSensitive = parseAiContentSensitivity(props);
private StartConfiguration buildStartConfiguration(Properties props,
MultiProviderConfiguration multiProviderConfig,
boolean logAiSensitive) {
return new StartConfiguration(
Paths.get(getRequiredProperty(props, "source.folder")),
Paths.get(getRequiredProperty(props, "target.folder")),
Paths.get(getRequiredProperty(props, "sqlite.file")),
parseUri(getRequiredProperty(props, "api.baseUrl")),
getRequiredProperty(props, "api.model"),
parseInt(getRequiredProperty(props, "api.timeoutSeconds")),
multiProviderConfig,
parseInt(getRequiredProperty(props, "max.retries.transient")),
parseInt(getRequiredProperty(props, "max.pages")),
parseInt(getRequiredProperty(props, "max.text.characters")),
@@ -123,19 +130,15 @@ public class PropertiesConfigurationPortAdapter implements ConfigurationPort {
Paths.get(getOptionalProperty(props, "runtime.lock.file", "")),
Paths.get(getOptionalProperty(props, "log.directory", "")),
getOptionalProperty(props, "log.level", "INFO"),
apiKey,
logAiSensitive
);
}
private String getApiKey(Properties props) {
String envApiKey = environmentLookup.apply(API_KEY_ENV_VAR);
if (envApiKey != null && !envApiKey.isBlank()) {
LOG.info("Using API key from environment variable {}", API_KEY_ENV_VAR);
return envApiKey;
}
String propsApiKey = props.getProperty("api.key");
return propsApiKey != null ? propsApiKey : "";
private String escapeBackslashes(String content) {
// Escape backslashes to prevent Java Properties from interpreting them as escape sequences.
// This is needed because Windows paths use backslashes (e.g., C:\temp\...)
// and Java Properties interprets \t as tab, \n as newline, etc.
return content.replace("\\", "\\\\");
}
private String getRequiredProperty(Properties props, String key) {
@@ -169,14 +172,6 @@ public class PropertiesConfigurationPortAdapter implements ConfigurationPort {
}
}
private URI parseUri(String value) {
try {
return new URI(value.trim());
} catch (URISyntaxException e) {
throw new ConfigurationLoadingException("Invalid URI value for property: " + value, e);
}
}
/**
* Parses the {@code log.ai.sensitive} configuration property with strict validation.
* <p>
@@ -212,4 +207,4 @@ public class PropertiesConfigurationPortAdapter implements ConfigurationPort {
+ "Default is 'false' (sensitive content not logged).");
}
}
}
}
@@ -31,9 +31,9 @@ import de.gecheckt.pdf.umbenenner.domain.model.RunId;
* including all AI traceability fields added during schema evolution.
* <p>
* <strong>Schema compatibility:</strong> This adapter writes all columns including
* the AI traceability columns. When reading rows that were written before schema
* evolution, those columns contain {@code NULL} and are mapped to {@code null}
* in the Java record.
* the AI traceability columns and the provider-identifier column ({@code ai_provider}).
* When reading rows that were written before schema evolution, those columns contain
* {@code NULL} and are mapped to {@code null} in the Java record.
* <p>
* <strong>Architecture boundary:</strong> All JDBC and SQLite details are strictly
* confined to this class. No JDBC types appear in the port interface or in any
@@ -129,6 +129,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
failure_class,
failure_message,
retryable,
ai_provider,
model_name,
prompt_identifier,
processed_page_count,
@@ -139,7 +140,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
date_source,
validated_title,
final_target_file_name
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""";
try (Connection connection = getConnection();
@@ -157,19 +158,20 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
setNullableString(statement, 7, attempt.failureClass());
setNullableString(statement, 8, attempt.failureMessage());
statement.setBoolean(9, attempt.retryable());
// AI traceability fields
setNullableString(statement, 10, attempt.modelName());
setNullableString(statement, 11, attempt.promptIdentifier());
setNullableInteger(statement, 12, attempt.processedPageCount());
setNullableInteger(statement, 13, attempt.sentCharacterCount());
setNullableString(statement, 14, attempt.aiRawResponse());
setNullableString(statement, 15, attempt.aiReasoning());
setNullableString(statement, 16,
attempt.resolvedDate() != null ? attempt.resolvedDate().toString() : null);
// AI provider identifier and AI traceability fields
setNullableString(statement, 10, attempt.aiProvider());
setNullableString(statement, 11, attempt.modelName());
setNullableString(statement, 12, attempt.promptIdentifier());
setNullableInteger(statement, 13, attempt.processedPageCount());
setNullableInteger(statement, 14, attempt.sentCharacterCount());
setNullableString(statement, 15, attempt.aiRawResponse());
setNullableString(statement, 16, attempt.aiReasoning());
setNullableString(statement, 17,
attempt.resolvedDate() != null ? attempt.resolvedDate().toString() : null);
setNullableString(statement, 18,
attempt.dateSource() != null ? attempt.dateSource().name() : null);
setNullableString(statement, 18, attempt.validatedTitle());
setNullableString(statement, 19, attempt.finalTargetFileName());
setNullableString(statement, 19, attempt.validatedTitle());
setNullableString(statement, 20, attempt.finalTargetFileName());
int rowsAffected = statement.executeUpdate();
if (rowsAffected != 1) {
@@ -204,7 +206,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
SELECT
fingerprint, run_id, attempt_number, started_at, ended_at,
status, failure_class, failure_message, retryable,
model_name, prompt_identifier, processed_page_count, sent_character_count,
ai_provider, model_name, prompt_identifier, processed_page_count, sent_character_count,
ai_raw_response, ai_reasoning, resolved_date, date_source, validated_title,
final_target_file_name
FROM processing_attempt
@@ -255,7 +257,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
SELECT
fingerprint, run_id, attempt_number, started_at, ended_at,
status, failure_class, failure_message, retryable,
model_name, prompt_identifier, processed_page_count, sent_character_count,
ai_provider, model_name, prompt_identifier, processed_page_count, sent_character_count,
ai_raw_response, ai_reasoning, resolved_date, date_source, validated_title,
final_target_file_name
FROM processing_attempt
@@ -312,6 +314,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
rs.getString("failure_class"),
rs.getString("failure_message"),
rs.getBoolean("retryable"),
rs.getString("ai_provider"),
rs.getString("model_name"),
rs.getString("prompt_identifier"),
processedPageCount,
@@ -41,6 +41,9 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitiali
* <li>Target-copy columns ({@code last_target_path}, {@code last_target_file_name}) to
* {@code document_record}</li>
* <li>Target-copy column ({@code final_target_file_name}) to {@code processing_attempt}</li>
* <li>Provider-identifier column ({@code ai_provider}) to {@code processing_attempt};
* existing rows receive {@code NULL} as the default, which is the correct value for
* attempts recorded before provider tracking was introduced.</li>
* </ul>
*
* <h2>Legacy-state migration</h2>
@@ -150,6 +153,9 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti
/**
* Columns to add idempotently to {@code processing_attempt}.
* Each entry is {@code [column_name, column_type]}.
* <p>
* {@code ai_provider} is nullable; existing rows receive {@code NULL}, which is the
* correct sentinel for attempts recorded before provider tracking was introduced.
*/
private static final String[][] EVOLUTION_ATTEMPT_COLUMNS = {
{"model_name", "TEXT"},
@@ -162,6 +168,7 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti
{"date_source", "TEXT"},
{"validated_title", "TEXT"},
{"final_target_file_name", "TEXT"},
{"ai_provider", "TEXT"},
};
// -------------------------------------------------------------------------
@@ -229,7 +236,8 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti
* <li>Create {@code document_record} table (if not exists).</li>
* <li>Create {@code processing_attempt} table (if not exists).</li>
* <li>Create all indexes (if not exist).</li>
* <li>Add AI-traceability columns to {@code processing_attempt} (idempotent evolution).</li>
* <li>Add AI-traceability and provider-identifier columns to {@code processing_attempt}
* (idempotent evolution).</li>
* <li>Migrate earlier positive intermediate state to {@code READY_FOR_AI} (idempotent).</li>
* </ol>
* <p>