M5 AP-001 Kernobjekte, Statusmodell und KI-Port-Verträge präzisiert

2026-04-06 23:05:12 +02:00
parent d1dfc75d4e
commit cd2389f3e1
19 changed files with 776 additions and 8 deletions
@@ -0,0 +1,77 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
+
+/**
+ * Outbound port for invoking an AI service over an OpenAI-compatible HTTP boundary.
+ * <p>
+ * This interface abstracts AI service communication, allowing the Application layer
+ * to orchestrate AI-based naming without knowing about HTTP, authentication, or
+ * provider-specific details.
+ * <p>
+ * <strong>Design principles:</strong>
+ * <ul>
+ *   <li>Provider is configurable (OpenAI, Azure, local LLM, etc.), not hard-coded</li>
+ *   <li>Base URL, model name, and timeout are runtime configuration</li>
+ *   <li>Results are returned as structured types ({@link AiInvocationResult}),
+ *       never as exceptions</li>
+ *   <li>Technical success (HTTP 200) is distinct from response content validity</li>
+ * </ul>
+ * <p>
+ * <strong>Adapter responsibilities:</strong>
+ * <ul>
+ *   <li>Construct an HTTP request from the {@link AiRequestRepresentation}</li>
+ *   <li>Apply all transport-level configuration (base URL, model, timeout, authentication)</li>
+ *   <li>Execute the HTTP request against the configured endpoint</li>
+ *   <li>Distinguish between successful reception of a response body and technical failure</li>
+ *   <li>Return either an invocation success with raw response or a classified technical error</li>
+ *   <li>Encapsulate all HTTP, JSON serialization, and authentication details</li>
+ * </ul>
+ * <p>
+ * <strong>Non-goals of this port:</strong>
+ * <ul>
+ *   <li>JSON parsing of the response body (Application layer handles this)</li>
+ *   <li>Validation of response content against domain rules</li>
+ *   <li>Prompt construction or text formatting (Application layer does this)</li>
+ *   <li>Handling of provider-specific output formats or structured output schemas</li>
+ * </ul>
+ * <p>
+ * <strong>OpenAI compatibility:</strong> The adapter must support the OpenAI Chat
+ * Completions API or a compatible endpoint. The {@code AiRequestRepresentation}
+ * contains the prompt and document text; the adapter is responsible for formatting
+ * these as needed (e.g., system message + user message in the Chat API).
+ *
+ * @since M5
+ */
+public interface AiInvocationPort {
+
+    /**
+     * Invokes an AI service with the given request representation.
+     * <p>
+     * This method sends a request to the configured AI endpoint and returns the result.
+     * The request contains both the prompt and the document text, deterministically
+     * composed by the Application layer.
+     * <p>
+     * <strong>Outcome distinction:</strong>
+     * <ul>
+     *   <li>If the HTTP call succeeds and a response body is received,
+     *       {@link AiInvocationSuccess} is returned, even if the body is invalid JSON
+     *       or semantically problematic. The Application layer will parse and validate
+     *       the content.</li>
+     *   <li>If the HTTP call fails (timeout, network error, endpoint unreachable,
+     *       connection failure), {@link AiInvocationTechnicalFailure} is returned.</li>
+     * </ul>
+     *
+     * @param request the complete request to send to the AI service; never null
+     * @return an {@link AiInvocationResult} encoding either:
+     *         <ul>
+     *           <li>Success: response body was received (valid or not)</li>
+     *           <li>Technical failure: HTTP communication failed</li>
+     *         </ul>
+     * @throws NullPointerException if request is null
+     *
+     * @see AiInvocationSuccess
+     * @see AiInvocationTechnicalFailure
+     */
+    AiInvocationResult invoke(AiRequestRepresentation request);
+}
@@ -0,0 +1,28 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+/**
+ * Sealed interface representing the outcome of invoking an AI service.
+ * <p>
+ * Implementations allow the Application layer to distinguish between:
+ * <ul>
+ *   <li>Successful HTTP communication with a response body (which may still contain
+ *       functionally invalid content, but is at least technically received)</li>
+ *   <li>Technical failure (timeout, network error, endpoint unreachable, malformed response)</li>
+ * </ul>
+ * <p>
+ * Permitted implementations:
+ * <ul>
+ *   <li>{@link AiInvocationSuccess} — HTTP call completed with a response body</li>
+ *   <li>{@link AiInvocationTechnicalFailure} — HTTP call failed or no valid response was received</li>
+ * </ul>
+ * <p>
+ * <strong>Critical distinction:</strong> A successful invocation means the HTTP request
+ * was sent and a response was received, but the response content may still be unparseable
+ * or semantically invalid. This is crucial for retry logic: a technical HTTP success
+ * with unparseable JSON is different from a timeout or network error.
+ *
+ * @since M5
+ */
+public sealed interface AiInvocationResult
+        permits AiInvocationSuccess, AiInvocationTechnicalFailure {
+}
@@ -0,0 +1,51 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
+import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
+import java.util.Objects;
+
+/**
+ * Represents successful HTTP communication with an AI service.
+ * <p>
+ * The HTTP request was sent and a response body was received. This indicates
+ * technical success of the communication, but does NOT guarantee that the response
+ * content is valid, parseable, or functionally usable.
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #request()} — the exact request that was sent to the AI service,
+ *       including prompt, document text, and character counts</li>
+ *   <li>{@link #rawResponse()} — the uninterpreted response body returned by the AI,
+ *       which may be valid JSON, malformed, empty, or otherwise problematic</li>
+ * </ul>
+ * <p>
+ * The Application layer is responsible for:
+ * <ul>
+ *   <li>Parsing the raw response (JSON extraction, field validation)</li>
+ *   <li>Distinguishing between parseable and unparseable responses</li>
+ *   <li>Validating the content against rules (title length, date format, etc.)</li>
+ *   <li>Classifying any failures as technical or functional</li>
+ * </ul>
+ * <p>
+ * <strong>Persistence:</strong> Both request and response are stored in the
+ * processing attempt history for debugging and audit.
+ *
+ * @param request    the AI request that was sent; never null
+ * @param rawResponse the uninterpreted response body; never null (but may be empty)
+ *
+ * @since M5
+ */
+public record AiInvocationSuccess(
+        AiRequestRepresentation request,
+        AiRawResponse rawResponse) implements AiInvocationResult {
+
+    /**
+     * Compact constructor validating mandatory fields.
+     *
+     * @throws NullPointerException if either field is null
+     */
+    public AiInvocationSuccess {
+        Objects.requireNonNull(request, "request must not be null");
+        Objects.requireNonNull(rawResponse, "rawResponse must not be null");
+    }
+}
@@ -0,0 +1,53 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
+import java.util.Objects;
+
+/**
+ * Represents a technical failure during AI service invocation.
+ * <p>
+ * The HTTP request could not be sent, or no valid response body was received.
+ * This covers network errors, timeouts, endpoint unreachability, connection failures,
+ * and other infrastructure-level problems.
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #request()} — the request that was attempted to be sent. Stored for
+ *       debugging and audit, even though it may not have reached the AI service.</li>
+ *   <li>{@link #failureReason()} — a classification of the technical error
+ *       (e.g., "TIMEOUT", "ENDPOINT_UNREACHABLE", "CONNECTION_ERROR")</li>
+ *   <li>{@link #failureMessage()} — a human-readable description of the error,
+ *       suitable for logging and operational troubleshooting</li>
+ * </ul>
+ * <p>
+ * <strong>Retry semantics:</strong> Technical failures are retryable. The Application
+ * layer will record this as a transient error, and the document may be retried in
+ * a later batch run up to the configured maximum transient-error count.
+ * <p>
+ * <strong>Distinction from functional errors:</strong> A 200 OK response with an
+ * invalid JSON body is NOT a technical failure; it's an invocation success that
+ * contains a functional error. Only communication/transport errors are classified here.
+ *
+ * @param request        the request that was attempted (may not have been successfully sent);
+ *                       never null
+ * @param failureReason  classification of the error type; never null (may be empty)
+ * @param failureMessage human-readable error description; never null (may be empty)
+ *
+ * @since M5
+ */
+public record AiInvocationTechnicalFailure(
+        AiRequestRepresentation request,
+        String failureReason,
+        String failureMessage) implements AiInvocationResult {
+
+    /**
+     * Compact constructor validating mandatory fields.
+     *
+     * @throws NullPointerException if any field is null
+     */
+    public AiInvocationTechnicalFailure {
+        Objects.requireNonNull(request, "request must not be null");
+        Objects.requireNonNull(failureReason, "failureReason must not be null");
+        Objects.requireNonNull(failureMessage, "failureMessage must not be null");
+    }
+}
@@ -0,0 +1,41 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+import java.util.Objects;
+
+/**
+ * Represents failure to load an external prompt template.
+ * <p>
+ * The prompt could not be obtained from the configured external source,
+ * or the loaded content was technically invalid (e.g., empty after trimming).
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #failureReason()} — a classification of the failure type
+ *       (e.g., "FILE_NOT_FOUND", "IO_ERROR", "EMPTY_CONTENT")</li>
+ *   <li>{@link #failureMessage()} — a human-readable description of why
+ *       the load failed, suitable for logging and debugging</li>
+ * </ul>
+ * <p>
+ * This is a technical failure, not a validation error, and typically prevents
+ * the batch run from proceeding further (may lead to a {@code PROCESSING} status
+ * treated as {@code FAILED_RETRYABLE}).
+ *
+ * @param failureReason  classification of the failure (non-null, may be empty)
+ * @param failureMessage human-readable failure description (non-null, may be empty)
+ *
+ * @since M5
+ */
+public record PromptLoadingFailure(
+        String failureReason,
+        String failureMessage) implements PromptLoadingResult {
+
+    /**
+     * Compact constructor validating mandatory fields.
+     *
+     * @throws NullPointerException if either field is null
+     */
+    public PromptLoadingFailure {
+        Objects.requireNonNull(failureReason, "failureReason must not be null");
+        Objects.requireNonNull(failureMessage, "failureMessage must not be null");
+    }
+}
@@ -0,0 +1,19 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+/**
+ * Sealed interface representing the outcome of loading an external prompt template.
+ * <p>
+ * Implementations allow the Application layer to distinguish between a successful
+ * prompt load and various failure scenarios without using exceptions.
+ * <p>
+ * Permitted implementations:
+ * <ul>
+ *   <li>{@link PromptLoadingSuccess} — prompt was successfully loaded from the external source</li>
+ *   <li>{@link PromptLoadingFailure} — prompt could not be loaded (file not found, read error, etc.)</li>
+ * </ul>
+ *
+ * @since M5
+ */
+public sealed interface PromptLoadingResult
+        permits PromptLoadingSuccess, PromptLoadingFailure {
+}
@@ -0,0 +1,44 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+import de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier;
+import java.util.Objects;
+
+/**
+ * Represents successful loading of an external prompt template.
+ * <p>
+ * The prompt content and a stable identifier for the prompt have both been
+ * successfully obtained from the configured external source.
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #promptIdentifier()} — a stable, deterministic identifier
+ *       (e.g., filename, semantic version, or hash) that will remain the same
+ *       across multiple batch runs if the prompt content hasn't changed.</li>
+ *   <li>{@link #promptContent()} — the actual prompt template text, ready to be
+ *       used in constructing an AI request. May be empty in edge cases, though
+ *       normally expected to be meaningful.</li>
+ * </ul>
+ * <p>
+ * The identifier is crucial for historical traceability: each processing attempt
+ * records which prompt was used, allowing later investigation of why a particular
+ * decision was made.
+ *
+ * @param promptIdentifier stable identifier for this prompt version; never null
+ * @param promptContent    the prompt template text; never null
+ *
+ * @since M5
+ */
+public record PromptLoadingSuccess(
+        PromptIdentifier promptIdentifier,
+        String promptContent) implements PromptLoadingResult {
+
+    /**
+     * Compact constructor validating mandatory fields.
+     *
+     * @throws NullPointerException if either field is null
+     */
+    public PromptLoadingSuccess {
+        Objects.requireNonNull(promptIdentifier, "promptIdentifier must not be null");
+        Objects.requireNonNull(promptContent, "promptContent must not be null");
+    }
+}
@@ -0,0 +1,58 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+/**
+ * Outbound port for loading external prompt templates.
+ * <p>
+ * This interface abstracts the loading of prompt content from external sources
+ * (files, resources, databases, etc.), allowing the Application layer to remain
+ * independent of how or where prompts are stored.
+ * <p>
+ * <strong>Design principles:</strong>
+ * <ul>
+ *   <li>Prompt is not embedded in code; it is loaded from an external source</li>
+ *   <li>Each prompt receives a stable identifier for traceability across batch runs</li>
+ *   <li>Results are returned as structured types ({@link PromptLoadingResult}),
+ *       never as exceptions</li>
+ * </ul>
+ * <p>
+ * <strong>Adapter responsibilities:</strong>
+ * <ul>
+ *   <li>Locate and read the prompt file/resource from the configured source</li>
+ *   <li>Derive a stable prompt identifier (e.g., filename, semantic version, content hash)</li>
+ *   <li>Validate that the loaded content is not empty or otherwise invalid</li>
+ *   <li>Return either success or a classified failure</li>
+ *   <li>Encapsulate all file I/O, resource loading, and configuration details</li>
+ * </ul>
+ * <p>
+ * <strong>Non-goals of this port:</strong>
+ * <ul>
+ *   <li>Prompt parsing or templating logic</li>
+ *   <li>Combining prompt with document text (Application layer handles this)</li>
+ *   <li>Template variable substitution</li>
+ *   <li>Validation of prompt content against fachliche rules</li>
+ * </ul>
+ *
+ * @since M5
+ */
+public interface PromptPort {
+
+    /**
+     * Loads the configured external prompt template.
+     * <p>
+     * This method is called once per batch run to obtain the current prompt.
+     * The prompt content and its stable identifier are returned together.
+     * <p>
+     * If loading fails for any reason (file not found, I/O error, content validation),
+     * a {@link PromptLoadingFailure} is returned rather than throwing an exception.
+     *
+     * @return a {@link PromptLoadingResult} encoding either:
+     *         <ul>
+     *           <li>Success: prompt content and identifier loaded successfully</li>
+     *           <li>Failure: prompt could not be loaded or is invalid</li>
+     *         </ul>
+     *
+     * @see PromptLoadingSuccess
+     * @see PromptLoadingFailure
+     */
+    PromptLoadingResult loadPrompt();
+}
@@ -22,6 +22,14 @@
 *       — Extract text content and page count from a single PDF</li>
 * </ul>
 * <p>
+ * AI-based naming ports (M5+):
+ * <ul>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptPort}
+ *       — Load an external prompt template and derive its stable identifier</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort}
+ *       — Invoke an AI service over an OpenAI-compatible HTTP boundary</li>
+ * </ul>
+ * <p>
 * Persistence and fingerprinting ports:
 * <ul>
 *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort}
@@ -46,6 +54,10 @@
 *       — Sealed result of a fingerprint computation (success or technical error)</li>
 *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult}
 *       — Sealed result of a master record lookup (unknown / processable / terminal / failure)</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingResult}
+ *       — Sealed result of prompt loading (success with identifier and content, or failure)</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult}
+ *       — Sealed result of AI service invocation (success with raw response, or technical failure)</li>
 * </ul>
 * <p>
 * Exception types: