1
0

M5 AP-001 Kernobjekte, Statusmodell und KI-Port-Verträge präzisiert

This commit is contained in:
2026-04-06 23:05:12 +02:00
parent d1dfc75d4e
commit cd2389f3e1
19 changed files with 776 additions and 8 deletions

View File

@@ -0,0 +1,77 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
/**
* Outbound port for invoking an AI service over an OpenAI-compatible HTTP boundary.
* <p>
* This interface abstracts AI service communication, allowing the Application layer
* to orchestrate AI-based naming without knowing about HTTP, authentication, or
* provider-specific details.
* <p>
* <strong>Design principles:</strong>
* <ul>
* <li>Provider is configurable (OpenAI, Azure, local LLM, etc.), not hard-coded</li>
* <li>Base URL, model name, and timeout are runtime configuration</li>
* <li>Results are returned as structured types ({@link AiInvocationResult}),
* never as exceptions</li>
* <li>Technical success (HTTP 200) is distinct from response content validity</li>
* </ul>
* <p>
* <strong>Adapter responsibilities:</strong>
* <ul>
* <li>Construct an HTTP request from the {@link AiRequestRepresentation}</li>
* <li>Apply all transport-level configuration (base URL, model, timeout, authentication)</li>
* <li>Execute the HTTP request against the configured endpoint</li>
* <li>Distinguish between successful reception of a response body and technical failure</li>
* <li>Return either an invocation success with raw response or a classified technical error</li>
* <li>Encapsulate all HTTP, JSON serialization, and authentication details</li>
* </ul>
* <p>
* <strong>Non-goals of this port:</strong>
* <ul>
* <li>JSON parsing of the response body (Application layer handles this)</li>
* <li>Validation of response content against domain rules</li>
* <li>Prompt construction or text formatting (Application layer does this)</li>
* <li>Handling of provider-specific output formats or structured output schemas</li>
* </ul>
* <p>
* <strong>OpenAI compatibility:</strong> The adapter must support the OpenAI Chat
* Completions API or a compatible endpoint. The {@code AiRequestRepresentation}
* contains the prompt and document text; the adapter is responsible for formatting
* these as needed (e.g., system message + user message in the Chat API).
*
* @since M5
*/
public interface AiInvocationPort {
/**
* Invokes an AI service with the given request representation.
* <p>
* This method sends a request to the configured AI endpoint and returns the result.
* The request contains both the prompt and the document text, deterministically
* composed by the Application layer.
* <p>
* <strong>Outcome distinction:</strong>
* <ul>
* <li>If the HTTP call succeeds and a response body is received,
* {@link AiInvocationSuccess} is returned, even if the body is invalid JSON
* or semantically problematic. The Application layer will parse and validate
* the content.</li>
* <li>If the HTTP call fails (timeout, network error, endpoint unreachable,
* connection failure), {@link AiInvocationTechnicalFailure} is returned.</li>
* </ul>
*
* @param request the complete request to send to the AI service; never null
* @return an {@link AiInvocationResult} encoding either:
* <ul>
* <li>Success: response body was received (valid or not)</li>
* <li>Technical failure: HTTP communication failed</li>
* </ul>
* @throws NullPointerException if request is null
*
* @see AiInvocationSuccess
* @see AiInvocationTechnicalFailure
*/
AiInvocationResult invoke(AiRequestRepresentation request);
}

View File

@@ -0,0 +1,28 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Sealed interface representing the outcome of invoking an AI service.
* <p>
* Implementations allow the Application layer to distinguish between:
* <ul>
* <li>Successful HTTP communication with a response body (which may still contain
* functionally invalid content, but is at least technically received)</li>
* <li>Technical failure (timeout, network error, endpoint unreachable, malformed response)</li>
* </ul>
* <p>
* Permitted implementations:
* <ul>
* <li>{@link AiInvocationSuccess} — HTTP call completed with a response body</li>
* <li>{@link AiInvocationTechnicalFailure} — HTTP call failed or no valid response was received</li>
* </ul>
* <p>
* <strong>Critical distinction:</strong> A successful invocation means the HTTP request
* was sent and a response was received, but the response content may still be unparseable
* or semantically invalid. This is crucial for retry logic: a technical HTTP success
* with unparseable JSON is different from a timeout or network error.
*
* @since M5
*/
public sealed interface AiInvocationResult
permits AiInvocationSuccess, AiInvocationTechnicalFailure {
}

View File

@@ -0,0 +1,51 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
import java.util.Objects;
/**
* Represents successful HTTP communication with an AI service.
* <p>
* The HTTP request was sent and a response body was received. This indicates
* technical success of the communication, but does NOT guarantee that the response
* content is valid, parseable, or functionally usable.
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #request()} — the exact request that was sent to the AI service,
* including prompt, document text, and character counts</li>
* <li>{@link #rawResponse()} — the uninterpreted response body returned by the AI,
* which may be valid JSON, malformed, empty, or otherwise problematic</li>
* </ul>
* <p>
* The Application layer is responsible for:
* <ul>
* <li>Parsing the raw response (JSON extraction, field validation)</li>
* <li>Distinguishing between parseable and unparseable responses</li>
* <li>Validating the content against rules (title length, date format, etc.)</li>
* <li>Classifying any failures as technical or functional</li>
* </ul>
* <p>
* <strong>Persistence:</strong> Both request and response are stored in the
* processing attempt history for debugging and audit.
*
* @param request the AI request that was sent; never null
* @param rawResponse the uninterpreted response body; never null (but may be empty)
*
* @since M5
*/
public record AiInvocationSuccess(
AiRequestRepresentation request,
AiRawResponse rawResponse) implements AiInvocationResult {
/**
* Compact constructor validating mandatory fields.
*
* @throws NullPointerException if either field is null
*/
public AiInvocationSuccess {
Objects.requireNonNull(request, "request must not be null");
Objects.requireNonNull(rawResponse, "rawResponse must not be null");
}
}

View File

@@ -0,0 +1,53 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
import java.util.Objects;
/**
* Represents a technical failure during AI service invocation.
* <p>
* The HTTP request could not be sent, or no valid response body was received.
* This covers network errors, timeouts, endpoint unreachability, connection failures,
* and other infrastructure-level problems.
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #request()} — the request that was attempted to be sent. Stored for
* debugging and audit, even though it may not have reached the AI service.</li>
* <li>{@link #failureReason()} — a classification of the technical error
* (e.g., "TIMEOUT", "ENDPOINT_UNREACHABLE", "CONNECTION_ERROR")</li>
* <li>{@link #failureMessage()} — a human-readable description of the error,
* suitable for logging and operational troubleshooting</li>
* </ul>
* <p>
* <strong>Retry semantics:</strong> Technical failures are retryable. The Application
* layer will record this as a transient error, and the document may be retried in
* a later batch run up to the configured maximum transient-error count.
* <p>
* <strong>Distinction from functional errors:</strong> A 200 OK response with an
* invalid JSON body is NOT a technical failure; it's an invocation success that
* contains a functional error. Only communication/transport errors are classified here.
*
* @param request the request that was attempted (may not have been successfully sent);
* never null
* @param failureReason classification of the error type; never null (may be empty)
* @param failureMessage human-readable error description; never null (may be empty)
*
* @since M5
*/
public record AiInvocationTechnicalFailure(
AiRequestRepresentation request,
String failureReason,
String failureMessage) implements AiInvocationResult {
/**
* Compact constructor validating mandatory fields.
*
* @throws NullPointerException if any field is null
*/
public AiInvocationTechnicalFailure {
Objects.requireNonNull(request, "request must not be null");
Objects.requireNonNull(failureReason, "failureReason must not be null");
Objects.requireNonNull(failureMessage, "failureMessage must not be null");
}
}

View File

@@ -0,0 +1,41 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import java.util.Objects;
/**
* Represents failure to load an external prompt template.
* <p>
* The prompt could not be obtained from the configured external source,
* or the loaded content was technically invalid (e.g., empty after trimming).
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #failureReason()} — a classification of the failure type
* (e.g., "FILE_NOT_FOUND", "IO_ERROR", "EMPTY_CONTENT")</li>
* <li>{@link #failureMessage()} — a human-readable description of why
* the load failed, suitable for logging and debugging</li>
* </ul>
* <p>
* This is a technical failure, not a validation error, and typically prevents
* the batch run from proceeding further (may lead to a {@code PROCESSING} status
* treated as {@code FAILED_RETRYABLE}).
*
* @param failureReason classification of the failure (non-null, may be empty)
* @param failureMessage human-readable failure description (non-null, may be empty)
*
* @since M5
*/
public record PromptLoadingFailure(
String failureReason,
String failureMessage) implements PromptLoadingResult {
/**
* Compact constructor validating mandatory fields.
*
* @throws NullPointerException if either field is null
*/
public PromptLoadingFailure {
Objects.requireNonNull(failureReason, "failureReason must not be null");
Objects.requireNonNull(failureMessage, "failureMessage must not be null");
}
}

View File

@@ -0,0 +1,19 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Sealed interface representing the outcome of loading an external prompt template.
* <p>
* Implementations allow the Application layer to distinguish between a successful
* prompt load and various failure scenarios without using exceptions.
* <p>
* Permitted implementations:
* <ul>
* <li>{@link PromptLoadingSuccess} — prompt was successfully loaded from the external source</li>
* <li>{@link PromptLoadingFailure} — prompt could not be loaded (file not found, read error, etc.)</li>
* </ul>
*
* @since M5
*/
public sealed interface PromptLoadingResult
permits PromptLoadingSuccess, PromptLoadingFailure {
}

View File

@@ -0,0 +1,44 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier;
import java.util.Objects;
/**
* Represents successful loading of an external prompt template.
* <p>
* The prompt content and a stable identifier for the prompt have both been
* successfully obtained from the configured external source.
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #promptIdentifier()} — a stable, deterministic identifier
* (e.g., filename, semantic version, or hash) that will remain the same
* across multiple batch runs if the prompt content hasn't changed.</li>
* <li>{@link #promptContent()} — the actual prompt template text, ready to be
* used in constructing an AI request. May be empty in edge cases, though
* normally expected to be meaningful.</li>
* </ul>
* <p>
* The identifier is crucial for historical traceability: each processing attempt
* records which prompt was used, allowing later investigation of why a particular
* decision was made.
*
* @param promptIdentifier stable identifier for this prompt version; never null
* @param promptContent the prompt template text; never null
*
* @since M5
*/
public record PromptLoadingSuccess(
PromptIdentifier promptIdentifier,
String promptContent) implements PromptLoadingResult {
/**
* Compact constructor validating mandatory fields.
*
* @throws NullPointerException if either field is null
*/
public PromptLoadingSuccess {
Objects.requireNonNull(promptIdentifier, "promptIdentifier must not be null");
Objects.requireNonNull(promptContent, "promptContent must not be null");
}
}

View File

@@ -0,0 +1,58 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Outbound port for loading external prompt templates.
* <p>
* This interface abstracts the loading of prompt content from external sources
* (files, resources, databases, etc.), allowing the Application layer to remain
* independent of how or where prompts are stored.
* <p>
* <strong>Design principles:</strong>
* <ul>
* <li>Prompt is not embedded in code; it is loaded from an external source</li>
* <li>Each prompt receives a stable identifier for traceability across batch runs</li>
* <li>Results are returned as structured types ({@link PromptLoadingResult}),
* never as exceptions</li>
* </ul>
* <p>
* <strong>Adapter responsibilities:</strong>
* <ul>
* <li>Locate and read the prompt file/resource from the configured source</li>
* <li>Derive a stable prompt identifier (e.g., filename, semantic version, content hash)</li>
* <li>Validate that the loaded content is not empty or otherwise invalid</li>
* <li>Return either success or a classified failure</li>
* <li>Encapsulate all file I/O, resource loading, and configuration details</li>
* </ul>
* <p>
* <strong>Non-goals of this port:</strong>
* <ul>
* <li>Prompt parsing or templating logic</li>
* <li>Combining prompt with document text (Application layer handles this)</li>
* <li>Template variable substitution</li>
* <li>Validation of prompt content against fachliche rules</li>
* </ul>
*
* @since M5
*/
public interface PromptPort {
/**
* Loads the configured external prompt template.
* <p>
* This method is called once per batch run to obtain the current prompt.
* The prompt content and its stable identifier are returned together.
* <p>
* If loading fails for any reason (file not found, I/O error, content validation),
* a {@link PromptLoadingFailure} is returned rather than throwing an exception.
*
* @return a {@link PromptLoadingResult} encoding either:
* <ul>
* <li>Success: prompt content and identifier loaded successfully</li>
* <li>Failure: prompt could not be loaded or is invalid</li>
* </ul>
*
* @see PromptLoadingSuccess
* @see PromptLoadingFailure
*/
PromptLoadingResult loadPrompt();
}

View File

@@ -22,6 +22,14 @@
* — Extract text content and page count from a single PDF</li>
* </ul>
* <p>
* AI-based naming ports (M5+):
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptPort}
* — Load an external prompt template and derive its stable identifier</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort}
* — Invoke an AI service over an OpenAI-compatible HTTP boundary</li>
* </ul>
* <p>
* Persistence and fingerprinting ports:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort}
@@ -46,6 +54,10 @@
* — Sealed result of a fingerprint computation (success or technical error)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult}
* — Sealed result of a master record lookup (unknown / processable / terminal / failure)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingResult}
* — Sealed result of prompt loading (success with identifier and content, or failure)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult}
* — Sealed result of AI service invocation (success with raw response, or technical failure)</li>
* </ul>
* <p>
* Exception types: