M5 AP-001 Kernobjekte, Statusmodell und KI-Port-Verträge präzisiert

2026-04-06 23:05:12 +02:00
parent d1dfc75d4e
commit cd2389f3e1
19 changed files with 776 additions and 8 deletions
@@ -103,7 +103,8 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo
                    return switch (record.overallStatus()) {
                        case SUCCESS -> new DocumentTerminalSuccess(record);
                        case FAILED_FINAL -> new DocumentTerminalFinalFailure(record);
-                        case PROCESSING, FAILED_RETRYABLE, SKIPPED_ALREADY_PROCESSED, SKIPPED_FINAL_FAILURE ->
+                        case READY_FOR_AI, PROPOSAL_READY, PROCESSING, FAILED_RETRYABLE,
+                             SKIPPED_ALREADY_PROCESSED, SKIPPED_FINAL_FAILURE ->
                                new DocumentKnownProcessable(record);
                    };
                } else {
@@ -0,0 +1,77 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
+
+/**
+ * Outbound port for invoking an AI service over an OpenAI-compatible HTTP boundary.
+ * <p>
+ * This interface abstracts AI service communication, allowing the Application layer
+ * to orchestrate AI-based naming without knowing about HTTP, authentication, or
+ * provider-specific details.
+ * <p>
+ * <strong>Design principles:</strong>
+ * <ul>
+ *   <li>Provider is configurable (OpenAI, Azure, local LLM, etc.), not hard-coded</li>
+ *   <li>Base URL, model name, and timeout are runtime configuration</li>
+ *   <li>Results are returned as structured types ({@link AiInvocationResult}),
+ *       never as exceptions</li>
+ *   <li>Technical success (HTTP 200) is distinct from response content validity</li>
+ * </ul>
+ * <p>
+ * <strong>Adapter responsibilities:</strong>
+ * <ul>
+ *   <li>Construct an HTTP request from the {@link AiRequestRepresentation}</li>
+ *   <li>Apply all transport-level configuration (base URL, model, timeout, authentication)</li>
+ *   <li>Execute the HTTP request against the configured endpoint</li>
+ *   <li>Distinguish between successful reception of a response body and technical failure</li>
+ *   <li>Return either an invocation success with raw response or a classified technical error</li>
+ *   <li>Encapsulate all HTTP, JSON serialization, and authentication details</li>
+ * </ul>
+ * <p>
+ * <strong>Non-goals of this port:</strong>
+ * <ul>
+ *   <li>JSON parsing of the response body (Application layer handles this)</li>
+ *   <li>Validation of response content against domain rules</li>
+ *   <li>Prompt construction or text formatting (Application layer does this)</li>
+ *   <li>Handling of provider-specific output formats or structured output schemas</li>
+ * </ul>
+ * <p>
+ * <strong>OpenAI compatibility:</strong> The adapter must support the OpenAI Chat
+ * Completions API or a compatible endpoint. The {@code AiRequestRepresentation}
+ * contains the prompt and document text; the adapter is responsible for formatting
+ * these as needed (e.g., system message + user message in the Chat API).
+ *
+ * @since M5
+ */
+public interface AiInvocationPort {
+
+    /**
+     * Invokes an AI service with the given request representation.
+     * <p>
+     * This method sends a request to the configured AI endpoint and returns the result.
+     * The request contains both the prompt and the document text, deterministically
+     * composed by the Application layer.
+     * <p>
+     * <strong>Outcome distinction:</strong>
+     * <ul>
+     *   <li>If the HTTP call succeeds and a response body is received,
+     *       {@link AiInvocationSuccess} is returned, even if the body is invalid JSON
+     *       or semantically problematic. The Application layer will parse and validate
+     *       the content.</li>
+     *   <li>If the HTTP call fails (timeout, network error, endpoint unreachable,
+     *       connection failure), {@link AiInvocationTechnicalFailure} is returned.</li>
+     * </ul>
+     *
+     * @param request the complete request to send to the AI service; never null
+     * @return an {@link AiInvocationResult} encoding either:
+     *         <ul>
+     *           <li>Success: response body was received (valid or not)</li>
+     *           <li>Technical failure: HTTP communication failed</li>
+     *         </ul>
+     * @throws NullPointerException if request is null
+     *
+     * @see AiInvocationSuccess
+     * @see AiInvocationTechnicalFailure
+     */
+    AiInvocationResult invoke(AiRequestRepresentation request);
+}
@@ -0,0 +1,28 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+/**
+ * Sealed interface representing the outcome of invoking an AI service.
+ * <p>
+ * Implementations allow the Application layer to distinguish between:
+ * <ul>
+ *   <li>Successful HTTP communication with a response body (which may still contain
+ *       functionally invalid content, but is at least technically received)</li>
+ *   <li>Technical failure (timeout, network error, endpoint unreachable, malformed response)</li>
+ * </ul>
+ * <p>
+ * Permitted implementations:
+ * <ul>
+ *   <li>{@link AiInvocationSuccess} — HTTP call completed with a response body</li>
+ *   <li>{@link AiInvocationTechnicalFailure} — HTTP call failed or no valid response was received</li>
+ * </ul>
+ * <p>
+ * <strong>Critical distinction:</strong> A successful invocation means the HTTP request
+ * was sent and a response was received, but the response content may still be unparseable
+ * or semantically invalid. This is crucial for retry logic: a technical HTTP success
+ * with unparseable JSON is different from a timeout or network error.
+ *
+ * @since M5
+ */
+public sealed interface AiInvocationResult
+        permits AiInvocationSuccess, AiInvocationTechnicalFailure {
+}
@@ -0,0 +1,51 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
+import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
+import java.util.Objects;
+
+/**
+ * Represents successful HTTP communication with an AI service.
+ * <p>
+ * The HTTP request was sent and a response body was received. This indicates
+ * technical success of the communication, but does NOT guarantee that the response
+ * content is valid, parseable, or functionally usable.
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #request()} — the exact request that was sent to the AI service,
+ *       including prompt, document text, and character counts</li>
+ *   <li>{@link #rawResponse()} — the uninterpreted response body returned by the AI,
+ *       which may be valid JSON, malformed, empty, or otherwise problematic</li>
+ * </ul>
+ * <p>
+ * The Application layer is responsible for:
+ * <ul>
+ *   <li>Parsing the raw response (JSON extraction, field validation)</li>
+ *   <li>Distinguishing between parseable and unparseable responses</li>
+ *   <li>Validating the content against rules (title length, date format, etc.)</li>
+ *   <li>Classifying any failures as technical or functional</li>
+ * </ul>
+ * <p>
+ * <strong>Persistence:</strong> Both request and response are stored in the
+ * processing attempt history for debugging and audit.
+ *
+ * @param request    the AI request that was sent; never null
+ * @param rawResponse the uninterpreted response body; never null (but may be empty)
+ *
+ * @since M5
+ */
+public record AiInvocationSuccess(
+        AiRequestRepresentation request,
+        AiRawResponse rawResponse) implements AiInvocationResult {
+
+    /**
+     * Compact constructor validating mandatory fields.
+     *
+     * @throws NullPointerException if either field is null
+     */
+    public AiInvocationSuccess {
+        Objects.requireNonNull(request, "request must not be null");
+        Objects.requireNonNull(rawResponse, "rawResponse must not be null");
+    }
+}
@@ -0,0 +1,53 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
+import java.util.Objects;
+
+/**
+ * Represents a technical failure during AI service invocation.
+ * <p>
+ * The HTTP request could not be sent, or no valid response body was received.
+ * This covers network errors, timeouts, endpoint unreachability, connection failures,
+ * and other infrastructure-level problems.
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #request()} — the request that was attempted to be sent. Stored for
+ *       debugging and audit, even though it may not have reached the AI service.</li>
+ *   <li>{@link #failureReason()} — a classification of the technical error
+ *       (e.g., "TIMEOUT", "ENDPOINT_UNREACHABLE", "CONNECTION_ERROR")</li>
+ *   <li>{@link #failureMessage()} — a human-readable description of the error,
+ *       suitable for logging and operational troubleshooting</li>
+ * </ul>
+ * <p>
+ * <strong>Retry semantics:</strong> Technical failures are retryable. The Application
+ * layer will record this as a transient error, and the document may be retried in
+ * a later batch run up to the configured maximum transient-error count.
+ * <p>
+ * <strong>Distinction from functional errors:</strong> A 200 OK response with an
+ * invalid JSON body is NOT a technical failure; it's an invocation success that
+ * contains a functional error. Only communication/transport errors are classified here.
+ *
+ * @param request        the request that was attempted (may not have been successfully sent);
+ *                       never null
+ * @param failureReason  classification of the error type; never null (may be empty)
+ * @param failureMessage human-readable error description; never null (may be empty)
+ *
+ * @since M5
+ */
+public record AiInvocationTechnicalFailure(
+        AiRequestRepresentation request,
+        String failureReason,
+        String failureMessage) implements AiInvocationResult {
+
+    /**
+     * Compact constructor validating mandatory fields.
+     *
+     * @throws NullPointerException if any field is null
+     */
+    public AiInvocationTechnicalFailure {
+        Objects.requireNonNull(request, "request must not be null");
+        Objects.requireNonNull(failureReason, "failureReason must not be null");
+        Objects.requireNonNull(failureMessage, "failureMessage must not be null");
+    }
+}
@@ -0,0 +1,41 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+import java.util.Objects;
+
+/**
+ * Represents failure to load an external prompt template.
+ * <p>
+ * The prompt could not be obtained from the configured external source,
+ * or the loaded content was technically invalid (e.g., empty after trimming).
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #failureReason()} — a classification of the failure type
+ *       (e.g., "FILE_NOT_FOUND", "IO_ERROR", "EMPTY_CONTENT")</li>
+ *   <li>{@link #failureMessage()} — a human-readable description of why
+ *       the load failed, suitable for logging and debugging</li>
+ * </ul>
+ * <p>
+ * This is a technical failure, not a validation error, and typically prevents
+ * the batch run from proceeding further (may lead to a {@code PROCESSING} status
+ * treated as {@code FAILED_RETRYABLE}).
+ *
+ * @param failureReason  classification of the failure (non-null, may be empty)
+ * @param failureMessage human-readable failure description (non-null, may be empty)
+ *
+ * @since M5
+ */
+public record PromptLoadingFailure(
+        String failureReason,
+        String failureMessage) implements PromptLoadingResult {
+
+    /**
+     * Compact constructor validating mandatory fields.
+     *
+     * @throws NullPointerException if either field is null
+     */
+    public PromptLoadingFailure {
+        Objects.requireNonNull(failureReason, "failureReason must not be null");
+        Objects.requireNonNull(failureMessage, "failureMessage must not be null");
+    }
+}
@@ -0,0 +1,19 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+/**
+ * Sealed interface representing the outcome of loading an external prompt template.
+ * <p>
+ * Implementations allow the Application layer to distinguish between a successful
+ * prompt load and various failure scenarios without using exceptions.
+ * <p>
+ * Permitted implementations:
+ * <ul>
+ *   <li>{@link PromptLoadingSuccess} — prompt was successfully loaded from the external source</li>
+ *   <li>{@link PromptLoadingFailure} — prompt could not be loaded (file not found, read error, etc.)</li>
+ * </ul>
+ *
+ * @since M5
+ */
+public sealed interface PromptLoadingResult
+        permits PromptLoadingSuccess, PromptLoadingFailure {
+}
@@ -0,0 +1,44 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+import de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier;
+import java.util.Objects;
+
+/**
+ * Represents successful loading of an external prompt template.
+ * <p>
+ * The prompt content and a stable identifier for the prompt have both been
+ * successfully obtained from the configured external source.
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #promptIdentifier()} — a stable, deterministic identifier
+ *       (e.g., filename, semantic version, or hash) that will remain the same
+ *       across multiple batch runs if the prompt content hasn't changed.</li>
+ *   <li>{@link #promptContent()} — the actual prompt template text, ready to be
+ *       used in constructing an AI request. May be empty in edge cases, though
+ *       normally expected to be meaningful.</li>
+ * </ul>
+ * <p>
+ * The identifier is crucial for historical traceability: each processing attempt
+ * records which prompt was used, allowing later investigation of why a particular
+ * decision was made.
+ *
+ * @param promptIdentifier stable identifier for this prompt version; never null
+ * @param promptContent    the prompt template text; never null
+ *
+ * @since M5
+ */
+public record PromptLoadingSuccess(
+        PromptIdentifier promptIdentifier,
+        String promptContent) implements PromptLoadingResult {
+
+    /**
+     * Compact constructor validating mandatory fields.
+     *
+     * @throws NullPointerException if either field is null
+     */
+    public PromptLoadingSuccess {
+        Objects.requireNonNull(promptIdentifier, "promptIdentifier must not be null");
+        Objects.requireNonNull(promptContent, "promptContent must not be null");
+    }
+}
@@ -0,0 +1,58 @@
+package de.gecheckt.pdf.umbenenner.application.port.out;
+
+/**
+ * Outbound port for loading external prompt templates.
+ * <p>
+ * This interface abstracts the loading of prompt content from external sources
+ * (files, resources, databases, etc.), allowing the Application layer to remain
+ * independent of how or where prompts are stored.
+ * <p>
+ * <strong>Design principles:</strong>
+ * <ul>
+ *   <li>Prompt is not embedded in code; it is loaded from an external source</li>
+ *   <li>Each prompt receives a stable identifier for traceability across batch runs</li>
+ *   <li>Results are returned as structured types ({@link PromptLoadingResult}),
+ *       never as exceptions</li>
+ * </ul>
+ * <p>
+ * <strong>Adapter responsibilities:</strong>
+ * <ul>
+ *   <li>Locate and read the prompt file/resource from the configured source</li>
+ *   <li>Derive a stable prompt identifier (e.g., filename, semantic version, content hash)</li>
+ *   <li>Validate that the loaded content is not empty or otherwise invalid</li>
+ *   <li>Return either success or a classified failure</li>
+ *   <li>Encapsulate all file I/O, resource loading, and configuration details</li>
+ * </ul>
+ * <p>
+ * <strong>Non-goals of this port:</strong>
+ * <ul>
+ *   <li>Prompt parsing or templating logic</li>
+ *   <li>Combining prompt with document text (Application layer handles this)</li>
+ *   <li>Template variable substitution</li>
+ *   <li>Validation of prompt content against fachliche rules</li>
+ * </ul>
+ *
+ * @since M5
+ */
+public interface PromptPort {
+
+    /**
+     * Loads the configured external prompt template.
+     * <p>
+     * This method is called once per batch run to obtain the current prompt.
+     * The prompt content and its stable identifier are returned together.
+     * <p>
+     * If loading fails for any reason (file not found, I/O error, content validation),
+     * a {@link PromptLoadingFailure} is returned rather than throwing an exception.
+     *
+     * @return a {@link PromptLoadingResult} encoding either:
+     *         <ul>
+     *           <li>Success: prompt content and identifier loaded successfully</li>
+     *           <li>Failure: prompt could not be loaded or is invalid</li>
+     *         </ul>
+     *
+     * @see PromptLoadingSuccess
+     * @see PromptLoadingFailure
+     */
+    PromptLoadingResult loadPrompt();
+}
@@ -22,6 +22,14 @@
 *       — Extract text content and page count from a single PDF</li>
 * </ul>
 * <p>
+ * AI-based naming ports (M5+):
+ * <ul>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptPort}
+ *       — Load an external prompt template and derive its stable identifier</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort}
+ *       — Invoke an AI service over an OpenAI-compatible HTTP boundary</li>
+ * </ul>
+ * <p>
 * Persistence and fingerprinting ports:
 * <ul>
 *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort}
@@ -46,6 +54,10 @@
 *       — Sealed result of a fingerprint computation (success or technical error)</li>
 *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult}
 *       — Sealed result of a master record lookup (unknown / processable / terminal / failure)</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingResult}
+ *       — Sealed result of prompt loading (success with identifier and content, or failure)</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult}
+ *       — Sealed result of AI service invocation (success with raw response, or technical failure)</li>
 * </ul>
 * <p>
 * Exception types:
@@ -0,0 +1,47 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+/**
+ * Classification of AI-related errors into technical vs. functional categories.
+ * <p>
+ * This enumeration distinguishes between two fundamental error types that occur
+ * during AI-based naming proposal generation:
+ * <ul>
+ *   <li><strong>Technical errors</strong>: Infrastructure or communication failures
+ *       (e.g., API unreachable, timeout, unparseable response). These are typically
+ *       transient and retryable.</li>
+ *   <li><strong>Functional errors</strong>: Problems with the AI response content itself
+ *       (e.g., invalid title, unparseable date, generic placeholder title).
+ *       These are deterministic and typically not resolved by retry alone.</li>
+ * </ul>
+ * <p>
+ * The classification determines retry behavior: technical errors may be retried in
+ * a later run, while functional errors are subject to the deterministic failure rule
+ * (first occurrence retryable, second occurrence final).
+ *
+ * @since M5
+ */
+public enum AiErrorClassification {
+
+    /**
+     * A technical infrastructure or communication failure occurred.
+     * <p>
+     * Examples: API endpoint not reachable, HTTP timeout, malformed response structure,
+     * missing mandatory fields in otherwise-parseable JSON, network error.
+     * <p>
+     * These errors are typically transient and may be resolved by retry in a later
+     * batch run. The failure is recorded against the transient-error counter.
+     */
+    TECHNICAL,
+
+    /**
+     * A functional or content validation error occurred.
+     * <p>
+     * Examples: invalid or generic title (e.g., "Dokument"), unparseable date string,
+     * AI response violates documented rules (e.g., title contains prohibited characters).
+     * <p>
+     * These errors are deterministic and reflect issues with the AI-generated content
+     * itself or the document's content quality. The failure is recorded against the
+     * content-error counter, subject to the deterministic retry rule.
+     */
+    FUNCTIONAL
+}
@@ -0,0 +1,45 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+import java.util.Objects;
+
+/**
+ * Unvalidated, uninterpreted raw response body from an AI service.
+ * <p>
+ * This record holds the exact bytes or string returned by the AI HTTP endpoint,
+ * before any parsing, validation, or business-logic processing. It is used to:
+ * <ul>
+ *   <li>Preserve the original response for audit and debugging purposes.</li>
+ *   <li>Separate the technical success of the HTTP call from the semantic validity
+ *       of the response content.</li>
+ *   <li>Enable deterministic retry decisions: a 200 OK response is a technical success
+ *       even if the JSON is unparseable or semantically invalid.</li>
+ * </ul>
+ * <p>
+ * <strong>Persistance:</strong> The raw response is stored in SQLite history for
+ * traceability and future debugging. It may contain the full JSON structure or
+ * formatted text, depending on the AI service.
+ * <p>
+ * <strong>Example:</strong>
+ * <pre>
+ * {@code
+ * AiRawResponse response = new AiRawResponse(
+ *     "{\"date\": \"2026-03-05\", \"title\": \"Stromabrechnung\", \"reasoning\": \"...\"}"
+ * );
+ * }
+ * </pre>
+ *
+ * @param content the raw response body as a string (non-null, may be empty or malformed)
+ *
+ * @since M5
+ */
+public record AiRawResponse(String content) {
+
+    /**
+     * Compact constructor validating that content is not null.
+     *
+     * @throws NullPointerException if {@code content} is null
+     */
+    public AiRawResponse {
+        Objects.requireNonNull(content, "content must not be null");
+    }
+}
@@ -0,0 +1,73 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+import java.util.Objects;
+
+/**
+ * Deterministic, complete representation of the request sent to an AI service.
+ * <p>
+ * This record captures the exact prompt, text, and configuration that were sent
+ * to the AI in a single request, allowing for reproducibility and debugging.
+ * <p>
+ * <strong>Construction:</strong> The Application layer constructs this representation
+ * deterministically from:
+ * <ul>
+ *   <li>The loaded external prompt text</li>
+ *   <li>The prompt's stable identifier</li>
+ *   <li>The document's extracted PDF text (already limited to max characters)</li>
+ *   <li>Structural markers (delimiters, field names) that define the format</li>
+ * </ul>
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #promptIdentifier()} — stable identifier of the prompt template used</li>
+ *   <li>{@link #promptContent()} — the actual prompt text loaded from the external file</li>
+ *   <li>{@link #documentText()} — the PDF-extracted text, already limited to the configured
+ *       maximum characters before being placed in this representation</li>
+ *   <li>{@link #sentCharacterCount()} — the exact number of text characters from
+ *       documentText that were included in the request sent to the AI
+ *       (may be less than documentText.length() if truncation occurred)</li>
+ * </ul>
+ * <p>
+ * <strong>Persistence:</strong> Both prompt identifier and sent character count
+ * are recorded in the processing attempt history for traceability.
+ * <p>
+ * <strong>Not included:</strong>
+ * <ul>
+ *   <li>HTTP-specific details (headers, authentication, endpoint URL)</li>
+ *   <li>Serialization format (whether sent as JSON, multipart, etc.)</li>
+ *   <li>AI provider configuration details</li>
+ * </ul>
+ *
+ * @param promptIdentifier   stable identifier for the prompt template; never null
+ * @param promptContent      content of the prompt template; never null (may be empty,
+ *                           though typically meaningful)
+ * @param documentText       extracted PDF text (already limited to max characters);
+ *                           never null (may be empty)
+ * @param sentCharacterCount exact number of characters from documentText that were
+ *                           sent to the AI; must be &gt;= 0 and &lt;= documentText.length()
+ *
+ * @since M5
+ */
+public record AiRequestRepresentation(
+        PromptIdentifier promptIdentifier,
+        String promptContent,
+        String documentText,
+        int sentCharacterCount) {
+
+    /**
+     * Compact constructor validating all fields.
+     *
+     * @throws NullPointerException if any field except possibly documentText is null
+     * @throws IllegalArgumentException if sentCharacterCount is out of valid range
+     */
+    public AiRequestRepresentation {
+        Objects.requireNonNull(promptIdentifier, "promptIdentifier must not be null");
+        Objects.requireNonNull(promptContent, "promptContent must not be null");
+        Objects.requireNonNull(documentText, "documentText must not be null");
+        if (sentCharacterCount < 0 || sentCharacterCount > documentText.length()) {
+            throw new IllegalArgumentException(
+                    "sentCharacterCount must be >= 0 and <= documentText.length(); " +
+                    "got " + sentCharacterCount + " but documentText.length() = " + documentText.length());
+        }
+    }
+}
@@ -0,0 +1,38 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+/**
+ * Enumeration of valid sources for a resolved document date.
+ * <p>
+ * Each enum constant represents a specific origin or determination method for the date
+ * used in a naming proposal. The source is recorded for traceability.
+ * <p>
+ * <strong>Semantics:</strong>
+ * <ul>
+ *   <li>{@link #AI_PROVIDED} — the date was supplied by the AI in its response
+ *       as a valid {@code YYYY-MM-DD} string.</li>
+ *   <li>{@link #FALLBACK_CURRENT} — the date was set to the current system date
+ *       (from {@code ClockPort}) because the AI did not provide a usable date.</li>
+ * </ul>
+ * <p>
+ * The source is recorded in the processing attempt history for reproducibility
+ * and operational transparency.
+ *
+ * @since M5
+ */
+public enum DateSource {
+
+    /**
+     * The date was provided by the AI in its JSON response.
+     * <p>
+     * The AI explicitly supplied a {@code date} field in valid {@code YYYY-MM-DD} format.
+     */
+    AI_PROVIDED,
+
+    /**
+     * The date is the current system date used as fallback.
+     * <p>
+     * The AI either omitted the {@code date} field or provided no usable date.
+     * The application set the fallback to the current date from {@code ClockPort}.
+     */
+    FALLBACK_CURRENT
+}
@@ -0,0 +1,68 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+import java.time.LocalDate;
+import java.util.Objects;
+
+/**
+ * A validated naming proposal derived from AI analysis of a document.
+ * <p>
+ * This record represents the core results of the AI-based naming stage:
+ * a proposed date, a proposed title, and the AI's reasoning. All three fields
+ * have been validated according to application rules at the time of creation.
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #resolvedDate()} — the effective date for the naming proposal,
+ *       either supplied by the AI or derived as a fallback. Always non-null.</li>
+ *   <li>{@link #dateSource()} — indicates whether the date came from the AI
+ *       or is a fallback to the current system date.</li>
+ *   <li>{@link #validatedTitle()} — the title extracted and validated from
+ *       the AI response. Guaranteed to be non-null, non-empty, and compliant
+ *       with documented title rules (max 20 base characters, no prohibited
+ *       special characters, etc.).</li>
+ *   <li>{@link #aiReasoning()} — the justification or explanation provided
+ *       by the AI for its proposal. Non-null (may be empty in edge cases,
+ *       though normally expected to be meaningful).</li>
+ * </ul>
+ * <p>
+ * <strong>Not included in this proposal:</strong>
+ * <ul>
+ *   <li>Final target filename (e.g., with {@code (1)}, {@code (2)} duplicates)</li>
+ *   <li>Target file path or copy operation</li>
+ *   <li>Windows-specific filename character transformations</li>
+ * </ul>
+ * <p>
+ * <strong>Persistence:</strong> The naming proposal is persistently stored as part
+ * of the processing attempt history for reproducibility and audit.
+ *
+ * @param resolvedDate   the effective date (never null); derived from AI or fallback
+ * @param dateSource     origin of the date ({@link DateSource#AI_PROVIDED} or
+ *                       {@link DateSource#FALLBACK_CURRENT}); never null
+ * @param validatedTitle the title validated per application rules (non-null, non-empty,
+ *                       max 20 base characters as defined in requirements)
+ * @param aiReasoning    the AI's explanation for the proposal (non-null, may be empty)
+ *
+ * @since M5
+ */
+public record NamingProposal(
+        LocalDate resolvedDate,
+        DateSource dateSource,
+        String validatedTitle,
+        String aiReasoning) {
+
+    /**
+     * Compact constructor validating all mandatory fields.
+     *
+     * @throws NullPointerException if any field is null
+     * @throws IllegalArgumentException if validatedTitle is empty
+     */
+    public NamingProposal {
+        Objects.requireNonNull(resolvedDate, "resolvedDate must not be null");
+        Objects.requireNonNull(dateSource, "dateSource must not be null");
+        Objects.requireNonNull(validatedTitle, "validatedTitle must not be null");
+        if (validatedTitle.isEmpty()) {
+            throw new IllegalArgumentException("validatedTitle must not be empty");
+        }
+        Objects.requireNonNull(aiReasoning, "aiReasoning must not be null");
+    }
+}
@@ -9,7 +9,15 @@ package de.gecheckt.pdf.umbenenner.domain.model;
 * <p>
 * <strong>Overall-status semantics (master record):</strong>
 * <ul>
- *   <li>{@link #SUCCESS} — document was fully processed; skip in all future runs.</li>
+ *   <li>{@link #READY_FOR_AI} — document has passed all pre-checks and extracted text
+ *       has been validated; it is ready for AI-based naming proposal. This is a non-terminal
+ *       intermediate state; the document will be processed further in subsequent stages.</li>
+ *   <li>{@link #PROPOSAL_READY} — a valid, persistent AI-based naming proposal has been
+ *       generated and stored. The document is complete for the current stage but will be
+ *       processed further in the next stage (target copy, final filename generation).
+ *       This is a non-terminal intermediate state.</li>
+ *   <li>{@link #SUCCESS} — document was fully processed end-to-end and written to the
+ *       target location. Status is final and irreversible; skip in all future runs.</li>
 *   <li>{@link #FAILED_RETRYABLE} — last attempt failed but is retryable; process again
 *       in the next run according to the applicable retry rule.</li>
 *   <li>{@link #FAILED_FINAL} — all allowed retries exhausted; skip in all future runs.</li>
@@ -19,7 +27,9 @@ package de.gecheckt.pdf.umbenenner.domain.model;
 * <p>
 * <strong>Attempt-status semantics (attempt history):</strong>
 * <ul>
- *   <li>{@link #SUCCESS} — this attempt completed successfully.</li>
+ *   <li>{@link #READY_FOR_AI} — this attempt resulted in a document ready for AI processing.</li>
+ *   <li>{@link #PROPOSAL_READY} — this attempt resulted in a valid AI-based naming proposal.</li>
+ *   <li>{@link #SUCCESS} — this attempt completed successfully (end-to-end, including target copy).</li>
 *   <li>{@link #FAILED_RETRYABLE} — this attempt failed; a future attempt is allowed.</li>
 *   <li>{@link #FAILED_FINAL} — this attempt failed and no further attempts will be made.</li>
 *   <li>{@link #SKIPPED_ALREADY_PROCESSED} — this attempt was a skip because the
@@ -44,10 +54,42 @@ package de.gecheckt.pdf.umbenenner.domain.model;
 */
 public enum ProcessingStatus {

+    /**
+     * Document has passed all pre-checks and extracted PDF text has been validated.
+     * <p>
+     * This is a non-terminal intermediate state. The document is ready to be processed
+     * by AI-based naming in the next stage. A document with this status may be reprocessed
+     * in later runs, and will be updated to a subsequent status (e.g., {@link #PROPOSAL_READY}
+     * or failure) based on the outcome of the AI naming step.
+     * <p>
+     * The document may transition from this state to {@link #PROPOSAL_READY} on successful
+     * AI-based naming, or to a failure status if the AI step fails.
+     *
+     * @since M5
+     */
+    READY_FOR_AI,
+
+    /**
+     * A valid, persistent AI-based naming proposal has been generated and stored.
+     * <p>
+     * This is a non-terminal intermediate state. The document is complete for its current
+     * processing stage, but will be processed further in the next stage (target copy,
+     * final filename generation with duplicate-suffix handling, and final success).
+     * <p>
+     * A document with this status will not be reprocessed by the AI step in future runs
+     * (idempotency rule), but may still be processed further by subsequent stages.
+     * The latest processing attempt with this status holds the authoritative naming proposal
+     * (resolved date, title, reasoning) for subsequent stages.
+     *
+     * @since M5
+     */
+    PROPOSAL_READY,
+
    /**
     * Document was successfully processed and written to the target location.
     * <p>
-     * A document with this status will be skipped in all future batch runs.
+     * As of M5, this status is reserved for the true end-to-end success after the target copy
+     * stage. A document with this status will be skipped in all future batch runs.
     * Status is final and irreversible.
     */
    SUCCESS,
@@ -0,0 +1,40 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+import java.util.Objects;
+
+/**
+ * Stable, unique identifier for an externally-loaded prompt template.
+ * <p>
+ * The prompt is not embedded in code but loaded from an external file or resource.
+ * The identifier allows traceability: which prompt version was used for a specific
+ * AI request and naming proposal?
+ * <p>
+ * <strong>Identity semantics:</strong> Two {@code PromptIdentifier} instances are
+ * equal if and only if their identifier strings are equal (by value, not by reference).
+ * <p>
+ * <strong>Typical examples:</strong>
+ * <ul>
+ *   <li>Prompt file name: {@code "prompt_de_v1.txt"}</li>
+ *   <li>Version string: {@code "2026-03-v2"}</li>
+ *   <li>Content hash: {@code "sha256:abc123..."}</li>
+ * </ul>
+ * <p>
+ * The choice of identifier scheme is flexible (filename, semantic version, hash, etc.),
+ * but must be stable and deterministic so that the same prompt always receives the
+ * same identifier across batch runs.
+ *
+ * @param identifier the stable, non-null identifier string (typically non-empty)
+ *
+ * @since M5
+ */
+public record PromptIdentifier(String identifier) {
+
+    /**
+     * Compact constructor validating that the identifier is not null.
+     *
+     * @throws NullPointerException if {@code identifier} is null
+     */
+    public PromptIdentifier {
+        Objects.requireNonNull(identifier, "identifier must not be null");
+    }
+}
@@ -3,7 +3,8 @@
 * <p>
 * This package contains the fundamental domain entities and status models required for document processing:
 * <ul>
- *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus} — enumeration of all valid document processing states</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus} — enumeration of all valid document processing states,
+ *       including intermediate non-terminal states such as {@code READY_FOR_AI} and {@code PROPOSAL_READY}</li>
 *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.RunId} — unique identifier for a batch run</li>
 *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext} — technical context for a batch run</li>
 *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint} — content-based document identity (SHA-256 hex); primary key for persistence</li>
@@ -14,6 +15,18 @@
 *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingDecision} — sealed result of pre-checks</li>
 * </ul>
 * <p>
+ * AI and naming proposal types (M5+):
+ * <ul>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.DateSource} — enumeration of valid date sources
+ *       for resolved document dates</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiErrorClassification} — distinguishes technical vs.
+ *       functional AI errors</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier} — stable identifier for an external prompt</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse} — unvalidated raw response from AI</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.NamingProposal} — validated naming proposal derived from AI result</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation} — deterministic representation of an AI request</li>
+ * </ul>
+ * <p>
 * Additional classes:
 * <ul>
 *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason} — enumeration of pre-check failure reasons</li>
@@ -8,19 +8,23 @@ import static org.junit.jupiter.api.Assertions.*;
 * Unit tests for {@link ProcessingStatus} enumeration.
 * <p>
 * Verifies that all required status values are present and correctly defined
- * for M2 and future milestones.
+ * for M2, M5, and future milestones.
 */
 class ProcessingStatusTest {

    @Test
    void allRequiredStatusValuesExist() {
        // Verify all status values required by the architecture are present
+        // M2+ statuses
        assertNotNull(ProcessingStatus.SUCCESS);
        assertNotNull(ProcessingStatus.FAILED_RETRYABLE);
        assertNotNull(ProcessingStatus.FAILED_FINAL);
        assertNotNull(ProcessingStatus.SKIPPED_ALREADY_PROCESSED);
        assertNotNull(ProcessingStatus.SKIPPED_FINAL_FAILURE);
        assertNotNull(ProcessingStatus.PROCESSING);
+        // M5+ statuses
+        assertNotNull(ProcessingStatus.READY_FOR_AI);
+        assertNotNull(ProcessingStatus.PROPOSAL_READY);
    }

    @Test
@@ -59,6 +63,18 @@ class ProcessingStatusTest {
        assertEquals(ProcessingStatus.PROCESSING, status);
    }

+    @Test
+    void readyForAiStatus_isDefinedAndAccessible() {
+        ProcessingStatus status = ProcessingStatus.READY_FOR_AI;
+        assertEquals(ProcessingStatus.READY_FOR_AI, status);
+    }
+
+    @Test
+    void proposalReadyStatus_isDefinedAndAccessible() {
+        ProcessingStatus status = ProcessingStatus.PROPOSAL_READY;
+        assertEquals(ProcessingStatus.PROPOSAL_READY, status);
+    }
+
    @Test
    void statusEquality_worksByReference() {
        // Enums have identity-based equality
@@ -72,6 +88,8 @@ class ProcessingStatusTest {

        switch (status) {
            case SUCCESS -> result = "success";
+            case READY_FOR_AI -> result = "ready-for-ai";
+            case PROPOSAL_READY -> result = "proposal-ready";
            case FAILED_RETRYABLE -> result = "retryable";
            case FAILED_FINAL -> result = "final";
            case SKIPPED_ALREADY_PROCESSED -> result = "skip-processed";
@@ -83,8 +101,8 @@ class ProcessingStatusTest {
    }

    @Test
-    void statusValues_areSixInTotal() {
+    void statusValues_areEightInTotal() {
        ProcessingStatus[] values = ProcessingStatus.values();
-        assertEquals(6, values.length, "ProcessingStatus should have exactly 6 values");
+        assertEquals(8, values.length, "ProcessingStatus should have exactly 8 values");
    }
 }