M5 AP-001 Parsebares KI-Antwortmodell ergänzt und Meilenstein-Bezüge

entfernt
2026-04-06 23:17:16 +02:00
parent cd2389f3e1
commit c77a6f06af
22 changed files with 184 additions and 39 deletions
@@ -18,7 +18,6 @@ package de.gecheckt.pdf.umbenenner.domain.model;
 * a later run, while functional errors are subject to the deterministic failure rule
 * (first occurrence retryable, second occurrence final).
 *
- * @since M5
 */
 public enum AiErrorClassification {

@@ -29,8 +29,6 @@ import java.util.Objects;
 * </pre>
 *
 * @param content the raw response body as a string (non-null, may be empty or malformed)
- *
- * @since M5
 */
 public record AiRawResponse(String content) {

@@ -45,8 +45,6 @@ import java.util.Objects;
 *                           never null (may be empty)
 * @param sentCharacterCount exact number of characters from documentText that were
 *                           sent to the AI; must be &gt;= 0 and &lt;= documentText.length()
- *
- * @since M5
 */
 public record AiRequestRepresentation(
        PromptIdentifier promptIdentifier,
@@ -0,0 +1,46 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+import java.util.Objects;
+
+/**
+ * Represents failure to parse an AI response or extract required fields.
+ * <p>
+ * The response body was received, but either:
+ * <ul>
+ *   <li>The JSON could not be parsed (malformed JSON, invalid structure)</li>
+ *   <li>Required fields (title, reasoning) are missing or not present as expected</li>
+ * </ul>
+ * <p>
+ * This is a technical parsing error, distinct from semantic validation failures.
+ * A response with valid JSON but semantically invalid field values (e.g., overly long title)
+ * is NOT a parsing failure; it is a parsing success with a functional validation error.
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #failureReason()} — a classification of the parsing failure
+ *       (e.g., "INVALID_JSON", "MISSING_TITLE", "MISSING_REASONING")</li>
+ *   <li>{@link #failureMessage()} — a human-readable description, suitable for
+ *       logging and debugging</li>
+ * </ul>
+ * <p>
+ * <strong>Retry semantics:</strong> Parsing failures are technical and retryable.
+ * The document should be retried in a later batch run up to the configured maximum
+ * for transient errors.
+ *
+ * @param failureReason  classification of the parsing failure (non-null, may be empty)
+ * @param failureMessage human-readable error description (non-null, may be empty)
+ */
+public record AiResponseParsingFailure(
+        String failureReason,
+        String failureMessage) implements AiResponseParsingResult {
+
+    /**
+     * Compact constructor validating mandatory fields.
+     *
+     * @throws NullPointerException if any field is null
+     */
+    public AiResponseParsingFailure {
+        Objects.requireNonNull(failureReason, "failureReason must not be null");
+        Objects.requireNonNull(failureMessage, "failureMessage must not be null");
+    }
+}
@@ -0,0 +1,25 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+/**
+ * Sealed interface representing the outcome of parsing an AI response into its JSON structure.
+ * <p>
+ * Implementations distinguish between:
+ * <ul>
+ *   <li>{@link AiResponseParsingSuccess} — JSON structure was parsed and required fields extracted</li>
+ *   <li>{@link AiResponseParsingFailure} — JSON parsing failed or required fields are missing</li>
+ * </ul>
+ * <p>
+ * <strong>Usage in the processing pipeline:</strong>
+ * <ul>
+ *   <li>Input: {@link AiRawResponse} from a successful HTTP invocation</li>
+ *   <li>Operation: Parse the raw response body as JSON and extract required fields</li>
+ *   <li>Output: Either a {@code ParsedAiResponse} (success) or a classified failure</li>
+ * </ul>
+ * <p>
+ * This distinction is crucial for error classification: an unparseable JSON response
+ * is a technical error (retryable), while a valid JSON with semantically invalid field
+ * values is a functional error (deterministic).
+ */
+public sealed interface AiResponseParsingResult
+        permits AiResponseParsingSuccess, AiResponseParsingFailure {
+}
@@ -0,0 +1,35 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+import java.util.Objects;
+
+/**
+ * Represents successful parsing of an AI response into its JSON structure.
+ * <p>
+ * The response body was parsed as JSON, the expected fields (title, reasoning, optional date)
+ * were extracted, and are now available for semantic validation.
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #response()} — the parsed response with title, reasoning, and optional date
+ *       as string values. Not yet semantically validated.</li>
+ * </ul>
+ * <p>
+ * <strong>Next step:</strong> Semantic validation of the parsed fields (title length,
+ * date format, etc.) is performed separately, in the Application layer. This separation
+ * allows precise error classification: parse failures are technical, validation failures
+ * are functional/deterministic.
+ *
+ * @param response the parsed response structure; never null
+ */
+public record AiResponseParsingSuccess(
+        ParsedAiResponse response) implements AiResponseParsingResult {
+
+    /**
+     * Compact constructor validating the response field.
+     *
+     * @throws NullPointerException if response is null
+     */
+    public AiResponseParsingSuccess {
+        Objects.requireNonNull(response, "response must not be null");
+    }
+}
@@ -17,7 +17,6 @@ package de.gecheckt.pdf.umbenenner.domain.model;
 * The source is recorded in the processing attempt history for reproducibility
 * and operational transparency.
 *
- * @since M5
 */
 public enum DateSource {

@@ -41,8 +41,6 @@ import java.util.Objects;
 * @param validatedTitle the title validated per application rules (non-null, non-empty,
 *                       max 20 base characters as defined in requirements)
 * @param aiReasoning    the AI's explanation for the proposal (non-null, may be empty)
- *
- * @since M5
 */
 public record NamingProposal(
        LocalDate resolvedDate,
@@ -0,0 +1,67 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+import java.util.Objects;
+import java.util.Optional;
+
+/**
+ * Represents a successfully parsed AI response with the expected JSON structure.
+ * <p>
+ * This record captures the three core fields that should be present in the AI's JSON response,
+ * extracted and structurally validated. The values are NOT yet semantically validated
+ * (e.g., title length, date format validity), but are known to exist and be parseable as strings.
+ * <p>
+ * <strong>Field semantics:</strong>
+ * <ul>
+ *   <li>{@link #title()} — the title string extracted from the AI response.
+ *       Non-null, but may require further validation (length, special characters, etc.).</li>
+ *   <li>{@link #reasoning()} — the AI's justification or explanation.
+ *       Non-null (may be empty), extracted as-is from the response.</li>
+ *   <li>{@link #dateString()} — the date string from the AI response, if present.
+ *       Optional; if present, will require format validation (e.g., YYYY-MM-DD).
+ *       If absent, the application may use a fallback date.</li>
+ * </ul>
+ * <p>
+ * <strong>Distinction from other types:</strong>
+ * <ul>
+ *   <li>{@link AiRawResponse} — entire uninterpreted response body</li>
+ *   <li>{@link ParsedAiResponse} — JSON structure extracted and fields present (this type)</li>
+ *   <li>{@link NamingProposal} — fields semantically validated and ready for use</li>
+ * </ul>
+ * <p>
+ * <strong>Usage:</strong> The Application layer receives an {@link AiInvocationSuccess},
+ * parses the raw response body, and produces either a {@code ParsedAiResponse} (if the JSON
+ * structure is sound) or an {@link AiResponseParsingFailure} (if parsing or mandatory field
+ * extraction fails). Later processing steps then validate the parsed values semantically.
+ *
+ * @param title       the title string extracted from the JSON; never null
+ * @param reasoning   the reasoning/explanation string extracted; never null (may be empty)
+ * @param dateString  optional date string (e.g., "2026-02-11"); empty Optional if not present
+ */
+public record ParsedAiResponse(
+        String title,
+        String reasoning,
+        Optional<String> dateString) {
+
+    /**
+     * Compact constructor validating mandatory fields.
+     *
+     * @throws NullPointerException if any field is null
+     */
+    public ParsedAiResponse {
+        Objects.requireNonNull(title, "title must not be null");
+        Objects.requireNonNull(reasoning, "reasoning must not be null");
+        Objects.requireNonNull(dateString, "dateString must not be null (use empty Optional if no date)");
+    }
+
+    /**
+     * Convenience constructor that accepts a nullable date string.
+     *
+     * @param title       the title string (non-null)
+     * @param reasoning   the reasoning string (non-null)
+     * @param dateString  the date string, or null to represent "not present"
+     * @return a new ParsedAiResponse with the date wrapped in Optional
+     */
+    public static ParsedAiResponse of(String title, String reasoning, String dateString) {
+        return new ParsedAiResponse(title, reasoning, Optional.ofNullable(dateString));
+    }
+}
@@ -64,8 +64,6 @@ public enum ProcessingStatus {
     * <p>
     * The document may transition from this state to {@link #PROPOSAL_READY} on successful
     * AI-based naming, or to a failure status if the AI step fails.
-     *
-     * @since M5
     */
    READY_FOR_AI,

@@ -80,16 +78,14 @@ public enum ProcessingStatus {
     * (idempotency rule), but may still be processed further by subsequent stages.
     * The latest processing attempt with this status holds the authoritative naming proposal
     * (resolved date, title, reasoning) for subsequent stages.
-     *
-     * @since M5
     */
    PROPOSAL_READY,

    /**
     * Document was successfully processed and written to the target location.
     * <p>
-     * As of M5, this status is reserved for the true end-to-end success after the target copy
-     * stage. A document with this status will be skipped in all future batch runs.
+     * This status is reserved for the true end-to-end success after the target copy
+     * stage is complete. A document with this status will be skipped in all future batch runs.
     * Status is final and irreversible.
     */
    SUCCESS,
@@ -24,8 +24,6 @@ import java.util.Objects;
 * same identifier across batch runs.
 *
 * @param identifier the stable, non-null identifier string (typically non-empty)
- *
- * @since M5
 */
 public record PromptIdentifier(String identifier) {

@@ -15,14 +15,16 @@
 *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingDecision} — sealed result of pre-checks</li>
 * </ul>
 * <p>
- * AI and naming proposal types (M5+):
+ * AI-based naming and proposal types:
 * <ul>
 *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.DateSource} — enumeration of valid date sources
 *       for resolved document dates</li>
 *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiErrorClassification} — distinguishes technical vs.
 *       functional AI errors</li>
 *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier} — stable identifier for an external prompt</li>
- *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse} — unvalidated raw response from AI</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse} — unvalidated raw response from AI service</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse} — successfully parsed JSON structure from AI response</li>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingResult} — sealed result of parsing the raw response</li>
 *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.NamingProposal} — validated naming proposal derived from AI result</li>
 *   <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation} — deterministic representation of an AI request</li>
 * </ul>
@@ -8,21 +8,19 @@ import static org.junit.jupiter.api.Assertions.*;
 * Unit tests for {@link ProcessingStatus} enumeration.
 * <p>
 * Verifies that all required status values are present and correctly defined
- * for M2, M5, and future milestones.
+ * for the document processing pipeline.
 */
 class ProcessingStatusTest {

    @Test
    void allRequiredStatusValuesExist() {
        // Verify all status values required by the architecture are present
-        // M2+ statuses
        assertNotNull(ProcessingStatus.SUCCESS);
        assertNotNull(ProcessingStatus.FAILED_RETRYABLE);
        assertNotNull(ProcessingStatus.FAILED_FINAL);
        assertNotNull(ProcessingStatus.SKIPPED_ALREADY_PROCESSED);
        assertNotNull(ProcessingStatus.SKIPPED_FINAL_FAILURE);
        assertNotNull(ProcessingStatus.PROCESSING);
-        // M5+ statuses
        assertNotNull(ProcessingStatus.READY_FOR_AI);
        assertNotNull(ProcessingStatus.PROPOSAL_READY);
    }