1
0

M5 AP-001 Parsebares KI-Antwortmodell ergänzt und Meilenstein-Bezüge

entfernt
This commit is contained in:
2026-04-06 23:17:16 +02:00
parent cd2389f3e1
commit c77a6f06af
22 changed files with 184 additions and 39 deletions

View File

@@ -40,8 +40,6 @@ import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
* Completions API or a compatible endpoint. The {@code AiRequestRepresentation} * Completions API or a compatible endpoint. The {@code AiRequestRepresentation}
* contains the prompt and document text; the adapter is responsible for formatting * contains the prompt and document text; the adapter is responsible for formatting
* these as needed (e.g., system message + user message in the Chat API). * these as needed (e.g., system message + user message in the Chat API).
*
* @since M5
*/ */
public interface AiInvocationPort { public interface AiInvocationPort {

View File

@@ -20,8 +20,6 @@ package de.gecheckt.pdf.umbenenner.application.port.out;
* was sent and a response was received, but the response content may still be unparseable * was sent and a response was received, but the response content may still be unparseable
* or semantically invalid. This is crucial for retry logic: a technical HTTP success * or semantically invalid. This is crucial for retry logic: a technical HTTP success
* with unparseable JSON is different from a timeout or network error. * with unparseable JSON is different from a timeout or network error.
*
* @since M5
*/ */
public sealed interface AiInvocationResult public sealed interface AiInvocationResult
permits AiInvocationSuccess, AiInvocationTechnicalFailure { permits AiInvocationSuccess, AiInvocationTechnicalFailure {

View File

@@ -32,8 +32,6 @@ import java.util.Objects;
* *
* @param request the AI request that was sent; never null * @param request the AI request that was sent; never null
* @param rawResponse the uninterpreted response body; never null (but may be empty) * @param rawResponse the uninterpreted response body; never null (but may be empty)
*
* @since M5
*/ */
public record AiInvocationSuccess( public record AiInvocationSuccess(
AiRequestRepresentation request, AiRequestRepresentation request,

View File

@@ -32,8 +32,6 @@ import java.util.Objects;
* never null * never null
* @param failureReason classification of the error type; never null (may be empty) * @param failureReason classification of the error type; never null (may be empty)
* @param failureMessage human-readable error description; never null (may be empty) * @param failureMessage human-readable error description; never null (may be empty)
*
* @since M5
*/ */
public record AiInvocationTechnicalFailure( public record AiInvocationTechnicalFailure(
AiRequestRepresentation request, AiRequestRepresentation request,

View File

@@ -22,8 +22,6 @@ import java.util.Objects;
* *
* @param failureReason classification of the failure (non-null, may be empty) * @param failureReason classification of the failure (non-null, may be empty)
* @param failureMessage human-readable failure description (non-null, may be empty) * @param failureMessage human-readable failure description (non-null, may be empty)
*
* @since M5
*/ */
public record PromptLoadingFailure( public record PromptLoadingFailure(
String failureReason, String failureReason,

View File

@@ -11,8 +11,6 @@ package de.gecheckt.pdf.umbenenner.application.port.out;
* <li>{@link PromptLoadingSuccess} — prompt was successfully loaded from the external source</li> * <li>{@link PromptLoadingSuccess} — prompt was successfully loaded from the external source</li>
* <li>{@link PromptLoadingFailure} — prompt could not be loaded (file not found, read error, etc.)</li> * <li>{@link PromptLoadingFailure} — prompt could not be loaded (file not found, read error, etc.)</li>
* </ul> * </ul>
*
* @since M5
*/ */
public sealed interface PromptLoadingResult public sealed interface PromptLoadingResult
permits PromptLoadingSuccess, PromptLoadingFailure { permits PromptLoadingSuccess, PromptLoadingFailure {

View File

@@ -25,8 +25,6 @@ import java.util.Objects;
* *
* @param promptIdentifier stable identifier for this prompt version; never null * @param promptIdentifier stable identifier for this prompt version; never null
* @param promptContent the prompt template text; never null * @param promptContent the prompt template text; never null
*
* @since M5
*/ */
public record PromptLoadingSuccess( public record PromptLoadingSuccess(
PromptIdentifier promptIdentifier, PromptIdentifier promptIdentifier,

View File

@@ -29,10 +29,8 @@ package de.gecheckt.pdf.umbenenner.application.port.out;
* <li>Prompt parsing or templating logic</li> * <li>Prompt parsing or templating logic</li>
* <li>Combining prompt with document text (Application layer handles this)</li> * <li>Combining prompt with document text (Application layer handles this)</li>
* <li>Template variable substitution</li> * <li>Template variable substitution</li>
* <li>Validation of prompt content against fachliche rules</li> * <li>Validation of prompt content against domain rules</li>
* </ul> * </ul>
*
* @since M5
*/ */
public interface PromptPort { public interface PromptPort {

View File

@@ -22,7 +22,7 @@
* — Extract text content and page count from a single PDF</li> * — Extract text content and page count from a single PDF</li>
* </ul> * </ul>
* <p> * <p>
* AI-based naming ports (M5+): * AI-based naming and invocation ports:
* <ul> * <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptPort} * <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptPort}
* — Load an external prompt template and derive its stable identifier</li> * — Load an external prompt template and derive its stable identifier</li>
@@ -58,6 +58,8 @@
* — Sealed result of prompt loading (success with identifier and content, or failure)</li> * — Sealed result of prompt loading (success with identifier and content, or failure)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult} * <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult}
* — Sealed result of AI service invocation (success with raw response, or technical failure)</li> * — Sealed result of AI service invocation (success with raw response, or technical failure)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingResult}
* — Sealed result of parsing raw response into JSON structure (success or parsing failure)</li>
* </ul> * </ul>
* <p> * <p>
* Exception types: * Exception types:

View File

@@ -18,7 +18,6 @@ package de.gecheckt.pdf.umbenenner.domain.model;
* a later run, while functional errors are subject to the deterministic failure rule * a later run, while functional errors are subject to the deterministic failure rule
* (first occurrence retryable, second occurrence final). * (first occurrence retryable, second occurrence final).
* *
* @since M5
*/ */
public enum AiErrorClassification { public enum AiErrorClassification {

View File

@@ -29,8 +29,6 @@ import java.util.Objects;
* </pre> * </pre>
* *
* @param content the raw response body as a string (non-null, may be empty or malformed) * @param content the raw response body as a string (non-null, may be empty or malformed)
*
* @since M5
*/ */
public record AiRawResponse(String content) { public record AiRawResponse(String content) {

View File

@@ -45,8 +45,6 @@ import java.util.Objects;
* never null (may be empty) * never null (may be empty)
* @param sentCharacterCount exact number of characters from documentText that were * @param sentCharacterCount exact number of characters from documentText that were
* sent to the AI; must be &gt;= 0 and &lt;= documentText.length() * sent to the AI; must be &gt;= 0 and &lt;= documentText.length()
*
* @since M5
*/ */
public record AiRequestRepresentation( public record AiRequestRepresentation(
PromptIdentifier promptIdentifier, PromptIdentifier promptIdentifier,

View File

@@ -0,0 +1,46 @@
package de.gecheckt.pdf.umbenenner.domain.model;
import java.util.Objects;
/**
* Represents failure to parse an AI response or extract required fields.
* <p>
* The response body was received, but either:
* <ul>
* <li>The JSON could not be parsed (malformed JSON, invalid structure)</li>
* <li>Required fields (title, reasoning) are missing or not present as expected</li>
* </ul>
* <p>
* This is a technical parsing error, distinct from semantic validation failures.
* A response with valid JSON but semantically invalid field values (e.g., overly long title)
* is NOT a parsing failure; it is a parsing success with a functional validation error.
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #failureReason()} — a classification of the parsing failure
* (e.g., "INVALID_JSON", "MISSING_TITLE", "MISSING_REASONING")</li>
* <li>{@link #failureMessage()} — a human-readable description, suitable for
* logging and debugging</li>
* </ul>
* <p>
* <strong>Retry semantics:</strong> Parsing failures are technical and retryable.
* The document should be retried in a later batch run up to the configured maximum
* for transient errors.
*
* @param failureReason classification of the parsing failure (non-null, may be empty)
* @param failureMessage human-readable error description (non-null, may be empty)
*/
public record AiResponseParsingFailure(
String failureReason,
String failureMessage) implements AiResponseParsingResult {
/**
* Compact constructor validating mandatory fields.
*
* @throws NullPointerException if any field is null
*/
public AiResponseParsingFailure {
Objects.requireNonNull(failureReason, "failureReason must not be null");
Objects.requireNonNull(failureMessage, "failureMessage must not be null");
}
}

View File

@@ -0,0 +1,25 @@
package de.gecheckt.pdf.umbenenner.domain.model;
/**
* Sealed interface representing the outcome of parsing an AI response into its JSON structure.
* <p>
* Implementations distinguish between:
* <ul>
* <li>{@link AiResponseParsingSuccess} — JSON structure was parsed and required fields extracted</li>
* <li>{@link AiResponseParsingFailure} — JSON parsing failed or required fields are missing</li>
* </ul>
* <p>
* <strong>Usage in the processing pipeline:</strong>
* <ul>
* <li>Input: {@link AiRawResponse} from a successful HTTP invocation</li>
* <li>Operation: Parse the raw response body as JSON and extract required fields</li>
* <li>Output: Either a {@code ParsedAiResponse} (success) or a classified failure</li>
* </ul>
* <p>
* This distinction is crucial for error classification: an unparseable JSON response
* is a technical error (retryable), while a valid JSON with semantically invalid field
* values is a functional error (deterministic).
*/
public sealed interface AiResponseParsingResult
permits AiResponseParsingSuccess, AiResponseParsingFailure {
}

View File

@@ -0,0 +1,35 @@
package de.gecheckt.pdf.umbenenner.domain.model;
import java.util.Objects;
/**
* Represents successful parsing of an AI response into its JSON structure.
* <p>
* The response body was parsed as JSON, the expected fields (title, reasoning, optional date)
* were extracted, and are now available for semantic validation.
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #response()} — the parsed response with title, reasoning, and optional date
* as string values. Not yet semantically validated.</li>
* </ul>
* <p>
* <strong>Next step:</strong> Semantic validation of the parsed fields (title length,
* date format, etc.) is performed separately, in the Application layer. This separation
* allows precise error classification: parse failures are technical, validation failures
* are functional/deterministic.
*
* @param response the parsed response structure; never null
*/
public record AiResponseParsingSuccess(
ParsedAiResponse response) implements AiResponseParsingResult {
/**
* Compact constructor validating the response field.
*
* @throws NullPointerException if response is null
*/
public AiResponseParsingSuccess {
Objects.requireNonNull(response, "response must not be null");
}
}

View File

@@ -17,7 +17,6 @@ package de.gecheckt.pdf.umbenenner.domain.model;
* The source is recorded in the processing attempt history for reproducibility * The source is recorded in the processing attempt history for reproducibility
* and operational transparency. * and operational transparency.
* *
* @since M5
*/ */
public enum DateSource { public enum DateSource {

View File

@@ -41,8 +41,6 @@ import java.util.Objects;
* @param validatedTitle the title validated per application rules (non-null, non-empty, * @param validatedTitle the title validated per application rules (non-null, non-empty,
* max 20 base characters as defined in requirements) * max 20 base characters as defined in requirements)
* @param aiReasoning the AI's explanation for the proposal (non-null, may be empty) * @param aiReasoning the AI's explanation for the proposal (non-null, may be empty)
*
* @since M5
*/ */
public record NamingProposal( public record NamingProposal(
LocalDate resolvedDate, LocalDate resolvedDate,

View File

@@ -0,0 +1,67 @@
package de.gecheckt.pdf.umbenenner.domain.model;
import java.util.Objects;
import java.util.Optional;
/**
* Represents a successfully parsed AI response with the expected JSON structure.
* <p>
* This record captures the three core fields that should be present in the AI's JSON response,
* extracted and structurally validated. The values are NOT yet semantically validated
* (e.g., title length, date format validity), but are known to exist and be parseable as strings.
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #title()} — the title string extracted from the AI response.
* Non-null, but may require further validation (length, special characters, etc.).</li>
* <li>{@link #reasoning()} — the AI's justification or explanation.
* Non-null (may be empty), extracted as-is from the response.</li>
* <li>{@link #dateString()} — the date string from the AI response, if present.
* Optional; if present, will require format validation (e.g., YYYY-MM-DD).
* If absent, the application may use a fallback date.</li>
* </ul>
* <p>
* <strong>Distinction from other types:</strong>
* <ul>
* <li>{@link AiRawResponse} — entire uninterpreted response body</li>
* <li>{@link ParsedAiResponse} — JSON structure extracted and fields present (this type)</li>
* <li>{@link NamingProposal} — fields semantically validated and ready for use</li>
* </ul>
* <p>
* <strong>Usage:</strong> The Application layer receives an {@link AiInvocationSuccess},
* parses the raw response body, and produces either a {@code ParsedAiResponse} (if the JSON
* structure is sound) or an {@link AiResponseParsingFailure} (if parsing or mandatory field
* extraction fails). Later processing steps then validate the parsed values semantically.
*
* @param title the title string extracted from the JSON; never null
* @param reasoning the reasoning/explanation string extracted; never null (may be empty)
* @param dateString optional date string (e.g., "2026-02-11"); empty Optional if not present
*/
public record ParsedAiResponse(
String title,
String reasoning,
Optional<String> dateString) {
/**
* Compact constructor validating mandatory fields.
*
* @throws NullPointerException if any field is null
*/
public ParsedAiResponse {
Objects.requireNonNull(title, "title must not be null");
Objects.requireNonNull(reasoning, "reasoning must not be null");
Objects.requireNonNull(dateString, "dateString must not be null (use empty Optional if no date)");
}
/**
* Convenience constructor that accepts a nullable date string.
*
* @param title the title string (non-null)
* @param reasoning the reasoning string (non-null)
* @param dateString the date string, or null to represent "not present"
* @return a new ParsedAiResponse with the date wrapped in Optional
*/
public static ParsedAiResponse of(String title, String reasoning, String dateString) {
return new ParsedAiResponse(title, reasoning, Optional.ofNullable(dateString));
}
}

View File

@@ -64,8 +64,6 @@ public enum ProcessingStatus {
* <p> * <p>
* The document may transition from this state to {@link #PROPOSAL_READY} on successful * The document may transition from this state to {@link #PROPOSAL_READY} on successful
* AI-based naming, or to a failure status if the AI step fails. * AI-based naming, or to a failure status if the AI step fails.
*
* @since M5
*/ */
READY_FOR_AI, READY_FOR_AI,
@@ -80,16 +78,14 @@ public enum ProcessingStatus {
* (idempotency rule), but may still be processed further by subsequent stages. * (idempotency rule), but may still be processed further by subsequent stages.
* The latest processing attempt with this status holds the authoritative naming proposal * The latest processing attempt with this status holds the authoritative naming proposal
* (resolved date, title, reasoning) for subsequent stages. * (resolved date, title, reasoning) for subsequent stages.
*
* @since M5
*/ */
PROPOSAL_READY, PROPOSAL_READY,
/** /**
* Document was successfully processed and written to the target location. * Document was successfully processed and written to the target location.
* <p> * <p>
* As of M5, this status is reserved for the true end-to-end success after the target copy * This status is reserved for the true end-to-end success after the target copy
* stage. A document with this status will be skipped in all future batch runs. * stage is complete. A document with this status will be skipped in all future batch runs.
* Status is final and irreversible. * Status is final and irreversible.
*/ */
SUCCESS, SUCCESS,

View File

@@ -24,8 +24,6 @@ import java.util.Objects;
* same identifier across batch runs. * same identifier across batch runs.
* *
* @param identifier the stable, non-null identifier string (typically non-empty) * @param identifier the stable, non-null identifier string (typically non-empty)
*
* @since M5
*/ */
public record PromptIdentifier(String identifier) { public record PromptIdentifier(String identifier) {

View File

@@ -15,14 +15,16 @@
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingDecision} — sealed result of pre-checks</li> * <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingDecision} — sealed result of pre-checks</li>
* </ul> * </ul>
* <p> * <p>
* AI and naming proposal types (M5+): * AI-based naming and proposal types:
* <ul> * <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.DateSource} — enumeration of valid date sources * <li>{@link de.gecheckt.pdf.umbenenner.domain.model.DateSource} — enumeration of valid date sources
* for resolved document dates</li> * for resolved document dates</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiErrorClassification} — distinguishes technical vs. * <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiErrorClassification} — distinguishes technical vs.
* functional AI errors</li> * functional AI errors</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier} — stable identifier for an external prompt</li> * <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier} — stable identifier for an external prompt</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse} — unvalidated raw response from AI</li> * <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse} — unvalidated raw response from AI service</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse} — successfully parsed JSON structure from AI response</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingResult} — sealed result of parsing the raw response</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.NamingProposal} — validated naming proposal derived from AI result</li> * <li>{@link de.gecheckt.pdf.umbenenner.domain.model.NamingProposal} — validated naming proposal derived from AI result</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation} — deterministic representation of an AI request</li> * <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation} — deterministic representation of an AI request</li>
* </ul> * </ul>

View File

@@ -8,21 +8,19 @@ import static org.junit.jupiter.api.Assertions.*;
* Unit tests for {@link ProcessingStatus} enumeration. * Unit tests for {@link ProcessingStatus} enumeration.
* <p> * <p>
* Verifies that all required status values are present and correctly defined * Verifies that all required status values are present and correctly defined
* for M2, M5, and future milestones. * for the document processing pipeline.
*/ */
class ProcessingStatusTest { class ProcessingStatusTest {
@Test @Test
void allRequiredStatusValuesExist() { void allRequiredStatusValuesExist() {
// Verify all status values required by the architecture are present // Verify all status values required by the architecture are present
// M2+ statuses
assertNotNull(ProcessingStatus.SUCCESS); assertNotNull(ProcessingStatus.SUCCESS);
assertNotNull(ProcessingStatus.FAILED_RETRYABLE); assertNotNull(ProcessingStatus.FAILED_RETRYABLE);
assertNotNull(ProcessingStatus.FAILED_FINAL); assertNotNull(ProcessingStatus.FAILED_FINAL);
assertNotNull(ProcessingStatus.SKIPPED_ALREADY_PROCESSED); assertNotNull(ProcessingStatus.SKIPPED_ALREADY_PROCESSED);
assertNotNull(ProcessingStatus.SKIPPED_FINAL_FAILURE); assertNotNull(ProcessingStatus.SKIPPED_FINAL_FAILURE);
assertNotNull(ProcessingStatus.PROCESSING); assertNotNull(ProcessingStatus.PROCESSING);
// M5+ statuses
assertNotNull(ProcessingStatus.READY_FOR_AI); assertNotNull(ProcessingStatus.READY_FOR_AI);
assertNotNull(ProcessingStatus.PROPOSAL_READY); assertNotNull(ProcessingStatus.PROPOSAL_READY);
} }