1
0

M5 AP-001 Kernobjekte, Statusmodell und KI-Port-Verträge präzisiert

This commit is contained in:
2026-04-06 23:05:12 +02:00
parent d1dfc75d4e
commit cd2389f3e1
19 changed files with 776 additions and 8 deletions

View File

@@ -0,0 +1,47 @@
package de.gecheckt.pdf.umbenenner.domain.model;
/**
* Classification of AI-related errors into technical vs. functional categories.
* <p>
* This enumeration distinguishes between two fundamental error types that occur
* during AI-based naming proposal generation:
* <ul>
* <li><strong>Technical errors</strong>: Infrastructure or communication failures
* (e.g., API unreachable, timeout, unparseable response). These are typically
* transient and retryable.</li>
* <li><strong>Functional errors</strong>: Problems with the AI response content itself
* (e.g., invalid title, unparseable date, generic placeholder title).
* These are deterministic and typically not resolved by retry alone.</li>
* </ul>
* <p>
* The classification determines retry behavior: technical errors may be retried in
* a later run, while functional errors are subject to the deterministic failure rule
* (first occurrence retryable, second occurrence final).
*
* @since M5
*/
public enum AiErrorClassification {
/**
* A technical infrastructure or communication failure occurred.
* <p>
* Examples: API endpoint not reachable, HTTP timeout, malformed response structure,
* missing mandatory fields in otherwise-parseable JSON, network error.
* <p>
* These errors are typically transient and may be resolved by retry in a later
* batch run. The failure is recorded against the transient-error counter.
*/
TECHNICAL,
/**
* A functional or content validation error occurred.
* <p>
* Examples: invalid or generic title (e.g., "Dokument"), unparseable date string,
* AI response violates documented rules (e.g., title contains prohibited characters).
* <p>
* These errors are deterministic and reflect issues with the AI-generated content
* itself or the document's content quality. The failure is recorded against the
* content-error counter, subject to the deterministic retry rule.
*/
FUNCTIONAL
}

View File

@@ -0,0 +1,45 @@
package de.gecheckt.pdf.umbenenner.domain.model;
import java.util.Objects;
/**
* Unvalidated, uninterpreted raw response body from an AI service.
* <p>
* This record holds the exact bytes or string returned by the AI HTTP endpoint,
* before any parsing, validation, or business-logic processing. It is used to:
* <ul>
* <li>Preserve the original response for audit and debugging purposes.</li>
* <li>Separate the technical success of the HTTP call from the semantic validity
* of the response content.</li>
* <li>Enable deterministic retry decisions: a 200 OK response is a technical success
* even if the JSON is unparseable or semantically invalid.</li>
* </ul>
* <p>
* <strong>Persistance:</strong> The raw response is stored in SQLite history for
* traceability and future debugging. It may contain the full JSON structure or
* formatted text, depending on the AI service.
* <p>
* <strong>Example:</strong>
* <pre>
* {@code
* AiRawResponse response = new AiRawResponse(
* "{\"date\": \"2026-03-05\", \"title\": \"Stromabrechnung\", \"reasoning\": \"...\"}"
* );
* }
* </pre>
*
* @param content the raw response body as a string (non-null, may be empty or malformed)
*
* @since M5
*/
public record AiRawResponse(String content) {
/**
* Compact constructor validating that content is not null.
*
* @throws NullPointerException if {@code content} is null
*/
public AiRawResponse {
Objects.requireNonNull(content, "content must not be null");
}
}

View File

@@ -0,0 +1,73 @@
package de.gecheckt.pdf.umbenenner.domain.model;
import java.util.Objects;
/**
* Deterministic, complete representation of the request sent to an AI service.
* <p>
* This record captures the exact prompt, text, and configuration that were sent
* to the AI in a single request, allowing for reproducibility and debugging.
* <p>
* <strong>Construction:</strong> The Application layer constructs this representation
* deterministically from:
* <ul>
* <li>The loaded external prompt text</li>
* <li>The prompt's stable identifier</li>
* <li>The document's extracted PDF text (already limited to max characters)</li>
* <li>Structural markers (delimiters, field names) that define the format</li>
* </ul>
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #promptIdentifier()} — stable identifier of the prompt template used</li>
* <li>{@link #promptContent()} — the actual prompt text loaded from the external file</li>
* <li>{@link #documentText()} — the PDF-extracted text, already limited to the configured
* maximum characters before being placed in this representation</li>
* <li>{@link #sentCharacterCount()} — the exact number of text characters from
* documentText that were included in the request sent to the AI
* (may be less than documentText.length() if truncation occurred)</li>
* </ul>
* <p>
* <strong>Persistence:</strong> Both prompt identifier and sent character count
* are recorded in the processing attempt history for traceability.
* <p>
* <strong>Not included:</strong>
* <ul>
* <li>HTTP-specific details (headers, authentication, endpoint URL)</li>
* <li>Serialization format (whether sent as JSON, multipart, etc.)</li>
* <li>AI provider configuration details</li>
* </ul>
*
* @param promptIdentifier stable identifier for the prompt template; never null
* @param promptContent content of the prompt template; never null (may be empty,
* though typically meaningful)
* @param documentText extracted PDF text (already limited to max characters);
* never null (may be empty)
* @param sentCharacterCount exact number of characters from documentText that were
* sent to the AI; must be &gt;= 0 and &lt;= documentText.length()
*
* @since M5
*/
public record AiRequestRepresentation(
PromptIdentifier promptIdentifier,
String promptContent,
String documentText,
int sentCharacterCount) {
/**
* Compact constructor validating all fields.
*
* @throws NullPointerException if any field except possibly documentText is null
* @throws IllegalArgumentException if sentCharacterCount is out of valid range
*/
public AiRequestRepresentation {
Objects.requireNonNull(promptIdentifier, "promptIdentifier must not be null");
Objects.requireNonNull(promptContent, "promptContent must not be null");
Objects.requireNonNull(documentText, "documentText must not be null");
if (sentCharacterCount < 0 || sentCharacterCount > documentText.length()) {
throw new IllegalArgumentException(
"sentCharacterCount must be >= 0 and <= documentText.length(); " +
"got " + sentCharacterCount + " but documentText.length() = " + documentText.length());
}
}
}

View File

@@ -0,0 +1,38 @@
package de.gecheckt.pdf.umbenenner.domain.model;
/**
* Enumeration of valid sources for a resolved document date.
* <p>
* Each enum constant represents a specific origin or determination method for the date
* used in a naming proposal. The source is recorded for traceability.
* <p>
* <strong>Semantics:</strong>
* <ul>
* <li>{@link #AI_PROVIDED} — the date was supplied by the AI in its response
* as a valid {@code YYYY-MM-DD} string.</li>
* <li>{@link #FALLBACK_CURRENT} — the date was set to the current system date
* (from {@code ClockPort}) because the AI did not provide a usable date.</li>
* </ul>
* <p>
* The source is recorded in the processing attempt history for reproducibility
* and operational transparency.
*
* @since M5
*/
public enum DateSource {
/**
* The date was provided by the AI in its JSON response.
* <p>
* The AI explicitly supplied a {@code date} field in valid {@code YYYY-MM-DD} format.
*/
AI_PROVIDED,
/**
* The date is the current system date used as fallback.
* <p>
* The AI either omitted the {@code date} field or provided no usable date.
* The application set the fallback to the current date from {@code ClockPort}.
*/
FALLBACK_CURRENT
}

View File

@@ -0,0 +1,68 @@
package de.gecheckt.pdf.umbenenner.domain.model;
import java.time.LocalDate;
import java.util.Objects;
/**
* A validated naming proposal derived from AI analysis of a document.
* <p>
* This record represents the core results of the AI-based naming stage:
* a proposed date, a proposed title, and the AI's reasoning. All three fields
* have been validated according to application rules at the time of creation.
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #resolvedDate()} — the effective date for the naming proposal,
* either supplied by the AI or derived as a fallback. Always non-null.</li>
* <li>{@link #dateSource()} — indicates whether the date came from the AI
* or is a fallback to the current system date.</li>
* <li>{@link #validatedTitle()} — the title extracted and validated from
* the AI response. Guaranteed to be non-null, non-empty, and compliant
* with documented title rules (max 20 base characters, no prohibited
* special characters, etc.).</li>
* <li>{@link #aiReasoning()} — the justification or explanation provided
* by the AI for its proposal. Non-null (may be empty in edge cases,
* though normally expected to be meaningful).</li>
* </ul>
* <p>
* <strong>Not included in this proposal:</strong>
* <ul>
* <li>Final target filename (e.g., with {@code (1)}, {@code (2)} duplicates)</li>
* <li>Target file path or copy operation</li>
* <li>Windows-specific filename character transformations</li>
* </ul>
* <p>
* <strong>Persistence:</strong> The naming proposal is persistently stored as part
* of the processing attempt history for reproducibility and audit.
*
* @param resolvedDate the effective date (never null); derived from AI or fallback
* @param dateSource origin of the date ({@link DateSource#AI_PROVIDED} or
* {@link DateSource#FALLBACK_CURRENT}); never null
* @param validatedTitle the title validated per application rules (non-null, non-empty,
* max 20 base characters as defined in requirements)
* @param aiReasoning the AI's explanation for the proposal (non-null, may be empty)
*
* @since M5
*/
public record NamingProposal(
LocalDate resolvedDate,
DateSource dateSource,
String validatedTitle,
String aiReasoning) {
/**
* Compact constructor validating all mandatory fields.
*
* @throws NullPointerException if any field is null
* @throws IllegalArgumentException if validatedTitle is empty
*/
public NamingProposal {
Objects.requireNonNull(resolvedDate, "resolvedDate must not be null");
Objects.requireNonNull(dateSource, "dateSource must not be null");
Objects.requireNonNull(validatedTitle, "validatedTitle must not be null");
if (validatedTitle.isEmpty()) {
throw new IllegalArgumentException("validatedTitle must not be empty");
}
Objects.requireNonNull(aiReasoning, "aiReasoning must not be null");
}
}

View File

@@ -9,7 +9,15 @@ package de.gecheckt.pdf.umbenenner.domain.model;
* <p>
* <strong>Overall-status semantics (master record):</strong>
* <ul>
* <li>{@link #SUCCESS} — document was fully processed; skip in all future runs.</li>
* <li>{@link #READY_FOR_AI} — document has passed all pre-checks and extracted text
* has been validated; it is ready for AI-based naming proposal. This is a non-terminal
* intermediate state; the document will be processed further in subsequent stages.</li>
* <li>{@link #PROPOSAL_READY} — a valid, persistent AI-based naming proposal has been
* generated and stored. The document is complete for the current stage but will be
* processed further in the next stage (target copy, final filename generation).
* This is a non-terminal intermediate state.</li>
* <li>{@link #SUCCESS} — document was fully processed end-to-end and written to the
* target location. Status is final and irreversible; skip in all future runs.</li>
* <li>{@link #FAILED_RETRYABLE} — last attempt failed but is retryable; process again
* in the next run according to the applicable retry rule.</li>
* <li>{@link #FAILED_FINAL} — all allowed retries exhausted; skip in all future runs.</li>
@@ -19,7 +27,9 @@ package de.gecheckt.pdf.umbenenner.domain.model;
* <p>
* <strong>Attempt-status semantics (attempt history):</strong>
* <ul>
* <li>{@link #SUCCESS} — this attempt completed successfully.</li>
* <li>{@link #READY_FOR_AI} — this attempt resulted in a document ready for AI processing.</li>
* <li>{@link #PROPOSAL_READY} — this attempt resulted in a valid AI-based naming proposal.</li>
* <li>{@link #SUCCESS} — this attempt completed successfully (end-to-end, including target copy).</li>
* <li>{@link #FAILED_RETRYABLE} — this attempt failed; a future attempt is allowed.</li>
* <li>{@link #FAILED_FINAL} — this attempt failed and no further attempts will be made.</li>
* <li>{@link #SKIPPED_ALREADY_PROCESSED} — this attempt was a skip because the
@@ -44,10 +54,42 @@ package de.gecheckt.pdf.umbenenner.domain.model;
*/
public enum ProcessingStatus {
/**
* Document has passed all pre-checks and extracted PDF text has been validated.
* <p>
* This is a non-terminal intermediate state. The document is ready to be processed
* by AI-based naming in the next stage. A document with this status may be reprocessed
* in later runs, and will be updated to a subsequent status (e.g., {@link #PROPOSAL_READY}
* or failure) based on the outcome of the AI naming step.
* <p>
* The document may transition from this state to {@link #PROPOSAL_READY} on successful
* AI-based naming, or to a failure status if the AI step fails.
*
* @since M5
*/
READY_FOR_AI,
/**
* A valid, persistent AI-based naming proposal has been generated and stored.
* <p>
* This is a non-terminal intermediate state. The document is complete for its current
* processing stage, but will be processed further in the next stage (target copy,
* final filename generation with duplicate-suffix handling, and final success).
* <p>
* A document with this status will not be reprocessed by the AI step in future runs
* (idempotency rule), but may still be processed further by subsequent stages.
* The latest processing attempt with this status holds the authoritative naming proposal
* (resolved date, title, reasoning) for subsequent stages.
*
* @since M5
*/
PROPOSAL_READY,
/**
* Document was successfully processed and written to the target location.
* <p>
* A document with this status will be skipped in all future batch runs.
* As of M5, this status is reserved for the true end-to-end success after the target copy
* stage. A document with this status will be skipped in all future batch runs.
* Status is final and irreversible.
*/
SUCCESS,

View File

@@ -0,0 +1,40 @@
package de.gecheckt.pdf.umbenenner.domain.model;
import java.util.Objects;
/**
* Stable, unique identifier for an externally-loaded prompt template.
* <p>
* The prompt is not embedded in code but loaded from an external file or resource.
* The identifier allows traceability: which prompt version was used for a specific
* AI request and naming proposal?
* <p>
* <strong>Identity semantics:</strong> Two {@code PromptIdentifier} instances are
* equal if and only if their identifier strings are equal (by value, not by reference).
* <p>
* <strong>Typical examples:</strong>
* <ul>
* <li>Prompt file name: {@code "prompt_de_v1.txt"}</li>
* <li>Version string: {@code "2026-03-v2"}</li>
* <li>Content hash: {@code "sha256:abc123..."}</li>
* </ul>
* <p>
* The choice of identifier scheme is flexible (filename, semantic version, hash, etc.),
* but must be stable and deterministic so that the same prompt always receives the
* same identifier across batch runs.
*
* @param identifier the stable, non-null identifier string (typically non-empty)
*
* @since M5
*/
public record PromptIdentifier(String identifier) {
/**
* Compact constructor validating that the identifier is not null.
*
* @throws NullPointerException if {@code identifier} is null
*/
public PromptIdentifier {
Objects.requireNonNull(identifier, "identifier must not be null");
}
}

View File

@@ -3,7 +3,8 @@
* <p>
* This package contains the fundamental domain entities and status models required for document processing:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus} — enumeration of all valid document processing states</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus} — enumeration of all valid document processing states,
* including intermediate non-terminal states such as {@code READY_FOR_AI} and {@code PROPOSAL_READY}</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.RunId} — unique identifier for a batch run</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext} — technical context for a batch run</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint} — content-based document identity (SHA-256 hex); primary key for persistence</li>
@@ -14,6 +15,18 @@
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingDecision} — sealed result of pre-checks</li>
* </ul>
* <p>
* AI and naming proposal types (M5+):
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.DateSource} — enumeration of valid date sources
* for resolved document dates</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiErrorClassification} — distinguishes technical vs.
* functional AI errors</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier} — stable identifier for an external prompt</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse} — unvalidated raw response from AI</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.NamingProposal} — validated naming proposal derived from AI result</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation} — deterministic representation of an AI request</li>
* </ul>
* <p>
* Additional classes:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason} — enumeration of pre-check failure reasons</li>

View File

@@ -8,19 +8,23 @@ import static org.junit.jupiter.api.Assertions.*;
* Unit tests for {@link ProcessingStatus} enumeration.
* <p>
* Verifies that all required status values are present and correctly defined
* for M2 and future milestones.
* for M2, M5, and future milestones.
*/
class ProcessingStatusTest {
@Test
void allRequiredStatusValuesExist() {
// Verify all status values required by the architecture are present
// M2+ statuses
assertNotNull(ProcessingStatus.SUCCESS);
assertNotNull(ProcessingStatus.FAILED_RETRYABLE);
assertNotNull(ProcessingStatus.FAILED_FINAL);
assertNotNull(ProcessingStatus.SKIPPED_ALREADY_PROCESSED);
assertNotNull(ProcessingStatus.SKIPPED_FINAL_FAILURE);
assertNotNull(ProcessingStatus.PROCESSING);
// M5+ statuses
assertNotNull(ProcessingStatus.READY_FOR_AI);
assertNotNull(ProcessingStatus.PROPOSAL_READY);
}
@Test
@@ -59,6 +63,18 @@ class ProcessingStatusTest {
assertEquals(ProcessingStatus.PROCESSING, status);
}
@Test
void readyForAiStatus_isDefinedAndAccessible() {
ProcessingStatus status = ProcessingStatus.READY_FOR_AI;
assertEquals(ProcessingStatus.READY_FOR_AI, status);
}
@Test
void proposalReadyStatus_isDefinedAndAccessible() {
ProcessingStatus status = ProcessingStatus.PROPOSAL_READY;
assertEquals(ProcessingStatus.PROPOSAL_READY, status);
}
@Test
void statusEquality_worksByReference() {
// Enums have identity-based equality
@@ -72,6 +88,8 @@ class ProcessingStatusTest {
switch (status) {
case SUCCESS -> result = "success";
case READY_FOR_AI -> result = "ready-for-ai";
case PROPOSAL_READY -> result = "proposal-ready";
case FAILED_RETRYABLE -> result = "retryable";
case FAILED_FINAL -> result = "final";
case SKIPPED_ALREADY_PROCESSED -> result = "skip-processed";
@@ -83,8 +101,8 @@ class ProcessingStatusTest {
}
@Test
void statusValues_areSixInTotal() {
void statusValues_areEightInTotal() {
ProcessingStatus[] values = ProcessingStatus.values();
assertEquals(6, values.length, "ProcessingStatus should have exactly 6 values");
assertEquals(8, values.length, "ProcessingStatus should have exactly 8 values");
}
}