1
0

M5 AP-001 Kernobjekte, Statusmodell und KI-Port-Verträge präzisiert

This commit is contained in:
2026-04-06 23:05:12 +02:00
parent d1dfc75d4e
commit cd2389f3e1
19 changed files with 776 additions and 8 deletions

View File

@@ -103,7 +103,8 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo
return switch (record.overallStatus()) {
case SUCCESS -> new DocumentTerminalSuccess(record);
case FAILED_FINAL -> new DocumentTerminalFinalFailure(record);
case PROCESSING, FAILED_RETRYABLE, SKIPPED_ALREADY_PROCESSED, SKIPPED_FINAL_FAILURE ->
case READY_FOR_AI, PROPOSAL_READY, PROCESSING, FAILED_RETRYABLE,
SKIPPED_ALREADY_PROCESSED, SKIPPED_FINAL_FAILURE ->
new DocumentKnownProcessable(record);
};
} else {

View File

@@ -0,0 +1,77 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
/**
* Outbound port for invoking an AI service over an OpenAI-compatible HTTP boundary.
* <p>
* This interface abstracts AI service communication, allowing the Application layer
* to orchestrate AI-based naming without knowing about HTTP, authentication, or
* provider-specific details.
* <p>
* <strong>Design principles:</strong>
* <ul>
* <li>Provider is configurable (OpenAI, Azure, local LLM, etc.), not hard-coded</li>
* <li>Base URL, model name, and timeout are runtime configuration</li>
* <li>Results are returned as structured types ({@link AiInvocationResult}),
* never as exceptions</li>
* <li>Technical success (HTTP 200) is distinct from response content validity</li>
* </ul>
* <p>
* <strong>Adapter responsibilities:</strong>
* <ul>
* <li>Construct an HTTP request from the {@link AiRequestRepresentation}</li>
* <li>Apply all transport-level configuration (base URL, model, timeout, authentication)</li>
* <li>Execute the HTTP request against the configured endpoint</li>
* <li>Distinguish between successful reception of a response body and technical failure</li>
* <li>Return either an invocation success with raw response or a classified technical error</li>
* <li>Encapsulate all HTTP, JSON serialization, and authentication details</li>
* </ul>
* <p>
* <strong>Non-goals of this port:</strong>
* <ul>
* <li>JSON parsing of the response body (Application layer handles this)</li>
* <li>Validation of response content against domain rules</li>
* <li>Prompt construction or text formatting (Application layer does this)</li>
* <li>Handling of provider-specific output formats or structured output schemas</li>
* </ul>
* <p>
* <strong>OpenAI compatibility:</strong> The adapter must support the OpenAI Chat
* Completions API or a compatible endpoint. The {@code AiRequestRepresentation}
* contains the prompt and document text; the adapter is responsible for formatting
* these as needed (e.g., system message + user message in the Chat API).
*
* @since M5
*/
public interface AiInvocationPort {
/**
* Invokes an AI service with the given request representation.
* <p>
* This method sends a request to the configured AI endpoint and returns the result.
* The request contains both the prompt and the document text, deterministically
* composed by the Application layer.
* <p>
* <strong>Outcome distinction:</strong>
* <ul>
* <li>If the HTTP call succeeds and a response body is received,
* {@link AiInvocationSuccess} is returned, even if the body is invalid JSON
* or semantically problematic. The Application layer will parse and validate
* the content.</li>
* <li>If the HTTP call fails (timeout, network error, endpoint unreachable,
* connection failure), {@link AiInvocationTechnicalFailure} is returned.</li>
* </ul>
*
* @param request the complete request to send to the AI service; never null
* @return an {@link AiInvocationResult} encoding either:
* <ul>
* <li>Success: response body was received (valid or not)</li>
* <li>Technical failure: HTTP communication failed</li>
* </ul>
* @throws NullPointerException if request is null
*
* @see AiInvocationSuccess
* @see AiInvocationTechnicalFailure
*/
AiInvocationResult invoke(AiRequestRepresentation request);
}

View File

@@ -0,0 +1,28 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Sealed interface representing the outcome of invoking an AI service.
* <p>
* Implementations allow the Application layer to distinguish between:
* <ul>
* <li>Successful HTTP communication with a response body (which may still contain
* functionally invalid content, but is at least technically received)</li>
* <li>Technical failure (timeout, network error, endpoint unreachable, malformed response)</li>
* </ul>
* <p>
* Permitted implementations:
* <ul>
* <li>{@link AiInvocationSuccess} — HTTP call completed with a response body</li>
* <li>{@link AiInvocationTechnicalFailure} — HTTP call failed or no valid response was received</li>
* </ul>
* <p>
* <strong>Critical distinction:</strong> A successful invocation means the HTTP request
* was sent and a response was received, but the response content may still be unparseable
* or semantically invalid. This is crucial for retry logic: a technical HTTP success
* with unparseable JSON is different from a timeout or network error.
*
* @since M5
*/
public sealed interface AiInvocationResult
permits AiInvocationSuccess, AiInvocationTechnicalFailure {
}

View File

@@ -0,0 +1,51 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
import java.util.Objects;
/**
* Represents successful HTTP communication with an AI service.
* <p>
* The HTTP request was sent and a response body was received. This indicates
* technical success of the communication, but does NOT guarantee that the response
* content is valid, parseable, or functionally usable.
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #request()} — the exact request that was sent to the AI service,
* including prompt, document text, and character counts</li>
* <li>{@link #rawResponse()} — the uninterpreted response body returned by the AI,
* which may be valid JSON, malformed, empty, or otherwise problematic</li>
* </ul>
* <p>
* The Application layer is responsible for:
* <ul>
* <li>Parsing the raw response (JSON extraction, field validation)</li>
* <li>Distinguishing between parseable and unparseable responses</li>
* <li>Validating the content against rules (title length, date format, etc.)</li>
* <li>Classifying any failures as technical or functional</li>
* </ul>
* <p>
* <strong>Persistence:</strong> Both request and response are stored in the
* processing attempt history for debugging and audit.
*
* @param request the AI request that was sent; never null
* @param rawResponse the uninterpreted response body; never null (but may be empty)
*
* @since M5
*/
public record AiInvocationSuccess(
AiRequestRepresentation request,
AiRawResponse rawResponse) implements AiInvocationResult {
/**
* Compact constructor validating mandatory fields.
*
* @throws NullPointerException if either field is null
*/
public AiInvocationSuccess {
Objects.requireNonNull(request, "request must not be null");
Objects.requireNonNull(rawResponse, "rawResponse must not be null");
}
}

View File

@@ -0,0 +1,53 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
import java.util.Objects;
/**
* Represents a technical failure during AI service invocation.
* <p>
* The HTTP request could not be sent, or no valid response body was received.
* This covers network errors, timeouts, endpoint unreachability, connection failures,
* and other infrastructure-level problems.
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #request()} — the request that was attempted to be sent. Stored for
* debugging and audit, even though it may not have reached the AI service.</li>
* <li>{@link #failureReason()} — a classification of the technical error
* (e.g., "TIMEOUT", "ENDPOINT_UNREACHABLE", "CONNECTION_ERROR")</li>
* <li>{@link #failureMessage()} — a human-readable description of the error,
* suitable for logging and operational troubleshooting</li>
* </ul>
* <p>
* <strong>Retry semantics:</strong> Technical failures are retryable. The Application
* layer will record this as a transient error, and the document may be retried in
* a later batch run up to the configured maximum transient-error count.
* <p>
* <strong>Distinction from functional errors:</strong> A 200 OK response with an
* invalid JSON body is NOT a technical failure; it's an invocation success that
* contains a functional error. Only communication/transport errors are classified here.
*
* @param request the request that was attempted (may not have been successfully sent);
* never null
* @param failureReason classification of the error type; never null (may be empty)
* @param failureMessage human-readable error description; never null (may be empty)
*
* @since M5
*/
public record AiInvocationTechnicalFailure(
AiRequestRepresentation request,
String failureReason,
String failureMessage) implements AiInvocationResult {
/**
* Compact constructor validating mandatory fields.
*
* @throws NullPointerException if any field is null
*/
public AiInvocationTechnicalFailure {
Objects.requireNonNull(request, "request must not be null");
Objects.requireNonNull(failureReason, "failureReason must not be null");
Objects.requireNonNull(failureMessage, "failureMessage must not be null");
}
}

View File

@@ -0,0 +1,41 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import java.util.Objects;
/**
* Represents failure to load an external prompt template.
* <p>
* The prompt could not be obtained from the configured external source,
* or the loaded content was technically invalid (e.g., empty after trimming).
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #failureReason()} — a classification of the failure type
* (e.g., "FILE_NOT_FOUND", "IO_ERROR", "EMPTY_CONTENT")</li>
* <li>{@link #failureMessage()} — a human-readable description of why
* the load failed, suitable for logging and debugging</li>
* </ul>
* <p>
* This is a technical failure, not a validation error, and typically prevents
* the batch run from proceeding further (may lead to a {@code PROCESSING} status
* treated as {@code FAILED_RETRYABLE}).
*
* @param failureReason classification of the failure (non-null, may be empty)
* @param failureMessage human-readable failure description (non-null, may be empty)
*
* @since M5
*/
public record PromptLoadingFailure(
String failureReason,
String failureMessage) implements PromptLoadingResult {
/**
* Compact constructor validating mandatory fields.
*
* @throws NullPointerException if either field is null
*/
public PromptLoadingFailure {
Objects.requireNonNull(failureReason, "failureReason must not be null");
Objects.requireNonNull(failureMessage, "failureMessage must not be null");
}
}

View File

@@ -0,0 +1,19 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Sealed interface representing the outcome of loading an external prompt template.
* <p>
* Implementations allow the Application layer to distinguish between a successful
* prompt load and various failure scenarios without using exceptions.
* <p>
* Permitted implementations:
* <ul>
* <li>{@link PromptLoadingSuccess} — prompt was successfully loaded from the external source</li>
* <li>{@link PromptLoadingFailure} — prompt could not be loaded (file not found, read error, etc.)</li>
* </ul>
*
* @since M5
*/
public sealed interface PromptLoadingResult
permits PromptLoadingSuccess, PromptLoadingFailure {
}

View File

@@ -0,0 +1,44 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier;
import java.util.Objects;
/**
* Represents successful loading of an external prompt template.
* <p>
* The prompt content and a stable identifier for the prompt have both been
* successfully obtained from the configured external source.
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #promptIdentifier()} — a stable, deterministic identifier
* (e.g., filename, semantic version, or hash) that will remain the same
* across multiple batch runs if the prompt content hasn't changed.</li>
* <li>{@link #promptContent()} — the actual prompt template text, ready to be
* used in constructing an AI request. May be empty in edge cases, though
* normally expected to be meaningful.</li>
* </ul>
* <p>
* The identifier is crucial for historical traceability: each processing attempt
* records which prompt was used, allowing later investigation of why a particular
* decision was made.
*
* @param promptIdentifier stable identifier for this prompt version; never null
* @param promptContent the prompt template text; never null
*
* @since M5
*/
public record PromptLoadingSuccess(
PromptIdentifier promptIdentifier,
String promptContent) implements PromptLoadingResult {
/**
* Compact constructor validating mandatory fields.
*
* @throws NullPointerException if either field is null
*/
public PromptLoadingSuccess {
Objects.requireNonNull(promptIdentifier, "promptIdentifier must not be null");
Objects.requireNonNull(promptContent, "promptContent must not be null");
}
}

View File

@@ -0,0 +1,58 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Outbound port for loading external prompt templates.
* <p>
* This interface abstracts the loading of prompt content from external sources
* (files, resources, databases, etc.), allowing the Application layer to remain
* independent of how or where prompts are stored.
* <p>
* <strong>Design principles:</strong>
* <ul>
* <li>Prompt is not embedded in code; it is loaded from an external source</li>
* <li>Each prompt receives a stable identifier for traceability across batch runs</li>
* <li>Results are returned as structured types ({@link PromptLoadingResult}),
* never as exceptions</li>
* </ul>
* <p>
* <strong>Adapter responsibilities:</strong>
* <ul>
* <li>Locate and read the prompt file/resource from the configured source</li>
* <li>Derive a stable prompt identifier (e.g., filename, semantic version, content hash)</li>
* <li>Validate that the loaded content is not empty or otherwise invalid</li>
* <li>Return either success or a classified failure</li>
* <li>Encapsulate all file I/O, resource loading, and configuration details</li>
* </ul>
* <p>
* <strong>Non-goals of this port:</strong>
* <ul>
* <li>Prompt parsing or templating logic</li>
* <li>Combining prompt with document text (Application layer handles this)</li>
* <li>Template variable substitution</li>
* <li>Validation of prompt content against fachliche rules</li>
* </ul>
*
* @since M5
*/
public interface PromptPort {
/**
* Loads the configured external prompt template.
* <p>
* This method is called once per batch run to obtain the current prompt.
* The prompt content and its stable identifier are returned together.
* <p>
* If loading fails for any reason (file not found, I/O error, content validation),
* a {@link PromptLoadingFailure} is returned rather than throwing an exception.
*
* @return a {@link PromptLoadingResult} encoding either:
* <ul>
* <li>Success: prompt content and identifier loaded successfully</li>
* <li>Failure: prompt could not be loaded or is invalid</li>
* </ul>
*
* @see PromptLoadingSuccess
* @see PromptLoadingFailure
*/
PromptLoadingResult loadPrompt();
}

View File

@@ -22,6 +22,14 @@
* — Extract text content and page count from a single PDF</li>
* </ul>
* <p>
* AI-based naming ports (M5+):
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptPort}
* — Load an external prompt template and derive its stable identifier</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort}
* — Invoke an AI service over an OpenAI-compatible HTTP boundary</li>
* </ul>
* <p>
* Persistence and fingerprinting ports:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort}
@@ -46,6 +54,10 @@
* — Sealed result of a fingerprint computation (success or technical error)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult}
* — Sealed result of a master record lookup (unknown / processable / terminal / failure)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingResult}
* — Sealed result of prompt loading (success with identifier and content, or failure)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult}
* — Sealed result of AI service invocation (success with raw response, or technical failure)</li>
* </ul>
* <p>
* Exception types:

View File

@@ -0,0 +1,47 @@
package de.gecheckt.pdf.umbenenner.domain.model;
/**
* Classification of AI-related errors into technical vs. functional categories.
* <p>
* This enumeration distinguishes between two fundamental error types that occur
* during AI-based naming proposal generation:
* <ul>
* <li><strong>Technical errors</strong>: Infrastructure or communication failures
* (e.g., API unreachable, timeout, unparseable response). These are typically
* transient and retryable.</li>
* <li><strong>Functional errors</strong>: Problems with the AI response content itself
* (e.g., invalid title, unparseable date, generic placeholder title).
* These are deterministic and typically not resolved by retry alone.</li>
* </ul>
* <p>
* The classification determines retry behavior: technical errors may be retried in
* a later run, while functional errors are subject to the deterministic failure rule
* (first occurrence retryable, second occurrence final).
*
* @since M5
*/
public enum AiErrorClassification {
/**
* A technical infrastructure or communication failure occurred.
* <p>
* Examples: API endpoint not reachable, HTTP timeout, malformed response structure,
* missing mandatory fields in otherwise-parseable JSON, network error.
* <p>
* These errors are typically transient and may be resolved by retry in a later
* batch run. The failure is recorded against the transient-error counter.
*/
TECHNICAL,
/**
* A functional or content validation error occurred.
* <p>
* Examples: invalid or generic title (e.g., "Dokument"), unparseable date string,
* AI response violates documented rules (e.g., title contains prohibited characters).
* <p>
* These errors are deterministic and reflect issues with the AI-generated content
* itself or the document's content quality. The failure is recorded against the
* content-error counter, subject to the deterministic retry rule.
*/
FUNCTIONAL
}

View File

@@ -0,0 +1,45 @@
package de.gecheckt.pdf.umbenenner.domain.model;
import java.util.Objects;
/**
* Unvalidated, uninterpreted raw response body from an AI service.
* <p>
* This record holds the exact bytes or string returned by the AI HTTP endpoint,
* before any parsing, validation, or business-logic processing. It is used to:
* <ul>
* <li>Preserve the original response for audit and debugging purposes.</li>
* <li>Separate the technical success of the HTTP call from the semantic validity
* of the response content.</li>
* <li>Enable deterministic retry decisions: a 200 OK response is a technical success
* even if the JSON is unparseable or semantically invalid.</li>
* </ul>
* <p>
* <strong>Persistance:</strong> The raw response is stored in SQLite history for
* traceability and future debugging. It may contain the full JSON structure or
* formatted text, depending on the AI service.
* <p>
* <strong>Example:</strong>
* <pre>
* {@code
* AiRawResponse response = new AiRawResponse(
* "{\"date\": \"2026-03-05\", \"title\": \"Stromabrechnung\", \"reasoning\": \"...\"}"
* );
* }
* </pre>
*
* @param content the raw response body as a string (non-null, may be empty or malformed)
*
* @since M5
*/
public record AiRawResponse(String content) {
/**
* Compact constructor validating that content is not null.
*
* @throws NullPointerException if {@code content} is null
*/
public AiRawResponse {
Objects.requireNonNull(content, "content must not be null");
}
}

View File

@@ -0,0 +1,73 @@
package de.gecheckt.pdf.umbenenner.domain.model;
import java.util.Objects;
/**
* Deterministic, complete representation of the request sent to an AI service.
* <p>
* This record captures the exact prompt, text, and configuration that were sent
* to the AI in a single request, allowing for reproducibility and debugging.
* <p>
* <strong>Construction:</strong> The Application layer constructs this representation
* deterministically from:
* <ul>
* <li>The loaded external prompt text</li>
* <li>The prompt's stable identifier</li>
* <li>The document's extracted PDF text (already limited to max characters)</li>
* <li>Structural markers (delimiters, field names) that define the format</li>
* </ul>
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #promptIdentifier()} — stable identifier of the prompt template used</li>
* <li>{@link #promptContent()} — the actual prompt text loaded from the external file</li>
* <li>{@link #documentText()} — the PDF-extracted text, already limited to the configured
* maximum characters before being placed in this representation</li>
* <li>{@link #sentCharacterCount()} — the exact number of text characters from
* documentText that were included in the request sent to the AI
* (may be less than documentText.length() if truncation occurred)</li>
* </ul>
* <p>
* <strong>Persistence:</strong> Both prompt identifier and sent character count
* are recorded in the processing attempt history for traceability.
* <p>
* <strong>Not included:</strong>
* <ul>
* <li>HTTP-specific details (headers, authentication, endpoint URL)</li>
* <li>Serialization format (whether sent as JSON, multipart, etc.)</li>
* <li>AI provider configuration details</li>
* </ul>
*
* @param promptIdentifier stable identifier for the prompt template; never null
* @param promptContent content of the prompt template; never null (may be empty,
* though typically meaningful)
* @param documentText extracted PDF text (already limited to max characters);
* never null (may be empty)
* @param sentCharacterCount exact number of characters from documentText that were
* sent to the AI; must be &gt;= 0 and &lt;= documentText.length()
*
* @since M5
*/
public record AiRequestRepresentation(
PromptIdentifier promptIdentifier,
String promptContent,
String documentText,
int sentCharacterCount) {
/**
* Compact constructor validating all fields.
*
* @throws NullPointerException if any field except possibly documentText is null
* @throws IllegalArgumentException if sentCharacterCount is out of valid range
*/
public AiRequestRepresentation {
Objects.requireNonNull(promptIdentifier, "promptIdentifier must not be null");
Objects.requireNonNull(promptContent, "promptContent must not be null");
Objects.requireNonNull(documentText, "documentText must not be null");
if (sentCharacterCount < 0 || sentCharacterCount > documentText.length()) {
throw new IllegalArgumentException(
"sentCharacterCount must be >= 0 and <= documentText.length(); " +
"got " + sentCharacterCount + " but documentText.length() = " + documentText.length());
}
}
}

View File

@@ -0,0 +1,38 @@
package de.gecheckt.pdf.umbenenner.domain.model;
/**
* Enumeration of valid sources for a resolved document date.
* <p>
* Each enum constant represents a specific origin or determination method for the date
* used in a naming proposal. The source is recorded for traceability.
* <p>
* <strong>Semantics:</strong>
* <ul>
* <li>{@link #AI_PROVIDED} — the date was supplied by the AI in its response
* as a valid {@code YYYY-MM-DD} string.</li>
* <li>{@link #FALLBACK_CURRENT} — the date was set to the current system date
* (from {@code ClockPort}) because the AI did not provide a usable date.</li>
* </ul>
* <p>
* The source is recorded in the processing attempt history for reproducibility
* and operational transparency.
*
* @since M5
*/
public enum DateSource {
/**
* The date was provided by the AI in its JSON response.
* <p>
* The AI explicitly supplied a {@code date} field in valid {@code YYYY-MM-DD} format.
*/
AI_PROVIDED,
/**
* The date is the current system date used as fallback.
* <p>
* The AI either omitted the {@code date} field or provided no usable date.
* The application set the fallback to the current date from {@code ClockPort}.
*/
FALLBACK_CURRENT
}

View File

@@ -0,0 +1,68 @@
package de.gecheckt.pdf.umbenenner.domain.model;
import java.time.LocalDate;
import java.util.Objects;
/**
* A validated naming proposal derived from AI analysis of a document.
* <p>
* This record represents the core results of the AI-based naming stage:
* a proposed date, a proposed title, and the AI's reasoning. All three fields
* have been validated according to application rules at the time of creation.
* <p>
* <strong>Field semantics:</strong>
* <ul>
* <li>{@link #resolvedDate()} — the effective date for the naming proposal,
* either supplied by the AI or derived as a fallback. Always non-null.</li>
* <li>{@link #dateSource()} — indicates whether the date came from the AI
* or is a fallback to the current system date.</li>
* <li>{@link #validatedTitle()} — the title extracted and validated from
* the AI response. Guaranteed to be non-null, non-empty, and compliant
* with documented title rules (max 20 base characters, no prohibited
* special characters, etc.).</li>
* <li>{@link #aiReasoning()} — the justification or explanation provided
* by the AI for its proposal. Non-null (may be empty in edge cases,
* though normally expected to be meaningful).</li>
* </ul>
* <p>
* <strong>Not included in this proposal:</strong>
* <ul>
* <li>Final target filename (e.g., with {@code (1)}, {@code (2)} duplicates)</li>
* <li>Target file path or copy operation</li>
* <li>Windows-specific filename character transformations</li>
* </ul>
* <p>
* <strong>Persistence:</strong> The naming proposal is persistently stored as part
* of the processing attempt history for reproducibility and audit.
*
* @param resolvedDate the effective date (never null); derived from AI or fallback
* @param dateSource origin of the date ({@link DateSource#AI_PROVIDED} or
* {@link DateSource#FALLBACK_CURRENT}); never null
* @param validatedTitle the title validated per application rules (non-null, non-empty,
* max 20 base characters as defined in requirements)
* @param aiReasoning the AI's explanation for the proposal (non-null, may be empty)
*
* @since M5
*/
public record NamingProposal(
LocalDate resolvedDate,
DateSource dateSource,
String validatedTitle,
String aiReasoning) {
/**
* Compact constructor validating all mandatory fields.
*
* @throws NullPointerException if any field is null
* @throws IllegalArgumentException if validatedTitle is empty
*/
public NamingProposal {
Objects.requireNonNull(resolvedDate, "resolvedDate must not be null");
Objects.requireNonNull(dateSource, "dateSource must not be null");
Objects.requireNonNull(validatedTitle, "validatedTitle must not be null");
if (validatedTitle.isEmpty()) {
throw new IllegalArgumentException("validatedTitle must not be empty");
}
Objects.requireNonNull(aiReasoning, "aiReasoning must not be null");
}
}

View File

@@ -9,7 +9,15 @@ package de.gecheckt.pdf.umbenenner.domain.model;
* <p>
* <strong>Overall-status semantics (master record):</strong>
* <ul>
* <li>{@link #SUCCESS} — document was fully processed; skip in all future runs.</li>
* <li>{@link #READY_FOR_AI} — document has passed all pre-checks and extracted text
* has been validated; it is ready for AI-based naming proposal. This is a non-terminal
* intermediate state; the document will be processed further in subsequent stages.</li>
* <li>{@link #PROPOSAL_READY} — a valid, persistent AI-based naming proposal has been
* generated and stored. The document is complete for the current stage but will be
* processed further in the next stage (target copy, final filename generation).
* This is a non-terminal intermediate state.</li>
* <li>{@link #SUCCESS} — document was fully processed end-to-end and written to the
* target location. Status is final and irreversible; skip in all future runs.</li>
* <li>{@link #FAILED_RETRYABLE} — last attempt failed but is retryable; process again
* in the next run according to the applicable retry rule.</li>
* <li>{@link #FAILED_FINAL} — all allowed retries exhausted; skip in all future runs.</li>
@@ -19,7 +27,9 @@ package de.gecheckt.pdf.umbenenner.domain.model;
* <p>
* <strong>Attempt-status semantics (attempt history):</strong>
* <ul>
* <li>{@link #SUCCESS} — this attempt completed successfully.</li>
* <li>{@link #READY_FOR_AI} — this attempt resulted in a document ready for AI processing.</li>
* <li>{@link #PROPOSAL_READY} — this attempt resulted in a valid AI-based naming proposal.</li>
* <li>{@link #SUCCESS} — this attempt completed successfully (end-to-end, including target copy).</li>
* <li>{@link #FAILED_RETRYABLE} — this attempt failed; a future attempt is allowed.</li>
* <li>{@link #FAILED_FINAL} — this attempt failed and no further attempts will be made.</li>
* <li>{@link #SKIPPED_ALREADY_PROCESSED} — this attempt was a skip because the
@@ -44,10 +54,42 @@ package de.gecheckt.pdf.umbenenner.domain.model;
*/
public enum ProcessingStatus {
/**
* Document has passed all pre-checks and extracted PDF text has been validated.
* <p>
* This is a non-terminal intermediate state. The document is ready to be processed
* by AI-based naming in the next stage. A document with this status may be reprocessed
* in later runs, and will be updated to a subsequent status (e.g., {@link #PROPOSAL_READY}
* or failure) based on the outcome of the AI naming step.
* <p>
* The document may transition from this state to {@link #PROPOSAL_READY} on successful
* AI-based naming, or to a failure status if the AI step fails.
*
* @since M5
*/
READY_FOR_AI,
/**
* A valid, persistent AI-based naming proposal has been generated and stored.
* <p>
* This is a non-terminal intermediate state. The document is complete for its current
* processing stage, but will be processed further in the next stage (target copy,
* final filename generation with duplicate-suffix handling, and final success).
* <p>
* A document with this status will not be reprocessed by the AI step in future runs
* (idempotency rule), but may still be processed further by subsequent stages.
* The latest processing attempt with this status holds the authoritative naming proposal
* (resolved date, title, reasoning) for subsequent stages.
*
* @since M5
*/
PROPOSAL_READY,
/**
* Document was successfully processed and written to the target location.
* <p>
* A document with this status will be skipped in all future batch runs.
* As of M5, this status is reserved for the true end-to-end success after the target copy
* stage. A document with this status will be skipped in all future batch runs.
* Status is final and irreversible.
*/
SUCCESS,

View File

@@ -0,0 +1,40 @@
package de.gecheckt.pdf.umbenenner.domain.model;
import java.util.Objects;
/**
* Stable, unique identifier for an externally-loaded prompt template.
* <p>
* The prompt is not embedded in code but loaded from an external file or resource.
* The identifier allows traceability: which prompt version was used for a specific
* AI request and naming proposal?
* <p>
* <strong>Identity semantics:</strong> Two {@code PromptIdentifier} instances are
* equal if and only if their identifier strings are equal (by value, not by reference).
* <p>
* <strong>Typical examples:</strong>
* <ul>
* <li>Prompt file name: {@code "prompt_de_v1.txt"}</li>
* <li>Version string: {@code "2026-03-v2"}</li>
* <li>Content hash: {@code "sha256:abc123..."}</li>
* </ul>
* <p>
* The choice of identifier scheme is flexible (filename, semantic version, hash, etc.),
* but must be stable and deterministic so that the same prompt always receives the
* same identifier across batch runs.
*
* @param identifier the stable, non-null identifier string (typically non-empty)
*
* @since M5
*/
public record PromptIdentifier(String identifier) {
/**
* Compact constructor validating that the identifier is not null.
*
* @throws NullPointerException if {@code identifier} is null
*/
public PromptIdentifier {
Objects.requireNonNull(identifier, "identifier must not be null");
}
}

View File

@@ -3,7 +3,8 @@
* <p>
* This package contains the fundamental domain entities and status models required for document processing:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus} — enumeration of all valid document processing states</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus} — enumeration of all valid document processing states,
* including intermediate non-terminal states such as {@code READY_FOR_AI} and {@code PROPOSAL_READY}</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.RunId} — unique identifier for a batch run</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext} — technical context for a batch run</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint} — content-based document identity (SHA-256 hex); primary key for persistence</li>
@@ -14,6 +15,18 @@
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingDecision} — sealed result of pre-checks</li>
* </ul>
* <p>
* AI and naming proposal types (M5+):
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.DateSource} — enumeration of valid date sources
* for resolved document dates</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiErrorClassification} — distinguishes technical vs.
* functional AI errors</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier} — stable identifier for an external prompt</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse} — unvalidated raw response from AI</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.NamingProposal} — validated naming proposal derived from AI result</li>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation} — deterministic representation of an AI request</li>
* </ul>
* <p>
* Additional classes:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason} — enumeration of pre-check failure reasons</li>

View File

@@ -8,19 +8,23 @@ import static org.junit.jupiter.api.Assertions.*;
* Unit tests for {@link ProcessingStatus} enumeration.
* <p>
* Verifies that all required status values are present and correctly defined
* for M2 and future milestones.
* for M2, M5, and future milestones.
*/
class ProcessingStatusTest {
@Test
void allRequiredStatusValuesExist() {
// Verify all status values required by the architecture are present
// M2+ statuses
assertNotNull(ProcessingStatus.SUCCESS);
assertNotNull(ProcessingStatus.FAILED_RETRYABLE);
assertNotNull(ProcessingStatus.FAILED_FINAL);
assertNotNull(ProcessingStatus.SKIPPED_ALREADY_PROCESSED);
assertNotNull(ProcessingStatus.SKIPPED_FINAL_FAILURE);
assertNotNull(ProcessingStatus.PROCESSING);
// M5+ statuses
assertNotNull(ProcessingStatus.READY_FOR_AI);
assertNotNull(ProcessingStatus.PROPOSAL_READY);
}
@Test
@@ -59,6 +63,18 @@ class ProcessingStatusTest {
assertEquals(ProcessingStatus.PROCESSING, status);
}
@Test
void readyForAiStatus_isDefinedAndAccessible() {
ProcessingStatus status = ProcessingStatus.READY_FOR_AI;
assertEquals(ProcessingStatus.READY_FOR_AI, status);
}
@Test
void proposalReadyStatus_isDefinedAndAccessible() {
ProcessingStatus status = ProcessingStatus.PROPOSAL_READY;
assertEquals(ProcessingStatus.PROPOSAL_READY, status);
}
@Test
void statusEquality_worksByReference() {
// Enums have identity-based equality
@@ -72,6 +88,8 @@ class ProcessingStatusTest {
switch (status) {
case SUCCESS -> result = "success";
case READY_FOR_AI -> result = "ready-for-ai";
case PROPOSAL_READY -> result = "proposal-ready";
case FAILED_RETRYABLE -> result = "retryable";
case FAILED_FINAL -> result = "final";
case SKIPPED_ALREADY_PROCESSED -> result = "skip-processed";
@@ -83,8 +101,8 @@ class ProcessingStatusTest {
}
@Test
void statusValues_areSixInTotal() {
void statusValues_areEightInTotal() {
ProcessingStatus[] values = ProcessingStatus.values();
assertEquals(6, values.length, "ProcessingStatus should have exactly 6 values");
assertEquals(8, values.length, "ProcessingStatus should have exactly 8 values");
}
}