M5 AP-001 Kernobjekte, Statusmodell und KI-Port-Verträge präzisiert
This commit is contained in:
@@ -103,7 +103,8 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo
|
||||
return switch (record.overallStatus()) {
|
||||
case SUCCESS -> new DocumentTerminalSuccess(record);
|
||||
case FAILED_FINAL -> new DocumentTerminalFinalFailure(record);
|
||||
case PROCESSING, FAILED_RETRYABLE, SKIPPED_ALREADY_PROCESSED, SKIPPED_FINAL_FAILURE ->
|
||||
case READY_FOR_AI, PROPOSAL_READY, PROCESSING, FAILED_RETRYABLE,
|
||||
SKIPPED_ALREADY_PROCESSED, SKIPPED_FINAL_FAILURE ->
|
||||
new DocumentKnownProcessable(record);
|
||||
};
|
||||
} else {
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
|
||||
|
||||
/**
|
||||
* Outbound port for invoking an AI service over an OpenAI-compatible HTTP boundary.
|
||||
* <p>
|
||||
* This interface abstracts AI service communication, allowing the Application layer
|
||||
* to orchestrate AI-based naming without knowing about HTTP, authentication, or
|
||||
* provider-specific details.
|
||||
* <p>
|
||||
* <strong>Design principles:</strong>
|
||||
* <ul>
|
||||
* <li>Provider is configurable (OpenAI, Azure, local LLM, etc.), not hard-coded</li>
|
||||
* <li>Base URL, model name, and timeout are runtime configuration</li>
|
||||
* <li>Results are returned as structured types ({@link AiInvocationResult}),
|
||||
* never as exceptions</li>
|
||||
* <li>Technical success (HTTP 200) is distinct from response content validity</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Adapter responsibilities:</strong>
|
||||
* <ul>
|
||||
* <li>Construct an HTTP request from the {@link AiRequestRepresentation}</li>
|
||||
* <li>Apply all transport-level configuration (base URL, model, timeout, authentication)</li>
|
||||
* <li>Execute the HTTP request against the configured endpoint</li>
|
||||
* <li>Distinguish between successful reception of a response body and technical failure</li>
|
||||
* <li>Return either an invocation success with raw response or a classified technical error</li>
|
||||
* <li>Encapsulate all HTTP, JSON serialization, and authentication details</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Non-goals of this port:</strong>
|
||||
* <ul>
|
||||
* <li>JSON parsing of the response body (Application layer handles this)</li>
|
||||
* <li>Validation of response content against domain rules</li>
|
||||
* <li>Prompt construction or text formatting (Application layer does this)</li>
|
||||
* <li>Handling of provider-specific output formats or structured output schemas</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>OpenAI compatibility:</strong> The adapter must support the OpenAI Chat
|
||||
* Completions API or a compatible endpoint. The {@code AiRequestRepresentation}
|
||||
* contains the prompt and document text; the adapter is responsible for formatting
|
||||
* these as needed (e.g., system message + user message in the Chat API).
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public interface AiInvocationPort {
|
||||
|
||||
/**
|
||||
* Invokes an AI service with the given request representation.
|
||||
* <p>
|
||||
* This method sends a request to the configured AI endpoint and returns the result.
|
||||
* The request contains both the prompt and the document text, deterministically
|
||||
* composed by the Application layer.
|
||||
* <p>
|
||||
* <strong>Outcome distinction:</strong>
|
||||
* <ul>
|
||||
* <li>If the HTTP call succeeds and a response body is received,
|
||||
* {@link AiInvocationSuccess} is returned, even if the body is invalid JSON
|
||||
* or semantically problematic. The Application layer will parse and validate
|
||||
* the content.</li>
|
||||
* <li>If the HTTP call fails (timeout, network error, endpoint unreachable,
|
||||
* connection failure), {@link AiInvocationTechnicalFailure} is returned.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param request the complete request to send to the AI service; never null
|
||||
* @return an {@link AiInvocationResult} encoding either:
|
||||
* <ul>
|
||||
* <li>Success: response body was received (valid or not)</li>
|
||||
* <li>Technical failure: HTTP communication failed</li>
|
||||
* </ul>
|
||||
* @throws NullPointerException if request is null
|
||||
*
|
||||
* @see AiInvocationSuccess
|
||||
* @see AiInvocationTechnicalFailure
|
||||
*/
|
||||
AiInvocationResult invoke(AiRequestRepresentation request);
|
||||
}
|
||||
@@ -0,0 +1,28 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Sealed interface representing the outcome of invoking an AI service.
|
||||
* <p>
|
||||
* Implementations allow the Application layer to distinguish between:
|
||||
* <ul>
|
||||
* <li>Successful HTTP communication with a response body (which may still contain
|
||||
* functionally invalid content, but is at least technically received)</li>
|
||||
* <li>Technical failure (timeout, network error, endpoint unreachable, malformed response)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Permitted implementations:
|
||||
* <ul>
|
||||
* <li>{@link AiInvocationSuccess} — HTTP call completed with a response body</li>
|
||||
* <li>{@link AiInvocationTechnicalFailure} — HTTP call failed or no valid response was received</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Critical distinction:</strong> A successful invocation means the HTTP request
|
||||
* was sent and a response was received, but the response content may still be unparseable
|
||||
* or semantically invalid. This is crucial for retry logic: a technical HTTP success
|
||||
* with unparseable JSON is different from a timeout or network error.
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public sealed interface AiInvocationResult
|
||||
permits AiInvocationSuccess, AiInvocationTechnicalFailure {
|
||||
}
|
||||
@@ -0,0 +1,51 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents successful HTTP communication with an AI service.
|
||||
* <p>
|
||||
* The HTTP request was sent and a response body was received. This indicates
|
||||
* technical success of the communication, but does NOT guarantee that the response
|
||||
* content is valid, parseable, or functionally usable.
|
||||
* <p>
|
||||
* <strong>Field semantics:</strong>
|
||||
* <ul>
|
||||
* <li>{@link #request()} — the exact request that was sent to the AI service,
|
||||
* including prompt, document text, and character counts</li>
|
||||
* <li>{@link #rawResponse()} — the uninterpreted response body returned by the AI,
|
||||
* which may be valid JSON, malformed, empty, or otherwise problematic</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The Application layer is responsible for:
|
||||
* <ul>
|
||||
* <li>Parsing the raw response (JSON extraction, field validation)</li>
|
||||
* <li>Distinguishing between parseable and unparseable responses</li>
|
||||
* <li>Validating the content against rules (title length, date format, etc.)</li>
|
||||
* <li>Classifying any failures as technical or functional</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Persistence:</strong> Both request and response are stored in the
|
||||
* processing attempt history for debugging and audit.
|
||||
*
|
||||
* @param request the AI request that was sent; never null
|
||||
* @param rawResponse the uninterpreted response body; never null (but may be empty)
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public record AiInvocationSuccess(
|
||||
AiRequestRepresentation request,
|
||||
AiRawResponse rawResponse) implements AiInvocationResult {
|
||||
|
||||
/**
|
||||
* Compact constructor validating mandatory fields.
|
||||
*
|
||||
* @throws NullPointerException if either field is null
|
||||
*/
|
||||
public AiInvocationSuccess {
|
||||
Objects.requireNonNull(request, "request must not be null");
|
||||
Objects.requireNonNull(rawResponse, "rawResponse must not be null");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,53 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a technical failure during AI service invocation.
|
||||
* <p>
|
||||
* The HTTP request could not be sent, or no valid response body was received.
|
||||
* This covers network errors, timeouts, endpoint unreachability, connection failures,
|
||||
* and other infrastructure-level problems.
|
||||
* <p>
|
||||
* <strong>Field semantics:</strong>
|
||||
* <ul>
|
||||
* <li>{@link #request()} — the request that was attempted to be sent. Stored for
|
||||
* debugging and audit, even though it may not have reached the AI service.</li>
|
||||
* <li>{@link #failureReason()} — a classification of the technical error
|
||||
* (e.g., "TIMEOUT", "ENDPOINT_UNREACHABLE", "CONNECTION_ERROR")</li>
|
||||
* <li>{@link #failureMessage()} — a human-readable description of the error,
|
||||
* suitable for logging and operational troubleshooting</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Retry semantics:</strong> Technical failures are retryable. The Application
|
||||
* layer will record this as a transient error, and the document may be retried in
|
||||
* a later batch run up to the configured maximum transient-error count.
|
||||
* <p>
|
||||
* <strong>Distinction from functional errors:</strong> A 200 OK response with an
|
||||
* invalid JSON body is NOT a technical failure; it's an invocation success that
|
||||
* contains a functional error. Only communication/transport errors are classified here.
|
||||
*
|
||||
* @param request the request that was attempted (may not have been successfully sent);
|
||||
* never null
|
||||
* @param failureReason classification of the error type; never null (may be empty)
|
||||
* @param failureMessage human-readable error description; never null (may be empty)
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public record AiInvocationTechnicalFailure(
|
||||
AiRequestRepresentation request,
|
||||
String failureReason,
|
||||
String failureMessage) implements AiInvocationResult {
|
||||
|
||||
/**
|
||||
* Compact constructor validating mandatory fields.
|
||||
*
|
||||
* @throws NullPointerException if any field is null
|
||||
*/
|
||||
public AiInvocationTechnicalFailure {
|
||||
Objects.requireNonNull(request, "request must not be null");
|
||||
Objects.requireNonNull(failureReason, "failureReason must not be null");
|
||||
Objects.requireNonNull(failureMessage, "failureMessage must not be null");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,41 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents failure to load an external prompt template.
|
||||
* <p>
|
||||
* The prompt could not be obtained from the configured external source,
|
||||
* or the loaded content was technically invalid (e.g., empty after trimming).
|
||||
* <p>
|
||||
* <strong>Field semantics:</strong>
|
||||
* <ul>
|
||||
* <li>{@link #failureReason()} — a classification of the failure type
|
||||
* (e.g., "FILE_NOT_FOUND", "IO_ERROR", "EMPTY_CONTENT")</li>
|
||||
* <li>{@link #failureMessage()} — a human-readable description of why
|
||||
* the load failed, suitable for logging and debugging</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* This is a technical failure, not a validation error, and typically prevents
|
||||
* the batch run from proceeding further (may lead to a {@code PROCESSING} status
|
||||
* treated as {@code FAILED_RETRYABLE}).
|
||||
*
|
||||
* @param failureReason classification of the failure (non-null, may be empty)
|
||||
* @param failureMessage human-readable failure description (non-null, may be empty)
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public record PromptLoadingFailure(
|
||||
String failureReason,
|
||||
String failureMessage) implements PromptLoadingResult {
|
||||
|
||||
/**
|
||||
* Compact constructor validating mandatory fields.
|
||||
*
|
||||
* @throws NullPointerException if either field is null
|
||||
*/
|
||||
public PromptLoadingFailure {
|
||||
Objects.requireNonNull(failureReason, "failureReason must not be null");
|
||||
Objects.requireNonNull(failureMessage, "failureMessage must not be null");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,19 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Sealed interface representing the outcome of loading an external prompt template.
|
||||
* <p>
|
||||
* Implementations allow the Application layer to distinguish between a successful
|
||||
* prompt load and various failure scenarios without using exceptions.
|
||||
* <p>
|
||||
* Permitted implementations:
|
||||
* <ul>
|
||||
* <li>{@link PromptLoadingSuccess} — prompt was successfully loaded from the external source</li>
|
||||
* <li>{@link PromptLoadingFailure} — prompt could not be loaded (file not found, read error, etc.)</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public sealed interface PromptLoadingResult
|
||||
permits PromptLoadingSuccess, PromptLoadingFailure {
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents successful loading of an external prompt template.
|
||||
* <p>
|
||||
* The prompt content and a stable identifier for the prompt have both been
|
||||
* successfully obtained from the configured external source.
|
||||
* <p>
|
||||
* <strong>Field semantics:</strong>
|
||||
* <ul>
|
||||
* <li>{@link #promptIdentifier()} — a stable, deterministic identifier
|
||||
* (e.g., filename, semantic version, or hash) that will remain the same
|
||||
* across multiple batch runs if the prompt content hasn't changed.</li>
|
||||
* <li>{@link #promptContent()} — the actual prompt template text, ready to be
|
||||
* used in constructing an AI request. May be empty in edge cases, though
|
||||
* normally expected to be meaningful.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The identifier is crucial for historical traceability: each processing attempt
|
||||
* records which prompt was used, allowing later investigation of why a particular
|
||||
* decision was made.
|
||||
*
|
||||
* @param promptIdentifier stable identifier for this prompt version; never null
|
||||
* @param promptContent the prompt template text; never null
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public record PromptLoadingSuccess(
|
||||
PromptIdentifier promptIdentifier,
|
||||
String promptContent) implements PromptLoadingResult {
|
||||
|
||||
/**
|
||||
* Compact constructor validating mandatory fields.
|
||||
*
|
||||
* @throws NullPointerException if either field is null
|
||||
*/
|
||||
public PromptLoadingSuccess {
|
||||
Objects.requireNonNull(promptIdentifier, "promptIdentifier must not be null");
|
||||
Objects.requireNonNull(promptContent, "promptContent must not be null");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Outbound port for loading external prompt templates.
|
||||
* <p>
|
||||
* This interface abstracts the loading of prompt content from external sources
|
||||
* (files, resources, databases, etc.), allowing the Application layer to remain
|
||||
* independent of how or where prompts are stored.
|
||||
* <p>
|
||||
* <strong>Design principles:</strong>
|
||||
* <ul>
|
||||
* <li>Prompt is not embedded in code; it is loaded from an external source</li>
|
||||
* <li>Each prompt receives a stable identifier for traceability across batch runs</li>
|
||||
* <li>Results are returned as structured types ({@link PromptLoadingResult}),
|
||||
* never as exceptions</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Adapter responsibilities:</strong>
|
||||
* <ul>
|
||||
* <li>Locate and read the prompt file/resource from the configured source</li>
|
||||
* <li>Derive a stable prompt identifier (e.g., filename, semantic version, content hash)</li>
|
||||
* <li>Validate that the loaded content is not empty or otherwise invalid</li>
|
||||
* <li>Return either success or a classified failure</li>
|
||||
* <li>Encapsulate all file I/O, resource loading, and configuration details</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Non-goals of this port:</strong>
|
||||
* <ul>
|
||||
* <li>Prompt parsing or templating logic</li>
|
||||
* <li>Combining prompt with document text (Application layer handles this)</li>
|
||||
* <li>Template variable substitution</li>
|
||||
* <li>Validation of prompt content against fachliche rules</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public interface PromptPort {
|
||||
|
||||
/**
|
||||
* Loads the configured external prompt template.
|
||||
* <p>
|
||||
* This method is called once per batch run to obtain the current prompt.
|
||||
* The prompt content and its stable identifier are returned together.
|
||||
* <p>
|
||||
* If loading fails for any reason (file not found, I/O error, content validation),
|
||||
* a {@link PromptLoadingFailure} is returned rather than throwing an exception.
|
||||
*
|
||||
* @return a {@link PromptLoadingResult} encoding either:
|
||||
* <ul>
|
||||
* <li>Success: prompt content and identifier loaded successfully</li>
|
||||
* <li>Failure: prompt could not be loaded or is invalid</li>
|
||||
* </ul>
|
||||
*
|
||||
* @see PromptLoadingSuccess
|
||||
* @see PromptLoadingFailure
|
||||
*/
|
||||
PromptLoadingResult loadPrompt();
|
||||
}
|
||||
@@ -22,6 +22,14 @@
|
||||
* — Extract text content and page count from a single PDF</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* AI-based naming ports (M5+):
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptPort}
|
||||
* — Load an external prompt template and derive its stable identifier</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort}
|
||||
* — Invoke an AI service over an OpenAI-compatible HTTP boundary</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Persistence and fingerprinting ports:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort}
|
||||
@@ -46,6 +54,10 @@
|
||||
* — Sealed result of a fingerprint computation (success or technical error)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult}
|
||||
* — Sealed result of a master record lookup (unknown / processable / terminal / failure)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingResult}
|
||||
* — Sealed result of prompt loading (success with identifier and content, or failure)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult}
|
||||
* — Sealed result of AI service invocation (success with raw response, or technical failure)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Exception types:
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
/**
|
||||
* Classification of AI-related errors into technical vs. functional categories.
|
||||
* <p>
|
||||
* This enumeration distinguishes between two fundamental error types that occur
|
||||
* during AI-based naming proposal generation:
|
||||
* <ul>
|
||||
* <li><strong>Technical errors</strong>: Infrastructure or communication failures
|
||||
* (e.g., API unreachable, timeout, unparseable response). These are typically
|
||||
* transient and retryable.</li>
|
||||
* <li><strong>Functional errors</strong>: Problems with the AI response content itself
|
||||
* (e.g., invalid title, unparseable date, generic placeholder title).
|
||||
* These are deterministic and typically not resolved by retry alone.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The classification determines retry behavior: technical errors may be retried in
|
||||
* a later run, while functional errors are subject to the deterministic failure rule
|
||||
* (first occurrence retryable, second occurrence final).
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public enum AiErrorClassification {
|
||||
|
||||
/**
|
||||
* A technical infrastructure or communication failure occurred.
|
||||
* <p>
|
||||
* Examples: API endpoint not reachable, HTTP timeout, malformed response structure,
|
||||
* missing mandatory fields in otherwise-parseable JSON, network error.
|
||||
* <p>
|
||||
* These errors are typically transient and may be resolved by retry in a later
|
||||
* batch run. The failure is recorded against the transient-error counter.
|
||||
*/
|
||||
TECHNICAL,
|
||||
|
||||
/**
|
||||
* A functional or content validation error occurred.
|
||||
* <p>
|
||||
* Examples: invalid or generic title (e.g., "Dokument"), unparseable date string,
|
||||
* AI response violates documented rules (e.g., title contains prohibited characters).
|
||||
* <p>
|
||||
* These errors are deterministic and reflect issues with the AI-generated content
|
||||
* itself or the document's content quality. The failure is recorded against the
|
||||
* content-error counter, subject to the deterministic retry rule.
|
||||
*/
|
||||
FUNCTIONAL
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Unvalidated, uninterpreted raw response body from an AI service.
|
||||
* <p>
|
||||
* This record holds the exact bytes or string returned by the AI HTTP endpoint,
|
||||
* before any parsing, validation, or business-logic processing. It is used to:
|
||||
* <ul>
|
||||
* <li>Preserve the original response for audit and debugging purposes.</li>
|
||||
* <li>Separate the technical success of the HTTP call from the semantic validity
|
||||
* of the response content.</li>
|
||||
* <li>Enable deterministic retry decisions: a 200 OK response is a technical success
|
||||
* even if the JSON is unparseable or semantically invalid.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Persistance:</strong> The raw response is stored in SQLite history for
|
||||
* traceability and future debugging. It may contain the full JSON structure or
|
||||
* formatted text, depending on the AI service.
|
||||
* <p>
|
||||
* <strong>Example:</strong>
|
||||
* <pre>
|
||||
* {@code
|
||||
* AiRawResponse response = new AiRawResponse(
|
||||
* "{\"date\": \"2026-03-05\", \"title\": \"Stromabrechnung\", \"reasoning\": \"...\"}"
|
||||
* );
|
||||
* }
|
||||
* </pre>
|
||||
*
|
||||
* @param content the raw response body as a string (non-null, may be empty or malformed)
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public record AiRawResponse(String content) {
|
||||
|
||||
/**
|
||||
* Compact constructor validating that content is not null.
|
||||
*
|
||||
* @throws NullPointerException if {@code content} is null
|
||||
*/
|
||||
public AiRawResponse {
|
||||
Objects.requireNonNull(content, "content must not be null");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,73 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Deterministic, complete representation of the request sent to an AI service.
|
||||
* <p>
|
||||
* This record captures the exact prompt, text, and configuration that were sent
|
||||
* to the AI in a single request, allowing for reproducibility and debugging.
|
||||
* <p>
|
||||
* <strong>Construction:</strong> The Application layer constructs this representation
|
||||
* deterministically from:
|
||||
* <ul>
|
||||
* <li>The loaded external prompt text</li>
|
||||
* <li>The prompt's stable identifier</li>
|
||||
* <li>The document's extracted PDF text (already limited to max characters)</li>
|
||||
* <li>Structural markers (delimiters, field names) that define the format</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Field semantics:</strong>
|
||||
* <ul>
|
||||
* <li>{@link #promptIdentifier()} — stable identifier of the prompt template used</li>
|
||||
* <li>{@link #promptContent()} — the actual prompt text loaded from the external file</li>
|
||||
* <li>{@link #documentText()} — the PDF-extracted text, already limited to the configured
|
||||
* maximum characters before being placed in this representation</li>
|
||||
* <li>{@link #sentCharacterCount()} — the exact number of text characters from
|
||||
* documentText that were included in the request sent to the AI
|
||||
* (may be less than documentText.length() if truncation occurred)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Persistence:</strong> Both prompt identifier and sent character count
|
||||
* are recorded in the processing attempt history for traceability.
|
||||
* <p>
|
||||
* <strong>Not included:</strong>
|
||||
* <ul>
|
||||
* <li>HTTP-specific details (headers, authentication, endpoint URL)</li>
|
||||
* <li>Serialization format (whether sent as JSON, multipart, etc.)</li>
|
||||
* <li>AI provider configuration details</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param promptIdentifier stable identifier for the prompt template; never null
|
||||
* @param promptContent content of the prompt template; never null (may be empty,
|
||||
* though typically meaningful)
|
||||
* @param documentText extracted PDF text (already limited to max characters);
|
||||
* never null (may be empty)
|
||||
* @param sentCharacterCount exact number of characters from documentText that were
|
||||
* sent to the AI; must be >= 0 and <= documentText.length()
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public record AiRequestRepresentation(
|
||||
PromptIdentifier promptIdentifier,
|
||||
String promptContent,
|
||||
String documentText,
|
||||
int sentCharacterCount) {
|
||||
|
||||
/**
|
||||
* Compact constructor validating all fields.
|
||||
*
|
||||
* @throws NullPointerException if any field except possibly documentText is null
|
||||
* @throws IllegalArgumentException if sentCharacterCount is out of valid range
|
||||
*/
|
||||
public AiRequestRepresentation {
|
||||
Objects.requireNonNull(promptIdentifier, "promptIdentifier must not be null");
|
||||
Objects.requireNonNull(promptContent, "promptContent must not be null");
|
||||
Objects.requireNonNull(documentText, "documentText must not be null");
|
||||
if (sentCharacterCount < 0 || sentCharacterCount > documentText.length()) {
|
||||
throw new IllegalArgumentException(
|
||||
"sentCharacterCount must be >= 0 and <= documentText.length(); " +
|
||||
"got " + sentCharacterCount + " but documentText.length() = " + documentText.length());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,38 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
/**
|
||||
* Enumeration of valid sources for a resolved document date.
|
||||
* <p>
|
||||
* Each enum constant represents a specific origin or determination method for the date
|
||||
* used in a naming proposal. The source is recorded for traceability.
|
||||
* <p>
|
||||
* <strong>Semantics:</strong>
|
||||
* <ul>
|
||||
* <li>{@link #AI_PROVIDED} — the date was supplied by the AI in its response
|
||||
* as a valid {@code YYYY-MM-DD} string.</li>
|
||||
* <li>{@link #FALLBACK_CURRENT} — the date was set to the current system date
|
||||
* (from {@code ClockPort}) because the AI did not provide a usable date.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The source is recorded in the processing attempt history for reproducibility
|
||||
* and operational transparency.
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public enum DateSource {
|
||||
|
||||
/**
|
||||
* The date was provided by the AI in its JSON response.
|
||||
* <p>
|
||||
* The AI explicitly supplied a {@code date} field in valid {@code YYYY-MM-DD} format.
|
||||
*/
|
||||
AI_PROVIDED,
|
||||
|
||||
/**
|
||||
* The date is the current system date used as fallback.
|
||||
* <p>
|
||||
* The AI either omitted the {@code date} field or provided no usable date.
|
||||
* The application set the fallback to the current date from {@code ClockPort}.
|
||||
*/
|
||||
FALLBACK_CURRENT
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* A validated naming proposal derived from AI analysis of a document.
|
||||
* <p>
|
||||
* This record represents the core results of the AI-based naming stage:
|
||||
* a proposed date, a proposed title, and the AI's reasoning. All three fields
|
||||
* have been validated according to application rules at the time of creation.
|
||||
* <p>
|
||||
* <strong>Field semantics:</strong>
|
||||
* <ul>
|
||||
* <li>{@link #resolvedDate()} — the effective date for the naming proposal,
|
||||
* either supplied by the AI or derived as a fallback. Always non-null.</li>
|
||||
* <li>{@link #dateSource()} — indicates whether the date came from the AI
|
||||
* or is a fallback to the current system date.</li>
|
||||
* <li>{@link #validatedTitle()} — the title extracted and validated from
|
||||
* the AI response. Guaranteed to be non-null, non-empty, and compliant
|
||||
* with documented title rules (max 20 base characters, no prohibited
|
||||
* special characters, etc.).</li>
|
||||
* <li>{@link #aiReasoning()} — the justification or explanation provided
|
||||
* by the AI for its proposal. Non-null (may be empty in edge cases,
|
||||
* though normally expected to be meaningful).</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Not included in this proposal:</strong>
|
||||
* <ul>
|
||||
* <li>Final target filename (e.g., with {@code (1)}, {@code (2)} duplicates)</li>
|
||||
* <li>Target file path or copy operation</li>
|
||||
* <li>Windows-specific filename character transformations</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Persistence:</strong> The naming proposal is persistently stored as part
|
||||
* of the processing attempt history for reproducibility and audit.
|
||||
*
|
||||
* @param resolvedDate the effective date (never null); derived from AI or fallback
|
||||
* @param dateSource origin of the date ({@link DateSource#AI_PROVIDED} or
|
||||
* {@link DateSource#FALLBACK_CURRENT}); never null
|
||||
* @param validatedTitle the title validated per application rules (non-null, non-empty,
|
||||
* max 20 base characters as defined in requirements)
|
||||
* @param aiReasoning the AI's explanation for the proposal (non-null, may be empty)
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public record NamingProposal(
|
||||
LocalDate resolvedDate,
|
||||
DateSource dateSource,
|
||||
String validatedTitle,
|
||||
String aiReasoning) {
|
||||
|
||||
/**
|
||||
* Compact constructor validating all mandatory fields.
|
||||
*
|
||||
* @throws NullPointerException if any field is null
|
||||
* @throws IllegalArgumentException if validatedTitle is empty
|
||||
*/
|
||||
public NamingProposal {
|
||||
Objects.requireNonNull(resolvedDate, "resolvedDate must not be null");
|
||||
Objects.requireNonNull(dateSource, "dateSource must not be null");
|
||||
Objects.requireNonNull(validatedTitle, "validatedTitle must not be null");
|
||||
if (validatedTitle.isEmpty()) {
|
||||
throw new IllegalArgumentException("validatedTitle must not be empty");
|
||||
}
|
||||
Objects.requireNonNull(aiReasoning, "aiReasoning must not be null");
|
||||
}
|
||||
}
|
||||
@@ -9,7 +9,15 @@ package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
* <p>
|
||||
* <strong>Overall-status semantics (master record):</strong>
|
||||
* <ul>
|
||||
* <li>{@link #SUCCESS} — document was fully processed; skip in all future runs.</li>
|
||||
* <li>{@link #READY_FOR_AI} — document has passed all pre-checks and extracted text
|
||||
* has been validated; it is ready for AI-based naming proposal. This is a non-terminal
|
||||
* intermediate state; the document will be processed further in subsequent stages.</li>
|
||||
* <li>{@link #PROPOSAL_READY} — a valid, persistent AI-based naming proposal has been
|
||||
* generated and stored. The document is complete for the current stage but will be
|
||||
* processed further in the next stage (target copy, final filename generation).
|
||||
* This is a non-terminal intermediate state.</li>
|
||||
* <li>{@link #SUCCESS} — document was fully processed end-to-end and written to the
|
||||
* target location. Status is final and irreversible; skip in all future runs.</li>
|
||||
* <li>{@link #FAILED_RETRYABLE} — last attempt failed but is retryable; process again
|
||||
* in the next run according to the applicable retry rule.</li>
|
||||
* <li>{@link #FAILED_FINAL} — all allowed retries exhausted; skip in all future runs.</li>
|
||||
@@ -19,7 +27,9 @@ package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
* <p>
|
||||
* <strong>Attempt-status semantics (attempt history):</strong>
|
||||
* <ul>
|
||||
* <li>{@link #SUCCESS} — this attempt completed successfully.</li>
|
||||
* <li>{@link #READY_FOR_AI} — this attempt resulted in a document ready for AI processing.</li>
|
||||
* <li>{@link #PROPOSAL_READY} — this attempt resulted in a valid AI-based naming proposal.</li>
|
||||
* <li>{@link #SUCCESS} — this attempt completed successfully (end-to-end, including target copy).</li>
|
||||
* <li>{@link #FAILED_RETRYABLE} — this attempt failed; a future attempt is allowed.</li>
|
||||
* <li>{@link #FAILED_FINAL} — this attempt failed and no further attempts will be made.</li>
|
||||
* <li>{@link #SKIPPED_ALREADY_PROCESSED} — this attempt was a skip because the
|
||||
@@ -44,10 +54,42 @@ package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
*/
|
||||
public enum ProcessingStatus {
|
||||
|
||||
/**
|
||||
* Document has passed all pre-checks and extracted PDF text has been validated.
|
||||
* <p>
|
||||
* This is a non-terminal intermediate state. The document is ready to be processed
|
||||
* by AI-based naming in the next stage. A document with this status may be reprocessed
|
||||
* in later runs, and will be updated to a subsequent status (e.g., {@link #PROPOSAL_READY}
|
||||
* or failure) based on the outcome of the AI naming step.
|
||||
* <p>
|
||||
* The document may transition from this state to {@link #PROPOSAL_READY} on successful
|
||||
* AI-based naming, or to a failure status if the AI step fails.
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
READY_FOR_AI,
|
||||
|
||||
/**
|
||||
* A valid, persistent AI-based naming proposal has been generated and stored.
|
||||
* <p>
|
||||
* This is a non-terminal intermediate state. The document is complete for its current
|
||||
* processing stage, but will be processed further in the next stage (target copy,
|
||||
* final filename generation with duplicate-suffix handling, and final success).
|
||||
* <p>
|
||||
* A document with this status will not be reprocessed by the AI step in future runs
|
||||
* (idempotency rule), but may still be processed further by subsequent stages.
|
||||
* The latest processing attempt with this status holds the authoritative naming proposal
|
||||
* (resolved date, title, reasoning) for subsequent stages.
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
PROPOSAL_READY,
|
||||
|
||||
/**
|
||||
* Document was successfully processed and written to the target location.
|
||||
* <p>
|
||||
* A document with this status will be skipped in all future batch runs.
|
||||
* As of M5, this status is reserved for the true end-to-end success after the target copy
|
||||
* stage. A document with this status will be skipped in all future batch runs.
|
||||
* Status is final and irreversible.
|
||||
*/
|
||||
SUCCESS,
|
||||
|
||||
@@ -0,0 +1,40 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Stable, unique identifier for an externally-loaded prompt template.
|
||||
* <p>
|
||||
* The prompt is not embedded in code but loaded from an external file or resource.
|
||||
* The identifier allows traceability: which prompt version was used for a specific
|
||||
* AI request and naming proposal?
|
||||
* <p>
|
||||
* <strong>Identity semantics:</strong> Two {@code PromptIdentifier} instances are
|
||||
* equal if and only if their identifier strings are equal (by value, not by reference).
|
||||
* <p>
|
||||
* <strong>Typical examples:</strong>
|
||||
* <ul>
|
||||
* <li>Prompt file name: {@code "prompt_de_v1.txt"}</li>
|
||||
* <li>Version string: {@code "2026-03-v2"}</li>
|
||||
* <li>Content hash: {@code "sha256:abc123..."}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The choice of identifier scheme is flexible (filename, semantic version, hash, etc.),
|
||||
* but must be stable and deterministic so that the same prompt always receives the
|
||||
* same identifier across batch runs.
|
||||
*
|
||||
* @param identifier the stable, non-null identifier string (typically non-empty)
|
||||
*
|
||||
* @since M5
|
||||
*/
|
||||
public record PromptIdentifier(String identifier) {
|
||||
|
||||
/**
|
||||
* Compact constructor validating that the identifier is not null.
|
||||
*
|
||||
* @throws NullPointerException if {@code identifier} is null
|
||||
*/
|
||||
public PromptIdentifier {
|
||||
Objects.requireNonNull(identifier, "identifier must not be null");
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,8 @@
|
||||
* <p>
|
||||
* This package contains the fundamental domain entities and status models required for document processing:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus} — enumeration of all valid document processing states</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus} — enumeration of all valid document processing states,
|
||||
* including intermediate non-terminal states such as {@code READY_FOR_AI} and {@code PROPOSAL_READY}</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.RunId} — unique identifier for a batch run</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext} — technical context for a batch run</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint} — content-based document identity (SHA-256 hex); primary key for persistence</li>
|
||||
@@ -14,6 +15,18 @@
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingDecision} — sealed result of pre-checks</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* AI and naming proposal types (M5+):
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.DateSource} — enumeration of valid date sources
|
||||
* for resolved document dates</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiErrorClassification} — distinguishes technical vs.
|
||||
* functional AI errors</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier} — stable identifier for an external prompt</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse} — unvalidated raw response from AI</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.NamingProposal} — validated naming proposal derived from AI result</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation} — deterministic representation of an AI request</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Additional classes:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason} — enumeration of pre-check failure reasons</li>
|
||||
|
||||
@@ -8,19 +8,23 @@ import static org.junit.jupiter.api.Assertions.*;
|
||||
* Unit tests for {@link ProcessingStatus} enumeration.
|
||||
* <p>
|
||||
* Verifies that all required status values are present and correctly defined
|
||||
* for M2 and future milestones.
|
||||
* for M2, M5, and future milestones.
|
||||
*/
|
||||
class ProcessingStatusTest {
|
||||
|
||||
@Test
|
||||
void allRequiredStatusValuesExist() {
|
||||
// Verify all status values required by the architecture are present
|
||||
// M2+ statuses
|
||||
assertNotNull(ProcessingStatus.SUCCESS);
|
||||
assertNotNull(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertNotNull(ProcessingStatus.FAILED_FINAL);
|
||||
assertNotNull(ProcessingStatus.SKIPPED_ALREADY_PROCESSED);
|
||||
assertNotNull(ProcessingStatus.SKIPPED_FINAL_FAILURE);
|
||||
assertNotNull(ProcessingStatus.PROCESSING);
|
||||
// M5+ statuses
|
||||
assertNotNull(ProcessingStatus.READY_FOR_AI);
|
||||
assertNotNull(ProcessingStatus.PROPOSAL_READY);
|
||||
}
|
||||
|
||||
@Test
|
||||
@@ -59,6 +63,18 @@ class ProcessingStatusTest {
|
||||
assertEquals(ProcessingStatus.PROCESSING, status);
|
||||
}
|
||||
|
||||
@Test
|
||||
void readyForAiStatus_isDefinedAndAccessible() {
|
||||
ProcessingStatus status = ProcessingStatus.READY_FOR_AI;
|
||||
assertEquals(ProcessingStatus.READY_FOR_AI, status);
|
||||
}
|
||||
|
||||
@Test
|
||||
void proposalReadyStatus_isDefinedAndAccessible() {
|
||||
ProcessingStatus status = ProcessingStatus.PROPOSAL_READY;
|
||||
assertEquals(ProcessingStatus.PROPOSAL_READY, status);
|
||||
}
|
||||
|
||||
@Test
|
||||
void statusEquality_worksByReference() {
|
||||
// Enums have identity-based equality
|
||||
@@ -72,6 +88,8 @@ class ProcessingStatusTest {
|
||||
|
||||
switch (status) {
|
||||
case SUCCESS -> result = "success";
|
||||
case READY_FOR_AI -> result = "ready-for-ai";
|
||||
case PROPOSAL_READY -> result = "proposal-ready";
|
||||
case FAILED_RETRYABLE -> result = "retryable";
|
||||
case FAILED_FINAL -> result = "final";
|
||||
case SKIPPED_ALREADY_PROCESSED -> result = "skip-processed";
|
||||
@@ -83,8 +101,8 @@ class ProcessingStatusTest {
|
||||
}
|
||||
|
||||
@Test
|
||||
void statusValues_areSixInTotal() {
|
||||
void statusValues_areEightInTotal() {
|
||||
ProcessingStatus[] values = ProcessingStatus.values();
|
||||
assertEquals(6, values.length, "ProcessingStatus should have exactly 6 values");
|
||||
assertEquals(8, values.length, "ProcessingStatus should have exactly 8 values");
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user