1
0

M5 komplett umgesetzt

This commit is contained in:
2026-04-07 12:26:14 +02:00
parent 506f5ac32e
commit 9874fdb1ba
51 changed files with 5960 additions and 536 deletions

View File

@@ -19,6 +19,12 @@
<version>${project.version}</version>
</dependency>
<!-- JSON parsing for AI response parsing -->
<dependency>
<groupId>org.json</groupId>
<artifactId>json</artifactId>
</dependency>
<!-- Test dependencies -->
<dependency>
<groupId>org.junit.jupiter</groupId>

View File

@@ -37,7 +37,11 @@ import java.util.Objects;
* <li>{@link #updatedAt()} — timestamp of the most recent update to this master record.</li>
* </ul>
* <p>
* <strong>Not yet included:</strong> target path, target file name, AI-related fields.
* <strong>Target location fields:</strong> {@link #lastTargetPath()} and
* {@link #lastTargetFileName()} are populated only after the document reaches
* {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus#SUCCESS}. Both
* fields are {@code null} for documents that have not yet been successfully copied
* to the target folder.
*
* @param fingerprint content-based identity; never null
* @param lastKnownSourceLocator opaque locator to the physical source file; never null
@@ -48,6 +52,10 @@ import java.util.Objects;
* @param lastSuccessInstant timestamp of the successful processing, or {@code null}
* @param createdAt timestamp when this record was first created; never null
* @param updatedAt timestamp of the most recent update; never null
* @param lastTargetPath opaque locator of the target folder where the last
* successful copy was written, or {@code null}
* @param lastTargetFileName filename of the last successfully written target copy
* (including any duplicate suffix), or {@code null}
*/
public record DocumentRecord(
DocumentFingerprint fingerprint,
@@ -58,7 +66,9 @@ public record DocumentRecord(
Instant lastFailureInstant,
Instant lastSuccessInstant,
Instant createdAt,
Instant updatedAt) {
Instant updatedAt,
String lastTargetPath,
String lastTargetFileName) {
/**
* Compact constructor validating mandatory non-null fields.

View File

@@ -1,10 +1,12 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.DateSource;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
import java.time.Instant;
import java.time.LocalDate;
import java.util.Objects;
/**
@@ -40,20 +42,49 @@ import java.util.Objects;
* successful or skip attempts.</li>
* <li>{@link #retryable()} — {@code true} if the failure is considered retryable in a
* later run; {@code false} for final failures, successes, and skip attempts.</li>
* <li>{@link #modelName()} — the AI model name used in this attempt; {@code null} if
* no AI call was made (e.g. pre-check failures or skip attempts).</li>
* <li>{@link #promptIdentifier()} — stable identifier of the prompt template used;
* {@code null} if no AI call was made.</li>
* <li>{@link #processedPageCount()} — number of PDF pages processed; {@code null} if
* pages were not extracted (e.g. pre-fingerprint or skip attempts).</li>
* <li>{@link #sentCharacterCount()} — number of characters sent to the AI; {@code null}
* if no AI call was made.</li>
* <li>{@link #aiRawResponse()} — the complete raw AI response body; {@code null} if no
* AI call was made. Stored in SQLite but not written to log files by default.</li>
* <li>{@link #aiReasoning()} — the reasoning extracted from the AI response; {@code null}
* if no valid AI response was obtained.</li>
* <li>{@link #resolvedDate()} — the date resolved for the naming proposal; {@code null}
* if no naming proposal was produced.</li>
* <li>{@link #dateSource()} — the origin of the resolved date; {@code null} if no
* naming proposal was produced.</li>
* <li>{@link #validatedTitle()} — the validated title from the naming proposal;
* {@code null} if no naming proposal was produced.</li>
* <li>{@link #finalTargetFileName()} — the final filename written to the target folder
* (including any duplicate suffix); set only for
* {@link ProcessingStatus#SUCCESS} attempts, {@code null} otherwise.</li>
* </ul>
* <p>
* <strong>Not yet included:</strong> model name, prompt identifier, AI raw response,
* AI reasoning, resolved date, date source, final title, final target file name.
*
* @param fingerprint content-based document identity; never null
* @param runId identifier of the batch run; never null
* @param attemptNumber monotonic sequence number per fingerprint; must be &gt;= 1
* @param startedAt start of this processing attempt; never null
* @param endedAt end of this processing attempt; never null
* @param status outcome status of this attempt; never null
* @param failureClass failure classification, or {@code null} for non-failure statuses
* @param failureMessage failure description, or {@code null} for non-failure statuses
* @param retryable whether this failure should be retried in a later run
* @param fingerprint content-based document identity; never null
* @param runId identifier of the batch run; never null
* @param attemptNumber monotonic sequence number per fingerprint; must be &gt;= 1
* @param startedAt start of this processing attempt; never null
* @param endedAt end of this processing attempt; never null
* @param status outcome status of this attempt; never null
* @param failureClass failure classification, or {@code null} for non-failure statuses
* @param failureMessage failure description, or {@code null} for non-failure statuses
* @param retryable whether this failure should be retried in a later run
* @param modelName AI model name, or {@code null} if no AI call was made
* @param promptIdentifier prompt identifier, or {@code null} if no AI call was made
* @param processedPageCount number of PDF pages processed, or {@code null}
* @param sentCharacterCount number of characters sent to AI, or {@code null}
* @param aiRawResponse full raw AI response, or {@code null}
* @param aiReasoning AI reasoning text, or {@code null}
* @param resolvedDate resolved date for naming proposal, or {@code null}
* @param dateSource origin of resolved date, or {@code null}
* @param validatedTitle validated title, or {@code null}
* @param finalTargetFileName filename written to the target folder for SUCCESS attempts,
* or {@code null}
*/
public record ProcessingAttempt(
DocumentFingerprint fingerprint,
@@ -64,7 +95,19 @@ public record ProcessingAttempt(
ProcessingStatus status,
String failureClass,
String failureMessage,
boolean retryable) {
boolean retryable,
// AI traceability fields (null for non-AI attempts)
String modelName,
String promptIdentifier,
Integer processedPageCount,
Integer sentCharacterCount,
String aiRawResponse,
String aiReasoning,
LocalDate resolvedDate,
DateSource dateSource,
String validatedTitle,
// Target copy traceability (null for non-SUCCESS attempts)
String finalTargetFileName) {
/**
* Compact constructor validating mandatory non-null fields and numeric constraints.
@@ -83,4 +126,37 @@ public record ProcessingAttempt(
Objects.requireNonNull(endedAt, "endedAt must not be null");
Objects.requireNonNull(status, "status must not be null");
}
/**
* Creates a {@link ProcessingAttempt} with no AI traceability fields set.
* <p>
* Convenience factory for pre-check failures, skip events, and any attempt
* that does not involve an AI call.
*
* @param fingerprint document identity; must not be null
* @param runId batch run identifier; must not be null
* @param attemptNumber monotonic attempt number; must be &gt;= 1
* @param startedAt start instant; must not be null
* @param endedAt end instant; must not be null
* @param status outcome status; must not be null
* @param failureClass failure class name, or {@code null}
* @param failureMessage failure description, or {@code null}
* @param retryable whether retryable in a later run
* @return a new attempt with all AI fields set to {@code null}
*/
public static ProcessingAttempt withoutAiFields(
DocumentFingerprint fingerprint,
RunId runId,
int attemptNumber,
Instant startedAt,
Instant endedAt,
ProcessingStatus status,
String failureClass,
String failureMessage,
boolean retryable) {
return new ProcessingAttempt(
fingerprint, runId, attemptNumber, startedAt, endedAt,
status, failureClass, failureMessage, retryable,
null, null, null, null, null, null, null, null, null, null);
}
}

View File

@@ -1,6 +1,7 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import java.util.List;
@@ -66,4 +67,25 @@ public interface ProcessingAttemptRepository {
* @throws DocumentPersistenceException if the query fails due to a technical error
*/
List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint);
/**
* Returns the most recent attempt with status {@link ProcessingStatus#PROPOSAL_READY}
* for the given fingerprint, or {@code null} if no such attempt exists.
* <p>
* <strong>Leading source for subsequent processing stages:</strong>
* The most recent {@code PROPOSAL_READY} attempt is the authoritative source for
* the validated naming proposal (resolved date, date source, validated title, and
* AI reasoning) consumed by subsequent stages. The document master record does not
* carry redundant proposal data; this method is the only correct way to retrieve it.
* <p>
* If the overall document status is {@code PROPOSAL_READY} but this method returns
* {@code null}, or if the returned attempt is missing mandatory proposal fields, the
* state is considered an inconsistent persistence state and must be treated as a
* document-level technical error — not silently healed.
*
* @param fingerprint the document identity; must not be null
* @return the most recent {@code PROPOSAL_READY} attempt, or {@code null} if none exists
* @throws DocumentPersistenceException if the query fails due to a technical error
*/
ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint);
}

View File

@@ -0,0 +1,26 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import java.util.Objects;
/**
* Successful outcome of {@link TargetFolderPort#resolveUniqueFilename(String)}.
* <p>
* Carries the first available filename in the target folder. The filename includes
* the {@code .pdf} extension and, if needed, a numeric duplicate-avoidance suffix
* inserted directly before {@code .pdf} (e.g., {@code "2024-01-15 - Rechnung(1).pdf"}).
*
* @param resolvedFilename the available filename including extension; never null or blank
*/
public record ResolvedTargetFilename(String resolvedFilename) implements TargetFilenameResolutionResult {
/**
* @throws NullPointerException if {@code resolvedFilename} is null
* @throws IllegalArgumentException if {@code resolvedFilename} is blank
*/
public ResolvedTargetFilename {
Objects.requireNonNull(resolvedFilename, "resolvedFilename must not be null");
if (resolvedFilename.isBlank()) {
throw new IllegalArgumentException("resolvedFilename must not be blank");
}
}
}

View File

@@ -0,0 +1,45 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
/**
* Outbound port for copying a source PDF to the target folder.
* <p>
* The physical copy is the final step in the successful document processing path.
* Copying is performed via a temporary file in the target context with a subsequent
* atomic move/rename to the final target filename, minimising the risk of incomplete
* target files being visible.
*
* <h2>Source integrity</h2>
* <p>
* The source file identified by the {@link SourceDocumentLocator} is <strong>never</strong>
* modified, moved, or deleted by this port. Only a copy is written.
*
* <h2>No immediate retry</h2>
* <p>
* This port performs exactly one copy attempt per invocation. No automatic retry within
* the same call is performed; retry decisions belong to higher-level orchestration.
*
* <h2>Architecture boundary</h2>
* <p>
* No {@code Path}, {@code File}, or NIO types appear in this interface.
*/
public interface TargetFileCopyPort {
/**
* Copies the source document to the target folder under the given resolved filename.
* <p>
* The implementation writes to a temporary file first and then performs a
* move/rename to the final {@code resolvedFilename}. If the move fails, a
* best-effort cleanup of the temporary file is attempted before returning the
* failure result.
*
* @param sourceLocator opaque locator identifying the source file; must not be null
* @param resolvedFilename the final filename (not full path) to write in the target
* folder; must not be null or blank; must have been obtained
* from {@link TargetFolderPort#resolveUniqueFilename(String)}
* @return {@link TargetFileCopySuccess} if the copy completed successfully, or
* {@link TargetFileCopyTechnicalFailure} if any step failed
*/
TargetFileCopyResult copyToTarget(SourceDocumentLocator sourceLocator, String resolvedFilename);
}

View File

@@ -0,0 +1,14 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Sealed result type for {@link TargetFileCopyPort#copyToTarget}.
* <p>
* Permits exactly two outcomes:
* <ul>
* <li>{@link TargetFileCopySuccess} — the source was successfully copied to the target.</li>
* <li>{@link TargetFileCopyTechnicalFailure} — a technical failure occurred during copying.</li>
* </ul>
*/
public sealed interface TargetFileCopyResult
permits TargetFileCopySuccess, TargetFileCopyTechnicalFailure {
}

View File

@@ -0,0 +1,10 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Successful outcome of {@link TargetFileCopyPort#copyToTarget}.
* <p>
* Indicates that the source file was successfully copied to the target folder and the
* final move/rename completed. The target file is now visible under the resolved filename.
*/
public record TargetFileCopySuccess() implements TargetFileCopyResult {
}

View File

@@ -0,0 +1,30 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import java.util.Objects;
/**
* Technical failure outcome of {@link TargetFileCopyPort#copyToTarget}.
* <p>
* Indicates that copying the source file to the target folder failed. The failure is
* always treated as a transient, retryable document-level technical error.
* <p>
* The {@code targetFileCleanedUp} flag records whether a best-effort cleanup of any
* partially written temporary target file was successful. A value of {@code false}
* means a stale temporary file may remain in the target folder; a value of {@code true}
* means cleanup succeeded (or no temporary file had been created at all).
*
* @param errorMessage human-readable description of the failure; never null
* @param targetFileCleanedUp {@code true} if cleanup of any temporary file succeeded;
* {@code false} if cleanup failed or was not attempted
*/
public record TargetFileCopyTechnicalFailure(
String errorMessage,
boolean targetFileCleanedUp) implements TargetFileCopyResult {
/**
* @throws NullPointerException if {@code errorMessage} is null
*/
public TargetFileCopyTechnicalFailure {
Objects.requireNonNull(errorMessage, "errorMessage must not be null");
}
}

View File

@@ -0,0 +1,14 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Sealed result type for {@link TargetFolderPort#resolveUniqueFilename(String)}.
* <p>
* Permits exactly two outcomes:
* <ul>
* <li>{@link ResolvedTargetFilename} — the first available unique filename was determined.</li>
* <li>{@link TargetFolderTechnicalFailure} — the target folder could not be accessed.</li>
* </ul>
*/
public sealed interface TargetFilenameResolutionResult
permits ResolvedTargetFilename, TargetFolderTechnicalFailure {
}

View File

@@ -0,0 +1,71 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
/**
* Outbound port for target folder access: duplicate resolution and best-effort cleanup.
* <p>
* The target folder is the directory where the renamed PDF copy is written. This port
* encapsulates all target-folder concerns so that the application layer never handles
* filesystem types ({@code Path}, {@code File}) directly.
*
* <h2>Duplicate resolution</h2>
* <p>
* When the base filename is already taken in the target folder, the port determines
* the first available name by appending a numeric suffix directly before {@code .pdf}:
* <pre>
* 2024-01-15 - Rechnung.pdf
* 2024-01-15 - Rechnung(1).pdf
* 2024-01-15 - Rechnung(2).pdf
* ...
* </pre>
* The base filename must already include the {@code .pdf} extension. The suffix is
* purely a technical collision-avoidance mechanism and introduces no new fachliche
* title interpretation.
*
* <h2>Architecture boundary</h2>
* <p>
* No {@code Path}, {@code File}, or NIO types appear in this interface. The concrete
* adapter implementation translates the opaque folder locator string to actual
* filesystem operations.
*/
public interface TargetFolderPort {
/**
* Returns an opaque string that identifies the target folder managed by this port.
* <p>
* The application layer treats this as an opaque locator and stores it in the
* document master record ({@code lastTargetPath}) for traceability. It must not
* be interpreted by the application layer.
*
* @return a non-null, non-blank string identifying the target folder
*/
String getTargetFolderLocator();
/**
* Resolves the first available unique filename in the target folder for the given base name.
* <p>
* If the base name is not yet taken, it is returned unchanged. Otherwise the method
* appends {@code (1)}, {@code (2)}, etc. directly before {@code .pdf} until a free
* name is found.
* <p>
* The returned filename contains only the file name, not the full path. It is safe
* to use as the {@code resolvedFilename} parameter of
* {@link TargetFileCopyPort#copyToTarget(de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator, String)}.
*
* @param baseName the desired filename including the {@code .pdf} extension;
* must not be null or blank
* @return a {@link ResolvedTargetFilename} with the first available name, or a
* {@link TargetFolderTechnicalFailure} if the target folder is not accessible
*/
TargetFilenameResolutionResult resolveUniqueFilename(String baseName);
/**
* Best-effort attempt to delete a file previously written to the target folder.
* <p>
* Intended for rollback after a successful target copy when subsequent persistence
* fails. This method must not throw; if deletion fails for any reason, the failure
* is silently ignored.
*
* @param resolvedFilename the filename (not full path) to delete; must not be null
*/
void tryDeleteTargetFile(String resolvedFilename);
}

View File

@@ -0,0 +1,22 @@
package de.gecheckt.pdf.umbenenner.application.port.out;
import java.util.Objects;
/**
* Technical failure outcome of {@link TargetFolderPort#resolveUniqueFilename(String)}.
* <p>
* Indicates that the target folder could not be accessed when attempting to determine
* a unique filename. This is a transient infrastructure error; the calling use case
* should treat it as a retryable document-level technical error.
*
* @param errorMessage human-readable description of the failure; never null
*/
public record TargetFolderTechnicalFailure(String errorMessage) implements TargetFilenameResolutionResult {
/**
* @throws NullPointerException if {@code errorMessage} is null
*/
public TargetFolderTechnicalFailure {
Objects.requireNonNull(errorMessage, "errorMessage must not be null");
}
}

View File

@@ -0,0 +1,242 @@
package de.gecheckt.pdf.umbenenner.application.service;
import java.util.Objects;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort;
import de.gecheckt.pdf.umbenenner.domain.model.AiAttemptContext;
import de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingFailure;
import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
import de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
/**
* Orchestrates the complete AI naming pipeline for a single document.
* <p>
* This service is called after pre-checks have passed (i.e. after extraction
* and content quality checks) and performs exactly these steps in order:
* <ol>
* <li>Load the external prompt template via {@link PromptPort}.</li>
* <li>Limit the extracted document text to the configured maximum character count.</li>
* <li>Compose a deterministic AI request from the prompt and limited text.</li>
* <li>Invoke the AI service via {@link AiInvocationPort}.</li>
* <li>Parse the raw AI response for structural correctness.</li>
* <li>Validate the parsed response for semantic correctness (title, date).</li>
* <li>Return a typed {@link DocumentProcessingOutcome} encoding success or failure.</li>
* </ol>
*
* <h2>Outcome classification</h2>
* <ul>
* <li>{@link NamingProposalReady} — AI responded with a structurally and semantically
* valid naming proposal; document status will become {@code PROPOSAL_READY}.</li>
* <li>{@link AiTechnicalFailure} — transient infrastructure problem (prompt load failure,
* HTTP error, timeout, connection problem, or unparseable JSON response); transient
* error counter is incremented and the document is retryable in a later run.</li>
* <li>{@link AiFunctionalFailure} — the AI responded successfully but the content
* fails deterministic validation (title too long, prohibited characters, generic
* placeholder, unparseable date); content error counter is incremented and the
* one-retry rule applies.</li>
* </ul>
*
* <h2>AI traceability</h2>
* <p>
* Every returned outcome carries an {@link AiAttemptContext} with the model name,
* prompt identifier, page count, sent character count, and raw response (null on
* connection failure). This context is persisted verbatim in the processing attempt
* history by the coordinator.
*
* <h2>Thread safety</h2>
* <p>
* This service is stateless with respect to individual documents. It is safe to
* reuse a single instance across documents within the same batch run, provided the
* injected dependencies are thread-safe.
*/
public class AiNamingService {
private final AiInvocationPort aiInvocationPort;
private final PromptPort promptPort;
private final AiResponseValidator aiResponseValidator;
private final String modelName;
private final int maxTextCharacters;
/**
* Creates the AI naming service with all required dependencies.
*
* @param aiInvocationPort port for invoking the AI over HTTP; must not be null
* @param promptPort port for loading the external prompt template; must not be null
* @param aiResponseValidator semantic validator for parsed AI responses; must not be null
* @param modelName the AI model name to record in attempt history; must not be null
* @param maxTextCharacters the maximum number of document-text characters to send;
* must be &gt;= 1
* @throws NullPointerException if any reference parameter is null
* @throws IllegalArgumentException if {@code maxTextCharacters} is less than 1
*/
public AiNamingService(
AiInvocationPort aiInvocationPort,
PromptPort promptPort,
AiResponseValidator aiResponseValidator,
String modelName,
int maxTextCharacters) {
this.aiInvocationPort = Objects.requireNonNull(aiInvocationPort, "aiInvocationPort must not be null");
this.promptPort = Objects.requireNonNull(promptPort, "promptPort must not be null");
this.aiResponseValidator = Objects.requireNonNull(aiResponseValidator, "aiResponseValidator must not be null");
this.modelName = Objects.requireNonNull(modelName, "modelName must not be null");
if (maxTextCharacters < 1) {
throw new IllegalArgumentException(
"maxTextCharacters must be >= 1, but was: " + maxTextCharacters);
}
this.maxTextCharacters = maxTextCharacters;
}
/**
* Runs the AI naming pipeline for a document that passed all pre-checks.
* <p>
* The extraction result embedded in {@code preCheckPassed} supplies the
* document text and page count needed for the AI request. The candidate is
* carried through for correct outcome construction (correlation, logging).
*
* @param preCheckPassed the pre-check result carrying the candidate and extraction;
* must not be null
* @return a {@link DocumentProcessingOutcome} encoding the AI pipeline result;
* one of {@link NamingProposalReady}, {@link AiTechnicalFailure}, or
* {@link AiFunctionalFailure}; never null
* @throws NullPointerException if {@code preCheckPassed} is null
*/
public DocumentProcessingOutcome invoke(PreCheckPassed preCheckPassed) {
Objects.requireNonNull(preCheckPassed, "preCheckPassed must not be null");
SourceDocumentCandidate candidate = preCheckPassed.candidate();
int pageCount = preCheckPassed.extraction().pageCount().value();
String rawText = preCheckPassed.extraction().extractedText();
// Step 1: Load the external prompt template
return switch (promptPort.loadPrompt()) {
case PromptLoadingFailure promptFailure ->
// Prompt is unavailable — transient infrastructure failure; retryable
new AiTechnicalFailure(
candidate,
"Prompt loading failed [" + promptFailure.failureReason() + "]: "
+ promptFailure.failureMessage(),
null,
new AiAttemptContext(modelName, "prompt-load-failed", pageCount, 0, null));
case PromptLoadingSuccess promptSuccess ->
invokeWithPrompt(candidate, rawText, pageCount, promptSuccess);
};
}
// -------------------------------------------------------------------------
// Private helpers
// -------------------------------------------------------------------------
/**
* Continues the AI pipeline after the prompt has been loaded successfully.
*/
private DocumentProcessingOutcome invokeWithPrompt(
SourceDocumentCandidate candidate,
String rawText,
int pageCount,
PromptLoadingSuccess promptSuccess) {
String promptIdentifier = promptSuccess.promptIdentifier().identifier();
String promptContent = promptSuccess.promptContent();
// Step 2: Limit the document text to the configured maximum
String limitedText = DocumentTextLimiter.limit(rawText, maxTextCharacters);
int sentCharacterCount = limitedText.length();
// Step 3: Compose a deterministic AI request
AiRequestRepresentation request = AiRequestComposer.compose(
promptSuccess.promptIdentifier(),
promptContent,
limitedText);
// Step 4: Invoke the AI service
return switch (aiInvocationPort.invoke(request)) {
case AiInvocationTechnicalFailure invocationFailure ->
// Transient infrastructure failure: timeout, network error, etc.
new AiTechnicalFailure(
candidate,
"AI invocation failed [" + invocationFailure.failureReason() + "]: "
+ invocationFailure.failureMessage(),
null,
new AiAttemptContext(
modelName, promptIdentifier, pageCount, sentCharacterCount, null));
case AiInvocationSuccess invocationSuccess ->
processSuccessfulInvocation(
candidate, pageCount, sentCharacterCount, promptIdentifier,
invocationSuccess);
};
}
/**
* Processes a technically successful AI invocation: parses and validates the response.
*/
private DocumentProcessingOutcome processSuccessfulInvocation(
SourceDocumentCandidate candidate,
int pageCount,
int sentCharacterCount,
String promptIdentifier,
AiInvocationSuccess invocationSuccess) {
String rawResponseBody = invocationSuccess.rawResponse().content();
// Step 5: Parse the raw response for structural correctness
return switch (AiResponseParser.parse(invocationSuccess.rawResponse())) {
case AiResponseParsingFailure parsingFailure ->
// Unparseable JSON or structurally invalid response: transient technical error
new AiTechnicalFailure(
candidate,
"AI response could not be parsed [" + parsingFailure.failureReason() + "]: "
+ parsingFailure.failureMessage(),
null,
new AiAttemptContext(
modelName, promptIdentifier, pageCount, sentCharacterCount,
rawResponseBody));
case AiResponseParsingSuccess parsingSuccess ->
// Step 6: Validate semantics (title rules, date format)
validateAndBuildOutcome(
candidate, pageCount, sentCharacterCount, promptIdentifier,
rawResponseBody, parsingSuccess.response());
};
}
/**
* Validates the parsed AI response and builds the final outcome.
*/
private DocumentProcessingOutcome validateAndBuildOutcome(
SourceDocumentCandidate candidate,
int pageCount,
int sentCharacterCount,
String promptIdentifier,
String rawResponseBody,
ParsedAiResponse parsedResponse) {
AiAttemptContext aiContext = new AiAttemptContext(
modelName, promptIdentifier, pageCount, sentCharacterCount, rawResponseBody);
return switch (aiResponseValidator.validate(parsedResponse)) {
case AiResponseValidator.AiValidationResult.Invalid invalid ->
// Deterministic semantic failure: bad title, bad date, generic placeholder
new AiFunctionalFailure(candidate, invalid.errorMessage(), aiContext);
case AiResponseValidator.AiValidationResult.Valid valid -> {
NamingProposal proposal = valid.proposal();
yield new NamingProposalReady(candidate, proposal, aiContext);
}
};
}
}

View File

@@ -0,0 +1,107 @@
package de.gecheckt.pdf.umbenenner.application.service;
import java.util.Objects;
import org.json.JSONException;
import org.json.JSONObject;
import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingFailure;
import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingResult;
import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse;
/**
* Parses the raw AI response body into a structurally validated {@link ParsedAiResponse}.
* <p>
* This parser enforces the technical contract: the AI must respond with exactly one
* parseable JSON object containing the mandatory fields {@code title} and {@code reasoning},
* and an optional {@code date} field. Any extra free-text outside the JSON object makes
* the response technically invalid.
*
* <h2>Parsing rules</h2>
* <ul>
* <li>The response body must be a valid JSON object (no arrays, no primitives).</li>
* <li>{@code title} must be present and non-empty.</li>
* <li>{@code reasoning} must be present (may be empty in degenerate cases, but must exist).</li>
* <li>{@code date} is optional; if absent the field is modelled as an empty Optional.</li>
* <li>Additional JSON fields are tolerated and silently ignored.</li>
* <li>Any free-text outside the outermost JSON object makes the response technically
* unacceptable; this is detected by attempting to parse the trimmed body directly
* as a JSON object and rejecting inputs that are not pure JSON objects.</li>
* </ul>
*
* <h2>Architecture boundary</h2>
* <p>
* Only structural parsing is performed here. Semantic validation (title length,
* special characters, date format, generic placeholder detection) is the responsibility
* of {@link AiResponseValidator}.
*/
public final class AiResponseParser {
private AiResponseParser() {
// Static utility no instances
}
/**
* Attempts to parse {@code rawResponse} into a {@link ParsedAiResponse}.
* <p>
* Returns {@link AiResponseParsingSuccess} if the response body is a valid JSON object
* containing the mandatory fields. Returns {@link AiResponseParsingFailure} for any
* structural problem: non-JSON content, JSON that is not an object, missing mandatory
* fields, or extra free-text surrounding the JSON object.
*
* @param rawResponse the raw AI response body; must not be null
* @return a parsing result indicating success or failure; never null
* @throws NullPointerException if {@code rawResponse} is null
*/
public static AiResponseParsingResult parse(AiRawResponse rawResponse) {
Objects.requireNonNull(rawResponse, "rawResponse must not be null");
String body = rawResponse.content();
if (body == null || body.isBlank()) {
return new AiResponseParsingFailure("EMPTY_RESPONSE", "AI response body is empty or blank");
}
String trimmed = body.trim();
// Reject if the body does not start with '{' and end with '}' (i.e., not a pure JSON object).
// This catches responses that embed a JSON object within surrounding prose.
if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) {
return new AiResponseParsingFailure(
"NOT_JSON_OBJECT",
"AI response is not a pure JSON object (contains extra text or is not an object)");
}
JSONObject json;
try {
json = new JSONObject(trimmed);
} catch (JSONException e) {
return new AiResponseParsingFailure("INVALID_JSON", "AI response is not valid JSON: " + e.getMessage());
}
// Validate mandatory field: title
if (!json.has("title") || json.isNull("title")) {
return new AiResponseParsingFailure("MISSING_TITLE", "AI response missing mandatory field 'title'");
}
String title = json.getString("title");
if (title.isBlank()) {
return new AiResponseParsingFailure("BLANK_TITLE", "AI response field 'title' is blank");
}
// Validate mandatory field: reasoning
if (!json.has("reasoning") || json.isNull("reasoning")) {
return new AiResponseParsingFailure("MISSING_REASONING", "AI response missing mandatory field 'reasoning'");
}
String reasoning = json.getString("reasoning");
// Optional field: date
String dateString = null;
if (json.has("date") && !json.isNull("date")) {
dateString = json.getString("date");
}
ParsedAiResponse parsed = ParsedAiResponse.of(title, reasoning, dateString);
return new AiResponseParsingSuccess(parsed);
}
}

View File

@@ -0,0 +1,215 @@
package de.gecheckt.pdf.umbenenner.application.service;
import java.time.LocalDate;
import java.time.format.DateTimeParseException;
import java.util.Objects;
import java.util.Set;
import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort;
import de.gecheckt.pdf.umbenenner.domain.model.AiErrorClassification;
import de.gecheckt.pdf.umbenenner.domain.model.DateSource;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal;
import de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse;
/**
* Validates the semantics of a structurally parsed AI response and produces a
* {@link NamingProposal} or a classified validation error.
*
* <h2>What this validator checks</h2>
* <p>
* All objectively computable rules are enforced here. Rules that depend on linguistic
* judgement (German language, comprehensibility, treatment of proper nouns) are
* delegated to the AI via the prompt contract and are not verified programmatically.
*
* <h3>Title rules (objective)</h3>
* <ul>
* <li>Base title must not exceed 20 characters.</li>
* <li>Title must not contain characters other than letters, digits, and space
* (Umlauts and ß are permitted).</li>
* <li>Title must not be a generic placeholder (e.g., "Dokument", "Datei", "Scan",
* "PDF", "Seite", "Unbekannt").</li>
* </ul>
*
* <h3>Date rules (objective)</h3>
* <ul>
* <li>If the AI provides a {@code date}, it must be interpretable as ISO-8601
* {@code YYYY-MM-DD}. A provided but unparseable date is a
* {@link AiErrorClassification#FUNCTIONAL functional} error.</li>
* <li>If the AI provides no {@code date}, the current date from {@link ClockPort} is
* used as a fallback ({@link DateSource#FALLBACK_CURRENT}).</li>
* </ul>
*
* <h2>Result</h2>
* <ul>
* <li>{@link AiValidationResult.Valid} — semantically sound; contains a ready
* {@link NamingProposal}.</li>
* <li>{@link AiValidationResult.Invalid} — contains an error message and the
* {@link AiErrorClassification} (always {@link AiErrorClassification#FUNCTIONAL}
* for validation failures from this class).</li>
* </ul>
*/
public final class AiResponseValidator {
/**
* Known generic placeholder titles that are not acceptable as document names.
* These are case-insensitive matches.
*/
private static final Set<String> GENERIC_TITLES = Set.of(
"dokument", "datei", "scan", "pdf", "seite", "unbekannt",
"document", "file", "unknown", "page"
);
private final ClockPort clockPort;
/**
* Creates the validator with the given clock for date fallback.
*
* @param clockPort the clock for current-date fallback; must not be null
* @throws NullPointerException if {@code clockPort} is null
*/
public AiResponseValidator(ClockPort clockPort) {
this.clockPort = Objects.requireNonNull(clockPort, "clockPort must not be null");
}
/**
* Validates the parsed AI response and produces a {@link NamingProposal} on success.
*
* @param parsed the structurally parsed AI response; must not be null
* @return a {@link AiValidationResult} indicating validity or the specific failure;
* never null
* @throws NullPointerException if {@code parsed} is null
*/
public AiValidationResult validate(ParsedAiResponse parsed) {
Objects.requireNonNull(parsed, "parsed must not be null");
// --- Title validation ---
String title = parsed.title().trim();
if (title.length() > 20) {
return AiValidationResult.invalid(
"Title exceeds 20 characters (base title): '" + title + "'",
AiErrorClassification.FUNCTIONAL);
}
if (!isAllowedTitleCharacters(title)) {
return AiValidationResult.invalid(
"Title contains disallowed characters (only letters, digits, and spaces are permitted): '"
+ title + "'",
AiErrorClassification.FUNCTIONAL);
}
if (isGenericTitle(title)) {
return AiValidationResult.invalid(
"Title is a generic placeholder and not acceptable: '" + title + "'",
AiErrorClassification.FUNCTIONAL);
}
// --- Date validation / fallback ---
LocalDate resolvedDate;
DateSource dateSource;
if (parsed.dateString().isPresent()) {
String dateStr = parsed.dateString().get();
try {
resolvedDate = LocalDate.parse(dateStr);
dateSource = DateSource.AI_PROVIDED;
} catch (DateTimeParseException e) {
return AiValidationResult.invalid(
"AI-provided date '" + dateStr + "' is not a valid YYYY-MM-DD date: " + e.getMessage(),
AiErrorClassification.FUNCTIONAL);
}
} else {
// No date provided by the AI → fall back to current date from the clock
resolvedDate = clockPort.now().atZone(java.time.ZoneOffset.UTC).toLocalDate();
dateSource = DateSource.FALLBACK_CURRENT;
}
NamingProposal proposal = new NamingProposal(resolvedDate, dateSource, title, parsed.reasoning());
return AiValidationResult.valid(proposal);
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
/**
* Returns {@code true} if every character in the title is a letter, digit, or space.
* <p>
* Permits Unicode letters including German Umlauts (ä, ö, ü, Ä, Ö, Ü) and ß.
*/
private static boolean isAllowedTitleCharacters(String title) {
for (int i = 0; i < title.length(); i++) {
char c = title.charAt(i);
if (!Character.isLetter(c) && !Character.isDigit(c) && c != ' ') {
return false;
}
}
return true;
}
/**
* Returns {@code true} if the title is a known generic placeholder.
* Comparison is case-insensitive.
*/
private static boolean isGenericTitle(String title) {
return GENERIC_TITLES.contains(title.toLowerCase());
}
// -------------------------------------------------------------------------
// Result type
// -------------------------------------------------------------------------
/**
* The result of a semantic AI response validation.
*/
public sealed interface AiValidationResult permits AiValidationResult.Valid, AiValidationResult.Invalid {
/**
* Returns a valid result containing the produced {@link NamingProposal}.
*
* @param proposal the validated naming proposal; must not be null
* @return a valid result; never null
*/
static AiValidationResult valid(NamingProposal proposal) {
return new Valid(proposal);
}
/**
* Returns an invalid result with an error message and classification.
*
* @param errorMessage human-readable description of the validation failure;
* must not be null
* @param classification always {@link AiErrorClassification#FUNCTIONAL} for
* semantic title/date violations
* @return an invalid result; never null
*/
static AiValidationResult invalid(String errorMessage, AiErrorClassification classification) {
return new Invalid(errorMessage, classification);
}
/**
* A successful validation result containing the ready {@link NamingProposal}.
*
* @param proposal the validated and complete naming proposal; never null
*/
record Valid(NamingProposal proposal) implements AiValidationResult {
public Valid {
Objects.requireNonNull(proposal, "proposal must not be null");
}
}
/**
* A failed validation result carrying the error details.
*
* @param errorMessage the reason for the failure; never null
* @param classification the error category; never null
*/
record Invalid(String errorMessage, AiErrorClassification classification)
implements AiValidationResult {
public Invalid {
Objects.requireNonNull(errorMessage, "errorMessage must not be null");
Objects.requireNonNull(classification, "classification must not be null");
}
}
}
}

View File

@@ -13,15 +13,26 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnica
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger;
import de.gecheckt.pdf.umbenenner.application.port.out.ResolvedTargetFilename;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFilenameResolutionResult;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderTechnicalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort;
import de.gecheckt.pdf.umbenenner.domain.model.AiAttemptContext;
import de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
import java.time.Instant;
import java.util.Objects;
@@ -32,7 +43,8 @@ import java.util.function.Function;
* Application-level service that implements the per-document processing logic.
* <p>
* This service is the single authoritative place for the decision rules:
* idempotency checks, status/counter mapping, and consistent two-level persistence.
* idempotency checks, status/counter mapping, target-copy finalization, and consistent
* two-level persistence.
*
* <h2>Processing order per candidate</h2>
* <ol>
@@ -41,56 +53,81 @@ import java.util.function.Function;
* a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}.</li>
* <li>If the overall status is {@link ProcessingStatus#FAILED_FINAL} → create and persist
* a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.</li>
* <li>Otherwise execute the flow (already done by the caller) and map the result
* into status, counters and retryable flag.</li>
* <li>If the overall status is {@link ProcessingStatus#PROPOSAL_READY} → load the
* leading proposal attempt and execute the target-copy finalization flow:
* build the base filename, resolve duplicates, write the copy, persist SUCCESS or
* FAILED_RETRYABLE.</li>
* <li>Otherwise execute the pipeline (extraction + pre-checks + AI naming) and map
* the result into status, counters, and retryable flag.</li>
* <li>Persist exactly one historised processing attempt for the identified document.</li>
* <li>Persist the updated document master record.</li>
* </ol>
*
* <h2>Minimal rules</h2>
* <h2>Status transitions</h2>
* <ul>
* <li>Already successful documents are skipped in later runs.</li>
* <li>Already finally failed documents are skipped in later runs.</li>
* <li>First historised deterministic content failure from processing →
* {@link ProcessingStatus#FAILED_RETRYABLE}, content error counter becomes 1,
* {@code retryable=true}.</li>
* <li>Second historised deterministic content failure in a later run →
* {@link ProcessingStatus#FAILED_FINAL}, content error counter becomes 2,
* {@code retryable=false}.</li>
* <li>Document-related technical failures after successful fingerprinting remain
* {@link ProcessingStatus#FAILED_RETRYABLE}, increment transient error counter,
* {@code retryable=true}.</li>
* <li>Skip events do not change error counters.</li>
* <li>Pre-check passed + AI naming proposal ready → {@link ProcessingStatus#PROPOSAL_READY}</li>
* <li>First deterministic content failure → {@link ProcessingStatus#FAILED_RETRYABLE}</li>
* <li>Second deterministic content failure → {@link ProcessingStatus#FAILED_FINAL}</li>
* <li>Technical infrastructure failure → {@link ProcessingStatus#FAILED_RETRYABLE}</li>
* <li>{@link ProcessingStatus#PROPOSAL_READY} + successful target copy + consistent
* persistence → {@link ProcessingStatus#SUCCESS}</li>
* <li>{@link ProcessingStatus#PROPOSAL_READY} + technical failure → {@link ProcessingStatus#FAILED_RETRYABLE},
* transient error counter +1</li>
* <li>{@link ProcessingStatus#SUCCESS} → {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED} skip</li>
* <li>{@link ProcessingStatus#FAILED_FINAL} → {@link ProcessingStatus#SKIPPED_FINAL_FAILURE} skip</li>
* </ul>
*
* <h2>Leading source for the naming proposal (verbindlich)</h2>
* <p>
* When a document is in {@code PROPOSAL_READY} state, the authoritative source for the
* validated title, resolved date, date source, and AI reasoning is the most recent
* {@code PROPOSAL_READY} attempt in the history. This coordinator never reconstructs
* proposal data from the document master record or re-invokes the AI when a valid
* {@code PROPOSAL_READY} attempt already exists.
*
* <h2>SUCCESS condition (verbindlich)</h2>
* <p>
* {@code SUCCESS} is set only after:
* <ol>
* <li>The target copy has been successfully written.</li>
* <li>The final target filename is determined.</li>
* <li>The persistence (attempt + master record) has been consistently committed.</li>
* </ol>
* If persistence fails after a successful target copy, a best-effort rollback of the
* newly written copy is attempted before the error is recorded.
*
* <h2>Persistence consistency</h2>
* <p>
* For every identified document, both the processing attempt and the master record are
* written atomically using a unit of work pattern. If either write fails, both writes
* are rolled back and the failure is logged. The batch run continues with the next
* candidate.
* For every identified document (except PROPOSAL_READY that fails before producing any
* persistent artifact), both the processing attempt and the master record are written
* atomically via a unit of work. If either write fails, both writes are rolled back and
* the failure is logged. The batch run continues with the next candidate.
*
* <h2>Pre-fingerprint failures</h2>
* <p>
* Failures that occur before a successful fingerprint is available are <em>not</em>
* historised in SQLite. They are handled by the caller and logged as non-identifiable
* run events.
* historised in SQLite. They are handled by the caller.
*/
public class DocumentProcessingCoordinator {
private final DocumentRecordRepository documentRecordRepository;
private final ProcessingAttemptRepository processingAttemptRepository;
private final UnitOfWorkPort unitOfWorkPort;
private final TargetFolderPort targetFolderPort;
private final TargetFileCopyPort targetFileCopyPort;
private final ProcessingLogger logger;
/**
* Creates the document processor with the required persistence ports and logger.
* Creates the document processing coordinator with all required ports and the logger.
*
* @param documentRecordRepository port for reading and writing the document master record;
* must not be null
* @param processingAttemptRepository port for writing and reading the attempt history;
* must not be null
* @param unitOfWorkPort port for executing operations atomically;
* @param unitOfWorkPort port for executing operations atomically; must not be null
* @param targetFolderPort port for target folder duplicate resolution and cleanup;
* must not be null
* @param targetFileCopyPort port for copying source files to the target folder;
* must not be null
* @param logger for processing-related logging; must not be null
* @throws NullPointerException if any parameter is null
@@ -99,6 +136,8 @@ public class DocumentProcessingCoordinator {
DocumentRecordRepository documentRecordRepository,
ProcessingAttemptRepository processingAttemptRepository,
UnitOfWorkPort unitOfWorkPort,
TargetFolderPort targetFolderPort,
TargetFileCopyPort targetFileCopyPort,
ProcessingLogger logger) {
this.documentRecordRepository =
Objects.requireNonNull(documentRecordRepository, "documentRecordRepository must not be null");
@@ -106,31 +145,25 @@ public class DocumentProcessingCoordinator {
Objects.requireNonNull(processingAttemptRepository, "processingAttemptRepository must not be null");
this.unitOfWorkPort =
Objects.requireNonNull(unitOfWorkPort, "unitOfWorkPort must not be null");
this.targetFolderPort =
Objects.requireNonNull(targetFolderPort, "targetFolderPort must not be null");
this.targetFileCopyPort =
Objects.requireNonNull(targetFileCopyPort, "targetFileCopyPort must not be null");
this.logger = Objects.requireNonNull(logger, "logger must not be null");
}
/**
* Applies the full processing logic for one identified document candidate.
* <p>
* The caller must have already computed a valid {@link DocumentFingerprint} for the
* candidate. The outcome (from the PDF extraction and pre-check pipeline) is
* provided as {@code outcome} and is used only when the document is not in a
* terminal state.
* <p>
* This method never throws. All persistence failures are caught, logged, and
* treated as controlled per-document failures so the batch run can continue.
* Convenience overload that accepts a pre-computed outcome (for callers that have
* already determined the outcome before calling this method).
*
* @param candidate the source document candidate being processed; must not be null
* @param fingerprint the successfully computed fingerprint for this candidate;
* must not be null
* @param outcome the result of the extraction and pre-check pipeline;
* must not be null
* @param context the current batch run context (for run ID and timing);
* must not be null
* @param attemptStart the instant at which processing of this candidate began;
* must not be null
* @return true if processing and persistence succeeded for this document, false if a
* persistence failure occurred
* @param candidate the source document candidate being processed; must not be null
* @param fingerprint the successfully computed fingerprint; must not be null
* @param outcome the pipeline result; must not be null
* @param context the current batch run context; must not be null
* @param attemptStart the instant at which processing began; must not be null
* @return true if processing and persistence succeeded, false if persistence failed
*/
public boolean process(
SourceDocumentCandidate candidate,
@@ -149,33 +182,32 @@ public class DocumentProcessingCoordinator {
}
/**
* Applies the full processing logic for one identified document candidate.
* <p>
* The caller must have already computed a valid {@link DocumentFingerprint} for the
* candidate. This method handles the complete processing flow:
* <ol>
* <li>Load document master record.</li>
* <li>Handle terminal SUCCESS / FAILED_FINAL skip cases first.</li>
* <li>Only if not terminal: execute the flow (PDF extraction + pre-checks).</li>
* <li>Map outcome to status, counters and retryable flag.</li>
* <li>Persist exactly one historised processing attempt.</li>
* <li>Persist the updated document master record.</li>
* </ol>
* Applies the full processing logic for one identified document candidate,
* loading the document master record internally and deferring pipeline execution
* until the terminal-state check passes.
* <p>
* This method never throws. All persistence failures are caught, logged, and
* treated as controlled per-document failures so the batch run can continue.
*
* @param candidate the source document candidate being processed; must not be null
* @param fingerprint the successfully computed fingerprint for this candidate;
* must not be null
* @param context the current batch run context (for run ID and timing);
* must not be null
* @param attemptStart the instant at which processing of this candidate began;
* must not be null
* @param pipelineExecutor functional interface that executes the extraction and pre-check
* pipeline when needed; must not be null
* @return true if processing and persistence succeeded for this document, false if a
* persistence failure occurred (lookup, attempt write, or record write)
* <h2>Processing order</h2>
* <ol>
* <li>Load the document master record.</li>
* <li>If the status is {@code SUCCESS} → persist
* {@code SKIPPED_ALREADY_PROCESSED}.</li>
* <li>If the status is {@code FAILED_FINAL} → persist
* {@code SKIPPED_FINAL_FAILURE}.</li>
* <li>If the status is {@code PROPOSAL_READY} → execute the target-copy
* finalization without invoking the AI pipeline again.</li>
* <li>Otherwise execute the pipeline (extraction + pre-checks + AI naming) and
* persist the outcome.</li>
* </ol>
*
* @param candidate the source document candidate; must not be null
* @param fingerprint the successfully computed fingerprint; must not be null
* @param context the current batch run context; must not be null
* @param attemptStart the instant at which processing began; must not be null
* @param pipelineExecutor executes the extraction + AI pipeline when needed; must not be null
* @return true if processing and persistence succeeded, false if a persistence failure occurred
*/
public boolean processDeferredOutcome(
SourceDocumentCandidate candidate,
@@ -194,7 +226,7 @@ public class DocumentProcessingCoordinator {
DocumentRecordLookupResult lookupResult =
documentRecordRepository.findByFingerprint(fingerprint);
// Step 2: Handle persistence lookup failure cannot safely proceed
// Step 2: Handle persistence lookup failure
if (lookupResult instanceof PersistenceLookupTechnicalFailure failure) {
logger.error("Cannot process '{}': master record lookup failed: {}",
candidate.uniqueIdentifier(), failure.errorMessage());
@@ -204,7 +236,6 @@ public class DocumentProcessingCoordinator {
// Step 3: Determine the action based on the lookup result
return switch (lookupResult) {
case DocumentTerminalSuccess terminalSuccess -> {
// Document already successfully processed → skip
logger.info("Skipping '{}': already successfully processed (fingerprint: {}).",
candidate.uniqueIdentifier(), fingerprint.sha256Hex());
yield persistSkipAttempt(
@@ -214,7 +245,6 @@ public class DocumentProcessingCoordinator {
}
case DocumentTerminalFinalFailure terminalFailure -> {
// Document finally failed → skip
logger.info("Skipping '{}': already finally failed (fingerprint: {}).",
candidate.uniqueIdentifier(), fingerprint.sha256Hex());
yield persistSkipAttempt(
@@ -223,14 +253,23 @@ public class DocumentProcessingCoordinator {
context, attemptStart);
}
case DocumentKnownProcessable knownProcessable
when knownProcessable.record().overallStatus() == ProcessingStatus.PROPOSAL_READY -> {
// Naming proposal is present — execute the target-copy finalization
// without triggering a new AI call
logger.info("Finalizing '{}': naming proposal present, proceeding to target copy "
+ "(fingerprint: {}).",
candidate.uniqueIdentifier(), fingerprint.sha256Hex());
yield finalizeProposalReady(
candidate, fingerprint, knownProcessable.record(), context, attemptStart);
}
case DocumentUnknown ignored -> {
// New document execute pipeline and process
DocumentProcessingOutcome outcome = pipelineExecutor.apply(candidate);
yield processAndPersistNewDocument(candidate, fingerprint, outcome, context, attemptStart);
}
case DocumentKnownProcessable knownProcessable -> {
// Known but not terminal execute pipeline and process
DocumentProcessingOutcome outcome = pipelineExecutor.apply(candidate);
yield processAndPersistKnownDocument(
candidate, fingerprint, outcome, knownProcessable.record(),
@@ -238,7 +277,6 @@ public class DocumentProcessingCoordinator {
}
default -> {
// Exhaustive sealed hierarchy; this branch is unreachable
logger.error("Unexpected lookup result type for '{}': {}",
candidate.uniqueIdentifier(), lookupResult.getClass().getSimpleName());
yield false;
@@ -246,24 +284,259 @@ public class DocumentProcessingCoordinator {
};
}
// -------------------------------------------------------------------------
// =========================================================================
// M6 target-copy finalization path
// =========================================================================
/**
* Finalizes a document whose status is {@code PROPOSAL_READY}.
* <p>
* Processing order:
* <ol>
* <li>Load the leading {@code PROPOSAL_READY} attempt (authoritative proposal source).</li>
* <li>Build the base filename from the proposal's date and title.</li>
* <li>Resolve the first available unique filename in the target folder.</li>
* <li>Copy the source file to the target folder.</li>
* <li>Persist a new {@code SUCCESS} attempt and update the master record.</li>
* <li>If persistence fails after a successful copy: attempt best-effort rollback
* of the copy and persist {@code FAILED_RETRYABLE} instead.</li>
* </ol>
* <p>
* A missing or inconsistent {@code PROPOSAL_READY} attempt is treated as a
* document-level technical error (retryable, transient counter +1).
*
* @return true if SUCCESS was persisted, false if a persistence failure occurred
*/
private boolean finalizeProposalReady(
SourceDocumentCandidate candidate,
DocumentFingerprint fingerprint,
DocumentRecord existingRecord,
BatchRunContext context,
Instant attemptStart) {
Instant now = Instant.now();
// --- Step 1: Load the leading PROPOSAL_READY attempt ---
ProcessingAttempt proposalAttempt;
try {
proposalAttempt = processingAttemptRepository.findLatestProposalReadyAttempt(fingerprint);
} catch (DocumentPersistenceException e) {
logger.error("Failed to load leading PROPOSAL_READY attempt for '{}': {}",
candidate.uniqueIdentifier(), e.getMessage(), e);
return persistTransientError(
candidate, fingerprint, existingRecord, context, attemptStart, now,
"Failed to load naming proposal from history: " + e.getMessage());
}
if (proposalAttempt == null) {
logger.error("Document '{}' has PROPOSAL_READY status but no matching attempt "
+ "found in history. Inconsistent persistence state.",
candidate.uniqueIdentifier());
return persistTransientError(
candidate, fingerprint, existingRecord, context, attemptStart, now,
"Status is PROPOSAL_READY but no PROPOSAL_READY attempt exists in history");
}
// --- Step 2: Build base filename from the proposal ---
TargetFilenameBuildingService.BaseFilenameResult filenameResult =
TargetFilenameBuildingService.buildBaseFilename(proposalAttempt);
if (filenameResult instanceof TargetFilenameBuildingService.InconsistentProposalState inconsistent) {
logger.error("Inconsistent proposal state for '{}': {}",
candidate.uniqueIdentifier(), inconsistent.reason());
return persistTransientError(
candidate, fingerprint, existingRecord, context, attemptStart, now,
"Inconsistent proposal state: " + inconsistent.reason());
}
String baseFilename = ((TargetFilenameBuildingService.BaseFilenameReady) filenameResult).baseFilename();
// --- Step 3: Resolve unique filename in target folder ---
TargetFilenameResolutionResult resolutionResult =
targetFolderPort.resolveUniqueFilename(baseFilename);
if (resolutionResult instanceof TargetFolderTechnicalFailure folderFailure) {
logger.error("Duplicate resolution failed for '{}': {}",
candidate.uniqueIdentifier(), folderFailure.errorMessage());
return persistTransientError(
candidate, fingerprint, existingRecord, context, attemptStart, now,
"Target folder duplicate resolution failed: " + folderFailure.errorMessage());
}
String resolvedFilename =
((ResolvedTargetFilename) resolutionResult).resolvedFilename();
logger.info("Resolved target filename for '{}': '{}'.",
candidate.uniqueIdentifier(), resolvedFilename);
// --- Step 4: Copy file to target ---
TargetFileCopyResult copyResult =
targetFileCopyPort.copyToTarget(candidate.locator(), resolvedFilename);
if (copyResult instanceof TargetFileCopyTechnicalFailure copyFailure) {
logger.error("Target copy failed for '{}': {}",
candidate.uniqueIdentifier(), copyFailure.errorMessage());
return persistTransientError(
candidate, fingerprint, existingRecord, context, attemptStart, now,
"Target file copy failed: " + copyFailure.errorMessage());
}
// Copy succeeded — attempt to persist SUCCESS
// If persistence fails: rollback the copy (best-effort) and persist FAILED_RETRYABLE
String targetFolderLocator = targetFolderPort.getTargetFolderLocator();
return persistTargetCopySuccess(
candidate, fingerprint, existingRecord, context, attemptStart, now,
resolvedFilename, targetFolderLocator);
}
/**
* Persists the SUCCESS attempt and updated master record after a successful target copy.
* <p>
* If the atomic persistence fails after the copy has already been written, a
* best-effort rollback of the target file is attempted and
* {@link ProcessingStatus#FAILED_RETRYABLE} is persisted instead.
*
* @return true if SUCCESS was persisted; false if persistence itself failed
*/
private boolean persistTargetCopySuccess(
SourceDocumentCandidate candidate,
DocumentFingerprint fingerprint,
DocumentRecord existingRecord,
BatchRunContext context,
Instant attemptStart,
Instant now,
String resolvedFilename,
String targetFolderLocator) {
try {
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
ProcessingAttempt successAttempt = new ProcessingAttempt(
fingerprint, context.runId(), attemptNumber, attemptStart, now,
ProcessingStatus.SUCCESS, null, null, false,
null, null, null, null, null, null, null, null, null,
resolvedFilename);
DocumentRecord successRecord = buildSuccessRecord(
existingRecord, candidate, now, targetFolderLocator, resolvedFilename);
unitOfWorkPort.executeInTransaction(txOps -> {
txOps.saveProcessingAttempt(successAttempt);
txOps.updateDocumentRecord(successRecord);
});
logger.info("Document '{}' successfully processed. Target: '{}'.",
candidate.uniqueIdentifier(), resolvedFilename);
return true;
} catch (DocumentPersistenceException e) {
// Persistence failed after a successful copy — rollback the copy (best-effort)
logger.error("Persistence failed after successful target copy for '{}': {}. "
+ "Attempting best-effort rollback of target file '{}'.",
candidate.uniqueIdentifier(), e.getMessage(), resolvedFilename);
targetFolderPort.tryDeleteTargetFile(resolvedFilename);
// Persist FAILED_RETRYABLE to record the incident
persistTransientErrorAfterPersistenceFailure(
candidate, fingerprint, existingRecord, context, attemptStart,
Instant.now(),
"Persistence failed after successful target copy (best-effort rollback attempted): "
+ e.getMessage());
return false;
}
}
/**
* Persists a {@code FAILED_RETRYABLE} attempt with an incremented transient error counter
* for a document-level technical error during the target-copy finalization stage.
*
* @return true if the error was persisted; false if the error persistence itself failed
*/
private boolean persistTransientError(
SourceDocumentCandidate candidate,
DocumentFingerprint fingerprint,
DocumentRecord existingRecord,
BatchRunContext context,
Instant attemptStart,
Instant now,
String errorMessage) {
FailureCounters updatedCounters =
existingRecord.failureCounters().withIncrementedTransientErrorCount();
try {
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
ProcessingAttempt errorAttempt = ProcessingAttempt.withoutAiFields(
fingerprint, context.runId(), attemptNumber, attemptStart, now,
ProcessingStatus.FAILED_RETRYABLE,
ProcessingStatus.FAILED_RETRYABLE.name(),
errorMessage, true);
DocumentRecord errorRecord = buildTransientErrorRecord(
existingRecord, candidate, updatedCounters, now);
unitOfWorkPort.executeInTransaction(txOps -> {
txOps.saveProcessingAttempt(errorAttempt);
txOps.updateDocumentRecord(errorRecord);
});
logger.debug("Transient error persisted for '{}': status=FAILED_RETRYABLE, "
+ "transientErrors={}.",
candidate.uniqueIdentifier(),
updatedCounters.transientErrorCount());
return true;
} catch (DocumentPersistenceException persistEx) {
logger.error("Failed to persist transient error for '{}': {}",
candidate.uniqueIdentifier(), persistEx.getMessage(), persistEx);
return false;
}
}
/**
* Attempts to persist a {@code FAILED_RETRYABLE} attempt after a persistence failure
* that occurred following a successful target copy. This is a secondary persistence
* effort; its failure is logged but does not change the return value.
*/
private void persistTransientErrorAfterPersistenceFailure(
SourceDocumentCandidate candidate,
DocumentFingerprint fingerprint,
DocumentRecord existingRecord,
BatchRunContext context,
Instant attemptStart,
Instant now,
String errorMessage) {
FailureCounters updatedCounters =
existingRecord.failureCounters().withIncrementedTransientErrorCount();
try {
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
ProcessingAttempt errorAttempt = ProcessingAttempt.withoutAiFields(
fingerprint, context.runId(), attemptNumber, attemptStart, now,
ProcessingStatus.FAILED_RETRYABLE,
ProcessingStatus.FAILED_RETRYABLE.name(),
errorMessage, true);
DocumentRecord errorRecord = buildTransientErrorRecord(
existingRecord, candidate, updatedCounters, now);
unitOfWorkPort.executeInTransaction(txOps -> {
txOps.saveProcessingAttempt(errorAttempt);
txOps.updateDocumentRecord(errorRecord);
});
} catch (DocumentPersistenceException secondaryEx) {
logger.error("Secondary persistence failure for '{}' after target copy rollback: {}",
candidate.uniqueIdentifier(), secondaryEx.getMessage(), secondaryEx);
}
}
// =========================================================================
// Skip path
// -------------------------------------------------------------------------
// =========================================================================
/**
* Persists a skip attempt and updates the master record's {@code updatedAt} timestamp.
* <p>
* Skip events do not change any failure counter. The master record's overall status
* remains unchanged (terminal).
*
* @param candidate the candidate being skipped
* @param fingerprint the document fingerprint
* @param existingRecord the current master record (already terminal)
* @param skipStatus the skip status to record ({@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}
* or {@link ProcessingStatus#SKIPPED_FINAL_FAILURE})
* @param context the current batch run context
* @param attemptStart the start instant of this processing attempt
* @return true if persistence succeeded, false if a persistence exception occurred
* Skip events do not change any failure counter or overall status.
*/
private boolean persistSkipAttempt(
SourceDocumentCandidate candidate,
@@ -278,21 +551,13 @@ public class DocumentProcessingCoordinator {
try {
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
ProcessingAttempt skipAttempt = new ProcessingAttempt(
fingerprint,
context.runId(),
attemptNumber,
attemptStart,
now,
skipStatus,
null, // no failure class for skip
null, // no failure message for skip
false // not retryable
);
ProcessingAttempt skipAttempt = ProcessingAttempt.withoutAiFields(
fingerprint, context.runId(), attemptNumber,
attemptStart, now, skipStatus,
null, null, false);
DocumentRecord skipRecord = buildSkipRecord(existingRecord, candidate, now);
// Write attempt and master record atomically
unitOfWorkPort.executeInTransaction(txOps -> {
txOps.saveProcessingAttempt(skipAttempt);
txOps.updateDocumentRecord(skipRecord);
@@ -309,11 +574,10 @@ public class DocumentProcessingCoordinator {
}
}
// -------------------------------------------------------------------------
// =========================================================================
// New document path
// -------------------------------------------------------------------------
// =========================================================================
/** Maps the pipeline outcome for a new document and persists attempt + new master record. */
private boolean processAndPersistNewDocument(
SourceDocumentCandidate candidate,
DocumentFingerprint fingerprint,
@@ -325,14 +589,13 @@ public class DocumentProcessingCoordinator {
ProcessingOutcomeTransition.ProcessingOutcome outcome = mapOutcomeForNewDocument(pipelineOutcome);
DocumentRecord newRecord = buildNewDocumentRecord(fingerprint, candidate, outcome, now);
return persistAttemptAndRecord(candidate, fingerprint, context, attemptStart, now, outcome,
txOps -> txOps.createDocumentRecord(newRecord));
pipelineOutcome, txOps -> txOps.createDocumentRecord(newRecord));
}
// -------------------------------------------------------------------------
// Known processable document path
// -------------------------------------------------------------------------
// =========================================================================
// Known processable document path (non-PROPOSAL_READY)
// =========================================================================
/** Maps the pipeline outcome for a known document and persists attempt + updated master record. */
private boolean processAndPersistKnownDocument(
SourceDocumentCandidate candidate,
DocumentFingerprint fingerprint,
@@ -342,62 +605,50 @@ public class DocumentProcessingCoordinator {
Instant attemptStart) {
Instant now = Instant.now();
ProcessingOutcomeTransition.ProcessingOutcome outcome = mapOutcomeForKnownDocument(pipelineOutcome, existingRecord.failureCounters());
ProcessingOutcomeTransition.ProcessingOutcome outcome =
mapOutcomeForKnownDocument(pipelineOutcome, existingRecord.failureCounters());
DocumentRecord updatedRecord = buildUpdatedDocumentRecord(existingRecord, candidate, outcome, now);
return persistAttemptAndRecord(candidate, fingerprint, context, attemptStart, now, outcome,
txOps -> txOps.updateDocumentRecord(updatedRecord));
pipelineOutcome, txOps -> txOps.updateDocumentRecord(updatedRecord));
}
// -------------------------------------------------------------------------
// =========================================================================
// Extraction outcome mapping
// -------------------------------------------------------------------------
// =========================================================================
/**
* Maps an outcome to status, counters, and retryable flag for a brand-new
* document (no prior history, counters start at zero).
*
* @param pipelineOutcome the pipeline result
* @return the outcome with status, counters and retryable flag
*/
private ProcessingOutcomeTransition.ProcessingOutcome mapOutcomeForNewDocument(
DocumentProcessingOutcome pipelineOutcome) {
return ProcessingOutcomeTransition.forNewDocument(pipelineOutcome);
}
/**
* Maps an outcome to status, counters, and retryable flag, taking the
* existing failure counters into account.
*
* @param pipelineOutcome the pipeline result
* @param existingCounters the current failure counters from the master record
* @return the outcome with updated status, counters and retryable flag
*/
private ProcessingOutcomeTransition.ProcessingOutcome mapOutcomeForKnownDocument(
DocumentProcessingOutcome pipelineOutcome,
FailureCounters existingCounters) {
return ProcessingOutcomeTransition.forKnownDocument(pipelineOutcome, existingCounters);
}
// -------------------------------------------------------------------------
// =========================================================================
// Record assembly helpers
// -------------------------------------------------------------------------
// =========================================================================
private DocumentRecord buildNewDocumentRecord(
DocumentFingerprint fingerprint,
SourceDocumentCandidate candidate,
ProcessingOutcomeTransition.ProcessingOutcome outcome,
Instant now) {
boolean success = outcome.overallStatus() == ProcessingStatus.SUCCESS;
boolean isProposalReady = outcome.overallStatus() == ProcessingStatus.PROPOSAL_READY;
return new DocumentRecord(
fingerprint,
new SourceDocumentLocator(candidate.locator().value()),
candidate.uniqueIdentifier(),
outcome.overallStatus(),
outcome.counters(),
success ? null : now, // lastFailureInstant
success ? now : null, // lastSuccessInstant
now, // createdAt
now // updatedAt
isProposalReady ? null : now, // lastFailureInstant
null, // lastSuccessInstant (only on final SUCCESS)
now, // createdAt
now, // updatedAt
null, // lastTargetPath (not yet set)
null // lastTargetFileName (not yet set)
);
}
@@ -406,21 +657,22 @@ public class DocumentProcessingCoordinator {
SourceDocumentCandidate candidate,
ProcessingOutcomeTransition.ProcessingOutcome outcome,
Instant now) {
boolean success = outcome.overallStatus() == ProcessingStatus.SUCCESS;
boolean isProposalReady = outcome.overallStatus() == ProcessingStatus.PROPOSAL_READY;
return new DocumentRecord(
existingRecord.fingerprint(),
new SourceDocumentLocator(candidate.locator().value()),
candidate.uniqueIdentifier(),
outcome.overallStatus(),
outcome.counters(),
success ? existingRecord.lastFailureInstant() : now,
success ? now : existingRecord.lastSuccessInstant(),
isProposalReady ? existingRecord.lastFailureInstant() : now,
existingRecord.lastSuccessInstant(), // success only set by target-copy finalization
existingRecord.createdAt(),
now // updatedAt
now, // updatedAt
existingRecord.lastTargetPath(), // carry over, not changed here
existingRecord.lastTargetFileName() // carry over, not changed here
);
}
/** Builds a skip record: only {@code updatedAt} advances; status and counters are unchanged. */
private DocumentRecord buildSkipRecord(
DocumentRecord existingRecord,
SourceDocumentCandidate candidate,
@@ -434,21 +686,60 @@ public class DocumentProcessingCoordinator {
existingRecord.lastFailureInstant(),
existingRecord.lastSuccessInstant(),
existingRecord.createdAt(),
now // updatedAt
now, // updatedAt
existingRecord.lastTargetPath(),
existingRecord.lastTargetFileName()
);
}
// -------------------------------------------------------------------------
// Common persistence flow (non-skip paths)
// -------------------------------------------------------------------------
private DocumentRecord buildSuccessRecord(
DocumentRecord existingRecord,
SourceDocumentCandidate candidate,
Instant now,
String targetFolderLocator,
String resolvedFilename) {
return new DocumentRecord(
existingRecord.fingerprint(),
new SourceDocumentLocator(candidate.locator().value()),
candidate.uniqueIdentifier(),
ProcessingStatus.SUCCESS,
existingRecord.failureCounters(), // counters unchanged on success
existingRecord.lastFailureInstant(),
now, // lastSuccessInstant
existingRecord.createdAt(),
now, // updatedAt
targetFolderLocator, // lastTargetPath
resolvedFilename // lastTargetFileName
);
}
private DocumentRecord buildTransientErrorRecord(
DocumentRecord existingRecord,
SourceDocumentCandidate candidate,
FailureCounters updatedCounters,
Instant now) {
return new DocumentRecord(
existingRecord.fingerprint(),
new SourceDocumentLocator(candidate.locator().value()),
candidate.uniqueIdentifier(),
ProcessingStatus.FAILED_RETRYABLE,
updatedCounters,
now, // lastFailureInstant
existingRecord.lastSuccessInstant(),
existingRecord.createdAt(),
now, // updatedAt
existingRecord.lastTargetPath(), // carry over
existingRecord.lastTargetFileName() // carry over
);
}
// =========================================================================
// Common persistence flow (AI pipeline path)
// =========================================================================
/**
* Loads the next attempt number, builds and persists the attempt together with the
* document record atomically, then logs the result.
* <p>
* {@code recordWriter} performs either {@code createDocumentRecord} or
* {@code updateDocumentRecord} depending on whether the document is new or known.
* All persistence failures are caught and logged; the batch run continues.
* document record atomically.
*
* @return true if persistence succeeded, false if a persistence exception occurred
*/
@@ -459,12 +750,14 @@ public class DocumentProcessingCoordinator {
Instant attemptStart,
Instant now,
ProcessingOutcomeTransition.ProcessingOutcome outcome,
DocumentProcessingOutcome pipelineOutcome,
Consumer<UnitOfWorkPort.TransactionOperations> recordWriter) {
try {
int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint);
ProcessingAttempt attempt =
buildAttempt(fingerprint, context, attemptNumber, attemptStart, now, outcome);
buildAttempt(fingerprint, context, attemptNumber, attemptStart, now,
outcome, pipelineOutcome);
unitOfWorkPort.executeInTransaction(txOps -> {
txOps.saveProcessingAttempt(attempt);
@@ -485,20 +778,14 @@ public class DocumentProcessingCoordinator {
}
}
// -------------------------------------------------------------------------
// Helper: build ProcessingAttempt
// -------------------------------------------------------------------------
// =========================================================================
// Attempt builder (AI pipeline path)
// =========================================================================
/**
* Constructs a {@link ProcessingAttempt} from the given parameters and outcome.
*
* @param fingerprint the document fingerprint
* @param context the current batch run context
* @param attemptNumber the monotonic attempt number
* @param startedAt the start instant of this attempt
* @param endedAt the end instant of this attempt
* @param outcome the outcome (status, counters, retryable)
* @return the constructed processing attempt
* Constructs a {@link ProcessingAttempt} from the pipeline outcome, including AI
* traceability fields when available. The {@code finalTargetFileName} is null for
* all pipeline-path attempts (target copy is handled separately).
*/
private ProcessingAttempt buildAttempt(
DocumentFingerprint fingerprint,
@@ -506,7 +793,8 @@ public class DocumentProcessingCoordinator {
int attemptNumber,
Instant startedAt,
Instant endedAt,
ProcessingOutcomeTransition.ProcessingOutcome outcome) {
ProcessingOutcomeTransition.ProcessingOutcome outcome,
DocumentProcessingOutcome pipelineOutcome) {
String failureClass = null;
String failureMessage = null;
@@ -514,38 +802,80 @@ public class DocumentProcessingCoordinator {
if (outcome.overallStatus() == ProcessingStatus.FAILED_RETRYABLE
|| outcome.overallStatus() == ProcessingStatus.FAILED_FINAL) {
failureClass = outcome.overallStatus().name();
failureMessage = buildFailureMessage(outcome);
failureMessage = buildFailureMessage(pipelineOutcome, outcome);
}
return new ProcessingAttempt(
fingerprint,
context.runId(),
attemptNumber,
startedAt,
endedAt,
outcome.overallStatus(),
failureClass,
failureMessage,
outcome.retryable()
);
}
/**
* Builds a human-readable failure message from the outcome.
*
* @param outcome the outcome
* @return a non-null failure message string
*/
private String buildFailureMessage(ProcessingOutcomeTransition.ProcessingOutcome outcome) {
return switch (outcome.overallStatus()) {
case FAILED_RETRYABLE -> "Processing failed (retryable). "
+ "ContentErrors=" + outcome.counters().contentErrorCount()
+ ", TransientErrors=" + outcome.counters().transientErrorCount();
case FAILED_FINAL -> "Processing failed finally (not retryable). "
+ "ContentErrors=" + outcome.counters().contentErrorCount()
+ ", TransientErrors=" + outcome.counters().transientErrorCount();
default -> outcome.overallStatus().name();
return switch (pipelineOutcome) {
case NamingProposalReady proposalReady -> {
AiAttemptContext ctx = proposalReady.aiContext();
NamingProposal proposal = proposalReady.proposal();
yield new ProcessingAttempt(
fingerprint, context.runId(), attemptNumber, startedAt, endedAt,
outcome.overallStatus(), failureClass, failureMessage, outcome.retryable(),
ctx.modelName(), ctx.promptIdentifier(),
ctx.processedPageCount(), ctx.sentCharacterCount(),
ctx.aiRawResponse(),
proposal.aiReasoning(),
proposal.resolvedDate(), proposal.dateSource(), proposal.validatedTitle(),
null // finalTargetFileName — set only on SUCCESS attempts
);
}
case AiTechnicalFailure techFail -> {
AiAttemptContext ctx = techFail.aiContext();
yield new ProcessingAttempt(
fingerprint, context.runId(), attemptNumber, startedAt, endedAt,
outcome.overallStatus(), failureClass, failureMessage, outcome.retryable(),
ctx.modelName(), ctx.promptIdentifier(),
ctx.processedPageCount(), ctx.sentCharacterCount(),
ctx.aiRawResponse(),
null, null, null, null,
null // finalTargetFileName
);
}
case AiFunctionalFailure funcFail -> {
AiAttemptContext ctx = funcFail.aiContext();
yield new ProcessingAttempt(
fingerprint, context.runId(), attemptNumber, startedAt, endedAt,
outcome.overallStatus(), failureClass, failureMessage, outcome.retryable(),
ctx.modelName(), ctx.promptIdentifier(),
ctx.processedPageCount(), ctx.sentCharacterCount(),
ctx.aiRawResponse(),
null, null, null, null,
null // finalTargetFileName
);
}
default -> ProcessingAttempt.withoutAiFields(
fingerprint, context.runId(), attemptNumber, startedAt, endedAt,
outcome.overallStatus(), failureClass, failureMessage, outcome.retryable()
);
};
}
}
/**
* Builds a human-readable failure message from the pipeline outcome and status outcome.
*/
private String buildFailureMessage(
DocumentProcessingOutcome pipelineOutcome,
ProcessingOutcomeTransition.ProcessingOutcome outcome) {
String base = switch (outcome.overallStatus()) {
case FAILED_RETRYABLE -> "Processing failed (retryable). ";
case FAILED_FINAL -> "Processing failed finally (not retryable). ";
default -> outcome.overallStatus().name() + ". ";
};
String detail = switch (pipelineOutcome) {
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed pf ->
"Reason: " + pf.failureReasonDescription();
case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError te ->
"Technical: " + te.errorMessage();
case AiTechnicalFailure ai ->
"AI technical error: " + ai.errorMessage();
case AiFunctionalFailure ai ->
"AI functional error: " + ai.errorMessage();
default -> "ContentErrors=" + outcome.counters().contentErrorCount()
+ ", TransientErrors=" + outcome.counters().transientErrorCount();
};
return base + detail;
}
}

View File

@@ -0,0 +1,55 @@
package de.gecheckt.pdf.umbenenner.application.service;
import java.util.Objects;
/**
* Utility for limiting extracted document text to the configured maximum character count.
* <p>
* The limitation is applied strictly <em>before</em> an AI request is composed.
* It operates on the extracted text as a character-count boundary without considering
* word or sentence boundaries, which is intentional: the AI is expected to handle
* partial text gracefully.
*
* <h2>Semantics</h2>
* <ul>
* <li>If the text length does not exceed the configured maximum, it is returned unchanged.</li>
* <li>If the text length exceeds the maximum, it is truncated to exactly
* {@code maxCharacters} characters.</li>
* </ul>
*
* <h2>Architecture boundary</h2>
* <p>
* This limiter does <em>not</em> modify the originally extracted document text stored
* elsewhere in the pipeline. It produces a new, potentially shorter copy suitable
* for inclusion in the AI request. The caller is responsible for recording the
* effective character count (i.e., the length of the returned string) for persistence.
*/
public final class DocumentTextLimiter {
private DocumentTextLimiter() {
// Static utility no instances
}
/**
* Returns the document text limited to {@code maxCharacters} characters.
* <p>
* If {@code text.length() <= maxCharacters} the original text is returned unchanged.
* Otherwise the first {@code maxCharacters} characters are returned as a new string.
*
* @param text the extracted document text; must not be null
* @param maxCharacters the maximum number of characters to include; must be &gt;= 1
* @return the text limited to {@code maxCharacters} characters; never null
* @throws NullPointerException if {@code text} is null
* @throws IllegalArgumentException if {@code maxCharacters} is less than 1
*/
public static String limit(String text, int maxCharacters) {
Objects.requireNonNull(text, "text must not be null");
if (maxCharacters < 1) {
throw new IllegalArgumentException("maxCharacters must be >= 1, but was: " + maxCharacters);
}
if (text.length() <= maxCharacters) {
return text;
}
return text.substring(0, maxCharacters);
}
}

View File

@@ -1,7 +1,10 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
import de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
@@ -10,7 +13,7 @@ import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
* Pure status and counter transition policy for document processing outcomes.
* <p>
* This class encapsulates the deterministic rules for mapping a pipeline outcome
* (success, content error, or technical error) to a processing status, updated
* (pre-check, naming proposal, or failure) to a processing status, updated
* failure counters, and retryability flag.
* <p>
* The transition logic is independent of persistence, orchestration, or any
@@ -18,15 +21,23 @@ import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
*
* <h2>Transition rules</h2>
* <ul>
* <li><strong>Success:</strong> Status becomes {@link ProcessingStatus#SUCCESS},
* counters remain unchanged, {@code retryable=false}.</li>
* <li><strong>Deterministic content error (first occurrence):</strong>
* <li><strong>Naming proposal ready:</strong> Status becomes
* {@link ProcessingStatus#PROPOSAL_READY}, counters unchanged,
* {@code retryable=false}.</li>
* <li><strong>Pre-check content error (first occurrence):</strong>
* Status becomes {@link ProcessingStatus#FAILED_RETRYABLE},
* content error counter incremented by 1, {@code retryable=true}.</li>
* <li><strong>Deterministic content error (second or later occurrence):</strong>
* <li><strong>Pre-check content error (second or later occurrence):</strong>
* Status becomes {@link ProcessingStatus#FAILED_FINAL},
* content error counter incremented by 1, {@code retryable=false}.</li>
* <li><strong>Technical error:</strong> Status becomes {@link ProcessingStatus#FAILED_RETRYABLE},
* <li><strong>AI functional failure (first occurrence):</strong>
* Status becomes {@link ProcessingStatus#FAILED_RETRYABLE},
* content error counter incremented by 1, {@code retryable=true}.</li>
* <li><strong>AI functional failure (second or later occurrence):</strong>
* Status becomes {@link ProcessingStatus#FAILED_FINAL},
* content error counter incremented by 1, {@code retryable=false}.</li>
* <li><strong>Technical error (pre-fingerprint / extraction / AI infrastructure):</strong>
* Status becomes {@link ProcessingStatus#FAILED_RETRYABLE},
* transient error counter incremented by 1, {@code retryable=true}.</li>
* </ul>
*/
@@ -41,7 +52,7 @@ final class ProcessingOutcomeTransition {
* <p>
* For new documents, all failure counters start at zero.
*
* @param pipelineOutcome the outcome from the extraction and pre-check pipeline
* @param pipelineOutcome the outcome from the processing pipeline
* @return the mapped outcome with status, counters, and retryability
*/
static ProcessingOutcome forNewDocument(DocumentProcessingOutcome pipelineOutcome) {
@@ -51,11 +62,8 @@ final class ProcessingOutcomeTransition {
/**
* Maps a pipeline outcome to a processing outcome, considering the existing
* failure counter state from a known document's history.
* <p>
* This method applies the deterministic transition rules to produce an updated
* status, counters, and retryable flag.
*
* @param pipelineOutcome the outcome from the extraction and pre-check pipeline
* @param pipelineOutcome the outcome from the processing pipeline
* @param existingCounters the current failure counter values from the document's master record
* @return the mapped outcome with updated status, counters, and retryability
*/
@@ -64,39 +72,61 @@ final class ProcessingOutcomeTransition {
FailureCounters existingCounters) {
return switch (pipelineOutcome) {
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored -> {
// Success: document passed all pre-checks
case NamingProposalReady ignored -> {
// AI naming proposal produced → PROPOSAL_READY (not yet SUCCESS)
yield new ProcessingOutcome(
ProcessingStatus.SUCCESS,
existingCounters, // counters unchanged on success
ProcessingStatus.PROPOSAL_READY,
existingCounters, // counters unchanged on proposal success
false // not retryable
);
}
case PreCheckFailed contentError -> {
// Deterministic content error: apply the 1-retry rule
case PreCheckFailed ignored2 -> {
// Deterministic content error from pre-check: apply the 1-retry rule
FailureCounters updatedCounters = existingCounters.withIncrementedContentErrorCount();
boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0;
if (isFirstOccurrence) {
// First content error → FAILED_RETRYABLE
yield new ProcessingOutcome(
ProcessingStatus.FAILED_RETRYABLE,
updatedCounters,
true
);
yield new ProcessingOutcome(ProcessingStatus.FAILED_RETRYABLE, updatedCounters, true);
} else {
// Second (or later) content error → FAILED_FINAL
yield new ProcessingOutcome(
ProcessingStatus.FAILED_FINAL,
updatedCounters,
false
);
yield new ProcessingOutcome(ProcessingStatus.FAILED_FINAL, updatedCounters, false);
}
}
case TechnicalDocumentError technicalError -> {
// Technical error after fingerprinting: always FAILED_RETRYABLE, increment transient counter
case AiFunctionalFailure ignored3 -> {
// Deterministic content error from AI validation: apply the 1-retry rule
FailureCounters updatedCounters = existingCounters.withIncrementedContentErrorCount();
boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0;
if (isFirstOccurrence) {
yield new ProcessingOutcome(ProcessingStatus.FAILED_RETRYABLE, updatedCounters, true);
} else {
yield new ProcessingOutcome(ProcessingStatus.FAILED_FINAL, updatedCounters, false);
}
}
case TechnicalDocumentError ignored4 -> {
// Technical error (extraction / infrastructure): retryable, transient counter +1
yield new ProcessingOutcome(
ProcessingStatus.FAILED_RETRYABLE,
existingCounters.withIncrementedTransientErrorCount(),
true
);
}
case AiTechnicalFailure ignored5 -> {
// Technical AI error (timeout, unreachable, bad JSON): retryable, transient counter +1
yield new ProcessingOutcome(
ProcessingStatus.FAILED_RETRYABLE,
existingCounters.withIncrementedTransientErrorCount(),
true
);
}
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored6 -> {
// Pre-check passed without AI step: in normal flow this should not appear at
// the outcome transition level once the AI pipeline is fully wired. Treat it
// as a technical error to avoid silent inconsistency.
yield new ProcessingOutcome(
ProcessingStatus.FAILED_RETRYABLE,
existingCounters.withIncrementedTransientErrorCount(),

View File

@@ -0,0 +1,159 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import java.time.LocalDate;
import java.util.Objects;
/**
* Stateless service for building the base target filename from a leading naming proposal.
* <p>
* The base filename follows the verbindliches Zielformat:
* <pre>
* YYYY-MM-DD - Titel.pdf
* </pre>
*
* <h2>Input source</h2>
* <p>
* The sole authoritative source for date and title is the most recent
* {@code PROPOSAL_READY} processing attempt. This service reads directly from a
* {@link ProcessingAttempt} whose
* {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus#PROPOSAL_READY}
* status was confirmed by the caller.
*
* <h2>Consistency checks</h2>
* <p>
* This service does not silently heal inconsistent persistence states. If the proposal
* attempt carries a title or date that violates the rules that were enforced during
* AI response validation, the state is treated as an inconsistent persistence state
* and the caller receives an {@link InconsistentProposalState} result. Such states
* must be surfaced as document-level technical errors.
*
* <h2>No new fachliche interpretation</h2>
* <p>
* This service never re-evaluates or reinterprets the title: it uses the already-validated
* title from the proposal attempt unchanged.
*/
public final class TargetFilenameBuildingService {
private TargetFilenameBuildingService() {
// static utility, no instances
}
// -------------------------------------------------------------------------
// Result type
// -------------------------------------------------------------------------
/**
* Sealed result of {@link #buildBaseFilename(ProcessingAttempt)}.
*/
public sealed interface BaseFilenameResult
permits BaseFilenameReady, InconsistentProposalState {
}
/**
* Successful result containing the ready base filename.
*
* @param baseFilename the filename in {@code YYYY-MM-DD - Titel.pdf} format;
* never null or blank
*/
public record BaseFilenameReady(String baseFilename) implements BaseFilenameResult {
public BaseFilenameReady {
Objects.requireNonNull(baseFilename, "baseFilename must not be null");
if (baseFilename.isBlank()) {
throw new IllegalArgumentException("baseFilename must not be blank");
}
}
}
/**
* Failure result indicating that the loaded proposal attempt contains data that
* violates the rules that were applied during naming-proposal validation, making
* the persistence state inconsistent.
*
* @param reason human-readable description of the inconsistency; never null
*/
public record InconsistentProposalState(String reason) implements BaseFilenameResult {
public InconsistentProposalState {
Objects.requireNonNull(reason, "reason must not be null");
}
}
// -------------------------------------------------------------------------
// Main method
// -------------------------------------------------------------------------
/**
* Builds the base target filename from the resolved date and validated title stored
* in the given {@code PROPOSAL_READY} attempt.
* <p>
* Validation rules applied defensively (already enforced during AI response validation):
* <ul>
* <li>Resolved date must be non-null.</li>
* <li>Validated title must be non-null and non-blank.</li>
* <li>Validated title must not exceed 20 characters.</li>
* <li>Validated title must contain only letters, digits, and spaces.</li>
* </ul>
* If any rule is violated, the state is treated as an
* {@link InconsistentProposalState}.
* <p>
* The 20-character limit applies exclusively to the base title. A duplicate-avoidance
* suffix (e.g., {@code (1)}) may be appended by the target folder adapter after this
* method returns and is not counted against the 20 characters.
*
* @param proposalAttempt the leading {@code PROPOSAL_READY} attempt; must not be null
* @return a {@link BaseFilenameReady} with the complete filename, or an
* {@link InconsistentProposalState} describing the consistency violation
*/
public static BaseFilenameResult buildBaseFilename(ProcessingAttempt proposalAttempt) {
Objects.requireNonNull(proposalAttempt, "proposalAttempt must not be null");
LocalDate date = proposalAttempt.resolvedDate();
String title = proposalAttempt.validatedTitle();
if (date == null) {
return new InconsistentProposalState(
"Leading PROPOSAL_READY attempt has no resolved date");
}
if (title == null || title.isBlank()) {
return new InconsistentProposalState(
"Leading PROPOSAL_READY attempt has no validated title");
}
if (title.length() > 20) {
return new InconsistentProposalState(
"Leading PROPOSAL_READY attempt has title exceeding 20 characters: '"
+ title + "'");
}
if (!isAllowedTitleCharacters(title)) {
return new InconsistentProposalState(
"Leading PROPOSAL_READY attempt has title with disallowed characters "
+ "(only letters, digits, and spaces are permitted): '"
+ title + "'");
}
// Build: YYYY-MM-DD - Titel.pdf
String baseFilename = date + " - " + title + ".pdf";
return new BaseFilenameReady(baseFilename);
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
/**
* Returns {@code true} if every character in the title is a letter, a digit, or a space.
* Unicode letters (including German Umlauts and ß) are permitted.
*/
private static boolean isAllowedTitleCharacters(String title) {
for (int i = 0; i < title.length(); i++) {
char c = title.charAt(i);
if (!Character.isLetter(c) && !Character.isDigit(c) && c != ' ') {
return false;
}
}
return true;
}
}

View File

@@ -13,12 +13,14 @@ import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.service.AiNamingService;
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingCoordinator;
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import java.time.Instant;
@@ -67,13 +69,6 @@ import java.util.Objects;
* written in sequence by {@link DocumentProcessingCoordinator}. Persistence failures for a single
* document are caught and logged; the batch run continues with the remaining candidates.
*
* <h2>Non-Goals (not implemented)</h2>
* <ul>
* <li>No KI/AI integration or prompt loading.</li>
* <li>No filename generation or target file copy.</li>
* <li>No retry rules for KI or target copy failures.</li>
* </ul>
*
*/
public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCase {
@@ -83,6 +78,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
private final PdfTextExtractionPort pdfTextExtractionPort;
private final FingerprintPort fingerprintPort;
private final DocumentProcessingCoordinator documentProcessingCoordinator;
private final AiNamingService aiNamingService;
private final ProcessingLogger logger;
/**
@@ -102,6 +98,8 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
* must not be null
* @param documentProcessingCoordinator for applying decision logic and persisting results;
* must not be null
* @param aiNamingService for running the AI naming pipeline after pre-checks;
* must not be null
* @param logger for processing-related logging; must not be null
* @throws NullPointerException if any parameter is null
*/
@@ -112,6 +110,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
PdfTextExtractionPort pdfTextExtractionPort,
FingerprintPort fingerprintPort,
DocumentProcessingCoordinator documentProcessingCoordinator,
AiNamingService aiNamingService,
ProcessingLogger logger) {
this.runtimeConfiguration = Objects.requireNonNull(runtimeConfiguration, "runtimeConfiguration must not be null");
this.runLockPort = Objects.requireNonNull(runLockPort, "runLockPort must not be null");
@@ -122,6 +121,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
this.fingerprintPort = Objects.requireNonNull(fingerprintPort, "fingerprintPort must not be null");
this.documentProcessingCoordinator = Objects.requireNonNull(
documentProcessingCoordinator, "documentProcessingCoordinator must not be null");
this.aiNamingService = Objects.requireNonNull(aiNamingService, "aiNamingService must not be null");
this.logger = Objects.requireNonNull(logger, "logger must not be null");
}
@@ -302,14 +302,24 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
}
/**
* Runs the pipeline (PDF text extraction + pre-checks) for the given candidate.
* Runs the full pipeline for the given candidate: extraction, pre-checks, and AI naming.
* <p>
* This method is called after a successful fingerprint computation. The result is
* passed to {@link DocumentProcessingCoordinator}, which applies it only when the document is
* not in a terminal state.
* <p>
* Processing order:
* <ol>
* <li>Extract PDF text and page count via the extraction port.</li>
* <li>Evaluate pre-checks (text quality, page limit). If any pre-check fails,
* return the failure outcome immediately — no AI call is made.</li>
* <li>If pre-checks pass, run the AI naming pipeline to obtain a naming proposal
* or classify the AI result as a technical or functional failure.</li>
* </ol>
*
* @param candidate the candidate to run through the pipeline
* @return the pipeline outcome (pre-check passed, pre-check failed, or technical error)
* @return the pipeline outcome; one of {@code PreCheckFailed}, {@code TechnicalDocumentError},
* {@code NamingProposalReady}, {@code AiTechnicalFailure}, or {@code AiFunctionalFailure}
*/
private DocumentProcessingOutcome runExtractionPipeline(SourceDocumentCandidate candidate) {
PdfExtractionResult extractionResult =
@@ -317,12 +327,22 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
logExtractionResult(candidate, extractionResult);
DocumentProcessingOutcome outcome =
DocumentProcessingOutcome preCheckOutcome =
DocumentProcessingService.processDocument(candidate, extractionResult, runtimeConfiguration);
logProcessingOutcome(candidate, outcome);
// If pre-checks did not pass, return the failure outcome immediately.
// This avoids an AI call for documents that cannot be processed.
if (!(preCheckOutcome instanceof PreCheckPassed preCheckPassed)) {
logProcessingOutcome(candidate, preCheckOutcome);
return preCheckOutcome;
}
return outcome;
// Pre-checks passed — run the AI naming pipeline
logger.info("Pre-checks passed for '{}'. Invoking AI naming pipeline.",
candidate.uniqueIdentifier());
DocumentProcessingOutcome aiOutcome = aiNamingService.invoke(preCheckPassed);
logProcessingOutcome(candidate, aiOutcome);
return aiOutcome;
}
/**
@@ -361,21 +381,24 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
*/
private void logProcessingOutcome(SourceDocumentCandidate candidate, DocumentProcessingOutcome outcome) {
switch (outcome) {
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed passed -> {
logger.info("Pre-checks PASSED for '{}'. Candidate ready for persistence.",
candidate.uniqueIdentifier());
}
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed -> {
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed ->
logger.info("Pre-checks FAILED for '{}': {} (Deterministic content error).",
candidate.uniqueIdentifier(), failed.failureReasonDescription());
}
case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError -> {
case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError ->
logger.warn("Processing FAILED for '{}': {} (Technical error retryable).",
candidate.uniqueIdentifier(), technicalError.errorMessage());
}
default -> {
// Handle any other cases
}
case de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady ready ->
logger.info("AI naming proposal ready for '{}': title='{}', date={}.",
candidate.uniqueIdentifier(),
ready.proposal().validatedTitle(),
ready.proposal().resolvedDate());
case de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure aiTechnical ->
logger.warn("AI technical failure for '{}': {} (Transient retryable).",
candidate.uniqueIdentifier(), aiTechnical.errorMessage());
case de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure aiFunctional ->
logger.info("AI functional failure for '{}': {} (Deterministic content error).",
candidate.uniqueIdentifier(), aiFunctional.errorMessage());
default -> { /* other outcomes are handled elsewhere */ }
}
}
}

View File

@@ -0,0 +1,317 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort;
import de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.DateSource;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.extension.ExtendWith;
import org.mockito.Mock;
import org.mockito.junit.jupiter.MockitoExtension;
import java.time.Instant;
import java.time.LocalDate;
import java.time.ZoneOffset;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
import static org.mockito.ArgumentMatchers.any;
import static org.mockito.Mockito.when;
/**
* Unit tests for {@link AiNamingService}.
* <p>
* Covers: prompt load failure, AI invocation failure, unparseable response,
* functional validation failure, and the successful naming proposal path.
*/
@ExtendWith(MockitoExtension.class)
class AiNamingServiceTest {
private static final String MODEL_NAME = "gpt-4";
private static final int MAX_CHARS = 1000;
private static final Instant FIXED_INSTANT = Instant.parse("2026-04-07T10:00:00Z");
@Mock
private AiInvocationPort aiInvocationPort;
@Mock
private PromptPort promptPort;
private AiResponseValidator validator;
private AiNamingService service;
private SourceDocumentCandidate candidate;
private PreCheckPassed preCheckPassed;
@BeforeEach
void setUp() {
validator = new AiResponseValidator(() -> FIXED_INSTANT);
service = new AiNamingService(aiInvocationPort, promptPort, validator, MODEL_NAME, MAX_CHARS);
candidate = new SourceDocumentCandidate(
"test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf"));
preCheckPassed = new PreCheckPassed(
candidate, new PdfExtractionSuccess("Document text content", new PdfPageCount(2)));
}
// -------------------------------------------------------------------------
// Helper
// -------------------------------------------------------------------------
private static AiRequestRepresentation dummyRequest() {
return new AiRequestRepresentation(
new PromptIdentifier("prompt.txt"), "Prompt content", "Document text", 13);
}
private static AiInvocationSuccess successWith(String jsonBody) {
return new AiInvocationSuccess(dummyRequest(), new AiRawResponse(jsonBody));
}
private static AiInvocationTechnicalFailure technicalFailure(String reason, String message) {
return new AiInvocationTechnicalFailure(dummyRequest(), reason, message);
}
// -------------------------------------------------------------------------
// Prompt load failure
// -------------------------------------------------------------------------
@Test
void invoke_promptLoadFailure_returnsAiTechnicalFailure() {
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingFailure("FILE_NOT_FOUND", "Prompt file missing"));
DocumentProcessingOutcome result = service.invoke(preCheckPassed);
assertThat(result).isInstanceOf(AiTechnicalFailure.class);
AiTechnicalFailure failure = (AiTechnicalFailure) result;
assertThat(failure.errorMessage()).contains("Prompt loading failed");
assertThat(failure.aiContext().modelName()).isEqualTo(MODEL_NAME);
}
// -------------------------------------------------------------------------
// AI invocation failure
// -------------------------------------------------------------------------
@Test
void invoke_aiInvocationTimeout_returnsAiTechnicalFailure() {
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingSuccess(new PromptIdentifier("prompt-v1.txt"), "Analyze this document."));
when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn(
technicalFailure("TIMEOUT", "Request timed out after 30s"));
DocumentProcessingOutcome result = service.invoke(preCheckPassed);
assertThat(result).isInstanceOf(AiTechnicalFailure.class);
assertThat(((AiTechnicalFailure) result).errorMessage()).contains("TIMEOUT");
}
@Test
void invoke_aiInvocationConnectionError_returnsAiTechnicalFailure() {
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt content"));
when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn(
technicalFailure("CONNECTION_ERROR", "Connection refused"));
DocumentProcessingOutcome result = service.invoke(preCheckPassed);
assertThat(result).isInstanceOf(AiTechnicalFailure.class);
}
// -------------------------------------------------------------------------
// Response parsing failure (unparseable JSON → technical failure)
// -------------------------------------------------------------------------
@Test
void invoke_unparseableAiResponse_returnsAiTechnicalFailure() {
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt"));
when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn(
successWith("This is not JSON at all"));
DocumentProcessingOutcome result = service.invoke(preCheckPassed);
assertThat(result).isInstanceOf(AiTechnicalFailure.class);
assertThat(((AiTechnicalFailure) result).aiContext().aiRawResponse())
.isEqualTo("This is not JSON at all");
}
@Test
void invoke_aiResponseMissingTitle_returnsAiTechnicalFailure() {
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt"));
when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn(
successWith("{\"reasoning\":\"No title provided\"}"));
DocumentProcessingOutcome result = service.invoke(preCheckPassed);
assertThat(result).isInstanceOf(AiTechnicalFailure.class);
}
// -------------------------------------------------------------------------
// Functional validation failure (parseable but semantically invalid)
// -------------------------------------------------------------------------
@Test
void invoke_aiResponseTitleTooLong_returnsAiFunctionalFailure() {
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt"));
// 21-char title: "TitleThatIsTooLongXXX"
when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn(
successWith("{\"title\":\"TitleThatIsTooLongXXX\",\"reasoning\":\"Too long\",\"date\":\"2026-01-15\"}"));
DocumentProcessingOutcome result = service.invoke(preCheckPassed);
assertThat(result).isInstanceOf(AiFunctionalFailure.class);
}
@Test
void invoke_aiResponseGenericTitle_returnsAiFunctionalFailure() {
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt"));
when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn(
successWith("{\"title\":\"Dokument\",\"reasoning\":\"Generic\"}"));
DocumentProcessingOutcome result = service.invoke(preCheckPassed);
assertThat(result).isInstanceOf(AiFunctionalFailure.class);
}
@Test
void invoke_aiResponseInvalidDateFormat_returnsAiFunctionalFailure() {
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt"));
when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn(
successWith("{\"title\":\"Rechnung\",\"reasoning\":\"OK\",\"date\":\"15.01.2026\"}"));
DocumentProcessingOutcome result = service.invoke(preCheckPassed);
assertThat(result).isInstanceOf(AiFunctionalFailure.class);
}
// -------------------------------------------------------------------------
// Successful naming proposal
// -------------------------------------------------------------------------
@Test
void invoke_validAiResponse_returnsNamingProposalReady() {
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingSuccess(new PromptIdentifier("prompt-v1.txt"), "Analyze the document."));
when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn(
successWith("{\"title\":\"Stromabrechnung\",\"reasoning\":\"Electricity invoice\",\"date\":\"2026-01-15\"}"));
DocumentProcessingOutcome result = service.invoke(preCheckPassed);
assertThat(result).isInstanceOf(NamingProposalReady.class);
NamingProposalReady ready = (NamingProposalReady) result;
assertThat(ready.proposal().validatedTitle()).isEqualTo("Stromabrechnung");
assertThat(ready.proposal().resolvedDate()).isEqualTo(LocalDate.of(2026, 1, 15));
assertThat(ready.proposal().dateSource()).isEqualTo(DateSource.AI_PROVIDED);
assertThat(ready.aiContext().modelName()).isEqualTo(MODEL_NAME);
assertThat(ready.aiContext().promptIdentifier()).isEqualTo("prompt-v1.txt");
}
@Test
void invoke_validAiResponseWithoutDate_usesFallbackDate() {
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt"));
when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn(
successWith("{\"title\":\"Kontoauszug\",\"reasoning\":\"No date in document\"}"));
DocumentProcessingOutcome result = service.invoke(preCheckPassed);
assertThat(result).isInstanceOf(NamingProposalReady.class);
NamingProposalReady ready = (NamingProposalReady) result;
assertThat(ready.proposal().dateSource()).isEqualTo(DateSource.FALLBACK_CURRENT);
assertThat(ready.proposal().resolvedDate())
.isEqualTo(FIXED_INSTANT.atZone(ZoneOffset.UTC).toLocalDate());
}
@Test
void invoke_documentTextLongerThanMax_sendsLimitedText() {
// max chars is 1000, document text is 2000 chars → sent chars should be 1000
String longText = "X".repeat(2000);
PreCheckPassed longDoc = new PreCheckPassed(
candidate, new PdfExtractionSuccess(longText, new PdfPageCount(5)));
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt"));
when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn(
successWith("{\"title\":\"Rechnung\",\"reasoning\":\"Invoice\",\"date\":\"2026-03-01\"}"));
DocumentProcessingOutcome result = service.invoke(longDoc);
assertThat(result).isInstanceOf(NamingProposalReady.class);
NamingProposalReady ready = (NamingProposalReady) result;
assertThat(ready.aiContext().sentCharacterCount()).isEqualTo(MAX_CHARS);
}
@Test
void invoke_documentTextShorterThanMax_sendsFullText() {
String shortText = "Short document";
PreCheckPassed shortDoc = new PreCheckPassed(
candidate, new PdfExtractionSuccess(shortText, new PdfPageCount(1)));
when(promptPort.loadPrompt()).thenReturn(
new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt"));
when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn(
successWith("{\"title\":\"Rechnung\",\"reasoning\":\"Invoice\",\"date\":\"2026-03-01\"}"));
DocumentProcessingOutcome result = service.invoke(shortDoc);
assertThat(result).isInstanceOf(NamingProposalReady.class);
NamingProposalReady ready = (NamingProposalReady) result;
assertThat(ready.aiContext().sentCharacterCount()).isEqualTo(shortText.length());
}
// -------------------------------------------------------------------------
// Null handling
// -------------------------------------------------------------------------
@Test
void invoke_nullPreCheckPassed_throwsNullPointerException() {
assertThatThrownBy(() -> service.invoke(null))
.isInstanceOf(NullPointerException.class)
.hasMessage("preCheckPassed must not be null");
}
@Test
void constructor_nullAiPort_throwsNullPointerException() {
assertThatThrownBy(() -> new AiNamingService(null, promptPort, validator, MODEL_NAME, MAX_CHARS))
.isInstanceOf(NullPointerException.class);
}
@Test
void constructor_nullPromptPort_throwsNullPointerException() {
assertThatThrownBy(() -> new AiNamingService(aiInvocationPort, null, validator, MODEL_NAME, MAX_CHARS))
.isInstanceOf(NullPointerException.class);
}
@Test
void constructor_nullValidator_throwsNullPointerException() {
assertThatThrownBy(() -> new AiNamingService(aiInvocationPort, promptPort, null, MODEL_NAME, MAX_CHARS))
.isInstanceOf(NullPointerException.class);
}
@Test
void constructor_maxTextCharactersZero_throwsIllegalArgumentException() {
assertThatThrownBy(() -> new AiNamingService(aiInvocationPort, promptPort, validator, MODEL_NAME, 0))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("maxTextCharacters must be >= 1");
}
}

View File

@@ -0,0 +1,217 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingFailure;
import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingResult;
import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
/**
* Unit tests for {@link AiResponseParser}.
* <p>
* Covers structural parsing rules: valid JSON objects, mandatory fields,
* optional date, extra fields, and rejection of non-JSON or mixed-content responses.
*/
class AiResponseParserTest {
// -------------------------------------------------------------------------
// Success cases
// -------------------------------------------------------------------------
@Test
void parse_validJsonWithAllFields_returnsSuccess() {
AiRawResponse raw = new AiRawResponse(
"{\"title\":\"Stromabrechnung\",\"reasoning\":\"Found bill dated 2026-01-15\",\"date\":\"2026-01-15\"}");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingSuccess.class);
ParsedAiResponse parsed = ((AiResponseParsingSuccess) result).response();
assertThat(parsed.title()).isEqualTo("Stromabrechnung");
assertThat(parsed.reasoning()).isEqualTo("Found bill dated 2026-01-15");
assertThat(parsed.dateString()).contains("2026-01-15");
}
@Test
void parse_validJsonWithoutDate_returnsSuccessWithEmptyOptional() {
AiRawResponse raw = new AiRawResponse(
"{\"title\":\"Kontoauszug\",\"reasoning\":\"No date found in document\"}");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingSuccess.class);
ParsedAiResponse parsed = ((AiResponseParsingSuccess) result).response();
assertThat(parsed.title()).isEqualTo("Kontoauszug");
assertThat(parsed.dateString()).isEmpty();
}
@Test
void parse_validJsonWithAdditionalFields_toleratesExtraFields() {
AiRawResponse raw = new AiRawResponse(
"{\"title\":\"Rechnung\",\"reasoning\":\"Invoice\",\"confidence\":0.95,\"lang\":\"de\"}");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingSuccess.class);
ParsedAiResponse parsed = ((AiResponseParsingSuccess) result).response();
assertThat(parsed.title()).isEqualTo("Rechnung");
}
@Test
void parse_validJsonWithLeadingAndTrailingWhitespace_trimsAndSucceeds() {
AiRawResponse raw = new AiRawResponse(
" {\"title\":\"Vertrag\",\"reasoning\":\"Contract document\"} ");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingSuccess.class);
}
@Test
void parse_emptyReasoningField_isAccepted() {
AiRawResponse raw = new AiRawResponse(
"{\"title\":\"Mahnung\",\"reasoning\":\"\"}");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingSuccess.class);
ParsedAiResponse parsed = ((AiResponseParsingSuccess) result).response();
assertThat(parsed.reasoning()).isEmpty();
}
@Test
void parse_nullDateField_treatedAsAbsent() {
AiRawResponse raw = new AiRawResponse(
"{\"title\":\"Bescheid\",\"reasoning\":\"Administrative notice\",\"date\":null}");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingSuccess.class);
ParsedAiResponse parsed = ((AiResponseParsingSuccess) result).response();
assertThat(parsed.dateString()).isEmpty();
}
// -------------------------------------------------------------------------
// Failure cases structural
// -------------------------------------------------------------------------
@Test
void parse_emptyBody_returnsFailure() {
AiRawResponse raw = new AiRawResponse("");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingFailure.class);
assertThat(((AiResponseParsingFailure) result).failureReason())
.isEqualTo("EMPTY_RESPONSE");
}
@Test
void parse_blankBody_returnsFailure() {
AiRawResponse raw = new AiRawResponse(" \t\n ");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingFailure.class);
}
@Test
void parse_plainText_returnsFailure() {
AiRawResponse raw = new AiRawResponse("Sure, here is the title: Rechnung");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingFailure.class);
assertThat(((AiResponseParsingFailure) result).failureReason())
.isEqualTo("NOT_JSON_OBJECT");
}
@Test
void parse_jsonEmbeddedInProse_returnsFailure() {
AiRawResponse raw = new AiRawResponse(
"Here is the result: {\"title\":\"Rechnung\",\"reasoning\":\"r\"} Hope that helps!");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingFailure.class);
assertThat(((AiResponseParsingFailure) result).failureReason())
.isEqualTo("NOT_JSON_OBJECT");
}
@Test
void parse_jsonArray_returnsFailure() {
AiRawResponse raw = new AiRawResponse("[{\"title\":\"Rechnung\",\"reasoning\":\"r\"}]");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingFailure.class);
assertThat(((AiResponseParsingFailure) result).failureReason())
.isEqualTo("NOT_JSON_OBJECT");
}
@Test
void parse_invalidJson_returnsFailure() {
AiRawResponse raw = new AiRawResponse("{\"title\":\"Rechnung\",\"reasoning\":}");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingFailure.class);
assertThat(((AiResponseParsingFailure) result).failureReason())
.isEqualTo("INVALID_JSON");
}
@Test
void parse_missingTitle_returnsFailure() {
AiRawResponse raw = new AiRawResponse("{\"reasoning\":\"Some reasoning without title\"}");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingFailure.class);
assertThat(((AiResponseParsingFailure) result).failureReason())
.isEqualTo("MISSING_TITLE");
}
@Test
void parse_nullTitle_returnsFailure() {
AiRawResponse raw = new AiRawResponse("{\"title\":null,\"reasoning\":\"r\"}");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingFailure.class);
assertThat(((AiResponseParsingFailure) result).failureReason())
.isEqualTo("MISSING_TITLE");
}
@Test
void parse_blankTitle_returnsFailure() {
AiRawResponse raw = new AiRawResponse("{\"title\":\" \",\"reasoning\":\"r\"}");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingFailure.class);
assertThat(((AiResponseParsingFailure) result).failureReason())
.isEqualTo("BLANK_TITLE");
}
@Test
void parse_missingReasoning_returnsFailure() {
AiRawResponse raw = new AiRawResponse("{\"title\":\"Rechnung\"}");
AiResponseParsingResult result = AiResponseParser.parse(raw);
assertThat(result).isInstanceOf(AiResponseParsingFailure.class);
assertThat(((AiResponseParsingFailure) result).failureReason())
.isEqualTo("MISSING_REASONING");
}
@Test
void parse_nullRawResponse_throwsNullPointerException() {
assertThatThrownBy(() -> AiResponseParser.parse(null))
.isInstanceOf(NullPointerException.class)
.hasMessage("rawResponse must not be null");
}
}

View File

@@ -0,0 +1,236 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort;
import de.gecheckt.pdf.umbenenner.domain.model.DateSource;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal;
import de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.time.Instant;
import java.time.LocalDate;
import java.time.ZoneOffset;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
/**
* Unit tests for {@link AiResponseValidator}.
* <p>
* Covers: title character rules, length limit, generic placeholder detection,
* date parsing, date fallback via {@link ClockPort}, and null handling.
*/
class AiResponseValidatorTest {
private static final Instant FIXED_INSTANT = Instant.parse("2026-04-07T10:00:00Z");
private static final LocalDate FIXED_DATE = FIXED_INSTANT.atZone(ZoneOffset.UTC).toLocalDate();
private AiResponseValidator validator;
@BeforeEach
void setUp() {
ClockPort fixedClock = () -> FIXED_INSTANT;
validator = new AiResponseValidator(fixedClock);
}
// -------------------------------------------------------------------------
// Valid cases
// -------------------------------------------------------------------------
@Test
void validate_validTitleAndAiDate_returnsValidWithAiProvided() {
ParsedAiResponse parsed = ParsedAiResponse.of("Stromabrechnung", "Electricity bill", "2026-01-15");
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class);
NamingProposal proposal = ((AiResponseValidator.AiValidationResult.Valid) result).proposal();
assertThat(proposal.validatedTitle()).isEqualTo("Stromabrechnung");
assertThat(proposal.resolvedDate()).isEqualTo(LocalDate.of(2026, 1, 15));
assertThat(proposal.dateSource()).isEqualTo(DateSource.AI_PROVIDED);
assertThat(proposal.aiReasoning()).isEqualTo("Electricity bill");
}
@Test
void validate_validTitleNoDate_usesFallbackCurrentDate() {
ParsedAiResponse parsed = ParsedAiResponse.of("Kontoauszug", "No date in document", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class);
NamingProposal proposal = ((AiResponseValidator.AiValidationResult.Valid) result).proposal();
assertThat(proposal.resolvedDate()).isEqualTo(FIXED_DATE);
assertThat(proposal.dateSource()).isEqualTo(DateSource.FALLBACK_CURRENT);
}
@Test
void validate_titleWithUmlauts_isAccepted() {
ParsedAiResponse parsed = ParsedAiResponse.of("Mietvertrag Müller", "Rental contract", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class);
}
@Test
void validate_titleWithSzligChar_isAccepted() {
ParsedAiResponse parsed = ParsedAiResponse.of("Straßenrechnung", "Street bill", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class);
}
@Test
void validate_titleWithDigits_isAccepted() {
ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung 2026", "Invoice 2026", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class);
}
@Test
void validate_titleExactly20Chars_isAccepted() {
String title = "12345678901234567890"; // exactly 20 chars
ParsedAiResponse parsed = ParsedAiResponse.of(title, "test", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class);
}
@Test
void validate_emptyReasoning_isAccepted() {
ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung", "", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class);
}
// -------------------------------------------------------------------------
// Title validation failures
// -------------------------------------------------------------------------
@Test
void validate_title21Chars_returnsInvalid() {
String title = "1234567890123456789A1"; // 21 chars
ParsedAiResponse parsed = ParsedAiResponse.of(title, "reasoning", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class);
assertThat(((AiResponseValidator.AiValidationResult.Invalid) result).errorMessage())
.contains("20");
}
@Test
void validate_titleWithSpecialChar_returnsInvalid() {
ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung!", "reasoning", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class);
assertThat(((AiResponseValidator.AiValidationResult.Invalid) result).errorMessage())
.containsIgnoringCase("disallowed");
}
@Test
void validate_titleWithHyphen_returnsInvalid() {
ParsedAiResponse parsed = ParsedAiResponse.of("Strom-Rechnung", "reasoning", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class);
}
@Test
void validate_genericTitleDokument_returnsInvalid() {
ParsedAiResponse parsed = ParsedAiResponse.of("Dokument", "reasoning", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class);
assertThat(((AiResponseValidator.AiValidationResult.Invalid) result).errorMessage())
.containsIgnoringCase("placeholder");
}
@Test
void validate_genericTitleDateiCaseInsensitive_returnsInvalid() {
ParsedAiResponse parsed = ParsedAiResponse.of("DATEI", "reasoning", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class);
}
@Test
void validate_genericTitleScan_returnsInvalid() {
ParsedAiResponse parsed = ParsedAiResponse.of("scan", "reasoning", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class);
}
@Test
void validate_genericTitlePdf_returnsInvalid() {
ParsedAiResponse parsed = ParsedAiResponse.of("PDF", "reasoning", null);
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class);
}
// -------------------------------------------------------------------------
// Date validation failures
// -------------------------------------------------------------------------
@Test
void validate_aiProvidesUnparseableDate_returnsInvalid() {
ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung", "reasoning", "not-a-date");
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class);
assertThat(((AiResponseValidator.AiValidationResult.Invalid) result).errorMessage())
.contains("not-a-date");
}
@Test
void validate_aiProvidesWrongDateFormat_returnsInvalid() {
ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung", "reasoning", "15.01.2026");
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class);
}
@Test
void validate_aiProvidesPartialDate_returnsInvalid() {
ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung", "reasoning", "2026-01");
AiResponseValidator.AiValidationResult result = validator.validate(parsed);
assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class);
}
// -------------------------------------------------------------------------
// Null handling
// -------------------------------------------------------------------------
@Test
void validate_nullParsedResponse_throwsNullPointerException() {
assertThatThrownBy(() -> validator.validate(null))
.isInstanceOf(NullPointerException.class)
.hasMessage("parsed must not be null");
}
@Test
void constructor_nullClockPort_throwsNullPointerException() {
assertThatThrownBy(() -> new AiResponseValidator(null))
.isInstanceOf(NullPointerException.class)
.hasMessage("clockPort must not be null");
}
}

View File

@@ -13,10 +13,22 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnica
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger;
import de.gecheckt.pdf.umbenenner.application.port.out.ResolvedTargetFilename;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFilenameResolutionResult;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderTechnicalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort;
import de.gecheckt.pdf.umbenenner.domain.model.AiAttemptContext;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.DateSource;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
@@ -32,6 +44,7 @@ import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.time.Instant;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Consumer;
@@ -72,7 +85,8 @@ class DocumentProcessingCoordinatorTest {
recordRepo = new CapturingDocumentRecordRepository();
attemptRepo = new CapturingProcessingAttemptRepository();
unitOfWorkPort = new CapturingUnitOfWorkPort(recordRepo, attemptRepo);
processor = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, new NoOpProcessingLogger());
processor = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
candidate = new SourceDocumentCandidate(
"test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf"));
@@ -86,17 +100,16 @@ class DocumentProcessingCoordinatorTest {
// -------------------------------------------------------------------------
@Test
void process_newDocument_preCheckPassed_persistsSuccessStatus() {
void process_newDocument_namingProposalReady_persistsProposalReadyStatus() {
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
DocumentProcessingOutcome outcome = buildNamingProposalOutcome();
processor.process(candidate, fingerprint, outcome, context, attemptStart);
// One attempt written
assertEquals(1, attemptRepo.savedAttempts.size());
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
assertEquals(ProcessingStatus.SUCCESS, attempt.status());
assertEquals(ProcessingStatus.PROPOSAL_READY, attempt.status());
assertFalse(attempt.retryable());
assertNull(attempt.failureClass());
assertNull(attempt.failureMessage());
@@ -104,10 +117,11 @@ class DocumentProcessingCoordinatorTest {
// One master record created
assertEquals(1, recordRepo.createdRecords.size());
DocumentRecord record = recordRepo.createdRecords.get(0);
assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
assertEquals(ProcessingStatus.PROPOSAL_READY, record.overallStatus());
assertEquals(0, record.failureCounters().contentErrorCount());
assertEquals(0, record.failureCounters().transientErrorCount());
assertNotNull(record.lastSuccessInstant());
// lastSuccessInstant is null in M5; it is set by the target-copy stage (M6)
assertNull(record.lastSuccessInstant());
assertNull(record.lastFailureInstant());
}
@@ -203,24 +217,24 @@ class DocumentProcessingCoordinatorTest {
}
@Test
void process_knownDocument_preCheckPassed_persistsSuccess() {
void process_knownDocument_namingProposalReady_persistsProposalReadyStatus() {
DocumentRecord existingRecord = buildRecord(
ProcessingStatus.FAILED_RETRYABLE,
new FailureCounters(0, 1));
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
DocumentProcessingOutcome outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
DocumentProcessingOutcome outcome = buildNamingProposalOutcome();
processor.process(candidate, fingerprint, outcome, context, attemptStart);
assertEquals(1, recordRepo.updatedRecords.size());
DocumentRecord record = recordRepo.updatedRecords.get(0);
assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
// Counters unchanged on success
assertEquals(ProcessingStatus.PROPOSAL_READY, record.overallStatus());
// Counters unchanged on naming proposal success
assertEquals(0, record.failureCounters().contentErrorCount());
assertEquals(1, record.failureCounters().transientErrorCount());
assertNotNull(record.lastSuccessInstant());
// lastSuccessInstant is null in M5; it is set by the target-copy stage (M6)
assertNull(record.lastSuccessInstant());
}
// -------------------------------------------------------------------------
@@ -469,8 +483,7 @@ class DocumentProcessingCoordinatorTest {
// -------------------------------------------------------------------------
@Test
void process_newDocument_firstContentError_failureMessageContainsContentErrorCount() {
// Prüft, dass die Fehlermeldung die Fehleranzahl enthält (nicht leer ist)
void process_newDocument_firstContentError_failureMessageContainsFailureReason() {
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome outcome = new PreCheckFailed(
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
@@ -481,13 +494,13 @@ class DocumentProcessingCoordinatorTest {
assertNotNull(attempt.failureMessage(), "Fehlermeldung darf nicht null sein bei FAILED_RETRYABLE");
assertFalse(attempt.failureMessage().isBlank(),
"Fehlermeldung darf nicht leer sein bei FAILED_RETRYABLE");
assertTrue(attempt.failureMessage().contains("ContentErrors=1"),
"Fehlermeldung muss den Inhaltsfehler-Zähler enthalten: " + attempt.failureMessage());
assertTrue(attempt.failureMessage().contains("No usable text in extracted PDF content"),
"Fehlermeldung muss den Fehlergrund enthalten: " + attempt.failureMessage());
}
@Test
void process_knownDocument_secondContentError_failureMessageContainsFinalStatus() {
// Prüft, dass die Fehlermeldung bei FAILED_FINAL den Endzustand enthält
// Prüft, dass die Fehlermeldung bei FAILED_FINAL den Fehlergrund enthält
DocumentRecord existingRecord = buildRecord(ProcessingStatus.FAILED_RETRYABLE, new FailureCounters(1, 0));
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
DocumentProcessingOutcome outcome = new PreCheckFailed(
@@ -499,13 +512,12 @@ class DocumentProcessingCoordinatorTest {
assertNotNull(attempt.failureMessage(), "Fehlermeldung darf nicht null sein bei FAILED_FINAL");
assertFalse(attempt.failureMessage().isBlank(),
"Fehlermeldung darf nicht leer sein bei FAILED_FINAL");
assertTrue(attempt.failureMessage().contains("ContentErrors=2"),
"Fehlermeldung muss den aktualisierten Inhaltsfehler-Zähler enthalten: " + attempt.failureMessage());
assertTrue(attempt.failureMessage().contains("Document page count exceeds configured limit"),
"Fehlermeldung muss den Fehlergrund enthalten: " + attempt.failureMessage());
}
@Test
void process_newDocument_technicalError_failureMessageContainsTransientErrorCount() {
// Prüft, dass die Fehlermeldung bei transientem Fehler den Transient-Zähler enthält
void process_newDocument_technicalError_failureMessageContainsTechnicalDetail() {
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome outcome = new TechnicalDocumentError(candidate, "Timeout", null);
@@ -513,22 +525,21 @@ class DocumentProcessingCoordinatorTest {
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
assertNotNull(attempt.failureMessage());
assertTrue(attempt.failureMessage().contains("TransientErrors=1"),
"Fehlermeldung muss den Transient-Fehler-Zähler enthalten: " + attempt.failureMessage());
assertTrue(attempt.failureMessage().contains("Timeout"),
"Fehlermeldung muss den technischen Fehlerdetail enthalten: " + attempt.failureMessage());
}
@Test
void process_newDocument_preCheckPassed_failureClassAndMessageAreNull() {
// Prüft, dass bei Erfolg failureClass und failureMessage null sind
void process_newDocument_namingProposalReady_failureClassAndMessageAreNull() {
// Prüft, dass bei PROPOSAL_READY failureClass und failureMessage null sind
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
DocumentProcessingOutcome outcome = buildNamingProposalOutcome();
processor.process(candidate, fingerprint, outcome, context, attemptStart);
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
assertNull(attempt.failureClass(), "Bei Erfolg muss failureClass null sein");
assertNull(attempt.failureMessage(), "Bei Erfolg muss failureMessage null sein");
assertNull(attempt.failureClass(), "Bei PROPOSAL_READY muss failureClass null sein");
assertNull(attempt.failureMessage(), "Bei PROPOSAL_READY muss failureMessage null sein");
}
// -------------------------------------------------------------------------
@@ -536,9 +547,9 @@ class DocumentProcessingCoordinatorTest {
// -------------------------------------------------------------------------
@Test
void process_knownDocument_preCheckPassed_lastSuccessInstantSetAndLastFailureInstantFromPreviousRecord() {
// Prüft, dass bei SUCCESS am known-Dokument lastSuccessInstant gesetzt
// und lastFailureInstant aus dem Vorgänger-Datensatz übernommen wird
void process_knownDocument_namingProposalReady_lastSuccessInstantNullAndLastFailureInstantFromPreviousRecord() {
// Prüft, dass bei PROPOSAL_READY am known-Dokument lastSuccessInstant null bleibt
// (M6 setzt ihn erst nach der Zielkopie) und lastFailureInstant aus dem Vorgänger übernommen wird
Instant previousFailureInstant = Instant.parse("2025-01-15T10:00:00Z");
DocumentRecord existingRecord = new DocumentRecord(
fingerprint,
@@ -549,19 +560,20 @@ class DocumentProcessingCoordinatorTest {
previousFailureInstant, // lastFailureInstant vorhanden
null, // noch kein Erfolgszeitpunkt
Instant.now(),
Instant.now()
Instant.now(),
null,
null
);
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
DocumentProcessingOutcome outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
DocumentProcessingOutcome outcome = buildNamingProposalOutcome();
processor.process(candidate, fingerprint, outcome, context, attemptStart);
DocumentRecord updated = recordRepo.updatedRecords.get(0);
assertNotNull(updated.lastSuccessInstant(),
"lastSuccessInstant muss nach erfolgreichem Verarbeiten gesetzt sein");
assertNull(updated.lastSuccessInstant(),
"lastSuccessInstant muss nach PROPOSAL_READY null bleiben (wird erst von M6 gesetzt)");
assertEquals(previousFailureInstant, updated.lastFailureInstant(),
"lastFailureInstant muss bei SUCCESS den Vorgänger-Wert beibehalten");
"lastFailureInstant muss bei PROPOSAL_READY den Vorgänger-Wert beibehalten");
}
@Test
@@ -578,7 +590,9 @@ class DocumentProcessingCoordinatorTest {
null, // noch keine Fehlzeit
previousSuccessInstant, // vorheriger Erfolg vorhanden
Instant.now(),
Instant.now()
Instant.now(),
null,
null
);
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
DocumentProcessingOutcome outcome = new PreCheckFailed(
@@ -602,7 +616,8 @@ class DocumentProcessingCoordinatorTest {
// Prüft, dass bei Lookup-Fehler ein Fehler-Log-Eintrag erzeugt wird
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger);
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
recordRepo.setLookupResult(new PersistenceLookupTechnicalFailure("Datenbank nicht erreichbar", null));
DocumentProcessingOutcome outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
@@ -618,7 +633,8 @@ class DocumentProcessingCoordinatorTest {
// Prüft, dass beim Überspringen eines bereits erfolgreich verarbeiteten Dokuments geloggt wird
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger);
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
DocumentProcessingOutcome outcome = new PreCheckPassed(
@@ -635,7 +651,8 @@ class DocumentProcessingCoordinatorTest {
// Prüft, dass beim Überspringen eines final fehlgeschlagenen Dokuments geloggt wird
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger);
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
DocumentRecord existingRecord = buildRecord(ProcessingStatus.FAILED_FINAL, new FailureCounters(2, 0));
recordRepo.setLookupResult(new DocumentTerminalFinalFailure(existingRecord));
DocumentProcessingOutcome outcome = new PreCheckFailed(
@@ -652,7 +669,8 @@ class DocumentProcessingCoordinatorTest {
// Prüft, dass nach erfolgreichem Persistieren einer neuen Datei geloggt wird
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger);
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
@@ -668,7 +686,8 @@ class DocumentProcessingCoordinatorTest {
// Prüft, dass bei Persistenzfehler ein Fehler-Log-Eintrag erzeugt wird
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger);
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
recordRepo.setLookupResult(new DocumentUnknown());
unitOfWorkPort.failOnExecute = true;
DocumentProcessingOutcome outcome = new PreCheckPassed(
@@ -685,7 +704,8 @@ class DocumentProcessingCoordinatorTest {
// Prüft, dass nach erfolgreichem Skip-Persistieren ein Debug-Log erzeugt wird (persistSkipAttempt L301)
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger);
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
DocumentProcessingOutcome outcome = new PreCheckPassed(
@@ -702,7 +722,8 @@ class DocumentProcessingCoordinatorTest {
// Prüft, dass bei Persistenzfehler im Skip-Pfad ein Fehler geloggt wird (persistSkipAttempt L306)
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturingLogger =
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger);
new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger);
DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
unitOfWorkPort.failOnExecute = true;
@@ -715,10 +736,192 @@ class DocumentProcessingCoordinatorTest {
"Bei Persistenzfehler im Skip-Pfad muss ein Fehler geloggt werden");
}
// -------------------------------------------------------------------------
// PROPOSAL_READY finalization path
// -------------------------------------------------------------------------
@Test
void processDeferredOutcome_proposalReady_successfulCopy_persistsSuccessWithTargetFileName() {
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart,
c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
assertTrue(result, "Finalization should succeed");
ProcessingAttempt successAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.SUCCESS)
.findFirst()
.orElse(null);
assertNotNull(successAttempt, "A SUCCESS attempt must be persisted");
assertNotNull(successAttempt.finalTargetFileName(), "SUCCESS attempt must carry the final target filename");
DocumentRecord updated = recordRepo.updatedRecords.get(0);
assertEquals(ProcessingStatus.SUCCESS, updated.overallStatus());
assertNotNull(updated.lastTargetFileName(), "Master record must carry the final target filename");
assertNotNull(updated.lastTargetPath(), "Master record must carry the target folder path");
assertNotNull(updated.lastSuccessInstant(), "lastSuccessInstant must be set on SUCCESS");
}
@Test
void processDeferredOutcome_proposalReady_missingProposalAttempt_persistsTransientError() {
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
// No PROPOSAL_READY attempt pre-populated
// persistTransientError returns true when the error record was persisted successfully
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
.orElse(null);
assertNotNull(errorAttempt, "A FAILED_RETRYABLE attempt must be persisted");
assertTrue(errorAttempt.retryable(), "Transient error must be retryable");
}
@Test
void processDeferredOutcome_proposalReady_inconsistentProposalNullDate_persistsTransientError() {
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
ProcessingAttempt badProposal = new ProcessingAttempt(
fingerprint, context.runId(), 1, Instant.now(), Instant.now(),
ProcessingStatus.PROPOSAL_READY, null, null, false,
"model", "prompt", 1, 100, "{}", "reason",
null, DateSource.AI_PROVIDED, "Rechnung", null);
attemptRepo.savedAttempts.add(badProposal);
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
.orElse(null);
assertNotNull(errorAttempt, "A FAILED_RETRYABLE attempt must be persisted for inconsistent proposal state");
}
@Test
void processDeferredOutcome_proposalReady_duplicateResolutionFailure_persistsTransientError() {
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
DocumentProcessingCoordinator coordinatorWithFailingFolder = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
coordinatorWithFailingFolder.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
.orElse(null);
assertNotNull(errorAttempt, "A FAILED_RETRYABLE attempt must be persisted when duplicate resolution fails");
}
@Test
void processDeferredOutcome_proposalReady_copyFailure_persistsTransientError() {
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
DocumentProcessingCoordinator coordinatorWithFailingCopy = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger());
coordinatorWithFailingCopy.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
.orElse(null);
assertNotNull(errorAttempt, "A FAILED_RETRYABLE attempt must be persisted when file copy fails");
}
@Test
void processDeferredOutcome_proposalReady_inconsistentProposalTitleExceeds20Chars_persistsTransientError() {
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
// Title of 21 characters violates the 20-char base-title rule — inconsistent persistence state
ProcessingAttempt badProposal = new ProcessingAttempt(
fingerprint, context.runId(), 1, Instant.now(), Instant.now(),
ProcessingStatus.PROPOSAL_READY, null, null, false,
"model", "prompt", 1, 100, "{}", "reason",
LocalDate.of(2026, 1, 15), DateSource.AI_PROVIDED,
"A".repeat(21), null);
attemptRepo.savedAttempts.add(badProposal);
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
.orElse(null);
assertNotNull(errorAttempt,
"A FAILED_RETRYABLE attempt must be persisted when the proposal title is inconsistent");
assertTrue(errorAttempt.retryable(), "Inconsistent proposal error must be retryable");
}
@Test
void processDeferredOutcome_proposalReady_inconsistentProposalTitleWithDisallowedChars_persistsTransientError() {
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
// Hyphen is a disallowed character in the fachliche Titelregel
ProcessingAttempt badProposal = new ProcessingAttempt(
fingerprint, context.runId(), 1, Instant.now(), Instant.now(),
ProcessingStatus.PROPOSAL_READY, null, null, false,
"model", "prompt", 1, 100, "{}", "reason",
LocalDate.of(2026, 1, 15), DateSource.AI_PROVIDED,
"Rechnung-2026", null);
attemptRepo.savedAttempts.add(badProposal);
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
.orElse(null);
assertNotNull(errorAttempt,
"A FAILED_RETRYABLE attempt must be persisted when the proposal title has disallowed characters");
}
@Test
void processDeferredOutcome_proposalReady_persistenceFailureAfterCopy_returnsFalse() {
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
unitOfWorkPort.failOnExecute = true;
boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertFalse(result, "Should return false when persistence fails after successful copy");
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
private ProcessingAttempt buildValidProposalAttempt() {
return new ProcessingAttempt(
fingerprint, context.runId(), 1, Instant.now(), Instant.now(),
ProcessingStatus.PROPOSAL_READY, null, null, false,
"gpt-4", "prompt-v1.txt", 1, 500, "{}", "reason",
LocalDate.of(2026, 1, 15), DateSource.AI_PROVIDED, "Rechnung", null);
}
private DocumentProcessingOutcome buildNamingProposalOutcome() {
AiAttemptContext ctx = new AiAttemptContext(
"gpt-4", "prompt-v1.txt", 1, 500, "{\"title\":\"Rechnung\",\"reasoning\":\"r\"}");
NamingProposal proposal = new NamingProposal(
LocalDate.of(2026, 1, 15), DateSource.AI_PROVIDED, "Rechnung", "AI reasoning");
return new NamingProposalReady(candidate, proposal, ctx);
}
private DocumentRecord buildRecord(ProcessingStatus status, FailureCounters counters) {
Instant now = Instant.now();
return new DocumentRecord(
@@ -730,7 +933,9 @@ class DocumentProcessingCoordinatorTest {
status == ProcessingStatus.SUCCESS ? null : now,
status == ProcessingStatus.SUCCESS ? now : null,
now,
now
now,
null,
null
);
}
@@ -785,8 +990,16 @@ class DocumentProcessingCoordinatorTest {
public List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint) {
return List.copyOf(savedAttempts);
}
@Override
public ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint) {
return savedAttempts.stream()
.filter(a -> a.status() == de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus.PROPOSAL_READY)
.reduce((first, second) -> second)
.orElse(null);
}
}
private static class CapturingUnitOfWorkPort implements UnitOfWorkPort {
private final CapturingDocumentRecordRepository recordRepo;
private final CapturingProcessingAttemptRepository attemptRepo;
@@ -850,6 +1063,58 @@ class DocumentProcessingCoordinatorTest {
}
}
private static class FailingTargetFolderPort implements TargetFolderPort {
@Override
public String getTargetFolderLocator() {
return "/tmp/target";
}
@Override
public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) {
return new TargetFolderTechnicalFailure("Simulated folder resolution failure");
}
@Override
public void tryDeleteTargetFile(String resolvedFilename) {
// No-op
}
}
private static class FailingTargetFileCopyPort implements TargetFileCopyPort {
@Override
public TargetFileCopyResult copyToTarget(
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator sourceLocator,
String resolvedFilename) {
return new TargetFileCopyTechnicalFailure("Simulated copy failure", false);
}
}
private static class NoOpTargetFolderPort implements TargetFolderPort {
@Override
public String getTargetFolderLocator() {
return "/tmp/target";
}
@Override
public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) {
return new ResolvedTargetFilename(baseName);
}
@Override
public void tryDeleteTargetFile(String resolvedFilename) {
// No-op
}
}
private static class NoOpTargetFileCopyPort implements TargetFileCopyPort {
@Override
public TargetFileCopyResult copyToTarget(
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator sourceLocator,
String resolvedFilename) {
return new TargetFileCopySuccess();
}
}
/** Zählt Logger-Aufrufe je Level, um VoidMethodCallMutator-Mutationen zu erkennen. */
private static class CapturingProcessingLogger implements ProcessingLogger {
int infoCallCount = 0;

View File

@@ -0,0 +1,94 @@
package de.gecheckt.pdf.umbenenner.application.service;
import org.junit.jupiter.api.Test;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
/**
* Unit tests for {@link DocumentTextLimiter}.
*/
class DocumentTextLimiterTest {
@Test
void limit_textShorterThanMax_returnsTextUnchanged() {
String text = "short text";
String result = DocumentTextLimiter.limit(text, 100);
assertThat(result).isEqualTo(text);
}
@Test
void limit_textExactlyMax_returnsTextUnchanged() {
String text = "exactly ten"; // 11 chars
String result = DocumentTextLimiter.limit(text, 11);
assertThat(result).isEqualTo(text);
assertThat(result).hasSize(11);
}
@Test
void limit_textLongerThanMax_returnsTruncatedText() {
String text = "Hello, World!";
String result = DocumentTextLimiter.limit(text, 5);
assertThat(result).isEqualTo("Hello");
assertThat(result).hasSize(5);
}
@Test
void limit_maxCharactersOne_returnsSingleChar() {
String text = "ABC";
String result = DocumentTextLimiter.limit(text, 1);
assertThat(result).isEqualTo("A");
}
@Test
void limit_emptyText_returnsEmptyString() {
String result = DocumentTextLimiter.limit("", 100);
assertThat(result).isEmpty();
}
@Test
void limit_emptyTextWithMinMax_returnsEmptyString() {
String result = DocumentTextLimiter.limit("", 1);
assertThat(result).isEmpty();
}
@Test
void limit_textWithUnicodeCharacters_respectsCharCount() {
// German umlauts are single chars in Java
String text = "Rechnungsübersicht"; // 18 chars
String result = DocumentTextLimiter.limit(text, 10);
assertThat(result).hasSize(10);
assertThat(result).startsWith("Rechnungs");
}
@Test
void limit_nullText_throwsNullPointerException() {
assertThatThrownBy(() -> DocumentTextLimiter.limit(null, 100))
.isInstanceOf(NullPointerException.class)
.hasMessage("text must not be null");
}
@Test
void limit_maxCharactersZero_throwsIllegalArgumentException() {
assertThatThrownBy(() -> DocumentTextLimiter.limit("text", 0))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("maxCharacters must be >= 1");
}
@Test
void limit_negativeMaxCharacters_throwsIllegalArgumentException() {
assertThatThrownBy(() -> DocumentTextLimiter.limit("text", -5))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("maxCharacters must be >= 1");
}
@Test
void limit_doesNotModifyOriginalText() {
String original = "This is the original document text that is long";
String limited = DocumentTextLimiter.limit(original, 10);
// The original String object is unchanged (Java Strings are immutable)
assertThat(limited).isNotSameAs(original);
assertThat(limited).hasSize(10);
}
}

View File

@@ -0,0 +1,264 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.application.service.TargetFilenameBuildingService.BaseFilenameReady;
import de.gecheckt.pdf.umbenenner.application.service.TargetFilenameBuildingService.BaseFilenameResult;
import de.gecheckt.pdf.umbenenner.application.service.TargetFilenameBuildingService.InconsistentProposalState;
import de.gecheckt.pdf.umbenenner.domain.model.DateSource;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
import org.junit.jupiter.api.Test;
import java.time.Instant;
import java.time.LocalDate;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatNullPointerException;
/**
* Unit tests for {@link TargetFilenameBuildingService}.
* <p>
* Covers the verbindliches Zielformat {@code YYYY-MM-DD - Titel.pdf}, the 20-character
* base-title rule, the fachliche Titelregel (only letters, digits, and spaces), and the
* detection of inconsistent persistence states.
*/
class TargetFilenameBuildingServiceTest {
private static final DocumentFingerprint FINGERPRINT =
new DocumentFingerprint("a".repeat(64));
private static final RunId RUN_ID = new RunId("run-test");
// -------------------------------------------------------------------------
// Null guard
// -------------------------------------------------------------------------
@Test
void buildBaseFilename_rejectsNullAttempt() {
assertThatNullPointerException()
.isThrownBy(() -> TargetFilenameBuildingService.buildBaseFilename(null));
}
// -------------------------------------------------------------------------
// Happy path correct format
// -------------------------------------------------------------------------
@Test
void buildBaseFilename_validProposal_returnsCorrectFormat() {
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 15), "Rechnung");
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(BaseFilenameReady.class);
assertThat(((BaseFilenameReady) result).baseFilename())
.isEqualTo("2026-01-15 - Rechnung.pdf");
}
@Test
void buildBaseFilename_dateWithLeadingZeros_formatsCorrectly() {
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 3, 5), "Kontoauszug");
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(BaseFilenameReady.class);
assertThat(((BaseFilenameReady) result).baseFilename())
.isEqualTo("2026-03-05 - Kontoauszug.pdf");
}
@Test
void buildBaseFilename_titleWithDigits_isAccepted() {
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 6, 1), "Rechnung 2026");
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(BaseFilenameReady.class);
assertThat(((BaseFilenameReady) result).baseFilename())
.isEqualTo("2026-06-01 - Rechnung 2026.pdf");
}
@Test
void buildBaseFilename_titleWithGermanUmlauts_isAccepted() {
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 4, 7), "Strom Abr");
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(BaseFilenameReady.class);
}
@Test
void buildBaseFilename_titleWithUmlautsAndSzlig_isAccepted() {
// ä, ö, ü, ß are Unicode letters and must be accepted
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 4, 7), "Büroausgabe");
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(BaseFilenameReady.class);
assertThat(((BaseFilenameReady) result).baseFilename())
.isEqualTo("2026-04-07 - Büroausgabe.pdf");
}
@Test
void buildBaseFilename_titleExactly20Chars_isAccepted() {
String title = "A".repeat(20); // exactly 20 characters
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), title);
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(BaseFilenameReady.class);
}
// -------------------------------------------------------------------------
// 20-character rule applies only to base title; format structure is separate
// -------------------------------------------------------------------------
@Test
void buildBaseFilename_format_separatorAndExtensionAreNotCountedAgainstTitle() {
// A 20-char title produces "YYYY-MM-DD - <20chars>.pdf" — total > 20 chars, which is fine
String title = "Stromabrechnung 2026"; // 20 chars
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 3, 31), title);
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(BaseFilenameReady.class);
String filename = ((BaseFilenameReady) result).baseFilename();
assertThat(filename).isEqualTo("2026-03-31 - Stromabrechnung 2026.pdf");
// The service does not append duplicate suffixes; those are added by the target folder adapter
assertThat(filename).doesNotContain("(");
}
// -------------------------------------------------------------------------
// InconsistentProposalState null/invalid date
// -------------------------------------------------------------------------
@Test
void buildBaseFilename_nullDate_returnsInconsistentProposalState() {
ProcessingAttempt attempt = proposalAttempt(null, "Rechnung");
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(InconsistentProposalState.class);
assertThat(((InconsistentProposalState) result).reason())
.contains("no resolved date");
}
// -------------------------------------------------------------------------
// InconsistentProposalState null/blank title
// -------------------------------------------------------------------------
@Test
void buildBaseFilename_nullTitle_returnsInconsistentProposalState() {
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), null);
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(InconsistentProposalState.class);
assertThat(((InconsistentProposalState) result).reason())
.contains("no validated title");
}
@Test
void buildBaseFilename_blankTitle_returnsInconsistentProposalState() {
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), " ");
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(InconsistentProposalState.class);
assertThat(((InconsistentProposalState) result).reason())
.contains("no validated title");
}
// -------------------------------------------------------------------------
// InconsistentProposalState title exceeds 20 characters
// -------------------------------------------------------------------------
@Test
void buildBaseFilename_titleExceeds20Chars_returnsInconsistentProposalState() {
String title = "A".repeat(21); // 21 characters
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), title);
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(InconsistentProposalState.class);
assertThat(((InconsistentProposalState) result).reason())
.contains("exceeding 20 characters");
}
// -------------------------------------------------------------------------
// InconsistentProposalState disallowed characters in title
// -------------------------------------------------------------------------
@Test
void buildBaseFilename_titleWithHyphen_returnsInconsistentProposalState() {
// Hyphens are not letters, digits, or spaces — disallowed by fachliche Titelregel
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), "Rechnung-2026");
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(InconsistentProposalState.class);
assertThat(((InconsistentProposalState) result).reason())
.contains("disallowed characters");
}
@Test
void buildBaseFilename_titleWithSlash_returnsInconsistentProposalState() {
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), "Rg/Strom");
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(InconsistentProposalState.class);
}
@Test
void buildBaseFilename_titleWithDot_returnsInconsistentProposalState() {
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), "Rechnung.pdf");
BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(result).isInstanceOf(InconsistentProposalState.class);
}
// -------------------------------------------------------------------------
// InconsistentProposalState reason field is non-null
// -------------------------------------------------------------------------
@Test
void incosistentProposalState_reason_isNeverNull() {
ProcessingAttempt attempt = proposalAttempt(null, "Rechnung");
InconsistentProposalState state =
(InconsistentProposalState) TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(state.reason()).isNotNull();
}
// -------------------------------------------------------------------------
// BaseFilenameReady result record is non-null and non-blank
// -------------------------------------------------------------------------
@Test
void baseFilenameReady_baseFilename_isNeverNullOrBlank() {
ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 7, 4), "Bescheid");
BaseFilenameReady ready =
(BaseFilenameReady) TargetFilenameBuildingService.buildBaseFilename(attempt);
assertThat(ready.baseFilename()).isNotNull().isNotBlank();
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
private ProcessingAttempt proposalAttempt(LocalDate date, String title) {
return new ProcessingAttempt(
FINGERPRINT, RUN_ID, 1,
Instant.now(), Instant.now(),
ProcessingStatus.PROPOSAL_READY,
null, null, false,
"gpt-4", "prompt-v1.txt", 1, 100,
"{}", "reasoning text",
date, DateSource.AI_PROVIDED, title,
null);
}
}

View File

@@ -2,6 +2,9 @@ package de.gecheckt.pdf.umbenenner.application.usecase;
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
@@ -14,12 +17,23 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger;
import de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.ResolvedTargetFilename;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFilenameResolutionResult;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort;
import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort;
import de.gecheckt.pdf.umbenenner.application.service.AiNamingService;
import de.gecheckt.pdf.umbenenner.application.service.AiResponseValidator;
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingCoordinator;
import de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
@@ -445,7 +459,8 @@ class BatchRunProcessingUseCaseTest {
// Use a coordinator that always fails persistence
DocumentProcessingCoordinator failingProcessor = new DocumentProcessingCoordinator(
new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(),
new NoOpUnitOfWorkPort(), new NoOpProcessingLogger()) {
new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(),
new NoOpProcessingLogger()) {
@Override
public boolean processDeferredOutcome(
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate,
@@ -488,7 +503,8 @@ class BatchRunProcessingUseCaseTest {
// Coordinator that succeeds for first document, fails persistence for second
DocumentProcessingCoordinator selectiveFailingProcessor = new DocumentProcessingCoordinator(
new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(),
new NoOpUnitOfWorkPort(), new NoOpProcessingLogger()) {
new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(),
new NoOpProcessingLogger()) {
private int callCount = 0;
@Override
@@ -535,7 +551,7 @@ class BatchRunProcessingUseCaseTest {
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, new MockRunLockPort(), candidatesPort, new NoOpExtractionPort(),
alwaysFailingFingerprintPort, new NoOpDocumentProcessingCoordinator(),
capturingLogger);
buildStubAiNamingService(), capturingLogger);
useCase.execute(new BatchRunContext(new RunId("fp-warn"), Instant.now()));
@@ -556,7 +572,7 @@ class BatchRunProcessingUseCaseTest {
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, new MockRunLockPort(), failingPort, new NoOpExtractionPort(),
new AlwaysSuccessFingerprintPort(), new NoOpDocumentProcessingCoordinator(),
capturingLogger);
buildStubAiNamingService(), capturingLogger);
useCase.execute(new BatchRunContext(new RunId("source-err"), Instant.now()));
@@ -578,7 +594,8 @@ class BatchRunProcessingUseCaseTest {
// Coordinator der immer Persistenzfehler zurückgibt
DocumentProcessingCoordinator failingCoordinator = new DocumentProcessingCoordinator(
new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(),
new NoOpUnitOfWorkPort(), new NoOpProcessingLogger()) {
new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(),
new NoOpProcessingLogger()) {
@Override
public boolean processDeferredOutcome(
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate c,
@@ -592,7 +609,7 @@ class BatchRunProcessingUseCaseTest {
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, new MockRunLockPort(), candidatesPort, extractionPort,
new AlwaysSuccessFingerprintPort(), failingCoordinator, capturingLogger);
new AlwaysSuccessFingerprintPort(), failingCoordinator, buildStubAiNamingService(), capturingLogger);
useCase.execute(new BatchRunContext(new RunId("persist-warn"), Instant.now()));
@@ -610,7 +627,7 @@ class BatchRunProcessingUseCaseTest {
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, new MockRunLockPort(), new EmptyCandidatesPort(), new NoOpExtractionPort(),
new AlwaysSuccessFingerprintPort(), new NoOpDocumentProcessingCoordinator(),
capturingLogger);
buildStubAiNamingService(), capturingLogger);
useCase.execute(new BatchRunContext(new RunId("start-log"), Instant.now()));
@@ -630,7 +647,7 @@ class BatchRunProcessingUseCaseTest {
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
new AlwaysSuccessFingerprintPort(), new NoOpDocumentProcessingCoordinator(),
capturingLogger);
buildStubAiNamingService(), capturingLogger);
useCase.execute(new BatchRunContext(new RunId("lock-warn"), Instant.now()));
@@ -659,11 +676,11 @@ class BatchRunProcessingUseCaseTest {
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, new MockRunLockPort(), candidatesPort, extractionPort,
new AlwaysSuccessFingerprintPort(), processor, capturingLogger);
new AlwaysSuccessFingerprintPort(), processor, buildStubAiNamingService(), capturingLogger);
useCase.execute(new BatchRunContext(new RunId("log-precheck"), Instant.now()));
// Ohne logExtractionResult wären es 4 debug()-Aufrufe; mit logExtractionResult 5
// Ohne logExtractionResult wären es mindestens 4 debug()-Aufrufe; mit logExtractionResult 5
assertTrue(capturingLogger.debugCallCount >= 5,
"logExtractionResult muss bei PdfExtractionSuccess debug() aufrufen (erwartet >= 5, war: "
+ capturingLogger.debugCallCount + ")");
@@ -689,7 +706,7 @@ class BatchRunProcessingUseCaseTest {
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, new MockRunLockPort(), candidatesPort, extractionPort,
new AlwaysSuccessFingerprintPort(), processor, capturingLogger);
new AlwaysSuccessFingerprintPort(), processor, buildStubAiNamingService(), capturingLogger);
useCase.execute(new BatchRunContext(new RunId("log-content-error"), Instant.now()));
@@ -718,7 +735,7 @@ class BatchRunProcessingUseCaseTest {
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, new MockRunLockPort(), candidatesPort, extractionPort,
new AlwaysSuccessFingerprintPort(), processor, capturingLogger);
new AlwaysSuccessFingerprintPort(), processor, buildStubAiNamingService(), capturingLogger);
useCase.execute(new BatchRunContext(new RunId("log-tech-error"), Instant.now()));
@@ -735,6 +752,20 @@ class BatchRunProcessingUseCaseTest {
// Helpers
// -------------------------------------------------------------------------
/**
* Builds a minimal stub {@link AiNamingService} that always returns an AI technical failure.
* Suitable for tests that do not care about the AI pipeline outcome.
*/
private static AiNamingService buildStubAiNamingService() {
AiInvocationPort stubAiPort = request ->
new AiInvocationTechnicalFailure(request, "STUBBED", "Stubbed AI for test");
PromptPort stubPromptPort = () ->
new PromptLoadingSuccess(new PromptIdentifier("stub-prompt"), "stub prompt content");
ClockPort stubClock = () -> java.time.Instant.EPOCH;
AiResponseValidator validator = new AiResponseValidator(stubClock);
return new AiNamingService(stubAiPort, stubPromptPort, validator, "stub-model", 1000);
}
private static DefaultBatchRunProcessingUseCase buildUseCase(
RuntimeConfiguration runtimeConfig,
RunLockPort lockPort,
@@ -744,7 +775,7 @@ class BatchRunProcessingUseCaseTest {
DocumentProcessingCoordinator processor) {
return new DefaultBatchRunProcessingUseCase(
runtimeConfig, lockPort, candidatesPort, extractionPort, fingerprintPort, processor,
new NoOpProcessingLogger());
buildStubAiNamingService(), new NoOpProcessingLogger());
}
private static RuntimeConfiguration buildConfig(Path tempDir) throws Exception {
@@ -906,7 +937,7 @@ class BatchRunProcessingUseCaseTest {
private static class NoOpDocumentProcessingCoordinator extends DocumentProcessingCoordinator {
NoOpDocumentProcessingCoordinator() {
super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(),
new NoOpProcessingLogger());
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
}
}
@@ -918,7 +949,7 @@ class BatchRunProcessingUseCaseTest {
TrackingDocumentProcessingCoordinator() {
super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(),
new NoOpProcessingLogger());
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger());
}
@Override
@@ -948,6 +979,32 @@ class BatchRunProcessingUseCaseTest {
int processCallCount() { return processCallCount; }
}
private static class NoOpTargetFolderPort implements TargetFolderPort {
@Override
public String getTargetFolderLocator() {
return "/tmp/target";
}
@Override
public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) {
return new ResolvedTargetFilename(baseName);
}
@Override
public void tryDeleteTargetFile(String resolvedFilename) {
// No-op
}
}
private static class NoOpTargetFileCopyPort implements TargetFileCopyPort {
@Override
public TargetFileCopyResult copyToTarget(
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator sourceLocator,
String resolvedFilename) {
return new TargetFileCopySuccess();
}
}
/** No-op DocumentRecordRepository for use in test instances. */
private static class NoOpDocumentRecordRepository implements DocumentRecordRepository {
@Override
@@ -983,8 +1040,13 @@ class BatchRunProcessingUseCaseTest {
public List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint) {
return List.of();
}
@Override
public ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint) {
return null;
}
}
/** No-op UnitOfWorkPort for use in test instances. */
private static class NoOpUnitOfWorkPort implements UnitOfWorkPort {
@Override