diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 8e26047..305b857 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -17,7 +17,8 @@ "Bash(grep \"\\\\.java$\")", "Bash(mvn -q clean compile -DskipTests)", "Bash(mvn -q test)", - "Bash(mvn -q clean test)" + "Bash(mvn -q clean test)", + "Bash(./mvnw.cmd:*)" ] } } diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentKnownProcessable.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentKnownProcessable.java new file mode 100644 index 0000000..a7c14d0 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentKnownProcessable.java @@ -0,0 +1,30 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import java.util.Objects; + +/** + * Lookup result indicating that a master record exists and the document is not yet terminal. + *
+ * The document is known (fingerprint exists in the persistence store) but its overall + * status is neither {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus#SUCCESS} + * nor {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus#FAILED_FINAL}. + * The use case may continue with normal M4 processing using the provided record. + *
+ * The existing {@link DocumentRecord} is supplied so the use case can inspect the + * current status, failure counters, and other fields required to apply M4 retry rules + * without an additional lookup. + * + * @param record the current master record for this document; never null + * @since M4-AP-001 + */ +public record DocumentKnownProcessable(DocumentRecord record) implements DocumentRecordLookupResult { + + /** + * Compact constructor validating the non-null contract. + * + * @throws NullPointerException if {@code record} is null + */ + public DocumentKnownProcessable { + Objects.requireNonNull(record, "record must not be null"); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentPersistenceException.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentPersistenceException.java new file mode 100644 index 0000000..0b2a5ad --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentPersistenceException.java @@ -0,0 +1,48 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Unchecked exception thrown by persistence write operations when a technical + * infrastructure failure prevents the operation from completing. + *
+ * This exception is thrown by {@link DocumentRecordRepository} and + * {@link ProcessingAttemptRepository} write methods, and by + * {@link PersistenceSchemaInitializationPort#initializeSchema()}, when the underlying + * persistence layer (SQLite) cannot be reached or returns an unrecoverable error. + *
+ * Batch run impact: + *
+ * The exception is not used for read operations; read failures are modelled + * as {@link PersistenceLookupTechnicalFailure} in the sealed + * {@link DocumentRecordLookupResult} hierarchy to allow exhaustive pattern matching + * at the call site. + * + * @since M4-AP-001 + */ +public class DocumentPersistenceException extends RuntimeException { + + /** + * Constructs a new {@code DocumentPersistenceException} with the given message. + * + * @param message human-readable description of the persistence failure + */ + public DocumentPersistenceException(String message) { + super(message); + } + + /** + * Constructs a new {@code DocumentPersistenceException} with message and cause. + * + * @param message human-readable description of the persistence failure + * @param cause the underlying throwable that caused this failure + */ + public DocumentPersistenceException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecord.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecord.java new file mode 100644 index 0000000..2a88afe --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecord.java @@ -0,0 +1,83 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; + +import java.time.Instant; +import java.util.Objects; + +/** + * Application-facing representation of the document master record (Dokument-Stammsatz). + *
+ * One {@code DocumentRecord} exists per unique {@link DocumentFingerprint}. It carries + * the current overall status, failure counters, and the most recently known source + * location of the document. + *
+ * Architecture boundary: This type contains no SQLite or JDBC types. + * Mapping between {@code DocumentRecord} and the persistence layer is performed + * exclusively by the repository adapter in {@code adapter-out}. + *
+ * M4 field semantics: + *
+ * Not included in M4: target path, target file name, AI-related fields. + * These are added in later milestones. + * + * @param fingerprint content-based identity; never null + * @param lastKnownSourceLocator opaque locator to the physical source file; never null + * @param lastKnownSourceFileName file name at the time of the last known access; never null or blank + * @param overallStatus current processing status; never null + * @param failureCounters counters for content and transient errors; never null + * @param lastFailureInstant timestamp of the most recent failure, or {@code null} + * @param lastSuccessInstant timestamp of the successful processing, or {@code null} + * @param createdAt timestamp when this record was first created; never null + * @param updatedAt timestamp of the most recent update; never null + * @since M4-AP-001 + */ +public record DocumentRecord( + DocumentFingerprint fingerprint, + SourceDocumentLocator lastKnownSourceLocator, + String lastKnownSourceFileName, + ProcessingStatus overallStatus, + FailureCounters failureCounters, + Instant lastFailureInstant, + Instant lastSuccessInstant, + Instant createdAt, + Instant updatedAt) { + + /** + * Compact constructor validating mandatory non-null fields. + * + * @throws NullPointerException if any mandatory field is null + * @throws IllegalArgumentException if {@code lastKnownSourceFileName} is blank + */ + public DocumentRecord { + Objects.requireNonNull(fingerprint, "fingerprint must not be null"); + Objects.requireNonNull(lastKnownSourceLocator, "lastKnownSourceLocator must not be null"); + Objects.requireNonNull(lastKnownSourceFileName, "lastKnownSourceFileName must not be null"); + if (lastKnownSourceFileName.isBlank()) { + throw new IllegalArgumentException("lastKnownSourceFileName must not be blank"); + } + Objects.requireNonNull(overallStatus, "overallStatus must not be null"); + Objects.requireNonNull(failureCounters, "failureCounters must not be null"); + Objects.requireNonNull(createdAt, "createdAt must not be null"); + Objects.requireNonNull(updatedAt, "updatedAt must not be null"); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecordLookupResult.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecordLookupResult.java new file mode 100644 index 0000000..b35c371 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecordLookupResult.java @@ -0,0 +1,32 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Sealed result type for a document master record lookup via {@link DocumentRecordRepository}. + *
+ * The use case uses this result to make the per-document processing decision in M4 + * without additional assumptions: + *
+ * Architecture boundary: No JDBC, SQLite, or filesystem types appear + * in this sealed hierarchy or in any of its implementations. + * + * @since M4-AP-001 + */ +public sealed interface DocumentRecordLookupResult + permits DocumentUnknown, + DocumentKnownProcessable, + DocumentTerminalSuccess, + DocumentTerminalFinalFailure, + PersistenceLookupTechnicalFailure { +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecordRepository.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecordRepository.java new file mode 100644 index 0000000..34d92fe --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecordRepository.java @@ -0,0 +1,72 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; + +/** + * Outbound port for reading and writing the document master record (Dokument-Stammsatz). + *
+ * One master record exists per unique {@link DocumentFingerprint}. The repository is + * responsible for the persistence of {@link DocumentRecord} values; it holds no + * business logic about retry rules, skip decisions, or status transitions. + *
+ * Lookup semantics: + * {@link #findByFingerprint(DocumentFingerprint)} returns a sealed + * {@link DocumentRecordLookupResult} that allows the use case to distinguish exhaustively + * between an unknown document, a known processable document, a terminal success, a + * terminal final failure, and a technical persistence failure — without additional + * assumptions or null checks. + *
+ * Write semantics: + *
+ * Architecture boundary: No JDBC, SQLite, or filesystem types appear + * in this interface or in any type it references. Mapping to and from the persistence + * schema is the exclusive responsibility of the adapter implementation. + * + * @since M4-AP-001 + */ +public interface DocumentRecordRepository { + + /** + * Looks up the master record for the given fingerprint. + *
+ * Returns a {@link DocumentRecordLookupResult} that encodes all possible outcomes + * including technical failures; this method never throws. + * + * @param fingerprint the content-based document identity to look up; must not be null + * @return {@link DocumentUnknown} if no record exists, + * {@link DocumentKnownProcessable} if the document is known but not terminal, + * {@link DocumentTerminalSuccess} if the document succeeded, + * {@link DocumentTerminalFinalFailure} if the document finally failed, or + * {@link PersistenceLookupTechnicalFailure} if the lookup itself failed + */ + DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint); + + /** + * Persists a new master record for a previously unknown document. + *
+ * The fingerprint within {@code record} must not yet exist in the persistence store. + * + * @param record the new master record to persist; must not be null + * @throws DocumentPersistenceException if the insert fails due to a technical error + */ + void create(DocumentRecord record); + + /** + * Updates the mutable fields of an existing master record. + *
+ * The record is identified by its {@link DocumentFingerprint}; the fingerprint + * itself is never changed. Mutable fields include the overall status, failure + * counters, last known source location, and all timestamp fields. + * + * @param record the updated master record; must not be null; fingerprint must exist + * @throws DocumentPersistenceException if the update fails due to a technical error + */ + void update(DocumentRecord record); +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentTerminalFinalFailure.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentTerminalFinalFailure.java new file mode 100644 index 0000000..6ac00d0 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentTerminalFinalFailure.java @@ -0,0 +1,30 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import java.util.Objects; + +/** + * Lookup result indicating that the document has finally and irrecoverably failed. + *
+ * The master record's overall status is + * {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus#FAILED_FINAL}. + * The use case must skip further processing and historise a + * {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus#SKIPPED_FINAL_FAILURE} + * attempt. No failure counters are changed. + *
+ * The existing {@link DocumentRecord} is supplied so the use case can read the + * current record for the skip attempt historisation without an additional lookup. + * + * @param record the current (finally failed) master record for this document; never null + * @since M4-AP-001 + */ +public record DocumentTerminalFinalFailure(DocumentRecord record) implements DocumentRecordLookupResult { + + /** + * Compact constructor validating the non-null contract. + * + * @throws NullPointerException if {@code record} is null + */ + public DocumentTerminalFinalFailure { + Objects.requireNonNull(record, "record must not be null"); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentTerminalSuccess.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentTerminalSuccess.java new file mode 100644 index 0000000..9fc28ed --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentTerminalSuccess.java @@ -0,0 +1,30 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import java.util.Objects; + +/** + * Lookup result indicating that the document was already successfully processed. + *
+ * The master record's overall status is + * {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus#SUCCESS}. + * The use case must skip further processing and historise a + * {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus#SKIPPED_ALREADY_PROCESSED} + * attempt. No failure counters are changed. + *
+ * The existing {@link DocumentRecord} is supplied so the use case can read the + * current record for the skip attempt historisation without an additional lookup. + * + * @param record the current (successful) master record for this document; never null + * @since M4-AP-001 + */ +public record DocumentTerminalSuccess(DocumentRecord record) implements DocumentRecordLookupResult { + + /** + * Compact constructor validating the non-null contract. + * + * @throws NullPointerException if {@code record} is null + */ + public DocumentTerminalSuccess { + Objects.requireNonNull(record, "record must not be null"); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentUnknown.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentUnknown.java new file mode 100644 index 0000000..2bbc0ce --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentUnknown.java @@ -0,0 +1,14 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Lookup result indicating that the fingerprint is not yet present in the persistence store. + *
+ * The document has never been processed before. The use case must create a new + * {@link DocumentRecord} and proceed with normal M4 processing. + *
+ * This variant carries no data because there is no existing record to return. + * + * @since M4-AP-001 + */ +public record DocumentUnknown() implements DocumentRecordLookupResult { +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FailureCounters.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FailureCounters.java new file mode 100644 index 0000000..468f37a --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FailureCounters.java @@ -0,0 +1,75 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Immutable snapshot of the two independent failure counters maintained per document. + *
+ * M4 tracks two distinct counters separately because they drive different retry rules: + *
+ * A freshly discovered document starts with both counters at zero. + * Counters are only written by the repository layer on the instructions of the + * application use case; they never change as a side-effect of a read operation. + * + * @param contentErrorCount number of deterministic content errors recorded so far; + * must be >= 0 + * @param transientErrorCount number of transient technical errors recorded so far; + * must be >= 0 + * @since M4-AP-001 + */ +public record FailureCounters(int contentErrorCount, int transientErrorCount) { + + /** + * Compact constructor validating that neither counter is negative. + * + * @throws IllegalArgumentException if either counter is negative + */ + public FailureCounters { + if (contentErrorCount < 0) { + throw new IllegalArgumentException( + "contentErrorCount must be >= 0, but was: " + contentErrorCount); + } + if (transientErrorCount < 0) { + throw new IllegalArgumentException( + "transientErrorCount must be >= 0, but was: " + transientErrorCount); + } + } + + /** + * Returns a {@code FailureCounters} instance with both counters at zero. + * Use this when initialising a master record for a newly discovered document. + * + * @return zero-value counters + */ + public static FailureCounters zero() { + return new FailureCounters(0, 0); + } + + /** + * Returns a copy with the content error counter incremented by one. + * + * @return new instance with {@code contentErrorCount + 1} + */ + public FailureCounters withIncrementedContentErrorCount() { + return new FailureCounters(contentErrorCount + 1, transientErrorCount); + } + + /** + * Returns a copy with the transient error counter incremented by one. + * + * @return new instance with {@code transientErrorCount + 1} + */ + public FailureCounters withIncrementedTransientErrorCount() { + return new FailureCounters(contentErrorCount, transientErrorCount + 1); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintPort.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintPort.java new file mode 100644 index 0000000..5a01cf9 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintPort.java @@ -0,0 +1,40 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; + +/** + * Outbound port for computing the content-based fingerprint of exactly one + * processing candidate. + *
+ * Implementations must derive the fingerprint exclusively from the binary + * content of the file referenced by the candidate. File name, path, and metadata must + * not influence the result. + *
+ * Architecture boundary: All hashing logic and file I/O are confined + * to the {@code adapter-out} implementation. This interface exposes no + * {@code java.nio.file.Path}, {@code java.io.File}, or cryptographic types to Domain + * or Application. + *
+ * Failure semantics: Technical failures (unreadable file, I/O error) + * are returned as {@link FingerprintTechnicalError} rather than thrown as exceptions. + * A {@link FingerprintTechnicalError} result means no + * {@link de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint} is available + * and the candidate cannot be identified; consequently no SQLite attempt record is + * created for this candidate in M4. + * + * @since M4-AP-001 + */ +public interface FingerprintPort { + + /** + * Computes the fingerprint for the given candidate. + *
+ * This method never throws. All outcomes, including technical failures, are + * encoded in the returned {@link FingerprintResult}. + * + * @param candidate the candidate whose file content is to be hashed; must not be null + * @return {@link FingerprintSuccess} on success, or {@link FingerprintTechnicalError} + * on any infrastructure failure + */ + FingerprintResult computeFingerprint(SourceDocumentCandidate candidate); +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintResult.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintResult.java new file mode 100644 index 0000000..ae2b9a5 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintResult.java @@ -0,0 +1,20 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Sealed result type for a fingerprint computation attempt via {@link FingerprintPort}. + *
+ * Exhaustive variants: + *
+ * Historisation impact: If the result is {@link FingerprintTechnicalError}, + * the document cannot be identified and no SQLite attempt record is created. + * The failure is treated as a non-identifiable run event. + * + * @since M4-AP-001 + */ +public sealed interface FingerprintResult permits FingerprintSuccess, FingerprintTechnicalError { +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintSuccess.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintSuccess.java new file mode 100644 index 0000000..3230b3d --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintSuccess.java @@ -0,0 +1,27 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; + +import java.util.Objects; + +/** + * Successful outcome of a fingerprint computation. + *
+ * Carries the computed {@link DocumentFingerprint} that uniquely identifies the + * document by its content. The fingerprint can now be used as the primary key + * for all subsequent persistence operations in M4. + * + * @param fingerprint the successfully computed fingerprint; never null + * @since M4-AP-001 + */ +public record FingerprintSuccess(DocumentFingerprint fingerprint) implements FingerprintResult { + + /** + * Compact constructor validating the non-null contract. + * + * @throws NullPointerException if {@code fingerprint} is null + */ + public FingerprintSuccess { + Objects.requireNonNull(fingerprint, "fingerprint must not be null"); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintTechnicalError.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintTechnicalError.java new file mode 100644 index 0000000..43b7938 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/FingerprintTechnicalError.java @@ -0,0 +1,34 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import java.util.Objects; + +/** + * Technical failure during fingerprint computation. + *
+ * Returned by {@link FingerprintPort} when the adapter cannot read the file content + * to compute the SHA-256 hash. Typical causes include the file no longer being + * accessible between candidate discovery and hashing, I/O errors, or permission issues. + *
+ * Historisation impact: Because no {@link de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint} + * could be produced, this failure is not historised in SQLite. No + * {@link ProcessingAttempt} is created. + * + * @param errorMessage human-readable description of the failure; never null or blank + * @param cause the underlying throwable, or {@code null} if not available + * @since M4-AP-001 + */ +public record FingerprintTechnicalError(String errorMessage, Throwable cause) implements FingerprintResult { + + /** + * Compact constructor validating the error message. + * + * @throws NullPointerException if {@code errorMessage} is null + * @throws IllegalArgumentException if {@code errorMessage} is blank + */ + public FingerprintTechnicalError { + Objects.requireNonNull(errorMessage, "errorMessage must not be null"); + if (errorMessage.isBlank()) { + throw new IllegalArgumentException("errorMessage must not be blank"); + } + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/PersistenceLookupTechnicalFailure.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/PersistenceLookupTechnicalFailure.java new file mode 100644 index 0000000..e9cb7f6 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/PersistenceLookupTechnicalFailure.java @@ -0,0 +1,36 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import java.util.Objects; + +/** + * Lookup result indicating that the master record lookup itself failed due to a + * technical infrastructure problem. + *
+ * The persistence layer (SQLite) could not be reached or returned an unexpected error. + * The document state is unknown; the use case must treat this candidate as a + * transient technical failure for this run and must not attempt to write any attempt + * record (since the underlying persistence is unavailable). + *
+ * This variant is distinct from a business-level "document not found" outcome + * ({@link DocumentUnknown}): here, the lookup operation itself failed. + * + * @param errorMessage human-readable description of the persistence failure; never null or blank + * @param cause the underlying throwable, or {@code null} if not available + * @since M4-AP-001 + */ +public record PersistenceLookupTechnicalFailure(String errorMessage, Throwable cause) + implements DocumentRecordLookupResult { + + /** + * Compact constructor validating the error message. + * + * @throws NullPointerException if {@code errorMessage} is null + * @throws IllegalArgumentException if {@code errorMessage} is blank + */ + public PersistenceLookupTechnicalFailure { + Objects.requireNonNull(errorMessage, "errorMessage must not be null"); + if (errorMessage.isBlank()) { + throw new IllegalArgumentException("errorMessage must not be blank"); + } + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/PersistenceSchemaInitializationPort.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/PersistenceSchemaInitializationPort.java new file mode 100644 index 0000000..815d6d2 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/PersistenceSchemaInitializationPort.java @@ -0,0 +1,40 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Outbound port for initialising the SQLite persistence schema at program startup. + *
+ * This port is invoked exactly once per program run, before the batch + * document processing loop begins. The initialisation must ensure that all tables, + * indices, and constraints required for M4 persistence are present in the SQLite file. + *
+ * Timing: The adapter implementation must perform the schema + * initialisation eagerly and synchronously. Lazy or deferred initialisation during + * the document processing loop is not the intent of this port. + *
+ * Failure handling: If the schema cannot be initialised, the + * implementation must throw {@link DocumentPersistenceException}. The bootstrap + * layer must catch this exception and abort the run with exit code 1. + *
+ * Idempotency: Calling {@link #initializeSchema()} on a database + * that already has the correct schema must succeed without error (e.g. via + * {@code CREATE TABLE IF NOT EXISTS} semantics). + *
+ * Architecture boundary: No JDBC, SQLite, or filesystem types appear + * in this interface. All schema DDL and connection management are confined to the + * {@code adapter-out} implementation. + * + * @since M4-AP-001 + */ +public interface PersistenceSchemaInitializationPort { + + /** + * Creates or verifies the M4 persistence schema. + *
+ * Must be called once at program start, before any document processing begins. + * The method must be idempotent: calling it on an already-initialised database + * must not fail or alter existing data. + * + * @throws DocumentPersistenceException if the schema cannot be created or verified + */ + void initializeSchema(); +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttempt.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttempt.java new file mode 100644 index 0000000..29d0d75 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttempt.java @@ -0,0 +1,88 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; +import de.gecheckt.pdf.umbenenner.domain.model.RunId; + +import java.time.Instant; +import java.util.Objects; + +/** + * Application-facing representation of exactly one historised processing attempt + * (Versuchshistorie-Eintrag) for an identified document. + *
+ * Historisation boundary (M4): Only attempts for documents whose + * {@link DocumentFingerprint} was successfully computed are historised. Failures that + * occur before the fingerprint is available (e.g. the source file is + * unreadable before hashing) are not represented by a {@code ProcessingAttempt} + * and are not written to SQLite. + *
+ * Attempt number semantics: The attempt number starts at 1 for the + * first historised attempt per fingerprint and increases monotonically by 1 for every + * subsequent attempt, including skip attempts + * ({@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}, + * {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}). + *
+ * Field semantics: + *
+ * Not included in M4: model name, prompt identifier, AI raw response, + * AI reasoning, resolved date, date source, final title, final target file name. + * These fields are added in later milestones (M5+). + * + * @param fingerprint content-based document identity; never null + * @param runId identifier of the batch run; never null + * @param attemptNumber monotonic sequence number per fingerprint; must be >= 1 + * @param startedAt start of this processing attempt; never null + * @param endedAt end of this processing attempt; never null + * @param status outcome status of this attempt; never null + * @param failureClass failure classification, or {@code null} for non-failure statuses + * @param failureMessage failure description, or {@code null} for non-failure statuses + * @param retryable whether this failure should be retried in a later run + * @since M4-AP-001 + */ +public record ProcessingAttempt( + DocumentFingerprint fingerprint, + RunId runId, + int attemptNumber, + Instant startedAt, + Instant endedAt, + ProcessingStatus status, + String failureClass, + String failureMessage, + boolean retryable) { + + /** + * Compact constructor validating mandatory non-null fields and numeric constraints. + * + * @throws NullPointerException if any mandatory field is null + * @throws IllegalArgumentException if {@code attemptNumber} is less than 1 + */ + public ProcessingAttempt { + Objects.requireNonNull(fingerprint, "fingerprint must not be null"); + Objects.requireNonNull(runId, "runId must not be null"); + if (attemptNumber < 1) { + throw new IllegalArgumentException( + "attemptNumber must be >= 1, but was: " + attemptNumber); + } + Objects.requireNonNull(startedAt, "startedAt must not be null"); + Objects.requireNonNull(endedAt, "endedAt must not be null"); + Objects.requireNonNull(status, "status must not be null"); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttemptRepository.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttemptRepository.java new file mode 100644 index 0000000..cd30361 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttemptRepository.java @@ -0,0 +1,70 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; + +import java.util.List; + +/** + * Outbound port for writing and reading the processing attempt history + * (Versuchshistorie). + *
+ * Every historisable processing attempt for an identified document results + * in exactly one {@link ProcessingAttempt} record written via {@link #save(ProcessingAttempt)}. + *
+ * Historisation boundary: Only attempts with a successfully computed + * {@link de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint} are historised. + * Failures that occur before the fingerprint is available are not recorded + * through this port. + *
+ * Attempt number semantics: + * Attempt numbers start at 1 per fingerprint and increase monotonically by 1 + * for every saved attempt, including skip attempts. The use case calls + * {@link #loadNextAttemptNumber(DocumentFingerprint)} to obtain the correct sequence + * number before constructing a {@link ProcessingAttempt}. + *
+ * Architecture boundary: No JDBC, SQLite, or filesystem types appear + * in this interface. Mapping to and from the persistence schema is the exclusive + * responsibility of the adapter implementation. + * + * @since M4-AP-001 + */ +public interface ProcessingAttemptRepository { + + /** + * Returns the attempt number to assign to the next attempt for the given + * fingerprint. + *
+ * If no prior attempts exist for the fingerprint, returns 1. + * Otherwise returns the current maximum attempt number plus 1. + * + * @param fingerprint the document identity; must not be null + * @return the next monotonic attempt number; always >= 1 + * @throws DocumentPersistenceException if the query fails due to a technical error + */ + int loadNextAttemptNumber(DocumentFingerprint fingerprint); + + /** + * Persists exactly one processing attempt record. + *
+ * The {@link ProcessingAttempt#attemptNumber()} must have been obtained from + * {@link #loadNextAttemptNumber(DocumentFingerprint)} in the same run to guarantee + * monotonic ordering. + * + * @param attempt the attempt to persist; must not be null + * @throws DocumentPersistenceException if the insert fails due to a technical error + */ + void save(ProcessingAttempt attempt); + + /** + * Returns all historised attempts for the given fingerprint, ordered by + * {@link ProcessingAttempt#attemptNumber()} ascending. + *
+ * Returns an empty list if no attempts have been recorded yet.
+ * Intended for use in tests and diagnostics; not required on the primary batch path.
+ *
+ * @param fingerprint the document identity; must not be null
+ * @return immutable list of attempts, ordered by attempt number; never null
+ * @throws DocumentPersistenceException if the query fails due to a technical error
+ */
+ List
+ * M4-AP-001 ports:
+ *
+ * M4-AP-001 value types and result types:
+ *
* Exception types:
*
* Architecture Rule: Outbound ports are implementation-agnostic and contain no business logic.
diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentFingerprint.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentFingerprint.java
new file mode 100644
index 0000000..9e9a3ab
--- /dev/null
+++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentFingerprint.java
@@ -0,0 +1,51 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+import java.util.Objects;
+
+/**
+ * Unique, stable identity of a document derived exclusively from its binary content.
+ *
+ * A {@code DocumentFingerprint} is computed once per file read and used as the primary
+ * key for all subsequent persistence lookups and history entries. It is independent of
+ * the file name, path, or any metadata — only the raw file content determines the value.
+ *
+ * Identification semantics (M4):
+ *
+ * Architecture boundary: The hashing algorithm (SHA-256) and all
+ * file I/O required to compute the fingerprint are strictly confined to the
+ * {@code adapter-out} layer. Domain and Application only hold and compare the resulting
+ * hex string; they never access the filesystem or perform cryptographic operations.
+ *
+ * Pre-fingerprint failures: If computing the fingerprint fails
+ * (e.g. due to an I/O error), no {@code DocumentFingerprint} is created and the failure
+ * is not historised in SQLite. The attempt is treated as a non-identifiable run event,
+ * not as a documentable processing attempt.
+ *
+ * @param sha256Hex lowercase hex encoding of the SHA-256 digest (exactly 64 characters,
+ * characters {@code [0-9a-f]})
+ * @since M4-AP-001
+ */
+public record DocumentFingerprint(String sha256Hex) {
+
+ /**
+ * Compact constructor that validates the hex string format.
+ *
+ * @param sha256Hex lowercase hex encoding of the SHA-256 digest
+ * @throws NullPointerException if {@code sha256Hex} is null
+ * @throws IllegalArgumentException if {@code sha256Hex} is not exactly 64 lowercase hex characters
+ */
+ public DocumentFingerprint {
+ Objects.requireNonNull(sha256Hex, "sha256Hex must not be null");
+ if (sha256Hex.length() != 64 || !sha256Hex.matches("[0-9a-f]{64}")) {
+ throw new IllegalArgumentException(
+ "sha256Hex must be a 64-character lowercase hex string, but was: '"
+ + sha256Hex + "'");
+ }
+ }
+}
diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/ProcessingStatus.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/ProcessingStatus.java
index d3c7088..dfd552c 100644
--- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/ProcessingStatus.java
+++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/ProcessingStatus.java
@@ -1,20 +1,44 @@
package de.gecheckt.pdf.umbenenner.domain.model;
/**
- * Enumeration of all valid processing status values for a document within a batch run.
+ * Enumeration of all valid processing status values for a document.
*
- * Each status reflects the outcome or current state of a document processing attempt.
- * Status transitions follow the rules defined in the architecture specification and persist
- * across multiple batch runs via the repository layer.
+ * Each status reflects the outcome or current state of a document in the
+ * master record ({@code DocumentRecord}) or in a single attempt record
+ * ({@code ProcessingAttempt}).
*
- * Status Categories:
+ * Overall-status semantics (master record, M4):
*
+ * Attempt-status semantics (attempt history, M4):
+ *
+ * M4 counter rules:
+ *
- * Examples: PDF has no extractable text, page limit exceeded, document is ambiguous.
+ * This status is reached after all allowed retries for a document are exhausted.
+ * For deterministic content errors (no usable text, page limit exceeded) this means
+ * the second occurrence of the error. For other error types, it means the configured
+ * maximum retry count has been reached.
*
- * A document with this status receives exactly one retry in a later batch run.
- * After that retry, if it still fails, status becomes {@link #FAILED_FINAL}.
- * No further retries are attempted.
+ * A document with this overall status is skipped in all future batch runs and
+ * a {@link #SKIPPED_FINAL_FAILURE} attempt is historised.
*/
FAILED_FINAL,
diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java
index f0c785f..3b289ab 100644
--- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java
+++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/package-info.java
@@ -6,6 +6,7 @@
*
+ *
+ *
+ *
+ *
*
*
+ *
+ *
- *
+ *
+ *
+ *
+ *
*
* @since M2-AP-001
@@ -40,13 +64,15 @@ public enum ProcessingStatus {
FAILED_RETRYABLE,
/**
- * Processing failed with a deterministic content error (non-recoverable problem).
+ * Processing has failed finally and irrecoverably — no further retries will be attempted.
*