diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidator.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidator.java index 9d031f5..6ef1ac0 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidator.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidator.java @@ -4,6 +4,7 @@ import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; @@ -16,8 +17,20 @@ import java.util.List; * and basic path existence checks. Throws {@link InvalidStartConfigurationException} * if any validation rule fails. *

- * Supports injected source folder validation for testability + * Supports injected source and target folder validation for testability * (allows mocking of platform-dependent filesystem checks). + * + *

Target folder validation

+ *

+ * The target folder is validated as "present or technically creatable": + *

+ *

+ * This behaviour ensures the target write path is technically usable before any + * document processing begins, without requiring the operator to create the folder manually. */ public class StartConfigurationValidator { @@ -48,22 +61,64 @@ public class StartConfigurationValidator { String checkSourceFolder(Path path); } + /** + * Abstraction for target folder existence, creatability, and write-access checks. + *

+ * Separates filesystem operations from validation logic to enable + * platform-independent unit testing (mocking) of write-access and creation edge cases. + *

+ * The default implementation attempts to create the folder via + * {@code Files.createDirectories} if it does not yet exist, then verifies it is a + * directory and writable. Tests can substitute alternative implementations. + */ + @FunctionalInterface + public interface TargetFolderChecker { + /** + * Checks target folder usability and returns a validation error message, or null if valid. + *

+ * Checks (in order): + *

    + *
  1. If folder does not exist: attempt to create it via {@code createDirectories}.
  2. + *
  3. Is a directory.
  4. + *
  5. Is writable (required for the file-copy write path).
  6. + *
+ * + * @param path the target folder path + * @return error message string, or null if all checks pass + */ + String checkTargetFolder(Path path); + } + private final SourceFolderChecker sourceFolderChecker; + private final TargetFolderChecker targetFolderChecker; /** - * Creates a validator with the default source folder checker (NIO-based). + * Creates a validator with default NIO-based source and target folder checkers. */ public StartConfigurationValidator() { - this(new DefaultSourceFolderChecker()); + this(new DefaultSourceFolderChecker(), new DefaultTargetFolderChecker()); } /** * Creates a validator with a custom source folder checker (primarily for testing). + * Uses the default NIO-based target folder checker. * - * @param sourceFolderChecker the checker to use (must not be null) + * @param sourceFolderChecker the source folder checker to use (must not be null) */ public StartConfigurationValidator(SourceFolderChecker sourceFolderChecker) { + this(sourceFolderChecker, new DefaultTargetFolderChecker()); + } + + /** + * Creates a validator with custom source and target folder checkers (primarily for testing). + * + * @param sourceFolderChecker the source folder checker to use (must not be null) + * @param targetFolderChecker the target folder checker to use (must not be null) + */ + public StartConfigurationValidator(SourceFolderChecker sourceFolderChecker, + TargetFolderChecker targetFolderChecker) { this.sourceFolderChecker = sourceFolderChecker; + this.targetFolderChecker = targetFolderChecker; } /** @@ -130,7 +185,14 @@ public class StartConfigurationValidator { } private void validateTargetFolder(Path targetFolder, List errors) { - validateRequiredExistingDirectory(targetFolder, "target.folder", errors); + if (targetFolder == null) { + errors.add("- target.folder: must not be null"); + return; + } + String checkError = targetFolderChecker.checkTargetFolder(targetFolder); + if (checkError != null) { + errors.add(checkError); + } } private void validateSqliteFile(Path sqliteFile, List errors) { @@ -321,4 +383,38 @@ public class StartConfigurationValidator { return null; // All checks passed } } + + /** + * Default NIO-based implementation of {@link TargetFolderChecker}. + *

+ * Validates that the target folder is present and writable for the file-copy write path. + * If the folder does not yet exist, creation is attempted via {@code Files.createDirectories}. + *

+ * This satisfies the "present or technically creatable" requirement: the folder need not + * exist before the application starts, but must be reachable at startup time. + *

+ * This separation allows unit tests to inject alternative implementations + * that control the outcome of write-access or creation checks without relying on actual + * filesystem permissions (which are platform-dependent). + */ + private static class DefaultTargetFolderChecker implements TargetFolderChecker { + @Override + public String checkTargetFolder(Path path) { + if (!Files.exists(path)) { + try { + Files.createDirectories(path); + } catch (IOException e) { + return "- target.folder: path does not exist and could not be created: " + + path + " (" + e.getMessage() + ")"; + } + } + if (!Files.isDirectory(path)) { + return "- target.folder: path is not a directory: " + path; + } + if (!Files.isWritable(path)) { + return "- target.folder: directory is not writable: " + path; + } + return null; // All checks passed + } + } } diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/clock/SystemClockAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/clock/SystemClockAdapter.java new file mode 100644 index 0000000..81a67b0 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/clock/SystemClockAdapter.java @@ -0,0 +1,24 @@ +package de.gecheckt.pdf.umbenenner.adapter.out.clock; + +import java.time.Instant; + +import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort; + +/** + * System clock implementation of {@link ClockPort}. + *

+ * Returns the current wall-clock time from the JVM system clock. + * Intended for production use; tests should inject a controlled clock implementation. + */ +public class SystemClockAdapter implements ClockPort { + + /** + * Returns the current system time as an {@link Instant}. + * + * @return the current UTC instant; never null + */ + @Override + public Instant now() { + return Instant.now(); + } +} diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteDocumentRecordRepositoryAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteDocumentRecordRepositoryAdapter.java index d99a262..4770494 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteDocumentRecordRepositoryAdapter.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteDocumentRecordRepositoryAdapter.java @@ -76,7 +76,7 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo } String sql = """ - SELECT + SELECT last_known_source_locator, last_known_source_file_name, overall_status, @@ -85,7 +85,9 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo last_failure_instant, last_success_instant, created_at, - updated_at + updated_at, + last_target_path, + last_target_file_name FROM document_record WHERE fingerprint = ? """; @@ -146,8 +148,10 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo last_failure_instant, last_success_instant, created_at, - updated_at - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + updated_at, + last_target_path, + last_target_file_name + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """; try (Connection connection = getConnection(); @@ -163,6 +167,8 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo statement.setString(8, instantToString(record.lastSuccessInstant())); statement.setString(9, instantToString(record.createdAt())); statement.setString(10, instantToString(record.updatedAt())); + statement.setString(11, record.lastTargetPath()); + statement.setString(12, record.lastTargetFileName()); int rowsAffected = statement.executeUpdate(); if (rowsAffected != 1) { @@ -205,7 +211,9 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo transient_error_count = ?, last_failure_instant = ?, last_success_instant = ?, - updated_at = ? + updated_at = ?, + last_target_path = ?, + last_target_file_name = ? WHERE fingerprint = ? """; @@ -220,7 +228,9 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo statement.setString(6, instantToString(record.lastFailureInstant())); statement.setString(7, instantToString(record.lastSuccessInstant())); statement.setString(8, instantToString(record.updatedAt())); - statement.setString(9, record.fingerprint().sha256Hex()); + statement.setString(9, record.lastTargetPath()); + statement.setString(10, record.lastTargetFileName()); + statement.setString(11, record.fingerprint().sha256Hex()); int rowsAffected = statement.executeUpdate(); if (rowsAffected != 1) { @@ -260,7 +270,9 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo stringToInstant(rs.getString("last_failure_instant")), stringToInstant(rs.getString("last_success_instant")), stringToInstant(rs.getString("created_at")), - stringToInstant(rs.getString("updated_at")) + stringToInstant(rs.getString("updated_at")), + rs.getString("last_target_path"), + rs.getString("last_target_file_name") ); } diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java index cdb129a..f3f6dc9 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java @@ -6,7 +6,9 @@ import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; +import java.sql.Types; import java.time.Instant; +import java.time.LocalDate; import java.util.ArrayList; import java.util.List; import java.util.Objects; @@ -17,13 +19,21 @@ import org.apache.logging.log4j.Logger; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository; +import de.gecheckt.pdf.umbenenner.domain.model.DateSource; import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; +import de.gecheckt.pdf.umbenenner.domain.model.RunId; /** * SQLite implementation of {@link ProcessingAttemptRepository}. *

* Provides CRUD operations for the processing attempt history (Versuchshistorie) - * with explicit mapping between application types and the SQLite schema. + * including all AI traceability fields added during schema evolution. + *

+ * Schema compatibility: This adapter writes all columns including + * the AI traceability columns. When reading rows that were written before schema + * evolution, those columns contain {@code NULL} and are mapped to {@code null} + * in the Java record. *

* Architecture boundary: All JDBC and SQLite details are strictly * confined to this class. No JDBC types appear in the port interface or in any @@ -65,9 +75,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem */ @Override public int loadNextAttemptNumber(DocumentFingerprint fingerprint) { - if (fingerprint == null) { - throw new NullPointerException("fingerprint must not be null"); - } + Objects.requireNonNull(fingerprint, "fingerprint must not be null"); String sql = """ SELECT COALESCE(MAX(attempt_number), 0) + 1 AS next_attempt_number @@ -78,7 +86,6 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem try (Connection connection = getConnection(); PreparedStatement statement = connection.prepareStatement(sql)) { - // Enable foreign key enforcement for this connection try (Statement pragmaStmt = connection.createStatement()) { pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON); } @@ -89,34 +96,27 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem if (rs.next()) { return rs.getInt("next_attempt_number"); } else { - // This should not happen, but fallback to 1 return 1; } } } catch (SQLException e) { - String message = "Failed to load next attempt number for fingerprint '" + - fingerprint.sha256Hex() + "': " + e.getMessage(); + String message = "Failed to load next attempt number for fingerprint '" + + fingerprint.sha256Hex() + "': " + e.getMessage(); logger.error(message, e); throw new DocumentPersistenceException(message, e); } } /** - * Persists exactly one processing attempt record. - *

- * The {@link ProcessingAttempt#attemptNumber()} must have been obtained from - * {@link #loadNextAttemptNumber(DocumentFingerprint)} in the same run to guarantee - * monotonic ordering. + * Persists exactly one processing attempt record including all AI traceability fields. * * @param attempt the attempt to persist; must not be null * @throws DocumentPersistenceException if the insert fails due to a technical error */ @Override public void save(ProcessingAttempt attempt) { - if (attempt == null) { - throw new NullPointerException("attempt must not be null"); - } + Objects.requireNonNull(attempt, "attempt must not be null"); String sql = """ INSERT INTO processing_attempt ( @@ -128,15 +128,24 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem status, failure_class, failure_message, - retryable - ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + retryable, + model_name, + prompt_identifier, + processed_page_count, + sent_character_count, + ai_raw_response, + ai_reasoning, + resolved_date, + date_source, + validated_title, + final_target_file_name + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) """; try (Connection connection = getConnection(); Statement pragmaStmt = connection.createStatement(); PreparedStatement statement = connection.prepareStatement(sql)) { - // Enable foreign key enforcement for this connection pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON); statement.setString(1, attempt.fingerprint().sha256Hex()); @@ -145,11 +154,22 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem statement.setString(4, attempt.startedAt().toString()); statement.setString(5, attempt.endedAt().toString()); statement.setString(6, attempt.status().name()); - - // Handle nullable fields - statement.setString(7, attempt.failureClass()); - statement.setString(8, attempt.failureMessage()); + setNullableString(statement, 7, attempt.failureClass()); + setNullableString(statement, 8, attempt.failureMessage()); statement.setBoolean(9, attempt.retryable()); + // AI traceability fields + setNullableString(statement, 10, attempt.modelName()); + setNullableString(statement, 11, attempt.promptIdentifier()); + setNullableInteger(statement, 12, attempt.processedPageCount()); + setNullableInteger(statement, 13, attempt.sentCharacterCount()); + setNullableString(statement, 14, attempt.aiRawResponse()); + setNullableString(statement, 15, attempt.aiReasoning()); + setNullableString(statement, 16, + attempt.resolvedDate() != null ? attempt.resolvedDate().toString() : null); + setNullableString(statement, 17, + attempt.dateSource() != null ? attempt.dateSource().name() : null); + setNullableString(statement, 18, attempt.validatedTitle()); + setNullableString(statement, 19, attempt.finalTargetFileName()); int rowsAffected = statement.executeUpdate(); if (rowsAffected != 1) { @@ -157,12 +177,12 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem "Expected to insert 1 row but affected " + rowsAffected + " rows"); } - logger.debug("Saved processing attempt #{} for fingerprint: {}", - attempt.attemptNumber(), attempt.fingerprint().sha256Hex()); + logger.debug("Saved processing attempt #{} for fingerprint: {}", + attempt.attemptNumber(), attempt.fingerprint().sha256Hex()); } catch (SQLException e) { - String message = "Failed to save processing attempt #" + attempt.attemptNumber() + - " for fingerprint '" + attempt.fingerprint().sha256Hex() + "': " + e.getMessage(); + String message = "Failed to save processing attempt #" + attempt.attemptNumber() + + " for fingerprint '" + attempt.fingerprint().sha256Hex() + "': " + e.getMessage(); logger.error(message, e); throw new DocumentPersistenceException(message, e); } @@ -171,31 +191,22 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem /** * Returns all historised attempts for the given fingerprint, ordered by * {@link ProcessingAttempt#attemptNumber()} ascending. - *

- * Returns an empty list if no attempts have been recorded yet. - * Intended for use in tests and diagnostics; not required on the primary batch path. * * @param fingerprint the document identity; must not be null - * @return immutable list of attempts, ordered by attempt number; never null - * @throws DocumentPersistenceException if the query fails due to a technical error + * @return immutable list of attempts; never null + * @throws DocumentPersistenceException if the query fails */ @Override public List findAllByFingerprint(DocumentFingerprint fingerprint) { - if (fingerprint == null) { - throw new NullPointerException("fingerprint must not be null"); - } + Objects.requireNonNull(fingerprint, "fingerprint must not be null"); String sql = """ - SELECT - fingerprint, - run_id, - attempt_number, - started_at, - ended_at, - status, - failure_class, - failure_message, - retryable + SELECT + fingerprint, run_id, attempt_number, started_at, ended_at, + status, failure_class, failure_message, retryable, + model_name, prompt_identifier, processed_page_count, sent_character_count, + ai_raw_response, ai_reasoning, resolved_date, date_source, validated_title, + final_target_file_name FROM processing_attempt WHERE fingerprint = ? ORDER BY attempt_number ASC @@ -205,67 +216,151 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem Statement pragmaStmt = connection.createStatement(); PreparedStatement statement = connection.prepareStatement(sql)) { - // Enable foreign key enforcement for this connection pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON); - statement.setString(1, fingerprint.sha256Hex()); try (ResultSet rs = statement.executeQuery()) { List attempts = new ArrayList<>(); while (rs.next()) { - ProcessingAttempt attempt = mapResultSetToProcessingAttempt(rs); - attempts.add(attempt); + attempts.add(mapResultSetToProcessingAttempt(rs)); } - return List.copyOf(attempts); // Return immutable copy + return List.copyOf(attempts); } } catch (SQLException e) { - String message = "Failed to find processing attempts for fingerprint '" + - fingerprint.sha256Hex() + "': " + e.getMessage(); + String message = "Failed to find processing attempts for fingerprint '" + + fingerprint.sha256Hex() + "': " + e.getMessage(); logger.error(message, e); throw new DocumentPersistenceException(message, e); } } /** - * Maps a ResultSet row to a ProcessingAttempt. + * Returns the most recent attempt with status {@code PROPOSAL_READY} for the given + * fingerprint, or {@code null} if no such attempt exists. + *

+ * This is the leading source for the naming proposal: the most recent + * {@code PROPOSAL_READY} attempt carries the validated date, title, and reasoning + * that subsequent processing steps consume. * - * @param rs the ResultSet positioned at the current row - * @return the mapped ProcessingAttempt - * @throws SQLException if reading from the ResultSet fails + * @param fingerprint the document identity; must not be null + * @return the most recent {@code PROPOSAL_READY} attempt, or {@code null} + * @throws DocumentPersistenceException if the query fails */ + public ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint) { + Objects.requireNonNull(fingerprint, "fingerprint must not be null"); + + String sql = """ + SELECT + fingerprint, run_id, attempt_number, started_at, ended_at, + status, failure_class, failure_message, retryable, + model_name, prompt_identifier, processed_page_count, sent_character_count, + ai_raw_response, ai_reasoning, resolved_date, date_source, validated_title, + final_target_file_name + FROM processing_attempt + WHERE fingerprint = ? + AND status = 'PROPOSAL_READY' + ORDER BY attempt_number DESC + LIMIT 1 + """; + + try (Connection connection = getConnection(); + Statement pragmaStmt = connection.createStatement(); + PreparedStatement statement = connection.prepareStatement(sql)) { + + pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON); + statement.setString(1, fingerprint.sha256Hex()); + + try (ResultSet rs = statement.executeQuery()) { + if (rs.next()) { + return mapResultSetToProcessingAttempt(rs); + } + return null; + } + + } catch (SQLException e) { + String message = "Failed to find latest PROPOSAL_READY attempt for fingerprint '" + + fingerprint.sha256Hex() + "': " + e.getMessage(); + logger.error(message, e); + throw new DocumentPersistenceException(message, e); + } + } + + // ------------------------------------------------------------------------- + // Mapping helpers + // ------------------------------------------------------------------------- + private ProcessingAttempt mapResultSetToProcessingAttempt(ResultSet rs) throws SQLException { + String resolvedDateStr = rs.getString("resolved_date"); + LocalDate resolvedDate = resolvedDateStr != null ? LocalDate.parse(resolvedDateStr) : null; + + String dateSourceStr = rs.getString("date_source"); + DateSource dateSource = dateSourceStr != null ? DateSource.valueOf(dateSourceStr) : null; + + Integer processedPageCount = (Integer) getNullableInt(rs, "processed_page_count"); + Integer sentCharacterCount = (Integer) getNullableInt(rs, "sent_character_count"); + return new ProcessingAttempt( new DocumentFingerprint(rs.getString("fingerprint")), - new de.gecheckt.pdf.umbenenner.domain.model.RunId(rs.getString("run_id")), + new RunId(rs.getString("run_id")), rs.getInt("attempt_number"), Instant.parse(rs.getString("started_at")), Instant.parse(rs.getString("ended_at")), - de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus.valueOf(rs.getString("status")), + ProcessingStatus.valueOf(rs.getString("status")), rs.getString("failure_class"), rs.getString("failure_message"), - rs.getBoolean("retryable") + rs.getBoolean("retryable"), + rs.getString("model_name"), + rs.getString("prompt_identifier"), + processedPageCount, + sentCharacterCount, + rs.getString("ai_raw_response"), + rs.getString("ai_reasoning"), + resolvedDate, + dateSource, + rs.getString("validated_title"), + rs.getString("final_target_file_name") ); } + // ------------------------------------------------------------------------- + // JDBC nullable helpers + // ------------------------------------------------------------------------- + + private static void setNullableString(PreparedStatement stmt, int index, String value) + throws SQLException { + if (value == null) { + stmt.setNull(index, Types.VARCHAR); + } else { + stmt.setString(index, value); + } + } + + private static void setNullableInteger(PreparedStatement stmt, int index, Integer value) + throws SQLException { + if (value == null) { + stmt.setNull(index, Types.INTEGER); + } else { + stmt.setInt(index, value); + } + } + + private static Object getNullableInt(ResultSet rs, String column) throws SQLException { + int value = rs.getInt(column); + return rs.wasNull() ? null : value; + } + /** - * Returns the JDBC URL this adapter uses to connect to the SQLite database. - *

- * Intended for logging and diagnostics only. + * Returns the JDBC URL this adapter uses. * * @return the JDBC URL; never null or blank */ public String getJdbcUrl() { return jdbcUrl; } - + /** - * Gets a connection to the database. - *

- * This method can be overridden by subclasses to provide a shared connection. - * - * @return a new database connection - * @throws SQLException if the connection cannot be established + * Returns a JDBC connection. May be overridden in tests to provide shared connections. */ protected Connection getConnection() throws SQLException { return DriverManager.getConnection(jdbcUrl); diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapter.java index 0182f10..8bfbdc5 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapter.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapter.java @@ -2,6 +2,7 @@ package de.gecheckt.pdf.umbenenner.adapter.out.sqlite; import java.sql.Connection; import java.sql.DriverManager; +import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.util.Objects; @@ -16,9 +17,8 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitiali * SQLite implementation of {@link PersistenceSchemaInitializationPort}. *

* Creates or verifies the two-level persistence schema in the configured SQLite - * database file. All DDL uses {@code IF NOT EXISTS} semantics, making the operation - * fully idempotent: calling {@link #initializeSchema()} on an already-initialised - * database succeeds without error and without modifying existing data. + * database file, and performs a controlled schema evolution from an earlier schema + * version to the current one. * *

Two-level schema

*

The schema consists of exactly two tables: @@ -30,10 +30,29 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitiali * the master record via fingerprint. * * + *

Schema evolution

+ *

+ * When upgrading from an earlier schema, this adapter uses idempotent + * {@code ALTER TABLE ... ADD COLUMN} statements for both tables. Columns that already + * exist are silently skipped, making the evolution safe to run on both fresh and existing + * databases. The current evolution adds: + *

+ * + *

M4→current-schema status migration

+ *

+ * Documents in an earlier positive intermediate state ({@code SUCCESS} recorded without + * a validated naming proposal) are idempotently migrated to {@code READY_FOR_AI} so that + * the AI naming pipeline processes them in the next run. Terminal negative states + * ({@code FAILED_RETRYABLE}, {@code FAILED_FINAL}, skip states) are left unchanged. + * *

Initialisation timing

*

This adapter must be invoked once at program startup, before the batch - * document processing loop begins. It is wired by the bootstrap module and called - * explicitly through the port. There is no lazy or deferred initialisation. + * document processing loop begins. * *

Architecture boundary

*

All JDBC connections, SQL DDL, and SQLite-specific behaviour are strictly confined @@ -44,34 +63,17 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti private static final Logger logger = LogManager.getLogger(SqliteSchemaInitializationAdapter.class); + // ------------------------------------------------------------------------- + // DDL — document_record table + // ------------------------------------------------------------------------- + /** * DDL for the document master record table. *

- * Columns (mandatory fields): - *

- *

- * Not included (M5+ fields): target path, target file name, - * AI-related fields. + * Columns: id (PK), fingerprint (unique), last_known_source_locator, + * last_known_source_file_name, overall_status, content_error_count, + * transient_error_count, last_failure_instant, last_success_instant, + * created_at, updated_at. */ private static final String DDL_CREATE_DOCUMENT_RECORD = """ CREATE TABLE IF NOT EXISTS document_record ( @@ -90,36 +92,18 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti ) """; + // ------------------------------------------------------------------------- + // DDL — processing_attempt table (base schema, without AI traceability cols) + // ------------------------------------------------------------------------- + /** - * DDL for the processing attempt history table. + * DDL for the base processing attempt history table. *

- * Columns (mandatory fields): - *

+ * Base columns (present in all schema versions): id, fingerprint, run_id, + * attempt_number, started_at, ended_at, status, failure_class, failure_message, retryable. *

- * Skip attempts: Skip statuses ({@code SKIPPED_ALREADY_PROCESSED}, - * {@code SKIPPED_FINAL_FAILURE}) are stored as regular rows with {@code retryable = 0} - * and null failure fields. - *

- * Not included (M5+ fields): model name, prompt identifier, - * AI raw response, AI reasoning, resolved date, date source, final title, - * final target file name. + * AI traceability columns are added separately via {@code ALTER TABLE} to support + * idempotent evolution from earlier schemas. */ private static final String DDL_CREATE_PROCESSING_ATTEMPT = """ CREATE TABLE IF NOT EXISTS processing_attempt ( @@ -140,6 +124,10 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti ) """; + // ------------------------------------------------------------------------- + // DDL — indexes + // ------------------------------------------------------------------------- + /** Index on {@code processing_attempt.fingerprint} for fast per-document lookups. */ private static final String DDL_IDX_ATTEMPT_FINGERPRINT = "CREATE INDEX IF NOT EXISTS idx_processing_attempt_fingerprint " @@ -155,14 +143,69 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti "CREATE INDEX IF NOT EXISTS idx_document_record_overall_status " + "ON document_record (overall_status)"; + // ------------------------------------------------------------------------- + // DDL — columns added to processing_attempt via schema evolution + // ------------------------------------------------------------------------- + + /** + * Columns to add idempotently to {@code processing_attempt}. + * Each entry is {@code [column_name, column_type]}. + */ + private static final String[][] EVOLUTION_ATTEMPT_COLUMNS = { + {"model_name", "TEXT"}, + {"prompt_identifier", "TEXT"}, + {"processed_page_count", "INTEGER"}, + {"sent_character_count", "INTEGER"}, + {"ai_raw_response", "TEXT"}, + {"ai_reasoning", "TEXT"}, + {"resolved_date", "TEXT"}, + {"date_source", "TEXT"}, + {"validated_title", "TEXT"}, + {"final_target_file_name", "TEXT"}, + }; + + // ------------------------------------------------------------------------- + // DDL — columns added to document_record via schema evolution + // ------------------------------------------------------------------------- + + /** + * Columns to add idempotently to {@code document_record}. + * Each entry is {@code [column_name, column_type]}. + */ + private static final String[][] EVOLUTION_RECORD_COLUMNS = { + {"last_target_path", "TEXT"}, + {"last_target_file_name", "TEXT"}, + }; + + // ------------------------------------------------------------------------- + // M4→current-schema status migration + // ------------------------------------------------------------------------- + + /** + * Migrates earlier positive intermediate states in {@code document_record} that were + * recorded as {@code SUCCESS} without a validated naming proposal to {@code READY_FOR_AI}, + * so the AI naming pipeline processes them in the next run. + *

+ * Only rows with {@code overall_status = 'SUCCESS'} that have no corresponding + * {@code processing_attempt} with {@code status = 'PROPOSAL_READY'} are updated. + * This migration is idempotent. + */ + private static final String SQL_MIGRATE_LEGACY_SUCCESS_TO_READY_FOR_AI = """ + UPDATE document_record + SET overall_status = 'READY_FOR_AI', + updated_at = datetime('now') + WHERE overall_status = 'SUCCESS' + AND NOT EXISTS ( + SELECT 1 FROM processing_attempt pa + WHERE pa.fingerprint = document_record.fingerprint + AND pa.status = 'PROPOSAL_READY' + ) + """; + private final String jdbcUrl; /** * Constructs the adapter with the JDBC URL of the SQLite database file. - *

- * The JDBC URL must be in the form {@code jdbc:sqlite:/path/to/file.db}. - * The file and its parent directories need not exist at construction time; - * SQLite creates them when the connection is first opened. * * @param jdbcUrl the JDBC URL of the SQLite database; must not be null or blank * @throws NullPointerException if {@code jdbcUrl} is null @@ -177,26 +220,22 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti } /** - * Creates or verifies the persistence schema in the SQLite database. + * Creates or verifies the persistence schema and performs schema evolution and + * status migration. *

- * Executes the following DDL statements in order: + * Execution order: *

    - *
  1. Enable foreign key enforcement ({@code PRAGMA foreign_keys = ON})
  2. - *
  3. Create {@code document_record} table (if not exists)
  4. - *
  5. Create {@code processing_attempt} table (if not exists)
  6. - *
  7. Create indexes on {@code processing_attempt.fingerprint}, - * {@code processing_attempt.run_id}, and - * {@code document_record.overall_status}
  8. + *
  9. Enable foreign key enforcement.
  10. + *
  11. Create {@code document_record} table (if not exists).
  12. + *
  13. Create {@code processing_attempt} table (if not exists).
  14. + *
  15. Create all indexes (if not exist).
  16. + *
  17. Add AI-traceability columns to {@code processing_attempt} (idempotent evolution).
  18. + *
  19. Migrate earlier positive intermediate state to {@code READY_FOR_AI} (idempotent).
  20. *
*

- * All statements use {@code IF NOT EXISTS} semantics. Calling this method on an - * already-initialised database is safe and produces no changes. - *

- * Timing: Must be called once at program startup, before the - * batch document processing loop begins. + * All steps are safe to run on both fresh and existing databases. * - * @throws DocumentPersistenceException if the schema cannot be created or verified - * due to a JDBC or SQLite error + * @throws DocumentPersistenceException if any DDL or migration step fails */ @Override public void initializeSchema() { @@ -211,7 +250,7 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti statement.execute(DDL_CREATE_DOCUMENT_RECORD); logger.debug("Table 'document_record' created or already present."); - // Level 2: processing attempt history + // Level 2: processing attempt history (base columns only) statement.execute(DDL_CREATE_PROCESSING_ATTEMPT); logger.debug("Table 'processing_attempt' created or already present."); @@ -221,7 +260,20 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti statement.execute(DDL_IDX_RECORD_STATUS); logger.debug("Indexes created or already present."); - logger.info("M4 SQLite schema initialisation completed successfully."); + // Schema evolution: add AI-traceability + target-copy columns (idempotent) + evolveTableColumns(connection, "processing_attempt", EVOLUTION_ATTEMPT_COLUMNS); + evolveTableColumns(connection, "document_record", EVOLUTION_RECORD_COLUMNS); + + // Status migration: earlier positive intermediate state → READY_FOR_AI + int migrated = statement.executeUpdate(SQL_MIGRATE_LEGACY_SUCCESS_TO_READY_FOR_AI); + if (migrated > 0) { + logger.info("Status migration: {} document(s) migrated from legacy SUCCESS state to READY_FOR_AI.", + migrated); + } else { + logger.debug("Status migration: no documents required migration."); + } + + logger.info("SQLite schema initialisation and migration completed successfully."); } catch (SQLException e) { String message = "Failed to initialise SQLite persistence schema at '" + jdbcUrl + "': " + e.getMessage(); @@ -231,9 +283,43 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti } /** - * Returns the JDBC URL this adapter uses to connect to the SQLite database. + * Idempotently adds the given columns to the specified table. *

- * Intended for logging and diagnostics only. + * For each column that does not yet exist, an {@code ALTER TABLE ... ADD COLUMN} + * statement is executed. Columns that already exist are silently skipped. + * + * @param connection an open JDBC connection to the database + * @param tableName the name of the table to evolve + * @param columns array of {@code [column_name, column_type]} pairs to add + * @throws SQLException if a column addition fails for a reason other than duplicate column + */ + private void evolveTableColumns(Connection connection, String tableName, String[][] columns) + throws SQLException { + java.util.Set existingColumns = new java.util.HashSet<>(); + try (ResultSet rs = connection.getMetaData().getColumns(null, null, tableName, null)) { + while (rs.next()) { + existingColumns.add(rs.getString("COLUMN_NAME").toLowerCase()); + } + } + + for (String[] col : columns) { + String columnName = col[0]; + String columnType = col[1]; + if (!existingColumns.contains(columnName.toLowerCase())) { + String alterSql = "ALTER TABLE " + tableName + " ADD COLUMN " + columnName + " " + columnType; + try (Statement stmt = connection.createStatement()) { + stmt.execute(alterSql); + } + logger.debug("Schema evolution: added column '{}' to '{}'.", columnName, tableName); + } else { + logger.debug("Schema evolution: column '{}' in '{}' already present, skipped.", + columnName, tableName); + } + } + } + + /** + * Returns the JDBC URL this adapter uses to connect to the SQLite database. * * @return the JDBC URL; never null or blank */ diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/FilesystemTargetFileCopyAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/FilesystemTargetFileCopyAdapter.java new file mode 100644 index 0000000..697455c --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/FilesystemTargetFileCopyAdapter.java @@ -0,0 +1,141 @@ +package de.gecheckt.pdf.umbenenner.adapter.out.targetcopy; + +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.IOException; +import java.nio.file.AtomicMoveNotSupportedException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; +import java.util.Objects; + +/** + * Filesystem-based implementation of {@link TargetFileCopyPort}. + *

+ * Copies a source PDF to the configured target folder using a two-step approach: + *

    + *
  1. Write the source content to a temporary file in the target folder.
  2. + *
  3. Rename/move the temporary file to the final resolved filename.
  4. + *
+ * The atomic-move option is attempted first. If the filesystem does not support atomic + * moves (e.g., across different volumes), a standard move is used as a fallback. + * + *

Source integrity

+ *

+ * The source file is never modified, moved, or deleted. Only a copy is created. + * + *

Temporary file naming

+ *

+ * The temporary file uses the suffix {@code .tmp} appended to the resolved filename + * and is placed in the same target folder. This ensures the final rename is typically + * an intra-filesystem operation, maximising atomicity. + * + *

Architecture boundary

+ *

+ * All NIO operations are confined to this adapter. No {@code Path} or {@code File} + * types appear in the port interface. + */ +public class FilesystemTargetFileCopyAdapter implements TargetFileCopyPort { + + private static final Logger logger = LogManager.getLogger(FilesystemTargetFileCopyAdapter.class); + + private final Path targetFolderPath; + + /** + * Creates the adapter for the given target folder. + * + * @param targetFolderPath the target folder path; must not be null + * @throws NullPointerException if {@code targetFolderPath} is null + */ + public FilesystemTargetFileCopyAdapter(Path targetFolderPath) { + this.targetFolderPath = Objects.requireNonNull(targetFolderPath, "targetFolderPath must not be null"); + } + + /** + * Copies the source document to the target folder under the given resolved filename. + *

+ * The copy is performed via a temporary file ({@code resolvedFilename + ".tmp"}) in + * the target folder followed by a move/rename to the final name. + *

+ * If any step fails, a best-effort cleanup of the temporary file is attempted + * before returning the failure result. + * + * @param sourceLocator opaque locator identifying the source file; must not be null + * @param resolvedFilename the final filename in the target folder; must not be null or blank + * @return {@link TargetFileCopySuccess} on success, or + * {@link TargetFileCopyTechnicalFailure} on any failure + */ + @Override + public TargetFileCopyResult copyToTarget(SourceDocumentLocator sourceLocator, String resolvedFilename) { + Objects.requireNonNull(sourceLocator, "sourceLocator must not be null"); + Objects.requireNonNull(resolvedFilename, "resolvedFilename must not be null"); + + Path sourcePath = Paths.get(sourceLocator.value()); + Path finalTargetPath = targetFolderPath.resolve(resolvedFilename); + Path tempTargetPath = targetFolderPath.resolve(resolvedFilename + ".tmp"); + + boolean tempCreated = false; + + try { + // Step 1: Copy source to temporary file in target folder + Files.copy(sourcePath, tempTargetPath, StandardCopyOption.REPLACE_EXISTING); + tempCreated = true; + logger.debug("Copied source '{}' to temporary file '{}'.", + sourceLocator.value(), tempTargetPath.getFileName()); + + // Step 2: Atomic move/rename to final target filename + moveToFinalTarget(tempTargetPath, finalTargetPath); + + logger.debug("Target copy completed: '{}'.", resolvedFilename); + return new TargetFileCopySuccess(); + + } catch (Exception e) { + String message = "Failed to copy source '" + sourceLocator.value() + + "' to target '" + resolvedFilename + "': " + e.getMessage(); + logger.error(message, e); + + boolean cleaned = tempCreated && tryDeletePath(tempTargetPath); + return new TargetFileCopyTechnicalFailure(message, cleaned); + } + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + /** + * Moves the temporary file to the final target path. + * Attempts an atomic move first; falls back to a standard move if the filesystem + * does not support atomic moves. + */ + private void moveToFinalTarget(Path tempPath, Path finalPath) throws IOException { + try { + Files.move(tempPath, finalPath, StandardCopyOption.ATOMIC_MOVE); + } catch (AtomicMoveNotSupportedException e) { + logger.debug("Atomic move not supported, falling back to standard move."); + Files.move(tempPath, finalPath, StandardCopyOption.REPLACE_EXISTING); + } + } + + /** + * Best-effort deletion of a path. Returns {@code true} if deletion succeeded + * or the file did not exist; {@code false} if an exception occurred. + */ + private boolean tryDeletePath(Path path) { + try { + Files.deleteIfExists(path); + return true; + } catch (IOException e) { + logger.warn("Best-effort cleanup: could not delete temporary file '{}': {}", + path, e.getMessage()); + return false; + } + } +} diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/FilesystemTargetFolderAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/FilesystemTargetFolderAdapter.java new file mode 100644 index 0000000..0f54635 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/FilesystemTargetFolderAdapter.java @@ -0,0 +1,140 @@ +package de.gecheckt.pdf.umbenenner.adapter.out.targetfolder; + +import de.gecheckt.pdf.umbenenner.application.port.out.ResolvedTargetFilename; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFilenameResolutionResult; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderTechnicalFailure; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.Objects; + +/** + * Filesystem-based implementation of {@link TargetFolderPort}. + *

+ * Resolves unique filenames for the configured target folder by checking for existing + * files and appending a numeric collision-avoidance suffix when necessary. + * + *

Duplicate resolution algorithm

+ *

+ * Given a base name such as {@code 2024-01-15 - Rechnung.pdf}, the adapter checks: + *

    + *
  1. {@code 2024-01-15 - Rechnung.pdf} — if free, return it.
  2. + *
  3. {@code 2024-01-15 - Rechnung(1).pdf} — if free, return it.
  4. + *
  5. {@code 2024-01-15 - Rechnung(2).pdf} — and so on.
  6. + *
+ * The suffix is inserted immediately before {@code .pdf}. + * The 20-character base-title limit does not apply to the suffix. + * + *

Architecture boundary

+ *

+ * All NIO operations are confined to this adapter. No {@code Path} or {@code File} types + * appear in the port interface. + */ +public class FilesystemTargetFolderAdapter implements TargetFolderPort { + + private static final Logger logger = LogManager.getLogger(FilesystemTargetFolderAdapter.class); + + /** Maximum number of duplicate suffixes attempted before giving up. */ + private static final int MAX_SUFFIX_ATTEMPTS = 9999; + + private final Path targetFolderPath; + + /** + * Creates the adapter for the given target folder. + * + * @param targetFolderPath the target folder path; must not be null + * @throws NullPointerException if {@code targetFolderPath} is null + */ + public FilesystemTargetFolderAdapter(Path targetFolderPath) { + this.targetFolderPath = Objects.requireNonNull(targetFolderPath, "targetFolderPath must not be null"); + } + + /** + * Returns the absolute string representation of the target folder path. + *

+ * Used by the application layer as an opaque target-folder locator for persistence. + * + * @return absolute path string of the target folder; never null or blank + */ + @Override + public String getTargetFolderLocator() { + return targetFolderPath.toAbsolutePath().toString(); + } + + /** + * Resolves the first available unique filename in the target folder for the given base name. + *

+ * Checks for {@code baseName} first; if taken, appends {@code (1)}, {@code (2)}, etc. + * directly before {@code .pdf} until a free name is found. + * + * @param baseName the desired filename including {@code .pdf} extension; + * must not be null or blank + * @return a {@link ResolvedTargetFilename} with the first available name, or a + * {@link TargetFolderTechnicalFailure} if folder access fails + */ + @Override + public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) { + Objects.requireNonNull(baseName, "baseName must not be null"); + + try { + // Try without suffix first + if (!Files.exists(targetFolderPath.resolve(baseName))) { + logger.debug("Resolved target filename without suffix: '{}'", baseName); + return new ResolvedTargetFilename(baseName); + } + + // Determine split point: everything before the final ".pdf" + if (!baseName.toLowerCase().endsWith(".pdf")) { + return new TargetFolderTechnicalFailure( + "Base name does not end with .pdf: '" + baseName + "'"); + } + String nameWithoutExt = baseName.substring(0, baseName.length() - 4); + + // Try (1), (2), ... + for (int i = 1; i <= MAX_SUFFIX_ATTEMPTS; i++) { + String candidate = nameWithoutExt + "(" + i + ").pdf"; + if (!Files.exists(targetFolderPath.resolve(candidate))) { + logger.debug("Resolved target filename with suffix ({}): '{}'", i, candidate); + return new ResolvedTargetFilename(candidate); + } + } + + return new TargetFolderTechnicalFailure( + "Too many duplicate files for base name '" + baseName + + "': checked up to suffix (" + MAX_SUFFIX_ATTEMPTS + ")"); + + } catch (Exception e) { + String message = "Failed to check target folder for duplicate resolution: " + e.getMessage(); + logger.error(message, e); + return new TargetFolderTechnicalFailure(message); + } + } + + /** + * Best-effort deletion of a file in the target folder. + *

+ * Used for rollback after a successful copy when subsequent persistence fails. + * Never throws; all exceptions are caught and logged at warn level. + * + * @param resolvedFilename the filename (not full path) to delete; must not be null + */ + @Override + public void tryDeleteTargetFile(String resolvedFilename) { + Objects.requireNonNull(resolvedFilename, "resolvedFilename must not be null"); + try { + boolean deleted = Files.deleteIfExists(targetFolderPath.resolve(resolvedFilename)); + if (deleted) { + logger.debug("Best-effort rollback: deleted target file '{}'.", resolvedFilename); + } else { + logger.debug("Best-effort rollback: target file '{}' did not exist.", resolvedFilename); + } + } catch (IOException e) { + logger.warn("Best-effort rollback: could not delete target file '{}': {}", + resolvedFilename, e.getMessage()); + } + } +} diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java index f0c4b67..605de0a 100644 --- a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java @@ -464,14 +464,16 @@ class StartConfigurationValidatorTest { } @Test - void validate_failsWhenTargetFolderDoesNotExist() throws Exception { + void validate_succeedsWhenTargetFolderDoesNotExistButParentExists() throws Exception { + // target.folder is "anlegbar" (creatable): parent tempDir exists, folder itself does not. + // The validator must create the folder and accept the configuration. Path sourceFolder = Files.createDirectory(tempDir.resolve("source")); Path sqliteFile = Files.createFile(tempDir.resolve("db.sqlite")); Path promptTemplateFile = Files.createFile(tempDir.resolve("prompt.txt")); StartConfiguration config = new StartConfiguration( sourceFolder, - tempDir.resolve("nonexistent"), + tempDir.resolve("nonexistent-target"), sqliteFile, URI.create("https://api.example.com"), "gpt-4", @@ -486,11 +488,43 @@ class StartConfigurationValidatorTest { "test-api-key" ); + assertDoesNotThrow(() -> validator.validate(config), + "Validator must accept a target folder that does not yet exist but can be created"); + assertTrue(Files.isDirectory(tempDir.resolve("nonexistent-target")), + "Target folder must have been created by the validator"); + } + + @Test + void validate_failsWhenTargetFolderCannotBeCreated() { + // Inject a TargetFolderChecker that simulates a creation failure. + StartConfigurationValidator validatorWithFailingChecker = new StartConfigurationValidator( + path -> null, // source folder checker always passes + path -> "- target.folder: path does not exist and could not be created: " + path + " (Permission denied)" + ); + + StartConfiguration config = new StartConfiguration( + tempDir.resolve("source"), + tempDir.resolve("uncreatable-target"), + tempDir.resolve("db.sqlite"), + URI.create("https://api.example.com"), + "gpt-4", + 30, + 3, + 100, + 50000, + tempDir.resolve("prompt.txt"), + null, + null, + "INFO", + "test-api-key" + ); + InvalidStartConfigurationException exception = assertThrows( InvalidStartConfigurationException.class, - () -> validator.validate(config) + () -> validatorWithFailingChecker.validate(config) ); - assertTrue(exception.getMessage().contains("target.folder: path does not exist")); + assertTrue(exception.getMessage().contains("target.folder: path does not exist and could not be created"), + "Error message must indicate that the target folder could not be created"); } @Test diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteDocumentRecordRepositoryAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteDocumentRecordRepositoryAdapterTest.java index 12e7cfa..4ddc0e3 100644 --- a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteDocumentRecordRepositoryAdapterTest.java +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteDocumentRecordRepositoryAdapterTest.java @@ -74,7 +74,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, null, Instant.now().truncatedTo(ChronoUnit.MICROS), - Instant.now().truncatedTo(ChronoUnit.MICROS) + Instant.now().truncatedTo(ChronoUnit.MICROS), + null, + null ); // When @@ -111,7 +113,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, null, Instant.now().minusSeconds(60).truncatedTo(ChronoUnit.MICROS), - Instant.now().minusSeconds(60).truncatedTo(ChronoUnit.MICROS) + Instant.now().minusSeconds(60).truncatedTo(ChronoUnit.MICROS), + null, + null ); repository.create(initialRecord); @@ -127,7 +131,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, now, initialRecord.createdAt(), - now + now, + null, + null ); // When @@ -160,7 +166,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, null, Instant.now().truncatedTo(ChronoUnit.MICROS), - Instant.now().truncatedTo(ChronoUnit.MICROS) + Instant.now().truncatedTo(ChronoUnit.MICROS), + null, + null ); repository.create(record1); @@ -174,7 +182,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, null, Instant.now().truncatedTo(ChronoUnit.MICROS), - Instant.now().truncatedTo(ChronoUnit.MICROS) + Instant.now().truncatedTo(ChronoUnit.MICROS), + null, + null ); // When / Then @@ -196,7 +206,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, null, Instant.now().truncatedTo(ChronoUnit.MICROS), - Instant.now().truncatedTo(ChronoUnit.MICROS) + Instant.now().truncatedTo(ChronoUnit.MICROS), + null, + null ); // When / Then @@ -221,7 +233,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, null, now.minusSeconds(120), - now.minusSeconds(120) + now.minusSeconds(120), + null, + null ); repository.create(initialRecord); @@ -236,7 +250,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { failureInstant, null, now.minusSeconds(120), - failureInstant + failureInstant, + null, + null ); repository.update(failedFinalRecord); @@ -269,7 +285,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, null, createdAt, - createdAt + createdAt, + null, + null ); repository.create(initialRecord); @@ -284,7 +302,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { failureInstant, null, createdAt, - failureInstant + failureInstant, + null, + null ); // When @@ -321,7 +341,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { firstFailureAt, null, createdAt, - firstFailureAt + firstFailureAt, + null, + null ); repository.create(initialRecord); @@ -336,7 +358,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { secondFailureAt, null, createdAt, - secondFailureAt + secondFailureAt, + null, + null ); // When @@ -369,7 +393,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, null, createdAt, - createdAt + createdAt, + null, + null ); repository.create(initialRecord); @@ -384,7 +410,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { failureInstant, null, createdAt, - failureInstant + failureInstant, + null, + null ); // When @@ -439,7 +467,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, null, now, - now + now, + null, + null ); repository.create(record); @@ -467,7 +497,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { now.minusSeconds(60), null, now, - now + now, + null, + null ); repository.create(record); @@ -495,7 +527,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, // lastFailureInstant is null null, // lastSuccessInstant is null now, - now + now, + null, + null ); repository.create(record); @@ -509,6 +543,76 @@ class SqliteDocumentRecordRepositoryAdapterTest { assertThat(known.record().lastSuccessInstant()).isNull(); } + @Test + void create_and_update_shouldPersistAndReadTargetPathAndTargetFileName() { + // Given: create a record with null target fields initially + DocumentFingerprint fingerprint = new DocumentFingerprint( + "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"); + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); + DocumentRecord initialRecord = new DocumentRecord( + fingerprint, + new SourceDocumentLocator("/source/doc.pdf"), + "doc.pdf", + ProcessingStatus.PROCESSING, + FailureCounters.zero(), + null, null, + now, now, + null, null + ); + repository.create(initialRecord); + + // Update with target path and filename + DocumentRecord successRecord = new DocumentRecord( + fingerprint, + new SourceDocumentLocator("/source/doc.pdf"), + "doc.pdf", + ProcessingStatus.SUCCESS, + FailureCounters.zero(), + null, now, + now, now, + "/target/folder", + "2026-01-15 - Rechnung.pdf" + ); + + // When + repository.update(successRecord); + DocumentRecordLookupResult result = repository.findByFingerprint(fingerprint); + + // Then + assertThat(result).isInstanceOf(DocumentTerminalSuccess.class); + DocumentRecord found = ((DocumentTerminalSuccess) result).record(); + assertThat(found.lastTargetPath()).isEqualTo("/target/folder"); + assertThat(found.lastTargetFileName()).isEqualTo("2026-01-15 - Rechnung.pdf"); + } + + @Test + void update_shouldPersistNullTargetFields_whenNotYetCopied() { + // Given: a record with null target path and filename (not yet in SUCCESS) + DocumentFingerprint fingerprint = new DocumentFingerprint( + "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"); + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); + DocumentRecord record = new DocumentRecord( + fingerprint, + new SourceDocumentLocator("/source/pending.pdf"), + "pending.pdf", + ProcessingStatus.FAILED_RETRYABLE, + new FailureCounters(0, 1), + now, null, + now, now, + null, null + ); + repository.create(record); + + // When + DocumentRecordLookupResult result = repository.findByFingerprint(fingerprint); + + // Then + assertThat(result).isInstanceOf(DocumentKnownProcessable.class); + DocumentRecord found = ((DocumentKnownProcessable) result).record(); + assertThat(found.lastTargetPath()).isNull(); + assertThat(found.lastTargetFileName()).isNull(); + } + @Test void update_shouldPreserveCreatedAtTimestamp() { // Given: create with specific createdAt @@ -526,7 +630,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, null, createdAt, // Much older createdAt - createdAt + createdAt, + null, + null ); repository.create(initialRecord); @@ -540,7 +646,9 @@ class SqliteDocumentRecordRepositoryAdapterTest { null, now, createdAt, // createdAt should remain unchanged - now + now, + null, + null ); // When diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapterTest.java index 21b1a02..7a6c07e 100644 --- a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapterTest.java +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapterTest.java @@ -8,6 +8,7 @@ import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; import java.time.Instant; +import java.time.LocalDate; import java.time.temporal.ChronoUnit; import java.util.List; @@ -17,14 +18,16 @@ import org.junit.jupiter.api.io.TempDir; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; +import de.gecheckt.pdf.umbenenner.domain.model.DateSource; import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; import de.gecheckt.pdf.umbenenner.domain.model.RunId; /** * Tests for {@link SqliteProcessingAttemptRepositoryAdapter}. - * - * @since M4-AP-005 + *

+ * Covers base attempt persistence, AI traceability field round-trips, + * proposal-ready lookup, and non-AI-attempt status storability. */ class SqliteProcessingAttemptRepositoryAdapterTest { @@ -101,7 +104,7 @@ class SqliteProcessingAttemptRepositoryAdapterTest { insertDocumentRecord(fingerprint); // Insert first attempt - ProcessingAttempt firstAttempt = new ProcessingAttempt( + ProcessingAttempt firstAttempt = ProcessingAttempt.withoutAiFields( fingerprint, runId, 1, @@ -134,7 +137,7 @@ class SqliteProcessingAttemptRepositoryAdapterTest { // Insert multiple attempts for (int i = 1; i <= 5; i++) { - ProcessingAttempt attempt = new ProcessingAttempt( + ProcessingAttempt attempt = ProcessingAttempt.withoutAiFields( fingerprint, runId, i, @@ -178,7 +181,7 @@ class SqliteProcessingAttemptRepositoryAdapterTest { // Insert a document record first (FK constraint) insertDocumentRecord(fingerprint); - ProcessingAttempt attempt = new ProcessingAttempt( + ProcessingAttempt attempt = ProcessingAttempt.withoutAiFields( fingerprint, runId, 1, @@ -221,7 +224,7 @@ class SqliteProcessingAttemptRepositoryAdapterTest { // Insert a document record first (FK constraint) insertDocumentRecord(fingerprint); - ProcessingAttempt attempt = new ProcessingAttempt( + ProcessingAttempt attempt = ProcessingAttempt.withoutAiFields( fingerprint, runId, 1, @@ -283,7 +286,7 @@ class SqliteProcessingAttemptRepositoryAdapterTest { insertDocumentRecord(fingerprint); // Insert attempts out of order to verify sorting - ProcessingAttempt attempt3 = new ProcessingAttempt( + ProcessingAttempt attempt3 = ProcessingAttempt.withoutAiFields( fingerprint, runId2, 3, @@ -296,7 +299,7 @@ class SqliteProcessingAttemptRepositoryAdapterTest { ); repository.save(attempt3); - ProcessingAttempt attempt1 = new ProcessingAttempt( + ProcessingAttempt attempt1 = ProcessingAttempt.withoutAiFields( fingerprint, runId1, 1, @@ -309,7 +312,7 @@ class SqliteProcessingAttemptRepositoryAdapterTest { ); repository.save(attempt1); - ProcessingAttempt attempt2 = new ProcessingAttempt( + ProcessingAttempt attempt2 = ProcessingAttempt.withoutAiFields( fingerprint, runId1, 2, @@ -368,6 +371,388 @@ class SqliteProcessingAttemptRepositoryAdapterTest { .hasMessageContaining("fingerprint"); } + // ------------------------------------------------------------------------- + // AI traceability fields — round-trip persistence + // ------------------------------------------------------------------------- + + @Test + void save_persistsAllAiTraceabilityFields_andFindAllReadsThemBack() { + // Given + DocumentFingerprint fingerprint = new DocumentFingerprint( + "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"); + RunId runId = new RunId("ai-run-1"); + Instant startedAt = Instant.now().minusSeconds(30).truncatedTo(ChronoUnit.MICROS); + Instant endedAt = Instant.now().truncatedTo(ChronoUnit.MICROS); + LocalDate resolvedDate = LocalDate.of(2026, 3, 15); + + insertDocumentRecord(fingerprint); + + ProcessingAttempt attempt = new ProcessingAttempt( + fingerprint, runId, 1, startedAt, endedAt, + ProcessingStatus.PROPOSAL_READY, + null, null, false, + "gpt-4o", "prompt-v1.txt", + 5, 1234, + "{\"date\":\"2026-03-15\",\"title\":\"Stromabrechnung\",\"reasoning\":\"Invoice date found.\"}", + "Invoice date found.", + resolvedDate, DateSource.AI_PROVIDED, + "Stromabrechnung", + null + ); + + // When + repository.save(attempt); + + // Then + List saved = repository.findAllByFingerprint(fingerprint); + assertThat(saved).hasSize(1); + ProcessingAttempt result = saved.get(0); + + assertThat(result.modelName()).isEqualTo("gpt-4o"); + assertThat(result.promptIdentifier()).isEqualTo("prompt-v1.txt"); + assertThat(result.processedPageCount()).isEqualTo(5); + assertThat(result.sentCharacterCount()).isEqualTo(1234); + assertThat(result.aiRawResponse()).contains("Stromabrechnung"); + assertThat(result.aiReasoning()).isEqualTo("Invoice date found."); + assertThat(result.resolvedDate()).isEqualTo(resolvedDate); + assertThat(result.dateSource()).isEqualTo(DateSource.AI_PROVIDED); + assertThat(result.validatedTitle()).isEqualTo("Stromabrechnung"); + } + + @Test + void save_persistsAiFieldsWithFallbackDateSource() { + // Given + DocumentFingerprint fingerprint = new DocumentFingerprint( + "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"); + RunId runId = new RunId("ai-run-2"); + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); + LocalDate fallbackDate = LocalDate.of(2026, 4, 7); + + insertDocumentRecord(fingerprint); + + ProcessingAttempt attempt = new ProcessingAttempt( + fingerprint, runId, 1, now, now.plusSeconds(5), + ProcessingStatus.PROPOSAL_READY, + null, null, false, + "claude-sonnet-4-6", "prompt-v2.txt", + 3, 800, + "{\"title\":\"Kontoauszug\",\"reasoning\":\"No date in document.\"}", + "No date in document.", + fallbackDate, DateSource.FALLBACK_CURRENT, + "Kontoauszug", + null + ); + + repository.save(attempt); + + List saved = repository.findAllByFingerprint(fingerprint); + assertThat(saved).hasSize(1); + ProcessingAttempt result = saved.get(0); + + assertThat(result.dateSource()).isEqualTo(DateSource.FALLBACK_CURRENT); + assertThat(result.resolvedDate()).isEqualTo(fallbackDate); + } + + @Test + void save_persistsNullAiFields_whenNoAiCallWasMade() { + // Given + DocumentFingerprint fingerprint = new DocumentFingerprint( + "cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc"); + RunId runId = new RunId("no-ai-run"); + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); + + insertDocumentRecord(fingerprint); + + ProcessingAttempt attempt = ProcessingAttempt.withoutAiFields( + fingerprint, runId, 1, now, now.plusSeconds(1), + ProcessingStatus.FAILED_RETRYABLE, + "NoTextError", "No extractable text", true + ); + + repository.save(attempt); + + List saved = repository.findAllByFingerprint(fingerprint); + assertThat(saved).hasSize(1); + ProcessingAttempt result = saved.get(0); + + assertThat(result.modelName()).isNull(); + assertThat(result.promptIdentifier()).isNull(); + assertThat(result.processedPageCount()).isNull(); + assertThat(result.sentCharacterCount()).isNull(); + assertThat(result.aiRawResponse()).isNull(); + assertThat(result.aiReasoning()).isNull(); + assertThat(result.resolvedDate()).isNull(); + assertThat(result.dateSource()).isNull(); + assertThat(result.validatedTitle()).isNull(); + } + + // ------------------------------------------------------------------------- + // findLatestProposalReadyAttempt + // ------------------------------------------------------------------------- + + @Test + void findLatestProposalReadyAttempt_returnsNull_whenNoAttemptsExist() { + DocumentFingerprint fingerprint = new DocumentFingerprint( + "dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddd"); + + ProcessingAttempt result = repository.findLatestProposalReadyAttempt(fingerprint); + + assertThat(result).isNull(); + } + + @Test + void findLatestProposalReadyAttempt_returnsNull_whenNoProposalReadyAttemptExists() { + DocumentFingerprint fingerprint = new DocumentFingerprint( + "eeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee"); + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); + + insertDocumentRecord(fingerprint); + ProcessingAttempt attempt = ProcessingAttempt.withoutAiFields( + fingerprint, new RunId("run-x"), 1, now, now.plusSeconds(1), + ProcessingStatus.FAILED_RETRYABLE, "Err", "msg", true + ); + repository.save(attempt); + + ProcessingAttempt result = repository.findLatestProposalReadyAttempt(fingerprint); + + assertThat(result).isNull(); + } + + @Test + void findLatestProposalReadyAttempt_returnsSingleProposalReadyAttempt() { + DocumentFingerprint fingerprint = new DocumentFingerprint( + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"); + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); + LocalDate date = LocalDate.of(2026, 2, 1); + + insertDocumentRecord(fingerprint); + + ProcessingAttempt attempt = new ProcessingAttempt( + fingerprint, new RunId("run-p"), 1, now, now.plusSeconds(2), + ProcessingStatus.PROPOSAL_READY, + null, null, false, + "gpt-4o", "prompt-v1.txt", 2, 500, + "{\"title\":\"Rechnung\",\"reasoning\":\"Found.\"}", + "Found.", date, DateSource.AI_PROVIDED, "Rechnung", + null + ); + repository.save(attempt); + + ProcessingAttempt result = repository.findLatestProposalReadyAttempt(fingerprint); + + assertThat(result).isNotNull(); + assertThat(result.status()).isEqualTo(ProcessingStatus.PROPOSAL_READY); + assertThat(result.validatedTitle()).isEqualTo("Rechnung"); + assertThat(result.resolvedDate()).isEqualTo(date); + assertThat(result.dateSource()).isEqualTo(DateSource.AI_PROVIDED); + } + + @Test + void findLatestProposalReadyAttempt_returnsLatest_whenMultipleExist() { + DocumentFingerprint fingerprint = new DocumentFingerprint( + "1111111111111111111111111111111111111111111111111111111111111112"); + Instant base = Instant.now().truncatedTo(ChronoUnit.MICROS); + + insertDocumentRecord(fingerprint); + + // First PROPOSAL_READY attempt + repository.save(new ProcessingAttempt( + fingerprint, new RunId("run-1"), 1, base, base.plusSeconds(1), + ProcessingStatus.PROPOSAL_READY, + null, null, false, + "model-a", "prompt-v1.txt", 1, 100, + "{}", "First.", LocalDate.of(2026, 1, 1), DateSource.AI_PROVIDED, "TitelEins", + null + )); + + // Subsequent FAILED attempt + repository.save(ProcessingAttempt.withoutAiFields( + fingerprint, new RunId("run-2"), 2, + base.plusSeconds(10), base.plusSeconds(11), + ProcessingStatus.FAILED_RETRYABLE, "Err", "msg", true + )); + + // Second PROPOSAL_READY attempt (newer) + repository.save(new ProcessingAttempt( + fingerprint, new RunId("run-3"), 3, base.plusSeconds(20), base.plusSeconds(21), + ProcessingStatus.PROPOSAL_READY, + null, null, false, + "model-b", "prompt-v2.txt", 2, 200, + "{}", "Second.", LocalDate.of(2026, 2, 2), DateSource.AI_PROVIDED, "TitelZwei", + null + )); + + ProcessingAttempt result = repository.findLatestProposalReadyAttempt(fingerprint); + + assertThat(result).isNotNull(); + assertThat(result.attemptNumber()).isEqualTo(3); + assertThat(result.validatedTitle()).isEqualTo("TitelZwei"); + assertThat(result.modelName()).isEqualTo("model-b"); + } + + @Test + void save_persistsFinalTargetFileName_forSuccessAttempt() { + // Given + DocumentFingerprint fingerprint = new DocumentFingerprint( + "4444444444444444444444444444444444444444444444444444444444444445"); + RunId runId = new RunId("success-run"); + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); + LocalDate date = LocalDate.of(2026, 1, 15); + String expectedFileName = "2026-01-15 - Rechnung.pdf"; + + insertDocumentRecord(fingerprint); + + ProcessingAttempt attempt = new ProcessingAttempt( + fingerprint, runId, 1, now, now.plusSeconds(3), + ProcessingStatus.SUCCESS, + null, null, false, + "gpt-4", "prompt-v1.txt", 2, 600, + "{\"title\":\"Rechnung\",\"reasoning\":\"Invoice.\"}", + "Invoice.", + date, DateSource.AI_PROVIDED, + "Rechnung", + expectedFileName + ); + + // When + repository.save(attempt); + + // Then + List saved = repository.findAllByFingerprint(fingerprint); + assertThat(saved).hasSize(1); + assertThat(saved.get(0).finalTargetFileName()).isEqualTo(expectedFileName); + assertThat(saved.get(0).status()).isEqualTo(ProcessingStatus.SUCCESS); + } + + @Test + void save_persistsNullFinalTargetFileName_forNonSuccessAttempt() { + // finalTargetFileName must remain null for PROPOSAL_READY and non-SUCCESS attempts + DocumentFingerprint fingerprint = new DocumentFingerprint( + "5555555555555555555555555555555555555555555555555555555555555556"); + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); + + insertDocumentRecord(fingerprint); + + ProcessingAttempt attempt = new ProcessingAttempt( + fingerprint, new RunId("run-prop"), 1, now, now.plusSeconds(1), + ProcessingStatus.PROPOSAL_READY, + null, null, false, + "gpt-4", "prompt-v1.txt", 1, 200, + "{}", "reason", + LocalDate.of(2026, 3, 1), DateSource.AI_PROVIDED, + "Kontoauszug", + null // no target filename yet + ); + + repository.save(attempt); + + List saved = repository.findAllByFingerprint(fingerprint); + assertThat(saved).hasSize(1); + assertThat(saved.get(0).finalTargetFileName()).isNull(); + } + + @Test + void save_proposalAttemptNotOverwrittenBySubsequentSuccessAttempt() { + // Verifies that the leading PROPOSAL_READY attempt remains unchanged when + // a subsequent SUCCESS attempt is added (no update, only new insert). + DocumentFingerprint fingerprint = new DocumentFingerprint( + "6666666666666666666666666666666666666666666666666666666666666667"); + Instant base = Instant.now().truncatedTo(ChronoUnit.MICROS); + LocalDate date = LocalDate.of(2026, 2, 10); + + insertDocumentRecord(fingerprint); + + // First attempt: PROPOSAL_READY + ProcessingAttempt proposalAttempt = new ProcessingAttempt( + fingerprint, new RunId("run-1"), 1, base, base.plusSeconds(2), + ProcessingStatus.PROPOSAL_READY, + null, null, false, + "model-a", "prompt-v1.txt", 3, 700, + "{}", "reason.", date, DateSource.AI_PROVIDED, "Bescheid", null + ); + repository.save(proposalAttempt); + + // Second attempt: SUCCESS (target copy completed) + ProcessingAttempt successAttempt = new ProcessingAttempt( + fingerprint, new RunId("run-1"), 2, + base.plusSeconds(5), base.plusSeconds(6), + ProcessingStatus.SUCCESS, + null, null, false, + null, null, null, null, null, null, + null, null, null, + "2026-02-10 - Bescheid.pdf" + ); + repository.save(successAttempt); + + // Both attempts must be present + List all = repository.findAllByFingerprint(fingerprint); + assertThat(all).hasSize(2); + + // The original PROPOSAL_READY attempt must remain unchanged + ProcessingAttempt first = all.get(0); + assertThat(first.status()).isEqualTo(ProcessingStatus.PROPOSAL_READY); + assertThat(first.validatedTitle()).isEqualTo("Bescheid"); + assertThat(first.finalTargetFileName()).isNull(); + + // The SUCCESS attempt carries the final filename + ProcessingAttempt second = all.get(1); + assertThat(second.status()).isEqualTo(ProcessingStatus.SUCCESS); + assertThat(second.finalTargetFileName()).isEqualTo("2026-02-10 - Bescheid.pdf"); + } + + @Test + void findLatestProposalReadyAttempt_rejectsNullFingerprint() { + assertThatThrownBy(() -> repository.findLatestProposalReadyAttempt(null)) + .isInstanceOf(NullPointerException.class) + .hasMessageContaining("fingerprint"); + } + + // ------------------------------------------------------------------------- + // READY_FOR_AI and PROPOSAL_READY status storability + // ------------------------------------------------------------------------- + + @Test + void save_canPersistReadyForAiStatus() { + DocumentFingerprint fingerprint = new DocumentFingerprint( + "2222222222222222222222222222222222222222222222222222222222222223"); + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); + + insertDocumentRecord(fingerprint); + + ProcessingAttempt attempt = ProcessingAttempt.withoutAiFields( + fingerprint, new RunId("run-r"), 1, now, now.plusSeconds(1), + ProcessingStatus.READY_FOR_AI, null, null, false + ); + repository.save(attempt); + + List saved = repository.findAllByFingerprint(fingerprint); + assertThat(saved).hasSize(1); + assertThat(saved.get(0).status()).isEqualTo(ProcessingStatus.READY_FOR_AI); + } + + @Test + void save_canPersistProposalReadyStatus() { + DocumentFingerprint fingerprint = new DocumentFingerprint( + "3333333333333333333333333333333333333333333333333333333333333334"); + Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); + + insertDocumentRecord(fingerprint); + + ProcessingAttempt attempt = new ProcessingAttempt( + fingerprint, new RunId("run-p2"), 1, now, now.plusSeconds(1), + ProcessingStatus.PROPOSAL_READY, + null, null, false, + "model-x", "prompt-v1.txt", 1, 50, + "{}", "Reasoning.", LocalDate.of(2026, 1, 15), DateSource.AI_PROVIDED, "Titel", + null + ); + repository.save(attempt); + + List saved = repository.findAllByFingerprint(fingerprint); + assertThat(saved).hasSize(1); + assertThat(saved.get(0).status()).isEqualTo(ProcessingStatus.PROPOSAL_READY); + } + // ------------------------------------------------------------------------- // Integration with document records (FK constraints) // ------------------------------------------------------------------------- @@ -380,7 +765,7 @@ class SqliteProcessingAttemptRepositoryAdapterTest { RunId runId = new RunId("test-run-7"); Instant now = Instant.now().truncatedTo(ChronoUnit.MICROS); - ProcessingAttempt attempt = new ProcessingAttempt( + ProcessingAttempt attempt = ProcessingAttempt.withoutAiFields( fingerprint, runId, 1, diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapterTest.java index 032bbdc..47186ae 100644 --- a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapterTest.java +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapterTest.java @@ -18,12 +18,12 @@ import org.junit.jupiter.api.io.TempDir; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; /** - * Unit tests for {@link SqliteSchemaInitializationAdapter}. + * Tests for {@link SqliteSchemaInitializationAdapter}. *

- * Verifies that the M4 two-level schema is created correctly, that the operation - * is idempotent, and that invalid configuration is rejected. - * - * @since M4-AP-003 + * Verifies that the two-level schema is created correctly, that schema evolution + * (idempotent addition of AI traceability columns) works, that the idempotent + * status migration of earlier positive intermediate states to {@code READY_FOR_AI} + * is correct, and that invalid configuration is rejected. */ class SqliteSchemaInitializationAdapterTest { @@ -87,7 +87,9 @@ class SqliteSchemaInitializationAdapterTest { "last_failure_instant", "last_success_instant", "created_at", - "updated_at" + "updated_at", + "last_target_path", + "last_target_file_name" ); } @@ -107,7 +109,17 @@ class SqliteSchemaInitializationAdapterTest { "status", "failure_class", "failure_message", - "retryable" + "retryable", + "model_name", + "prompt_identifier", + "processed_page_count", + "sent_character_count", + "ai_raw_response", + "ai_reasoning", + "resolved_date", + "date_source", + "validated_title", + "final_target_file_name" ); } @@ -239,6 +251,130 @@ class SqliteSchemaInitializationAdapterTest { } } + // ------------------------------------------------------------------------- + // Schema evolution — AI traceability columns + // ------------------------------------------------------------------------- + + @Test + void initializeSchema_addsAiTraceabilityColumnsToExistingSchema(@TempDir Path dir) + throws SQLException { + // Simulate a pre-evolution schema: create the base tables without AI columns + String jdbcUrl = jdbcUrl(dir, "evolution_test.db"); + try (Connection conn = DriverManager.getConnection(jdbcUrl); + var stmt = conn.createStatement()) { + stmt.execute(""" + CREATE TABLE IF NOT EXISTS document_record ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + fingerprint TEXT NOT NULL, + last_known_source_locator TEXT NOT NULL, + last_known_source_file_name TEXT NOT NULL, + overall_status TEXT NOT NULL, + content_error_count INTEGER NOT NULL DEFAULT 0, + transient_error_count INTEGER NOT NULL DEFAULT 0, + last_failure_instant TEXT, + last_success_instant TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + CONSTRAINT uq_document_record_fingerprint UNIQUE (fingerprint) + ) + """); + stmt.execute(""" + CREATE TABLE IF NOT EXISTS processing_attempt ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + fingerprint TEXT NOT NULL, + run_id TEXT NOT NULL, + attempt_number INTEGER NOT NULL, + started_at TEXT NOT NULL, + ended_at TEXT NOT NULL, + status TEXT NOT NULL, + failure_class TEXT, + failure_message TEXT, + retryable INTEGER NOT NULL DEFAULT 0 + ) + """); + } + + // Running initializeSchema on the existing base schema must succeed (evolution) + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + Set columns = readColumnNames(jdbcUrl, "processing_attempt"); + assertThat(columns).contains( + "model_name", "prompt_identifier", "processed_page_count", + "sent_character_count", "ai_raw_response", "ai_reasoning", + "resolved_date", "date_source", "validated_title"); + } + + // ------------------------------------------------------------------------- + // Status migration — earlier positive intermediate state → READY_FOR_AI + // ------------------------------------------------------------------------- + + @Test + void initializeSchema_migrates_legacySuccessWithoutProposal_toReadyForAi(@TempDir Path dir) + throws SQLException { + String jdbcUrl = jdbcUrl(dir, "migration_test.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + // Insert a document with SUCCESS status and no PROPOSAL_READY attempt + String fp = "d".repeat(64); + insertDocumentRecordWithStatus(jdbcUrl, fp, "SUCCESS"); + + // Run schema initialisation again (migration step runs every time) + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + String status = readOverallStatus(jdbcUrl, fp); + assertThat(status).isEqualTo("READY_FOR_AI"); + } + + @Test + void initializeSchema_migration_isIdempotent(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "migration_idempotent_test.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + String fp = "e".repeat(64); + insertDocumentRecordWithStatus(jdbcUrl, fp, "SUCCESS"); + + // Run migration twice — must not corrupt data or throw + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + String status = readOverallStatus(jdbcUrl, fp); + assertThat(status).isEqualTo("READY_FOR_AI"); + } + + @Test + void initializeSchema_doesNotMigrate_successWithProposalReadyAttempt(@TempDir Path dir) + throws SQLException { + String jdbcUrl = jdbcUrl(dir, "migration_proposal_test.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + String fp = "f".repeat(64); + // SUCCESS document that already has a PROPOSAL_READY attempt must NOT be migrated + insertDocumentRecordWithStatus(jdbcUrl, fp, "SUCCESS"); + insertAttemptWithStatus(jdbcUrl, fp, "PROPOSAL_READY"); + + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + String status = readOverallStatus(jdbcUrl, fp); + assertThat(status).isEqualTo("SUCCESS"); + } + + @Test + void initializeSchema_doesNotMigrate_terminalFailureStates(@TempDir Path dir) + throws SQLException { + String jdbcUrl = jdbcUrl(dir, "migration_failure_test.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + String fpRetryable = "1".repeat(64); + String fpFinal = "2".repeat(64); + insertDocumentRecordWithStatus(jdbcUrl, fpRetryable, "FAILED_RETRYABLE"); + insertDocumentRecordWithStatus(jdbcUrl, fpFinal, "FAILED_FINAL"); + + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + assertThat(readOverallStatus(jdbcUrl, fpRetryable)).isEqualTo("FAILED_RETRYABLE"); + assertThat(readOverallStatus(jdbcUrl, fpFinal)).isEqualTo("FAILED_FINAL"); + } + // ------------------------------------------------------------------------- // Error handling // ------------------------------------------------------------------------- @@ -286,4 +422,47 @@ class SqliteSchemaInitializationAdapterTest { } return columns; } + + private static void insertDocumentRecordWithStatus(String jdbcUrl, String fingerprint, + String status) throws SQLException { + try (Connection conn = DriverManager.getConnection(jdbcUrl); + var ps = conn.prepareStatement(""" + INSERT INTO document_record + (fingerprint, last_known_source_locator, last_known_source_file_name, + overall_status, created_at, updated_at) + VALUES (?, '/src', 'doc.pdf', ?, '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z') + """)) { + ps.setString(1, fingerprint); + ps.setString(2, status); + ps.executeUpdate(); + } + } + + private static void insertAttemptWithStatus(String jdbcUrl, String fingerprint, + String status) throws SQLException { + try (Connection conn = DriverManager.getConnection(jdbcUrl); + var ps = conn.prepareStatement(""" + INSERT INTO processing_attempt + (fingerprint, run_id, attempt_number, started_at, ended_at, status, retryable) + VALUES (?, 'run-1', 1, '2026-01-01T00:00:00Z', '2026-01-01T00:01:00Z', ?, 0) + """)) { + ps.setString(1, fingerprint); + ps.setString(2, status); + ps.executeUpdate(); + } + } + + private static String readOverallStatus(String jdbcUrl, String fingerprint) throws SQLException { + try (Connection conn = DriverManager.getConnection(jdbcUrl); + var ps = conn.prepareStatement( + "SELECT overall_status FROM document_record WHERE fingerprint = ?")) { + ps.setString(1, fingerprint); + try (ResultSet rs = ps.executeQuery()) { + if (rs.next()) { + return rs.getString("overall_status"); + } + throw new IllegalStateException("No document record found for fingerprint: " + fingerprint); + } + } + } } diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapterTest.java index b37dea7..7673f2f 100644 --- a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapterTest.java +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapterTest.java @@ -112,7 +112,9 @@ class SqliteUnitOfWorkAdapterTest { null, null, now, - now + now, + null, + null ); // Create repositories for verification @@ -151,7 +153,9 @@ class SqliteUnitOfWorkAdapterTest { null, null, now, - now + now, + null, + null ); RuntimeException customException = new RuntimeException("Custom runtime error"); diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/FilesystemTargetFileCopyAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/FilesystemTargetFileCopyAdapterTest.java new file mode 100644 index 0000000..168884b --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/FilesystemTargetFileCopyAdapterTest.java @@ -0,0 +1,229 @@ +package de.gecheckt.pdf.umbenenner.adapter.out.targetcopy; + +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatNullPointerException; + +/** + * Tests for {@link FilesystemTargetFileCopyAdapter}. + *

+ * Covers the happy path (copy via temp file and final move), source integrity, + * technical failure cases, and cleanup after failure. + */ +class FilesystemTargetFileCopyAdapterTest { + + @TempDir + Path sourceFolder; + + @TempDir + Path targetFolder; + + private FilesystemTargetFileCopyAdapter adapter; + + @BeforeEach + void setUp() { + adapter = new FilesystemTargetFileCopyAdapter(targetFolder); + } + + // ------------------------------------------------------------------------- + // Happy path – successful copy + // ------------------------------------------------------------------------- + + @Test + void copyToTarget_success_returnsTargetFileCopySuccess() throws IOException { + Path sourceFile = createSourceFile("source.pdf", "PDF content"); + String resolvedFilename = "2026-01-15 - Rechnung.pdf"; + + TargetFileCopyResult result = adapter.copyToTarget( + new SourceDocumentLocator(sourceFile.toAbsolutePath().toString()), + resolvedFilename); + + assertThat(result).isInstanceOf(TargetFileCopySuccess.class); + } + + @Test + void copyToTarget_success_targetFileCreatedWithCorrectContent() throws IOException { + byte[] content = "PDF content bytes".getBytes(); + Path sourceFile = sourceFolder.resolve("invoice.pdf"); + Files.write(sourceFile, content); + String resolvedFilename = "2026-01-15 - Rechnung.pdf"; + + adapter.copyToTarget( + new SourceDocumentLocator(sourceFile.toAbsolutePath().toString()), + resolvedFilename); + + Path targetFile = targetFolder.resolve(resolvedFilename); + assertThat(targetFile).exists(); + assertThat(Files.readAllBytes(targetFile)).isEqualTo(content); + } + + @Test + void copyToTarget_success_sourceFileRemainsUnchanged() throws IOException { + byte[] originalContent = "original PDF content".getBytes(); + Path sourceFile = sourceFolder.resolve("source.pdf"); + Files.write(sourceFile, originalContent); + String resolvedFilename = "2026-01-15 - Rechnung.pdf"; + + adapter.copyToTarget( + new SourceDocumentLocator(sourceFile.toAbsolutePath().toString()), + resolvedFilename); + + // Source must remain completely unchanged + assertThat(Files.readAllBytes(sourceFile)).isEqualTo(originalContent); + assertThat(sourceFile).exists(); + } + + @Test + void copyToTarget_success_noTempFileRemainsInTargetFolder() throws IOException { + Path sourceFile = createSourceFile("source.pdf", "content"); + String resolvedFilename = "2026-04-07 - Bescheid.pdf"; + + adapter.copyToTarget( + new SourceDocumentLocator(sourceFile.toAbsolutePath().toString()), + resolvedFilename); + + // The .tmp file must not remain after a successful copy + Path tempFile = targetFolder.resolve(resolvedFilename + ".tmp"); + assertThat(tempFile).doesNotExist(); + } + + @Test + void copyToTarget_success_finalFileNameIsResolved() throws IOException { + Path sourceFile = createSourceFile("source.pdf", "data"); + String resolvedFilename = "2026-03-05 - Kontoauszug.pdf"; + + adapter.copyToTarget( + new SourceDocumentLocator(sourceFile.toAbsolutePath().toString()), + resolvedFilename); + + assertThat(targetFolder.resolve(resolvedFilename)).exists(); + } + + // ------------------------------------------------------------------------- + // Technical failure – source file does not exist + // ------------------------------------------------------------------------- + + @Test + void copyToTarget_sourceDoesNotExist_returnsTargetFileCopyTechnicalFailure() { + String nonExistentSource = sourceFolder.resolve("nonexistent.pdf").toAbsolutePath().toString(); + + TargetFileCopyResult result = adapter.copyToTarget( + new SourceDocumentLocator(nonExistentSource), + "2026-01-01 - Rechnung.pdf"); + + assertThat(result).isInstanceOf(TargetFileCopyTechnicalFailure.class); + } + + @Test + void copyToTarget_sourceDoesNotExist_failureContainsSourcePath() { + String nonExistentSource = sourceFolder.resolve("nonexistent.pdf").toAbsolutePath().toString(); + + TargetFileCopyResult result = adapter.copyToTarget( + new SourceDocumentLocator(nonExistentSource), + "2026-01-01 - Rechnung.pdf"); + + assertThat(result).isInstanceOf(TargetFileCopyTechnicalFailure.class); + TargetFileCopyTechnicalFailure failure = (TargetFileCopyTechnicalFailure) result; + assertThat(failure.errorMessage()).contains(nonExistentSource); + } + + // ------------------------------------------------------------------------- + // Technical failure – target folder does not exist + // ------------------------------------------------------------------------- + + @Test + void copyToTarget_targetFolderDoesNotExist_returnsTargetFileCopyTechnicalFailure() + throws IOException { + Path sourceFile = createSourceFile("source.pdf", "content"); + Path nonExistentTargetFolder = targetFolder.resolve("nonexistent-subfolder"); + FilesystemTargetFileCopyAdapter adapterWithMissingFolder = + new FilesystemTargetFileCopyAdapter(nonExistentTargetFolder); + + TargetFileCopyResult result = adapterWithMissingFolder.copyToTarget( + new SourceDocumentLocator(sourceFile.toAbsolutePath().toString()), + "2026-01-01 - Rechnung.pdf"); + + assertThat(result).isInstanceOf(TargetFileCopyTechnicalFailure.class); + } + + // ------------------------------------------------------------------------- + // Cleanup after failure – no temp file left + // ------------------------------------------------------------------------- + + @Test + void copyToTarget_sourceDoesNotExist_noTempFileLeftInTargetFolder() { + String nonExistentSource = sourceFolder.resolve("missing.pdf").toAbsolutePath().toString(); + String resolvedFilename = "2026-01-01 - Test.pdf"; + + adapter.copyToTarget( + new SourceDocumentLocator(nonExistentSource), + resolvedFilename); + + // Even though the copy failed, no temp file should remain + Path tempFile = targetFolder.resolve(resolvedFilename + ".tmp"); + assertThat(tempFile).doesNotExist(); + } + + // ------------------------------------------------------------------------- + // TargetFileCopyTechnicalFailure semantics + // ------------------------------------------------------------------------- + + @Test + void copyToTarget_failure_messageIsNonNull() { + String nonExistentSource = sourceFolder.resolve("ghost.pdf").toAbsolutePath().toString(); + + TargetFileCopyTechnicalFailure failure = (TargetFileCopyTechnicalFailure) + adapter.copyToTarget( + new SourceDocumentLocator(nonExistentSource), + "2026-01-01 - Test.pdf"); + + assertThat(failure.errorMessage()).isNotNull(); + } + + // ------------------------------------------------------------------------- + // Null guards + // ------------------------------------------------------------------------- + + @Test + void copyToTarget_rejectsNullSourceLocator() throws IOException { + assertThatNullPointerException() + .isThrownBy(() -> adapter.copyToTarget(null, "2026-01-01 - Test.pdf")); + } + + @Test + void copyToTarget_rejectsNullResolvedFilename() throws IOException { + Path sourceFile = createSourceFile("source.pdf", "content"); + assertThatNullPointerException() + .isThrownBy(() -> adapter.copyToTarget( + new SourceDocumentLocator(sourceFile.toAbsolutePath().toString()), + null)); + } + + @Test + void constructor_rejectsNullTargetFolderPath() { + assertThatNullPointerException() + .isThrownBy(() -> new FilesystemTargetFileCopyAdapter(null)); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private Path createSourceFile(String filename, String content) throws IOException { + Path file = sourceFolder.resolve(filename); + Files.writeString(file, content); + return file; + } +} diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/FilesystemTargetFolderAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/FilesystemTargetFolderAdapterTest.java new file mode 100644 index 0000000..b519299 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/FilesystemTargetFolderAdapterTest.java @@ -0,0 +1,259 @@ +package de.gecheckt.pdf.umbenenner.adapter.out.targetfolder; + +import de.gecheckt.pdf.umbenenner.application.port.out.ResolvedTargetFilename; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFilenameResolutionResult; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderTechnicalFailure; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatNullPointerException; + +/** + * Tests for {@link FilesystemTargetFolderAdapter}. + *

+ * Covers duplicate resolution (no conflict, single conflict, multiple conflicts), + * suffix placement, rollback deletion, and error handling. + */ +class FilesystemTargetFolderAdapterTest { + + @TempDir + Path targetFolder; + + private FilesystemTargetFolderAdapter adapter; + + @BeforeEach + void setUp() { + adapter = new FilesystemTargetFolderAdapter(targetFolder); + } + + // ------------------------------------------------------------------------- + // getTargetFolderLocator + // ------------------------------------------------------------------------- + + @Test + void getTargetFolderLocator_returnsAbsolutePath() { + String locator = adapter.getTargetFolderLocator(); + + assertThat(locator).isEqualTo(targetFolder.toAbsolutePath().toString()); + } + + @Test + void getTargetFolderLocator_isNeverNullOrBlank() { + assertThat(adapter.getTargetFolderLocator()).isNotNull().isNotBlank(); + } + + // ------------------------------------------------------------------------- + // resolveUniqueFilename – no conflict + // ------------------------------------------------------------------------- + + @Test + void resolveUniqueFilename_noConflict_returnsBaseName() { + String baseName = "2026-01-15 - Rechnung.pdf"; + + TargetFilenameResolutionResult result = adapter.resolveUniqueFilename(baseName); + + assertThat(result).isInstanceOf(ResolvedTargetFilename.class); + assertThat(((ResolvedTargetFilename) result).resolvedFilename()).isEqualTo(baseName); + } + + // ------------------------------------------------------------------------- + // resolveUniqueFilename – collision with base name + // ------------------------------------------------------------------------- + + @Test + void resolveUniqueFilename_baseNameTaken_returnsSuffixOne() throws IOException { + String baseName = "2026-01-15 - Rechnung.pdf"; + Files.createFile(targetFolder.resolve(baseName)); + + TargetFilenameResolutionResult result = adapter.resolveUniqueFilename(baseName); + + assertThat(result).isInstanceOf(ResolvedTargetFilename.class); + assertThat(((ResolvedTargetFilename) result).resolvedFilename()) + .isEqualTo("2026-01-15 - Rechnung(1).pdf"); + } + + @Test + void resolveUniqueFilename_baseAndOneTaken_returnsSuffixTwo() throws IOException { + String baseName = "2026-01-15 - Rechnung.pdf"; + Files.createFile(targetFolder.resolve(baseName)); + Files.createFile(targetFolder.resolve("2026-01-15 - Rechnung(1).pdf")); + + TargetFilenameResolutionResult result = adapter.resolveUniqueFilename(baseName); + + assertThat(result).isInstanceOf(ResolvedTargetFilename.class); + assertThat(((ResolvedTargetFilename) result).resolvedFilename()) + .isEqualTo("2026-01-15 - Rechnung(2).pdf"); + } + + @Test + void resolveUniqueFilename_multipleTaken_returnsFirstFree() throws IOException { + String baseName = "2026-03-31 - Stromabrechnung.pdf"; + // Create base + (1), (2), (3) + Files.createFile(targetFolder.resolve(baseName)); + Files.createFile(targetFolder.resolve("2026-03-31 - Stromabrechnung(1).pdf")); + Files.createFile(targetFolder.resolve("2026-03-31 - Stromabrechnung(2).pdf")); + Files.createFile(targetFolder.resolve("2026-03-31 - Stromabrechnung(3).pdf")); + + TargetFilenameResolutionResult result = adapter.resolveUniqueFilename(baseName); + + assertThat(result).isInstanceOf(ResolvedTargetFilename.class); + assertThat(((ResolvedTargetFilename) result).resolvedFilename()) + .isEqualTo("2026-03-31 - Stromabrechnung(4).pdf"); + } + + // ------------------------------------------------------------------------- + // Suffix placement: immediately before .pdf + // ------------------------------------------------------------------------- + + @Test + void resolveUniqueFilename_suffixPlacedImmediatelyBeforePdf() throws IOException { + String baseName = "2026-04-07 - Bescheid.pdf"; + Files.createFile(targetFolder.resolve(baseName)); + + TargetFilenameResolutionResult result = adapter.resolveUniqueFilename(baseName); + + assertThat(result).isInstanceOf(ResolvedTargetFilename.class); + String resolved = ((ResolvedTargetFilename) result).resolvedFilename(); + // Must end with "(1).pdf", not ".pdf(1)" + assertThat(resolved).endsWith("(1).pdf"); + assertThat(resolved).doesNotContain(".pdf("); + } + + // ------------------------------------------------------------------------- + // Suffix does not count against 20-char base title + // ------------------------------------------------------------------------- + + @Test + void resolveUniqueFilename_20CharTitle_suffixDoesNotViolateTitleLimit() throws IOException { + // Base title has exactly 20 chars; with (1) suffix the title exceeds 20, but that is expected + String title = "A".repeat(20); // 20-char title + String baseName = "2026-01-01 - " + title + ".pdf"; + Files.createFile(targetFolder.resolve(baseName)); + + TargetFilenameResolutionResult result = adapter.resolveUniqueFilename(baseName); + + assertThat(result).isInstanceOf(ResolvedTargetFilename.class); + String resolved = ((ResolvedTargetFilename) result).resolvedFilename(); + // The resolved filename must contain (1) even though overall length > 20 chars + assertThat(resolved).contains("(1)"); + } + + // ------------------------------------------------------------------------- + // resolveUniqueFilename – base name without .pdf extension + // ------------------------------------------------------------------------- + + @Test + void resolveUniqueFilename_baseNameWithoutPdfExtension_whenConflict_returnsFailure() + throws IOException { + // When there is no conflict (file does not exist), the adapter returns the name as-is + // because it only checks the extension when it needs to insert a suffix. + String nameWithoutExt = "2026-01-15 - Rechnung"; + + // Create a file with that name (no extension) to trigger conflict handling + Files.createFile(targetFolder.resolve(nameWithoutExt)); + + TargetFilenameResolutionResult result = adapter.resolveUniqueFilename(nameWithoutExt); + + // Without .pdf extension, suffix insertion fails + assertThat(result).isInstanceOf(TargetFolderTechnicalFailure.class); + } + + // ------------------------------------------------------------------------- + // resolveUniqueFilename – no conflict, name without .pdf (edge: no conflict → ok) + // ------------------------------------------------------------------------- + + @Test + void resolveUniqueFilename_baseNameWithoutPdfExtension_whenNoConflict_returnsIt() { + // If the name does not exist, the adapter returns it without checking the extension + String nameWithoutExt = "2026-01-15 - Rechnung"; + + TargetFilenameResolutionResult result = adapter.resolveUniqueFilename(nameWithoutExt); + + assertThat(result).isInstanceOf(ResolvedTargetFilename.class); + assertThat(((ResolvedTargetFilename) result).resolvedFilename()).isEqualTo(nameWithoutExt); + } + + // ------------------------------------------------------------------------- + // resolveUniqueFilename – null guard + // ------------------------------------------------------------------------- + + @Test + void resolveUniqueFilename_rejectsNullBaseName() { + assertThatNullPointerException() + .isThrownBy(() -> adapter.resolveUniqueFilename(null)); + } + + // ------------------------------------------------------------------------- + // tryDeleteTargetFile – file exists, gets deleted + // ------------------------------------------------------------------------- + + @Test + void tryDeleteTargetFile_fileExists_deletesFile() throws IOException { + String filename = "2026-01-15 - Rechnung.pdf"; + Files.createFile(targetFolder.resolve(filename)); + assertThat(targetFolder.resolve(filename)).exists(); + + adapter.tryDeleteTargetFile(filename); + + assertThat(targetFolder.resolve(filename)).doesNotExist(); + } + + // ------------------------------------------------------------------------- + // tryDeleteTargetFile – file does not exist, no error + // ------------------------------------------------------------------------- + + @Test + void tryDeleteTargetFile_fileDoesNotExist_doesNotThrow() { + // Must not throw even if the file is absent + adapter.tryDeleteTargetFile("nonexistent.pdf"); + } + + // ------------------------------------------------------------------------- + // tryDeleteTargetFile – null guard + // ------------------------------------------------------------------------- + + @Test + void tryDeleteTargetFile_rejectsNullFilename() { + assertThatNullPointerException() + .isThrownBy(() -> adapter.tryDeleteTargetFile(null)); + } + + // ------------------------------------------------------------------------- + // resolveUniqueFilename – non-existent target folder + // ------------------------------------------------------------------------- + + @Test + void resolveUniqueFilename_nonExistentTargetFolder_returnsFailure() { + Path nonExistentFolder = targetFolder.resolve("does-not-exist"); + FilesystemTargetFolderAdapter adapterWithMissingFolder = + new FilesystemTargetFolderAdapter(nonExistentFolder); + + String baseName = "2026-01-01 - Test.pdf"; + + // Files.exists() on a file in a non-existent folder does not throw; + // it simply returns false, so the adapter returns the base name. + // This is consistent behaviour: no folder access error when just checking existence. + TargetFilenameResolutionResult result = adapterWithMissingFolder.resolveUniqueFilename(baseName); + + // Adapter returns the base name since no conflict is detected for a non-existent folder + assertThat(result).isInstanceOf(ResolvedTargetFilename.class); + assertThat(((ResolvedTargetFilename) result).resolvedFilename()).isEqualTo(baseName); + } + + // ------------------------------------------------------------------------- + // Construction – null guard + // ------------------------------------------------------------------------- + + @Test + void constructor_rejectsNullTargetFolderPath() { + assertThatNullPointerException() + .isThrownBy(() -> new FilesystemTargetFolderAdapter(null)); + } +} diff --git a/pdf-umbenenner-application/pom.xml b/pdf-umbenenner-application/pom.xml index f3ed695..f62532d 100644 --- a/pdf-umbenenner-application/pom.xml +++ b/pdf-umbenenner-application/pom.xml @@ -19,6 +19,12 @@ ${project.version} + + + org.json + json + + org.junit.jupiter diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecord.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecord.java index fc0cb3f..72b966c 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecord.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/DocumentRecord.java @@ -37,7 +37,11 @@ import java.util.Objects; *

  • {@link #updatedAt()} — timestamp of the most recent update to this master record.
  • * *

    - * Not yet included: target path, target file name, AI-related fields. + * Target location fields: {@link #lastTargetPath()} and + * {@link #lastTargetFileName()} are populated only after the document reaches + * {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus#SUCCESS}. Both + * fields are {@code null} for documents that have not yet been successfully copied + * to the target folder. * * @param fingerprint content-based identity; never null * @param lastKnownSourceLocator opaque locator to the physical source file; never null @@ -48,6 +52,10 @@ import java.util.Objects; * @param lastSuccessInstant timestamp of the successful processing, or {@code null} * @param createdAt timestamp when this record was first created; never null * @param updatedAt timestamp of the most recent update; never null + * @param lastTargetPath opaque locator of the target folder where the last + * successful copy was written, or {@code null} + * @param lastTargetFileName filename of the last successfully written target copy + * (including any duplicate suffix), or {@code null} */ public record DocumentRecord( DocumentFingerprint fingerprint, @@ -58,7 +66,9 @@ public record DocumentRecord( Instant lastFailureInstant, Instant lastSuccessInstant, Instant createdAt, - Instant updatedAt) { + Instant updatedAt, + String lastTargetPath, + String lastTargetFileName) { /** * Compact constructor validating mandatory non-null fields. diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttempt.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttempt.java index 07dd78f..8b3628a 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttempt.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttempt.java @@ -1,10 +1,12 @@ package de.gecheckt.pdf.umbenenner.application.port.out; +import de.gecheckt.pdf.umbenenner.domain.model.DateSource; import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; import de.gecheckt.pdf.umbenenner.domain.model.RunId; import java.time.Instant; +import java.time.LocalDate; import java.util.Objects; /** @@ -40,20 +42,49 @@ import java.util.Objects; * successful or skip attempts. *

  • {@link #retryable()} — {@code true} if the failure is considered retryable in a * later run; {@code false} for final failures, successes, and skip attempts.
  • + *
  • {@link #modelName()} — the AI model name used in this attempt; {@code null} if + * no AI call was made (e.g. pre-check failures or skip attempts).
  • + *
  • {@link #promptIdentifier()} — stable identifier of the prompt template used; + * {@code null} if no AI call was made.
  • + *
  • {@link #processedPageCount()} — number of PDF pages processed; {@code null} if + * pages were not extracted (e.g. pre-fingerprint or skip attempts).
  • + *
  • {@link #sentCharacterCount()} — number of characters sent to the AI; {@code null} + * if no AI call was made.
  • + *
  • {@link #aiRawResponse()} — the complete raw AI response body; {@code null} if no + * AI call was made. Stored in SQLite but not written to log files by default.
  • + *
  • {@link #aiReasoning()} — the reasoning extracted from the AI response; {@code null} + * if no valid AI response was obtained.
  • + *
  • {@link #resolvedDate()} — the date resolved for the naming proposal; {@code null} + * if no naming proposal was produced.
  • + *
  • {@link #dateSource()} — the origin of the resolved date; {@code null} if no + * naming proposal was produced.
  • + *
  • {@link #validatedTitle()} — the validated title from the naming proposal; + * {@code null} if no naming proposal was produced.
  • + *
  • {@link #finalTargetFileName()} — the final filename written to the target folder + * (including any duplicate suffix); set only for + * {@link ProcessingStatus#SUCCESS} attempts, {@code null} otherwise.
  • * - *

    - * Not yet included: model name, prompt identifier, AI raw response, - * AI reasoning, resolved date, date source, final title, final target file name. * - * @param fingerprint content-based document identity; never null - * @param runId identifier of the batch run; never null - * @param attemptNumber monotonic sequence number per fingerprint; must be >= 1 - * @param startedAt start of this processing attempt; never null - * @param endedAt end of this processing attempt; never null - * @param status outcome status of this attempt; never null - * @param failureClass failure classification, or {@code null} for non-failure statuses - * @param failureMessage failure description, or {@code null} for non-failure statuses - * @param retryable whether this failure should be retried in a later run + * @param fingerprint content-based document identity; never null + * @param runId identifier of the batch run; never null + * @param attemptNumber monotonic sequence number per fingerprint; must be >= 1 + * @param startedAt start of this processing attempt; never null + * @param endedAt end of this processing attempt; never null + * @param status outcome status of this attempt; never null + * @param failureClass failure classification, or {@code null} for non-failure statuses + * @param failureMessage failure description, or {@code null} for non-failure statuses + * @param retryable whether this failure should be retried in a later run + * @param modelName AI model name, or {@code null} if no AI call was made + * @param promptIdentifier prompt identifier, or {@code null} if no AI call was made + * @param processedPageCount number of PDF pages processed, or {@code null} + * @param sentCharacterCount number of characters sent to AI, or {@code null} + * @param aiRawResponse full raw AI response, or {@code null} + * @param aiReasoning AI reasoning text, or {@code null} + * @param resolvedDate resolved date for naming proposal, or {@code null} + * @param dateSource origin of resolved date, or {@code null} + * @param validatedTitle validated title, or {@code null} + * @param finalTargetFileName filename written to the target folder for SUCCESS attempts, + * or {@code null} */ public record ProcessingAttempt( DocumentFingerprint fingerprint, @@ -64,7 +95,19 @@ public record ProcessingAttempt( ProcessingStatus status, String failureClass, String failureMessage, - boolean retryable) { + boolean retryable, + // AI traceability fields (null for non-AI attempts) + String modelName, + String promptIdentifier, + Integer processedPageCount, + Integer sentCharacterCount, + String aiRawResponse, + String aiReasoning, + LocalDate resolvedDate, + DateSource dateSource, + String validatedTitle, + // Target copy traceability (null for non-SUCCESS attempts) + String finalTargetFileName) { /** * Compact constructor validating mandatory non-null fields and numeric constraints. @@ -83,4 +126,37 @@ public record ProcessingAttempt( Objects.requireNonNull(endedAt, "endedAt must not be null"); Objects.requireNonNull(status, "status must not be null"); } + + /** + * Creates a {@link ProcessingAttempt} with no AI traceability fields set. + *

    + * Convenience factory for pre-check failures, skip events, and any attempt + * that does not involve an AI call. + * + * @param fingerprint document identity; must not be null + * @param runId batch run identifier; must not be null + * @param attemptNumber monotonic attempt number; must be >= 1 + * @param startedAt start instant; must not be null + * @param endedAt end instant; must not be null + * @param status outcome status; must not be null + * @param failureClass failure class name, or {@code null} + * @param failureMessage failure description, or {@code null} + * @param retryable whether retryable in a later run + * @return a new attempt with all AI fields set to {@code null} + */ + public static ProcessingAttempt withoutAiFields( + DocumentFingerprint fingerprint, + RunId runId, + int attemptNumber, + Instant startedAt, + Instant endedAt, + ProcessingStatus status, + String failureClass, + String failureMessage, + boolean retryable) { + return new ProcessingAttempt( + fingerprint, runId, attemptNumber, startedAt, endedAt, + status, failureClass, failureMessage, retryable, + null, null, null, null, null, null, null, null, null, null); + } } diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttemptRepository.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttemptRepository.java index 4f1b9b7..f509e66 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttemptRepository.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ProcessingAttemptRepository.java @@ -1,6 +1,7 @@ package de.gecheckt.pdf.umbenenner.application.port.out; import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; import java.util.List; @@ -66,4 +67,25 @@ public interface ProcessingAttemptRepository { * @throws DocumentPersistenceException if the query fails due to a technical error */ List findAllByFingerprint(DocumentFingerprint fingerprint); + + /** + * Returns the most recent attempt with status {@link ProcessingStatus#PROPOSAL_READY} + * for the given fingerprint, or {@code null} if no such attempt exists. + *

    + * Leading source for subsequent processing stages: + * The most recent {@code PROPOSAL_READY} attempt is the authoritative source for + * the validated naming proposal (resolved date, date source, validated title, and + * AI reasoning) consumed by subsequent stages. The document master record does not + * carry redundant proposal data; this method is the only correct way to retrieve it. + *

    + * If the overall document status is {@code PROPOSAL_READY} but this method returns + * {@code null}, or if the returned attempt is missing mandatory proposal fields, the + * state is considered an inconsistent persistence state and must be treated as a + * document-level technical error — not silently healed. + * + * @param fingerprint the document identity; must not be null + * @return the most recent {@code PROPOSAL_READY} attempt, or {@code null} if none exists + * @throws DocumentPersistenceException if the query fails due to a technical error + */ + ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint); } diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ResolvedTargetFilename.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ResolvedTargetFilename.java new file mode 100644 index 0000000..bfba57d --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/ResolvedTargetFilename.java @@ -0,0 +1,26 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import java.util.Objects; + +/** + * Successful outcome of {@link TargetFolderPort#resolveUniqueFilename(String)}. + *

    + * Carries the first available filename in the target folder. The filename includes + * the {@code .pdf} extension and, if needed, a numeric duplicate-avoidance suffix + * inserted directly before {@code .pdf} (e.g., {@code "2024-01-15 - Rechnung(1).pdf"}). + * + * @param resolvedFilename the available filename including extension; never null or blank + */ +public record ResolvedTargetFilename(String resolvedFilename) implements TargetFilenameResolutionResult { + + /** + * @throws NullPointerException if {@code resolvedFilename} is null + * @throws IllegalArgumentException if {@code resolvedFilename} is blank + */ + public ResolvedTargetFilename { + Objects.requireNonNull(resolvedFilename, "resolvedFilename must not be null"); + if (resolvedFilename.isBlank()) { + throw new IllegalArgumentException("resolvedFilename must not be blank"); + } + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopyPort.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopyPort.java new file mode 100644 index 0000000..2ee1619 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopyPort.java @@ -0,0 +1,45 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; + +/** + * Outbound port for copying a source PDF to the target folder. + *

    + * The physical copy is the final step in the successful document processing path. + * Copying is performed via a temporary file in the target context with a subsequent + * atomic move/rename to the final target filename, minimising the risk of incomplete + * target files being visible. + * + *

    Source integrity

    + *

    + * The source file identified by the {@link SourceDocumentLocator} is never + * modified, moved, or deleted by this port. Only a copy is written. + * + *

    No immediate retry

    + *

    + * This port performs exactly one copy attempt per invocation. No automatic retry within + * the same call is performed; retry decisions belong to higher-level orchestration. + * + *

    Architecture boundary

    + *

    + * No {@code Path}, {@code File}, or NIO types appear in this interface. + */ +public interface TargetFileCopyPort { + + /** + * Copies the source document to the target folder under the given resolved filename. + *

    + * The implementation writes to a temporary file first and then performs a + * move/rename to the final {@code resolvedFilename}. If the move fails, a + * best-effort cleanup of the temporary file is attempted before returning the + * failure result. + * + * @param sourceLocator opaque locator identifying the source file; must not be null + * @param resolvedFilename the final filename (not full path) to write in the target + * folder; must not be null or blank; must have been obtained + * from {@link TargetFolderPort#resolveUniqueFilename(String)} + * @return {@link TargetFileCopySuccess} if the copy completed successfully, or + * {@link TargetFileCopyTechnicalFailure} if any step failed + */ + TargetFileCopyResult copyToTarget(SourceDocumentLocator sourceLocator, String resolvedFilename); +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopyResult.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopyResult.java new file mode 100644 index 0000000..d4de44b --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopyResult.java @@ -0,0 +1,14 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Sealed result type for {@link TargetFileCopyPort#copyToTarget}. + *

    + * Permits exactly two outcomes: + *

    + */ +public sealed interface TargetFileCopyResult + permits TargetFileCopySuccess, TargetFileCopyTechnicalFailure { +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopySuccess.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopySuccess.java new file mode 100644 index 0000000..67da250 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopySuccess.java @@ -0,0 +1,10 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Successful outcome of {@link TargetFileCopyPort#copyToTarget}. + *

    + * Indicates that the source file was successfully copied to the target folder and the + * final move/rename completed. The target file is now visible under the resolved filename. + */ +public record TargetFileCopySuccess() implements TargetFileCopyResult { +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopyTechnicalFailure.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopyTechnicalFailure.java new file mode 100644 index 0000000..e8d32f5 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFileCopyTechnicalFailure.java @@ -0,0 +1,30 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import java.util.Objects; + +/** + * Technical failure outcome of {@link TargetFileCopyPort#copyToTarget}. + *

    + * Indicates that copying the source file to the target folder failed. The failure is + * always treated as a transient, retryable document-level technical error. + *

    + * The {@code targetFileCleanedUp} flag records whether a best-effort cleanup of any + * partially written temporary target file was successful. A value of {@code false} + * means a stale temporary file may remain in the target folder; a value of {@code true} + * means cleanup succeeded (or no temporary file had been created at all). + * + * @param errorMessage human-readable description of the failure; never null + * @param targetFileCleanedUp {@code true} if cleanup of any temporary file succeeded; + * {@code false} if cleanup failed or was not attempted + */ +public record TargetFileCopyTechnicalFailure( + String errorMessage, + boolean targetFileCleanedUp) implements TargetFileCopyResult { + + /** + * @throws NullPointerException if {@code errorMessage} is null + */ + public TargetFileCopyTechnicalFailure { + Objects.requireNonNull(errorMessage, "errorMessage must not be null"); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFilenameResolutionResult.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFilenameResolutionResult.java new file mode 100644 index 0000000..afbf423 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFilenameResolutionResult.java @@ -0,0 +1,14 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Sealed result type for {@link TargetFolderPort#resolveUniqueFilename(String)}. + *

    + * Permits exactly two outcomes: + *

    + */ +public sealed interface TargetFilenameResolutionResult + permits ResolvedTargetFilename, TargetFolderTechnicalFailure { +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFolderPort.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFolderPort.java new file mode 100644 index 0000000..19e7971 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFolderPort.java @@ -0,0 +1,71 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +/** + * Outbound port for target folder access: duplicate resolution and best-effort cleanup. + *

    + * The target folder is the directory where the renamed PDF copy is written. This port + * encapsulates all target-folder concerns so that the application layer never handles + * filesystem types ({@code Path}, {@code File}) directly. + * + *

    Duplicate resolution

    + *

    + * When the base filename is already taken in the target folder, the port determines + * the first available name by appending a numeric suffix directly before {@code .pdf}: + *

    + *   2024-01-15 - Rechnung.pdf
    + *   2024-01-15 - Rechnung(1).pdf
    + *   2024-01-15 - Rechnung(2).pdf
    + *   ...
    + * 
    + * The base filename must already include the {@code .pdf} extension. The suffix is + * purely a technical collision-avoidance mechanism and introduces no new fachliche + * title interpretation. + * + *

    Architecture boundary

    + *

    + * No {@code Path}, {@code File}, or NIO types appear in this interface. The concrete + * adapter implementation translates the opaque folder locator string to actual + * filesystem operations. + */ +public interface TargetFolderPort { + + /** + * Returns an opaque string that identifies the target folder managed by this port. + *

    + * The application layer treats this as an opaque locator and stores it in the + * document master record ({@code lastTargetPath}) for traceability. It must not + * be interpreted by the application layer. + * + * @return a non-null, non-blank string identifying the target folder + */ + String getTargetFolderLocator(); + + /** + * Resolves the first available unique filename in the target folder for the given base name. + *

    + * If the base name is not yet taken, it is returned unchanged. Otherwise the method + * appends {@code (1)}, {@code (2)}, etc. directly before {@code .pdf} until a free + * name is found. + *

    + * The returned filename contains only the file name, not the full path. It is safe + * to use as the {@code resolvedFilename} parameter of + * {@link TargetFileCopyPort#copyToTarget(de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator, String)}. + * + * @param baseName the desired filename including the {@code .pdf} extension; + * must not be null or blank + * @return a {@link ResolvedTargetFilename} with the first available name, or a + * {@link TargetFolderTechnicalFailure} if the target folder is not accessible + */ + TargetFilenameResolutionResult resolveUniqueFilename(String baseName); + + /** + * Best-effort attempt to delete a file previously written to the target folder. + *

    + * Intended for rollback after a successful target copy when subsequent persistence + * fails. This method must not throw; if deletion fails for any reason, the failure + * is silently ignored. + * + * @param resolvedFilename the filename (not full path) to delete; must not be null + */ + void tryDeleteTargetFile(String resolvedFilename); +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFolderTechnicalFailure.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFolderTechnicalFailure.java new file mode 100644 index 0000000..ce3fd5d --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/port/out/TargetFolderTechnicalFailure.java @@ -0,0 +1,22 @@ +package de.gecheckt.pdf.umbenenner.application.port.out; + +import java.util.Objects; + +/** + * Technical failure outcome of {@link TargetFolderPort#resolveUniqueFilename(String)}. + *

    + * Indicates that the target folder could not be accessed when attempting to determine + * a unique filename. This is a transient infrastructure error; the calling use case + * should treat it as a retryable document-level technical error. + * + * @param errorMessage human-readable description of the failure; never null + */ +public record TargetFolderTechnicalFailure(String errorMessage) implements TargetFilenameResolutionResult { + + /** + * @throws NullPointerException if {@code errorMessage} is null + */ + public TargetFolderTechnicalFailure { + Objects.requireNonNull(errorMessage, "errorMessage must not be null"); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingService.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingService.java new file mode 100644 index 0000000..08b104e --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingService.java @@ -0,0 +1,242 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import java.util.Objects; + +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort; +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort; +import de.gecheckt.pdf.umbenenner.domain.model.AiAttemptContext; +import de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure; +import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation; +import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingFailure; +import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingSuccess; +import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; +import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal; +import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady; +import de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; + +/** + * Orchestrates the complete AI naming pipeline for a single document. + *

    + * This service is called after pre-checks have passed (i.e. after extraction + * and content quality checks) and performs exactly these steps in order: + *

      + *
    1. Load the external prompt template via {@link PromptPort}.
    2. + *
    3. Limit the extracted document text to the configured maximum character count.
    4. + *
    5. Compose a deterministic AI request from the prompt and limited text.
    6. + *
    7. Invoke the AI service via {@link AiInvocationPort}.
    8. + *
    9. Parse the raw AI response for structural correctness.
    10. + *
    11. Validate the parsed response for semantic correctness (title, date).
    12. + *
    13. Return a typed {@link DocumentProcessingOutcome} encoding success or failure.
    14. + *
    + * + *

    Outcome classification

    + * + * + *

    AI traceability

    + *

    + * Every returned outcome carries an {@link AiAttemptContext} with the model name, + * prompt identifier, page count, sent character count, and raw response (null on + * connection failure). This context is persisted verbatim in the processing attempt + * history by the coordinator. + * + *

    Thread safety

    + *

    + * This service is stateless with respect to individual documents. It is safe to + * reuse a single instance across documents within the same batch run, provided the + * injected dependencies are thread-safe. + */ +public class AiNamingService { + + private final AiInvocationPort aiInvocationPort; + private final PromptPort promptPort; + private final AiResponseValidator aiResponseValidator; + private final String modelName; + private final int maxTextCharacters; + + /** + * Creates the AI naming service with all required dependencies. + * + * @param aiInvocationPort port for invoking the AI over HTTP; must not be null + * @param promptPort port for loading the external prompt template; must not be null + * @param aiResponseValidator semantic validator for parsed AI responses; must not be null + * @param modelName the AI model name to record in attempt history; must not be null + * @param maxTextCharacters the maximum number of document-text characters to send; + * must be >= 1 + * @throws NullPointerException if any reference parameter is null + * @throws IllegalArgumentException if {@code maxTextCharacters} is less than 1 + */ + public AiNamingService( + AiInvocationPort aiInvocationPort, + PromptPort promptPort, + AiResponseValidator aiResponseValidator, + String modelName, + int maxTextCharacters) { + this.aiInvocationPort = Objects.requireNonNull(aiInvocationPort, "aiInvocationPort must not be null"); + this.promptPort = Objects.requireNonNull(promptPort, "promptPort must not be null"); + this.aiResponseValidator = Objects.requireNonNull(aiResponseValidator, "aiResponseValidator must not be null"); + this.modelName = Objects.requireNonNull(modelName, "modelName must not be null"); + if (maxTextCharacters < 1) { + throw new IllegalArgumentException( + "maxTextCharacters must be >= 1, but was: " + maxTextCharacters); + } + this.maxTextCharacters = maxTextCharacters; + } + + /** + * Runs the AI naming pipeline for a document that passed all pre-checks. + *

    + * The extraction result embedded in {@code preCheckPassed} supplies the + * document text and page count needed for the AI request. The candidate is + * carried through for correct outcome construction (correlation, logging). + * + * @param preCheckPassed the pre-check result carrying the candidate and extraction; + * must not be null + * @return a {@link DocumentProcessingOutcome} encoding the AI pipeline result; + * one of {@link NamingProposalReady}, {@link AiTechnicalFailure}, or + * {@link AiFunctionalFailure}; never null + * @throws NullPointerException if {@code preCheckPassed} is null + */ + public DocumentProcessingOutcome invoke(PreCheckPassed preCheckPassed) { + Objects.requireNonNull(preCheckPassed, "preCheckPassed must not be null"); + + SourceDocumentCandidate candidate = preCheckPassed.candidate(); + int pageCount = preCheckPassed.extraction().pageCount().value(); + String rawText = preCheckPassed.extraction().extractedText(); + + // Step 1: Load the external prompt template + return switch (promptPort.loadPrompt()) { + case PromptLoadingFailure promptFailure -> + // Prompt is unavailable — transient infrastructure failure; retryable + new AiTechnicalFailure( + candidate, + "Prompt loading failed [" + promptFailure.failureReason() + "]: " + + promptFailure.failureMessage(), + null, + new AiAttemptContext(modelName, "prompt-load-failed", pageCount, 0, null)); + + case PromptLoadingSuccess promptSuccess -> + invokeWithPrompt(candidate, rawText, pageCount, promptSuccess); + }; + } + + // ------------------------------------------------------------------------- + // Private helpers + // ------------------------------------------------------------------------- + + /** + * Continues the AI pipeline after the prompt has been loaded successfully. + */ + private DocumentProcessingOutcome invokeWithPrompt( + SourceDocumentCandidate candidate, + String rawText, + int pageCount, + PromptLoadingSuccess promptSuccess) { + + String promptIdentifier = promptSuccess.promptIdentifier().identifier(); + String promptContent = promptSuccess.promptContent(); + + // Step 2: Limit the document text to the configured maximum + String limitedText = DocumentTextLimiter.limit(rawText, maxTextCharacters); + int sentCharacterCount = limitedText.length(); + + // Step 3: Compose a deterministic AI request + AiRequestRepresentation request = AiRequestComposer.compose( + promptSuccess.promptIdentifier(), + promptContent, + limitedText); + + // Step 4: Invoke the AI service + return switch (aiInvocationPort.invoke(request)) { + case AiInvocationTechnicalFailure invocationFailure -> + // Transient infrastructure failure: timeout, network error, etc. + new AiTechnicalFailure( + candidate, + "AI invocation failed [" + invocationFailure.failureReason() + "]: " + + invocationFailure.failureMessage(), + null, + new AiAttemptContext( + modelName, promptIdentifier, pageCount, sentCharacterCount, null)); + + case AiInvocationSuccess invocationSuccess -> + processSuccessfulInvocation( + candidate, pageCount, sentCharacterCount, promptIdentifier, + invocationSuccess); + }; + } + + /** + * Processes a technically successful AI invocation: parses and validates the response. + */ + private DocumentProcessingOutcome processSuccessfulInvocation( + SourceDocumentCandidate candidate, + int pageCount, + int sentCharacterCount, + String promptIdentifier, + AiInvocationSuccess invocationSuccess) { + + String rawResponseBody = invocationSuccess.rawResponse().content(); + + // Step 5: Parse the raw response for structural correctness + return switch (AiResponseParser.parse(invocationSuccess.rawResponse())) { + case AiResponseParsingFailure parsingFailure -> + // Unparseable JSON or structurally invalid response: transient technical error + new AiTechnicalFailure( + candidate, + "AI response could not be parsed [" + parsingFailure.failureReason() + "]: " + + parsingFailure.failureMessage(), + null, + new AiAttemptContext( + modelName, promptIdentifier, pageCount, sentCharacterCount, + rawResponseBody)); + + case AiResponseParsingSuccess parsingSuccess -> + // Step 6: Validate semantics (title rules, date format) + validateAndBuildOutcome( + candidate, pageCount, sentCharacterCount, promptIdentifier, + rawResponseBody, parsingSuccess.response()); + }; + } + + /** + * Validates the parsed AI response and builds the final outcome. + */ + private DocumentProcessingOutcome validateAndBuildOutcome( + SourceDocumentCandidate candidate, + int pageCount, + int sentCharacterCount, + String promptIdentifier, + String rawResponseBody, + ParsedAiResponse parsedResponse) { + + AiAttemptContext aiContext = new AiAttemptContext( + modelName, promptIdentifier, pageCount, sentCharacterCount, rawResponseBody); + + return switch (aiResponseValidator.validate(parsedResponse)) { + case AiResponseValidator.AiValidationResult.Invalid invalid -> + // Deterministic semantic failure: bad title, bad date, generic placeholder + new AiFunctionalFailure(candidate, invalid.errorMessage(), aiContext); + + case AiResponseValidator.AiValidationResult.Valid valid -> { + NamingProposal proposal = valid.proposal(); + yield new NamingProposalReady(candidate, proposal, aiContext); + } + }; + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseParser.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseParser.java new file mode 100644 index 0000000..e84e65d --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseParser.java @@ -0,0 +1,107 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import java.util.Objects; + +import org.json.JSONException; +import org.json.JSONObject; + +import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse; +import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingFailure; +import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingResult; +import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingSuccess; +import de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse; + +/** + * Parses the raw AI response body into a structurally validated {@link ParsedAiResponse}. + *

    + * This parser enforces the technical contract: the AI must respond with exactly one + * parseable JSON object containing the mandatory fields {@code title} and {@code reasoning}, + * and an optional {@code date} field. Any extra free-text outside the JSON object makes + * the response technically invalid. + * + *

    Parsing rules

    + * + * + *

    Architecture boundary

    + *

    + * Only structural parsing is performed here. Semantic validation (title length, + * special characters, date format, generic placeholder detection) is the responsibility + * of {@link AiResponseValidator}. + */ +public final class AiResponseParser { + + private AiResponseParser() { + // Static utility – no instances + } + + /** + * Attempts to parse {@code rawResponse} into a {@link ParsedAiResponse}. + *

    + * Returns {@link AiResponseParsingSuccess} if the response body is a valid JSON object + * containing the mandatory fields. Returns {@link AiResponseParsingFailure} for any + * structural problem: non-JSON content, JSON that is not an object, missing mandatory + * fields, or extra free-text surrounding the JSON object. + * + * @param rawResponse the raw AI response body; must not be null + * @return a parsing result indicating success or failure; never null + * @throws NullPointerException if {@code rawResponse} is null + */ + public static AiResponseParsingResult parse(AiRawResponse rawResponse) { + Objects.requireNonNull(rawResponse, "rawResponse must not be null"); + + String body = rawResponse.content(); + if (body == null || body.isBlank()) { + return new AiResponseParsingFailure("EMPTY_RESPONSE", "AI response body is empty or blank"); + } + + String trimmed = body.trim(); + + // Reject if the body does not start with '{' and end with '}' (i.e., not a pure JSON object). + // This catches responses that embed a JSON object within surrounding prose. + if (!trimmed.startsWith("{") || !trimmed.endsWith("}")) { + return new AiResponseParsingFailure( + "NOT_JSON_OBJECT", + "AI response is not a pure JSON object (contains extra text or is not an object)"); + } + + JSONObject json; + try { + json = new JSONObject(trimmed); + } catch (JSONException e) { + return new AiResponseParsingFailure("INVALID_JSON", "AI response is not valid JSON: " + e.getMessage()); + } + + // Validate mandatory field: title + if (!json.has("title") || json.isNull("title")) { + return new AiResponseParsingFailure("MISSING_TITLE", "AI response missing mandatory field 'title'"); + } + String title = json.getString("title"); + if (title.isBlank()) { + return new AiResponseParsingFailure("BLANK_TITLE", "AI response field 'title' is blank"); + } + + // Validate mandatory field: reasoning + if (!json.has("reasoning") || json.isNull("reasoning")) { + return new AiResponseParsingFailure("MISSING_REASONING", "AI response missing mandatory field 'reasoning'"); + } + String reasoning = json.getString("reasoning"); + + // Optional field: date + String dateString = null; + if (json.has("date") && !json.isNull("date")) { + dateString = json.getString("date"); + } + + ParsedAiResponse parsed = ParsedAiResponse.of(title, reasoning, dateString); + return new AiResponseParsingSuccess(parsed); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseValidator.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseValidator.java new file mode 100644 index 0000000..34b8821 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseValidator.java @@ -0,0 +1,215 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import java.time.LocalDate; +import java.time.format.DateTimeParseException; +import java.util.Objects; +import java.util.Set; + +import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort; +import de.gecheckt.pdf.umbenenner.domain.model.AiErrorClassification; +import de.gecheckt.pdf.umbenenner.domain.model.DateSource; +import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal; +import de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse; + +/** + * Validates the semantics of a structurally parsed AI response and produces a + * {@link NamingProposal} or a classified validation error. + * + *

    What this validator checks

    + *

    + * All objectively computable rules are enforced here. Rules that depend on linguistic + * judgement (German language, comprehensibility, treatment of proper nouns) are + * delegated to the AI via the prompt contract and are not verified programmatically. + * + *

    Title rules (objective)

    + * + * + *

    Date rules (objective)

    + * + * + *

    Result

    + * + */ +public final class AiResponseValidator { + + /** + * Known generic placeholder titles that are not acceptable as document names. + * These are case-insensitive matches. + */ + private static final Set GENERIC_TITLES = Set.of( + "dokument", "datei", "scan", "pdf", "seite", "unbekannt", + "document", "file", "unknown", "page" + ); + + private final ClockPort clockPort; + + /** + * Creates the validator with the given clock for date fallback. + * + * @param clockPort the clock for current-date fallback; must not be null + * @throws NullPointerException if {@code clockPort} is null + */ + public AiResponseValidator(ClockPort clockPort) { + this.clockPort = Objects.requireNonNull(clockPort, "clockPort must not be null"); + } + + /** + * Validates the parsed AI response and produces a {@link NamingProposal} on success. + * + * @param parsed the structurally parsed AI response; must not be null + * @return a {@link AiValidationResult} indicating validity or the specific failure; + * never null + * @throws NullPointerException if {@code parsed} is null + */ + public AiValidationResult validate(ParsedAiResponse parsed) { + Objects.requireNonNull(parsed, "parsed must not be null"); + + // --- Title validation --- + String title = parsed.title().trim(); + + if (title.length() > 20) { + return AiValidationResult.invalid( + "Title exceeds 20 characters (base title): '" + title + "'", + AiErrorClassification.FUNCTIONAL); + } + + if (!isAllowedTitleCharacters(title)) { + return AiValidationResult.invalid( + "Title contains disallowed characters (only letters, digits, and spaces are permitted): '" + + title + "'", + AiErrorClassification.FUNCTIONAL); + } + + if (isGenericTitle(title)) { + return AiValidationResult.invalid( + "Title is a generic placeholder and not acceptable: '" + title + "'", + AiErrorClassification.FUNCTIONAL); + } + + // --- Date validation / fallback --- + LocalDate resolvedDate; + DateSource dateSource; + + if (parsed.dateString().isPresent()) { + String dateStr = parsed.dateString().get(); + try { + resolvedDate = LocalDate.parse(dateStr); + dateSource = DateSource.AI_PROVIDED; + } catch (DateTimeParseException e) { + return AiValidationResult.invalid( + "AI-provided date '" + dateStr + "' is not a valid YYYY-MM-DD date: " + e.getMessage(), + AiErrorClassification.FUNCTIONAL); + } + } else { + // No date provided by the AI → fall back to current date from the clock + resolvedDate = clockPort.now().atZone(java.time.ZoneOffset.UTC).toLocalDate(); + dateSource = DateSource.FALLBACK_CURRENT; + } + + NamingProposal proposal = new NamingProposal(resolvedDate, dateSource, title, parsed.reasoning()); + return AiValidationResult.valid(proposal); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + /** + * Returns {@code true} if every character in the title is a letter, digit, or space. + *

    + * Permits Unicode letters including German Umlauts (ä, ö, ü, Ä, Ö, Ü) and ß. + */ + private static boolean isAllowedTitleCharacters(String title) { + for (int i = 0; i < title.length(); i++) { + char c = title.charAt(i); + if (!Character.isLetter(c) && !Character.isDigit(c) && c != ' ') { + return false; + } + } + return true; + } + + /** + * Returns {@code true} if the title is a known generic placeholder. + * Comparison is case-insensitive. + */ + private static boolean isGenericTitle(String title) { + return GENERIC_TITLES.contains(title.toLowerCase()); + } + + // ------------------------------------------------------------------------- + // Result type + // ------------------------------------------------------------------------- + + /** + * The result of a semantic AI response validation. + */ + public sealed interface AiValidationResult permits AiValidationResult.Valid, AiValidationResult.Invalid { + + /** + * Returns a valid result containing the produced {@link NamingProposal}. + * + * @param proposal the validated naming proposal; must not be null + * @return a valid result; never null + */ + static AiValidationResult valid(NamingProposal proposal) { + return new Valid(proposal); + } + + /** + * Returns an invalid result with an error message and classification. + * + * @param errorMessage human-readable description of the validation failure; + * must not be null + * @param classification always {@link AiErrorClassification#FUNCTIONAL} for + * semantic title/date violations + * @return an invalid result; never null + */ + static AiValidationResult invalid(String errorMessage, AiErrorClassification classification) { + return new Invalid(errorMessage, classification); + } + + /** + * A successful validation result containing the ready {@link NamingProposal}. + * + * @param proposal the validated and complete naming proposal; never null + */ + record Valid(NamingProposal proposal) implements AiValidationResult { + public Valid { + Objects.requireNonNull(proposal, "proposal must not be null"); + } + } + + /** + * A failed validation result carrying the error details. + * + * @param errorMessage the reason for the failure; never null + * @param classification the error category; never null + */ + record Invalid(String errorMessage, AiErrorClassification classification) + implements AiValidationResult { + public Invalid { + Objects.requireNonNull(errorMessage, "errorMessage must not be null"); + Objects.requireNonNull(classification, "classification must not be null"); + } + } + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinator.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinator.java index 0cc653a..4311111 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinator.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinator.java @@ -13,15 +13,26 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnica import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger; +import de.gecheckt.pdf.umbenenner.application.port.out.ResolvedTargetFilename; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFilenameResolutionResult; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderTechnicalFailure; import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort; +import de.gecheckt.pdf.umbenenner.domain.model.AiAttemptContext; +import de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure; +import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; -import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; +import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal; +import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady; import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; -import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; import java.time.Instant; import java.util.Objects; @@ -32,7 +43,8 @@ import java.util.function.Function; * Application-level service that implements the per-document processing logic. *

    * This service is the single authoritative place for the decision rules: - * idempotency checks, status/counter mapping, and consistent two-level persistence. + * idempotency checks, status/counter mapping, target-copy finalization, and consistent + * two-level persistence. * *

    Processing order per candidate

    *
      @@ -41,56 +53,81 @@ import java.util.function.Function; * a skip attempt with {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED}. *
    1. If the overall status is {@link ProcessingStatus#FAILED_FINAL} → create and persist * a skip attempt with {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}.
    2. - *
    3. Otherwise execute the flow (already done by the caller) and map the result - * into status, counters and retryable flag.
    4. + *
    5. If the overall status is {@link ProcessingStatus#PROPOSAL_READY} → load the + * leading proposal attempt and execute the target-copy finalization flow: + * build the base filename, resolve duplicates, write the copy, persist SUCCESS or + * FAILED_RETRYABLE.
    6. + *
    7. Otherwise execute the pipeline (extraction + pre-checks + AI naming) and map + * the result into status, counters, and retryable flag.
    8. *
    9. Persist exactly one historised processing attempt for the identified document.
    10. *
    11. Persist the updated document master record.
    12. *
    * - *

    Minimal rules

    + *

    Status transitions

    *
      - *
    • Already successful documents are skipped in later runs.
    • - *
    • Already finally failed documents are skipped in later runs.
    • - *
    • First historised deterministic content failure from processing → - * {@link ProcessingStatus#FAILED_RETRYABLE}, content error counter becomes 1, - * {@code retryable=true}.
    • - *
    • Second historised deterministic content failure in a later run → - * {@link ProcessingStatus#FAILED_FINAL}, content error counter becomes 2, - * {@code retryable=false}.
    • - *
    • Document-related technical failures after successful fingerprinting remain - * {@link ProcessingStatus#FAILED_RETRYABLE}, increment transient error counter, - * {@code retryable=true}.
    • - *
    • Skip events do not change error counters.
    • + *
    • Pre-check passed + AI naming proposal ready → {@link ProcessingStatus#PROPOSAL_READY}
    • + *
    • First deterministic content failure → {@link ProcessingStatus#FAILED_RETRYABLE}
    • + *
    • Second deterministic content failure → {@link ProcessingStatus#FAILED_FINAL}
    • + *
    • Technical infrastructure failure → {@link ProcessingStatus#FAILED_RETRYABLE}
    • + *
    • {@link ProcessingStatus#PROPOSAL_READY} + successful target copy + consistent + * persistence → {@link ProcessingStatus#SUCCESS}
    • + *
    • {@link ProcessingStatus#PROPOSAL_READY} + technical failure → {@link ProcessingStatus#FAILED_RETRYABLE}, + * transient error counter +1
    • + *
    • {@link ProcessingStatus#SUCCESS} → {@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED} skip
    • + *
    • {@link ProcessingStatus#FAILED_FINAL} → {@link ProcessingStatus#SKIPPED_FINAL_FAILURE} skip
    • *
    * + *

    Leading source for the naming proposal (verbindlich)

    + *

    + * When a document is in {@code PROPOSAL_READY} state, the authoritative source for the + * validated title, resolved date, date source, and AI reasoning is the most recent + * {@code PROPOSAL_READY} attempt in the history. This coordinator never reconstructs + * proposal data from the document master record or re-invokes the AI when a valid + * {@code PROPOSAL_READY} attempt already exists. + * + *

    SUCCESS condition (verbindlich)

    + *

    + * {@code SUCCESS} is set only after: + *

      + *
    1. The target copy has been successfully written.
    2. + *
    3. The final target filename is determined.
    4. + *
    5. The persistence (attempt + master record) has been consistently committed.
    6. + *
    + * If persistence fails after a successful target copy, a best-effort rollback of the + * newly written copy is attempted before the error is recorded. + * *

    Persistence consistency

    *

    - * For every identified document, both the processing attempt and the master record are - * written atomically using a unit of work pattern. If either write fails, both writes - * are rolled back and the failure is logged. The batch run continues with the next - * candidate. + * For every identified document (except PROPOSAL_READY that fails before producing any + * persistent artifact), both the processing attempt and the master record are written + * atomically via a unit of work. If either write fails, both writes are rolled back and + * the failure is logged. The batch run continues with the next candidate. * *

    Pre-fingerprint failures

    *

    * Failures that occur before a successful fingerprint is available are not - * historised in SQLite. They are handled by the caller and logged as non-identifiable - * run events. + * historised in SQLite. They are handled by the caller. */ public class DocumentProcessingCoordinator { private final DocumentRecordRepository documentRecordRepository; private final ProcessingAttemptRepository processingAttemptRepository; private final UnitOfWorkPort unitOfWorkPort; + private final TargetFolderPort targetFolderPort; + private final TargetFileCopyPort targetFileCopyPort; private final ProcessingLogger logger; /** - * Creates the document processor with the required persistence ports and logger. + * Creates the document processing coordinator with all required ports and the logger. * * @param documentRecordRepository port for reading and writing the document master record; * must not be null * @param processingAttemptRepository port for writing and reading the attempt history; * must not be null - * @param unitOfWorkPort port for executing operations atomically; + * @param unitOfWorkPort port for executing operations atomically; must not be null + * @param targetFolderPort port for target folder duplicate resolution and cleanup; + * must not be null + * @param targetFileCopyPort port for copying source files to the target folder; * must not be null * @param logger for processing-related logging; must not be null * @throws NullPointerException if any parameter is null @@ -99,6 +136,8 @@ public class DocumentProcessingCoordinator { DocumentRecordRepository documentRecordRepository, ProcessingAttemptRepository processingAttemptRepository, UnitOfWorkPort unitOfWorkPort, + TargetFolderPort targetFolderPort, + TargetFileCopyPort targetFileCopyPort, ProcessingLogger logger) { this.documentRecordRepository = Objects.requireNonNull(documentRecordRepository, "documentRecordRepository must not be null"); @@ -106,31 +145,25 @@ public class DocumentProcessingCoordinator { Objects.requireNonNull(processingAttemptRepository, "processingAttemptRepository must not be null"); this.unitOfWorkPort = Objects.requireNonNull(unitOfWorkPort, "unitOfWorkPort must not be null"); + this.targetFolderPort = + Objects.requireNonNull(targetFolderPort, "targetFolderPort must not be null"); + this.targetFileCopyPort = + Objects.requireNonNull(targetFileCopyPort, "targetFileCopyPort must not be null"); this.logger = Objects.requireNonNull(logger, "logger must not be null"); } /** * Applies the full processing logic for one identified document candidate. *

    - * The caller must have already computed a valid {@link DocumentFingerprint} for the - * candidate. The outcome (from the PDF extraction and pre-check pipeline) is - * provided as {@code outcome} and is used only when the document is not in a - * terminal state. - *

    - * This method never throws. All persistence failures are caught, logged, and - * treated as controlled per-document failures so the batch run can continue. + * Convenience overload that accepts a pre-computed outcome (for callers that have + * already determined the outcome before calling this method). * - * @param candidate the source document candidate being processed; must not be null - * @param fingerprint the successfully computed fingerprint for this candidate; - * must not be null - * @param outcome the result of the extraction and pre-check pipeline; - * must not be null - * @param context the current batch run context (for run ID and timing); - * must not be null - * @param attemptStart the instant at which processing of this candidate began; - * must not be null - * @return true if processing and persistence succeeded for this document, false if a - * persistence failure occurred + * @param candidate the source document candidate being processed; must not be null + * @param fingerprint the successfully computed fingerprint; must not be null + * @param outcome the pipeline result; must not be null + * @param context the current batch run context; must not be null + * @param attemptStart the instant at which processing began; must not be null + * @return true if processing and persistence succeeded, false if persistence failed */ public boolean process( SourceDocumentCandidate candidate, @@ -149,33 +182,32 @@ public class DocumentProcessingCoordinator { } /** - * Applies the full processing logic for one identified document candidate. - *

    - * The caller must have already computed a valid {@link DocumentFingerprint} for the - * candidate. This method handles the complete processing flow: - *

      - *
    1. Load document master record.
    2. - *
    3. Handle terminal SUCCESS / FAILED_FINAL skip cases first.
    4. - *
    5. Only if not terminal: execute the flow (PDF extraction + pre-checks).
    6. - *
    7. Map outcome to status, counters and retryable flag.
    8. - *
    9. Persist exactly one historised processing attempt.
    10. - *
    11. Persist the updated document master record.
    12. - *
    + * Applies the full processing logic for one identified document candidate, + * loading the document master record internally and deferring pipeline execution + * until the terminal-state check passes. *

    * This method never throws. All persistence failures are caught, logged, and * treated as controlled per-document failures so the batch run can continue. * - * @param candidate the source document candidate being processed; must not be null - * @param fingerprint the successfully computed fingerprint for this candidate; - * must not be null - * @param context the current batch run context (for run ID and timing); - * must not be null - * @param attemptStart the instant at which processing of this candidate began; - * must not be null - * @param pipelineExecutor functional interface that executes the extraction and pre-check - * pipeline when needed; must not be null - * @return true if processing and persistence succeeded for this document, false if a - * persistence failure occurred (lookup, attempt write, or record write) + *

    Processing order

    + *
      + *
    1. Load the document master record.
    2. + *
    3. If the status is {@code SUCCESS} → persist + * {@code SKIPPED_ALREADY_PROCESSED}.
    4. + *
    5. If the status is {@code FAILED_FINAL} → persist + * {@code SKIPPED_FINAL_FAILURE}.
    6. + *
    7. If the status is {@code PROPOSAL_READY} → execute the target-copy + * finalization without invoking the AI pipeline again.
    8. + *
    9. Otherwise execute the pipeline (extraction + pre-checks + AI naming) and + * persist the outcome.
    10. + *
    + * + * @param candidate the source document candidate; must not be null + * @param fingerprint the successfully computed fingerprint; must not be null + * @param context the current batch run context; must not be null + * @param attemptStart the instant at which processing began; must not be null + * @param pipelineExecutor executes the extraction + AI pipeline when needed; must not be null + * @return true if processing and persistence succeeded, false if a persistence failure occurred */ public boolean processDeferredOutcome( SourceDocumentCandidate candidate, @@ -194,7 +226,7 @@ public class DocumentProcessingCoordinator { DocumentRecordLookupResult lookupResult = documentRecordRepository.findByFingerprint(fingerprint); - // Step 2: Handle persistence lookup failure – cannot safely proceed + // Step 2: Handle persistence lookup failure if (lookupResult instanceof PersistenceLookupTechnicalFailure failure) { logger.error("Cannot process '{}': master record lookup failed: {}", candidate.uniqueIdentifier(), failure.errorMessage()); @@ -204,7 +236,6 @@ public class DocumentProcessingCoordinator { // Step 3: Determine the action based on the lookup result return switch (lookupResult) { case DocumentTerminalSuccess terminalSuccess -> { - // Document already successfully processed → skip logger.info("Skipping '{}': already successfully processed (fingerprint: {}).", candidate.uniqueIdentifier(), fingerprint.sha256Hex()); yield persistSkipAttempt( @@ -214,7 +245,6 @@ public class DocumentProcessingCoordinator { } case DocumentTerminalFinalFailure terminalFailure -> { - // Document finally failed → skip logger.info("Skipping '{}': already finally failed (fingerprint: {}).", candidate.uniqueIdentifier(), fingerprint.sha256Hex()); yield persistSkipAttempt( @@ -223,14 +253,23 @@ public class DocumentProcessingCoordinator { context, attemptStart); } + case DocumentKnownProcessable knownProcessable + when knownProcessable.record().overallStatus() == ProcessingStatus.PROPOSAL_READY -> { + // Naming proposal is present — execute the target-copy finalization + // without triggering a new AI call + logger.info("Finalizing '{}': naming proposal present, proceeding to target copy " + + "(fingerprint: {}).", + candidate.uniqueIdentifier(), fingerprint.sha256Hex()); + yield finalizeProposalReady( + candidate, fingerprint, knownProcessable.record(), context, attemptStart); + } + case DocumentUnknown ignored -> { - // New document – execute pipeline and process DocumentProcessingOutcome outcome = pipelineExecutor.apply(candidate); yield processAndPersistNewDocument(candidate, fingerprint, outcome, context, attemptStart); } case DocumentKnownProcessable knownProcessable -> { - // Known but not terminal – execute pipeline and process DocumentProcessingOutcome outcome = pipelineExecutor.apply(candidate); yield processAndPersistKnownDocument( candidate, fingerprint, outcome, knownProcessable.record(), @@ -238,7 +277,6 @@ public class DocumentProcessingCoordinator { } default -> { - // Exhaustive sealed hierarchy; this branch is unreachable logger.error("Unexpected lookup result type for '{}': {}", candidate.uniqueIdentifier(), lookupResult.getClass().getSimpleName()); yield false; @@ -246,24 +284,259 @@ public class DocumentProcessingCoordinator { }; } - // ------------------------------------------------------------------------- + // ========================================================================= + // M6 target-copy finalization path + // ========================================================================= + + /** + * Finalizes a document whose status is {@code PROPOSAL_READY}. + *

    + * Processing order: + *

      + *
    1. Load the leading {@code PROPOSAL_READY} attempt (authoritative proposal source).
    2. + *
    3. Build the base filename from the proposal's date and title.
    4. + *
    5. Resolve the first available unique filename in the target folder.
    6. + *
    7. Copy the source file to the target folder.
    8. + *
    9. Persist a new {@code SUCCESS} attempt and update the master record.
    10. + *
    11. If persistence fails after a successful copy: attempt best-effort rollback + * of the copy and persist {@code FAILED_RETRYABLE} instead.
    12. + *
    + *

    + * A missing or inconsistent {@code PROPOSAL_READY} attempt is treated as a + * document-level technical error (retryable, transient counter +1). + * + * @return true if SUCCESS was persisted, false if a persistence failure occurred + */ + private boolean finalizeProposalReady( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentRecord existingRecord, + BatchRunContext context, + Instant attemptStart) { + + Instant now = Instant.now(); + + // --- Step 1: Load the leading PROPOSAL_READY attempt --- + ProcessingAttempt proposalAttempt; + try { + proposalAttempt = processingAttemptRepository.findLatestProposalReadyAttempt(fingerprint); + } catch (DocumentPersistenceException e) { + logger.error("Failed to load leading PROPOSAL_READY attempt for '{}': {}", + candidate.uniqueIdentifier(), e.getMessage(), e); + return persistTransientError( + candidate, fingerprint, existingRecord, context, attemptStart, now, + "Failed to load naming proposal from history: " + e.getMessage()); + } + + if (proposalAttempt == null) { + logger.error("Document '{}' has PROPOSAL_READY status but no matching attempt " + + "found in history. Inconsistent persistence state.", + candidate.uniqueIdentifier()); + return persistTransientError( + candidate, fingerprint, existingRecord, context, attemptStart, now, + "Status is PROPOSAL_READY but no PROPOSAL_READY attempt exists in history"); + } + + // --- Step 2: Build base filename from the proposal --- + TargetFilenameBuildingService.BaseFilenameResult filenameResult = + TargetFilenameBuildingService.buildBaseFilename(proposalAttempt); + + if (filenameResult instanceof TargetFilenameBuildingService.InconsistentProposalState inconsistent) { + logger.error("Inconsistent proposal state for '{}': {}", + candidate.uniqueIdentifier(), inconsistent.reason()); + return persistTransientError( + candidate, fingerprint, existingRecord, context, attemptStart, now, + "Inconsistent proposal state: " + inconsistent.reason()); + } + + String baseFilename = ((TargetFilenameBuildingService.BaseFilenameReady) filenameResult).baseFilename(); + + // --- Step 3: Resolve unique filename in target folder --- + TargetFilenameResolutionResult resolutionResult = + targetFolderPort.resolveUniqueFilename(baseFilename); + + if (resolutionResult instanceof TargetFolderTechnicalFailure folderFailure) { + logger.error("Duplicate resolution failed for '{}': {}", + candidate.uniqueIdentifier(), folderFailure.errorMessage()); + return persistTransientError( + candidate, fingerprint, existingRecord, context, attemptStart, now, + "Target folder duplicate resolution failed: " + folderFailure.errorMessage()); + } + + String resolvedFilename = + ((ResolvedTargetFilename) resolutionResult).resolvedFilename(); + logger.info("Resolved target filename for '{}': '{}'.", + candidate.uniqueIdentifier(), resolvedFilename); + + // --- Step 4: Copy file to target --- + TargetFileCopyResult copyResult = + targetFileCopyPort.copyToTarget(candidate.locator(), resolvedFilename); + + if (copyResult instanceof TargetFileCopyTechnicalFailure copyFailure) { + logger.error("Target copy failed for '{}': {}", + candidate.uniqueIdentifier(), copyFailure.errorMessage()); + return persistTransientError( + candidate, fingerprint, existingRecord, context, attemptStart, now, + "Target file copy failed: " + copyFailure.errorMessage()); + } + + // Copy succeeded — attempt to persist SUCCESS + // If persistence fails: rollback the copy (best-effort) and persist FAILED_RETRYABLE + String targetFolderLocator = targetFolderPort.getTargetFolderLocator(); + + return persistTargetCopySuccess( + candidate, fingerprint, existingRecord, context, attemptStart, now, + resolvedFilename, targetFolderLocator); + } + + /** + * Persists the SUCCESS attempt and updated master record after a successful target copy. + *

    + * If the atomic persistence fails after the copy has already been written, a + * best-effort rollback of the target file is attempted and + * {@link ProcessingStatus#FAILED_RETRYABLE} is persisted instead. + * + * @return true if SUCCESS was persisted; false if persistence itself failed + */ + private boolean persistTargetCopySuccess( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentRecord existingRecord, + BatchRunContext context, + Instant attemptStart, + Instant now, + String resolvedFilename, + String targetFolderLocator) { + + try { + int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint); + + ProcessingAttempt successAttempt = new ProcessingAttempt( + fingerprint, context.runId(), attemptNumber, attemptStart, now, + ProcessingStatus.SUCCESS, null, null, false, + null, null, null, null, null, null, null, null, null, + resolvedFilename); + + DocumentRecord successRecord = buildSuccessRecord( + existingRecord, candidate, now, targetFolderLocator, resolvedFilename); + + unitOfWorkPort.executeInTransaction(txOps -> { + txOps.saveProcessingAttempt(successAttempt); + txOps.updateDocumentRecord(successRecord); + }); + + logger.info("Document '{}' successfully processed. Target: '{}'.", + candidate.uniqueIdentifier(), resolvedFilename); + return true; + + } catch (DocumentPersistenceException e) { + // Persistence failed after a successful copy — rollback the copy (best-effort) + logger.error("Persistence failed after successful target copy for '{}': {}. " + + "Attempting best-effort rollback of target file '{}'.", + candidate.uniqueIdentifier(), e.getMessage(), resolvedFilename); + targetFolderPort.tryDeleteTargetFile(resolvedFilename); + + // Persist FAILED_RETRYABLE to record the incident + persistTransientErrorAfterPersistenceFailure( + candidate, fingerprint, existingRecord, context, attemptStart, + Instant.now(), + "Persistence failed after successful target copy (best-effort rollback attempted): " + + e.getMessage()); + return false; + } + } + + /** + * Persists a {@code FAILED_RETRYABLE} attempt with an incremented transient error counter + * for a document-level technical error during the target-copy finalization stage. + * + * @return true if the error was persisted; false if the error persistence itself failed + */ + private boolean persistTransientError( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentRecord existingRecord, + BatchRunContext context, + Instant attemptStart, + Instant now, + String errorMessage) { + + FailureCounters updatedCounters = + existingRecord.failureCounters().withIncrementedTransientErrorCount(); + try { + int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint); + ProcessingAttempt errorAttempt = ProcessingAttempt.withoutAiFields( + fingerprint, context.runId(), attemptNumber, attemptStart, now, + ProcessingStatus.FAILED_RETRYABLE, + ProcessingStatus.FAILED_RETRYABLE.name(), + errorMessage, true); + + DocumentRecord errorRecord = buildTransientErrorRecord( + existingRecord, candidate, updatedCounters, now); + + unitOfWorkPort.executeInTransaction(txOps -> { + txOps.saveProcessingAttempt(errorAttempt); + txOps.updateDocumentRecord(errorRecord); + }); + + logger.debug("Transient error persisted for '{}': status=FAILED_RETRYABLE, " + + "transientErrors={}.", + candidate.uniqueIdentifier(), + updatedCounters.transientErrorCount()); + return true; + + } catch (DocumentPersistenceException persistEx) { + logger.error("Failed to persist transient error for '{}': {}", + candidate.uniqueIdentifier(), persistEx.getMessage(), persistEx); + return false; + } + } + + /** + * Attempts to persist a {@code FAILED_RETRYABLE} attempt after a persistence failure + * that occurred following a successful target copy. This is a secondary persistence + * effort; its failure is logged but does not change the return value. + */ + private void persistTransientErrorAfterPersistenceFailure( + SourceDocumentCandidate candidate, + DocumentFingerprint fingerprint, + DocumentRecord existingRecord, + BatchRunContext context, + Instant attemptStart, + Instant now, + String errorMessage) { + + FailureCounters updatedCounters = + existingRecord.failureCounters().withIncrementedTransientErrorCount(); + try { + int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint); + ProcessingAttempt errorAttempt = ProcessingAttempt.withoutAiFields( + fingerprint, context.runId(), attemptNumber, attemptStart, now, + ProcessingStatus.FAILED_RETRYABLE, + ProcessingStatus.FAILED_RETRYABLE.name(), + errorMessage, true); + + DocumentRecord errorRecord = buildTransientErrorRecord( + existingRecord, candidate, updatedCounters, now); + + unitOfWorkPort.executeInTransaction(txOps -> { + txOps.saveProcessingAttempt(errorAttempt); + txOps.updateDocumentRecord(errorRecord); + }); + + } catch (DocumentPersistenceException secondaryEx) { + logger.error("Secondary persistence failure for '{}' after target copy rollback: {}", + candidate.uniqueIdentifier(), secondaryEx.getMessage(), secondaryEx); + } + } + + // ========================================================================= // Skip path - // ------------------------------------------------------------------------- + // ========================================================================= /** * Persists a skip attempt and updates the master record's {@code updatedAt} timestamp. - *

    - * Skip events do not change any failure counter. The master record's overall status - * remains unchanged (terminal). - * - * @param candidate the candidate being skipped - * @param fingerprint the document fingerprint - * @param existingRecord the current master record (already terminal) - * @param skipStatus the skip status to record ({@link ProcessingStatus#SKIPPED_ALREADY_PROCESSED} - * or {@link ProcessingStatus#SKIPPED_FINAL_FAILURE}) - * @param context the current batch run context - * @param attemptStart the start instant of this processing attempt - * @return true if persistence succeeded, false if a persistence exception occurred + * Skip events do not change any failure counter or overall status. */ private boolean persistSkipAttempt( SourceDocumentCandidate candidate, @@ -278,21 +551,13 @@ public class DocumentProcessingCoordinator { try { int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint); - ProcessingAttempt skipAttempt = new ProcessingAttempt( - fingerprint, - context.runId(), - attemptNumber, - attemptStart, - now, - skipStatus, - null, // no failure class for skip - null, // no failure message for skip - false // not retryable - ); + ProcessingAttempt skipAttempt = ProcessingAttempt.withoutAiFields( + fingerprint, context.runId(), attemptNumber, + attemptStart, now, skipStatus, + null, null, false); DocumentRecord skipRecord = buildSkipRecord(existingRecord, candidate, now); - // Write attempt and master record atomically unitOfWorkPort.executeInTransaction(txOps -> { txOps.saveProcessingAttempt(skipAttempt); txOps.updateDocumentRecord(skipRecord); @@ -309,11 +574,10 @@ public class DocumentProcessingCoordinator { } } - // ------------------------------------------------------------------------- + // ========================================================================= // New document path - // ------------------------------------------------------------------------- + // ========================================================================= - /** Maps the pipeline outcome for a new document and persists attempt + new master record. */ private boolean processAndPersistNewDocument( SourceDocumentCandidate candidate, DocumentFingerprint fingerprint, @@ -325,14 +589,13 @@ public class DocumentProcessingCoordinator { ProcessingOutcomeTransition.ProcessingOutcome outcome = mapOutcomeForNewDocument(pipelineOutcome); DocumentRecord newRecord = buildNewDocumentRecord(fingerprint, candidate, outcome, now); return persistAttemptAndRecord(candidate, fingerprint, context, attemptStart, now, outcome, - txOps -> txOps.createDocumentRecord(newRecord)); + pipelineOutcome, txOps -> txOps.createDocumentRecord(newRecord)); } - // ------------------------------------------------------------------------- - // Known processable document path - // ------------------------------------------------------------------------- + // ========================================================================= + // Known processable document path (non-PROPOSAL_READY) + // ========================================================================= - /** Maps the pipeline outcome for a known document and persists attempt + updated master record. */ private boolean processAndPersistKnownDocument( SourceDocumentCandidate candidate, DocumentFingerprint fingerprint, @@ -342,62 +605,50 @@ public class DocumentProcessingCoordinator { Instant attemptStart) { Instant now = Instant.now(); - ProcessingOutcomeTransition.ProcessingOutcome outcome = mapOutcomeForKnownDocument(pipelineOutcome, existingRecord.failureCounters()); + ProcessingOutcomeTransition.ProcessingOutcome outcome = + mapOutcomeForKnownDocument(pipelineOutcome, existingRecord.failureCounters()); DocumentRecord updatedRecord = buildUpdatedDocumentRecord(existingRecord, candidate, outcome, now); return persistAttemptAndRecord(candidate, fingerprint, context, attemptStart, now, outcome, - txOps -> txOps.updateDocumentRecord(updatedRecord)); + pipelineOutcome, txOps -> txOps.updateDocumentRecord(updatedRecord)); } - // ------------------------------------------------------------------------- + // ========================================================================= // Extraction outcome mapping - // ------------------------------------------------------------------------- + // ========================================================================= - /** - * Maps an outcome to status, counters, and retryable flag for a brand-new - * document (no prior history, counters start at zero). - * - * @param pipelineOutcome the pipeline result - * @return the outcome with status, counters and retryable flag - */ private ProcessingOutcomeTransition.ProcessingOutcome mapOutcomeForNewDocument( DocumentProcessingOutcome pipelineOutcome) { return ProcessingOutcomeTransition.forNewDocument(pipelineOutcome); } - /** - * Maps an outcome to status, counters, and retryable flag, taking the - * existing failure counters into account. - * - * @param pipelineOutcome the pipeline result - * @param existingCounters the current failure counters from the master record - * @return the outcome with updated status, counters and retryable flag - */ private ProcessingOutcomeTransition.ProcessingOutcome mapOutcomeForKnownDocument( DocumentProcessingOutcome pipelineOutcome, FailureCounters existingCounters) { return ProcessingOutcomeTransition.forKnownDocument(pipelineOutcome, existingCounters); } - // ------------------------------------------------------------------------- + // ========================================================================= // Record assembly helpers - // ------------------------------------------------------------------------- + // ========================================================================= private DocumentRecord buildNewDocumentRecord( DocumentFingerprint fingerprint, SourceDocumentCandidate candidate, ProcessingOutcomeTransition.ProcessingOutcome outcome, Instant now) { - boolean success = outcome.overallStatus() == ProcessingStatus.SUCCESS; + boolean isProposalReady = outcome.overallStatus() == ProcessingStatus.PROPOSAL_READY; return new DocumentRecord( fingerprint, new SourceDocumentLocator(candidate.locator().value()), candidate.uniqueIdentifier(), outcome.overallStatus(), outcome.counters(), - success ? null : now, // lastFailureInstant - success ? now : null, // lastSuccessInstant - now, // createdAt - now // updatedAt + isProposalReady ? null : now, // lastFailureInstant + null, // lastSuccessInstant (only on final SUCCESS) + now, // createdAt + now, // updatedAt + null, // lastTargetPath (not yet set) + null // lastTargetFileName (not yet set) ); } @@ -406,21 +657,22 @@ public class DocumentProcessingCoordinator { SourceDocumentCandidate candidate, ProcessingOutcomeTransition.ProcessingOutcome outcome, Instant now) { - boolean success = outcome.overallStatus() == ProcessingStatus.SUCCESS; + boolean isProposalReady = outcome.overallStatus() == ProcessingStatus.PROPOSAL_READY; return new DocumentRecord( existingRecord.fingerprint(), new SourceDocumentLocator(candidate.locator().value()), candidate.uniqueIdentifier(), outcome.overallStatus(), outcome.counters(), - success ? existingRecord.lastFailureInstant() : now, - success ? now : existingRecord.lastSuccessInstant(), + isProposalReady ? existingRecord.lastFailureInstant() : now, + existingRecord.lastSuccessInstant(), // success only set by target-copy finalization existingRecord.createdAt(), - now // updatedAt + now, // updatedAt + existingRecord.lastTargetPath(), // carry over, not changed here + existingRecord.lastTargetFileName() // carry over, not changed here ); } - /** Builds a skip record: only {@code updatedAt} advances; status and counters are unchanged. */ private DocumentRecord buildSkipRecord( DocumentRecord existingRecord, SourceDocumentCandidate candidate, @@ -434,21 +686,60 @@ public class DocumentProcessingCoordinator { existingRecord.lastFailureInstant(), existingRecord.lastSuccessInstant(), existingRecord.createdAt(), - now // updatedAt + now, // updatedAt + existingRecord.lastTargetPath(), + existingRecord.lastTargetFileName() ); } - // ------------------------------------------------------------------------- - // Common persistence flow (non-skip paths) - // ------------------------------------------------------------------------- + private DocumentRecord buildSuccessRecord( + DocumentRecord existingRecord, + SourceDocumentCandidate candidate, + Instant now, + String targetFolderLocator, + String resolvedFilename) { + return new DocumentRecord( + existingRecord.fingerprint(), + new SourceDocumentLocator(candidate.locator().value()), + candidate.uniqueIdentifier(), + ProcessingStatus.SUCCESS, + existingRecord.failureCounters(), // counters unchanged on success + existingRecord.lastFailureInstant(), + now, // lastSuccessInstant + existingRecord.createdAt(), + now, // updatedAt + targetFolderLocator, // lastTargetPath + resolvedFilename // lastTargetFileName + ); + } + + private DocumentRecord buildTransientErrorRecord( + DocumentRecord existingRecord, + SourceDocumentCandidate candidate, + FailureCounters updatedCounters, + Instant now) { + return new DocumentRecord( + existingRecord.fingerprint(), + new SourceDocumentLocator(candidate.locator().value()), + candidate.uniqueIdentifier(), + ProcessingStatus.FAILED_RETRYABLE, + updatedCounters, + now, // lastFailureInstant + existingRecord.lastSuccessInstant(), + existingRecord.createdAt(), + now, // updatedAt + existingRecord.lastTargetPath(), // carry over + existingRecord.lastTargetFileName() // carry over + ); + } + + // ========================================================================= + // Common persistence flow (AI pipeline path) + // ========================================================================= /** * Loads the next attempt number, builds and persists the attempt together with the - * document record atomically, then logs the result. - *

    - * {@code recordWriter} performs either {@code createDocumentRecord} or - * {@code updateDocumentRecord} depending on whether the document is new or known. - * All persistence failures are caught and logged; the batch run continues. + * document record atomically. * * @return true if persistence succeeded, false if a persistence exception occurred */ @@ -459,12 +750,14 @@ public class DocumentProcessingCoordinator { Instant attemptStart, Instant now, ProcessingOutcomeTransition.ProcessingOutcome outcome, + DocumentProcessingOutcome pipelineOutcome, Consumer recordWriter) { try { int attemptNumber = processingAttemptRepository.loadNextAttemptNumber(fingerprint); ProcessingAttempt attempt = - buildAttempt(fingerprint, context, attemptNumber, attemptStart, now, outcome); + buildAttempt(fingerprint, context, attemptNumber, attemptStart, now, + outcome, pipelineOutcome); unitOfWorkPort.executeInTransaction(txOps -> { txOps.saveProcessingAttempt(attempt); @@ -485,20 +778,14 @@ public class DocumentProcessingCoordinator { } } - // ------------------------------------------------------------------------- - // Helper: build ProcessingAttempt - // ------------------------------------------------------------------------- + // ========================================================================= + // Attempt builder (AI pipeline path) + // ========================================================================= /** - * Constructs a {@link ProcessingAttempt} from the given parameters and outcome. - * - * @param fingerprint the document fingerprint - * @param context the current batch run context - * @param attemptNumber the monotonic attempt number - * @param startedAt the start instant of this attempt - * @param endedAt the end instant of this attempt - * @param outcome the outcome (status, counters, retryable) - * @return the constructed processing attempt + * Constructs a {@link ProcessingAttempt} from the pipeline outcome, including AI + * traceability fields when available. The {@code finalTargetFileName} is null for + * all pipeline-path attempts (target copy is handled separately). */ private ProcessingAttempt buildAttempt( DocumentFingerprint fingerprint, @@ -506,7 +793,8 @@ public class DocumentProcessingCoordinator { int attemptNumber, Instant startedAt, Instant endedAt, - ProcessingOutcomeTransition.ProcessingOutcome outcome) { + ProcessingOutcomeTransition.ProcessingOutcome outcome, + DocumentProcessingOutcome pipelineOutcome) { String failureClass = null; String failureMessage = null; @@ -514,38 +802,80 @@ public class DocumentProcessingCoordinator { if (outcome.overallStatus() == ProcessingStatus.FAILED_RETRYABLE || outcome.overallStatus() == ProcessingStatus.FAILED_FINAL) { failureClass = outcome.overallStatus().name(); - failureMessage = buildFailureMessage(outcome); + failureMessage = buildFailureMessage(pipelineOutcome, outcome); } - return new ProcessingAttempt( - fingerprint, - context.runId(), - attemptNumber, - startedAt, - endedAt, - outcome.overallStatus(), - failureClass, - failureMessage, - outcome.retryable() - ); - } - - /** - * Builds a human-readable failure message from the outcome. - * - * @param outcome the outcome - * @return a non-null failure message string - */ - private String buildFailureMessage(ProcessingOutcomeTransition.ProcessingOutcome outcome) { - return switch (outcome.overallStatus()) { - case FAILED_RETRYABLE -> "Processing failed (retryable). " - + "ContentErrors=" + outcome.counters().contentErrorCount() - + ", TransientErrors=" + outcome.counters().transientErrorCount(); - case FAILED_FINAL -> "Processing failed finally (not retryable). " - + "ContentErrors=" + outcome.counters().contentErrorCount() - + ", TransientErrors=" + outcome.counters().transientErrorCount(); - default -> outcome.overallStatus().name(); + return switch (pipelineOutcome) { + case NamingProposalReady proposalReady -> { + AiAttemptContext ctx = proposalReady.aiContext(); + NamingProposal proposal = proposalReady.proposal(); + yield new ProcessingAttempt( + fingerprint, context.runId(), attemptNumber, startedAt, endedAt, + outcome.overallStatus(), failureClass, failureMessage, outcome.retryable(), + ctx.modelName(), ctx.promptIdentifier(), + ctx.processedPageCount(), ctx.sentCharacterCount(), + ctx.aiRawResponse(), + proposal.aiReasoning(), + proposal.resolvedDate(), proposal.dateSource(), proposal.validatedTitle(), + null // finalTargetFileName — set only on SUCCESS attempts + ); + } + case AiTechnicalFailure techFail -> { + AiAttemptContext ctx = techFail.aiContext(); + yield new ProcessingAttempt( + fingerprint, context.runId(), attemptNumber, startedAt, endedAt, + outcome.overallStatus(), failureClass, failureMessage, outcome.retryable(), + ctx.modelName(), ctx.promptIdentifier(), + ctx.processedPageCount(), ctx.sentCharacterCount(), + ctx.aiRawResponse(), + null, null, null, null, + null // finalTargetFileName + ); + } + case AiFunctionalFailure funcFail -> { + AiAttemptContext ctx = funcFail.aiContext(); + yield new ProcessingAttempt( + fingerprint, context.runId(), attemptNumber, startedAt, endedAt, + outcome.overallStatus(), failureClass, failureMessage, outcome.retryable(), + ctx.modelName(), ctx.promptIdentifier(), + ctx.processedPageCount(), ctx.sentCharacterCount(), + ctx.aiRawResponse(), + null, null, null, null, + null // finalTargetFileName + ); + } + default -> ProcessingAttempt.withoutAiFields( + fingerprint, context.runId(), attemptNumber, startedAt, endedAt, + outcome.overallStatus(), failureClass, failureMessage, outcome.retryable() + ); }; } -} \ No newline at end of file + /** + * Builds a human-readable failure message from the pipeline outcome and status outcome. + */ + private String buildFailureMessage( + DocumentProcessingOutcome pipelineOutcome, + ProcessingOutcomeTransition.ProcessingOutcome outcome) { + String base = switch (outcome.overallStatus()) { + case FAILED_RETRYABLE -> "Processing failed (retryable). "; + case FAILED_FINAL -> "Processing failed finally (not retryable). "; + default -> outcome.overallStatus().name() + ". "; + }; + + String detail = switch (pipelineOutcome) { + case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed pf -> + "Reason: " + pf.failureReasonDescription(); + case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError te -> + "Technical: " + te.errorMessage(); + case AiTechnicalFailure ai -> + "AI technical error: " + ai.errorMessage(); + case AiFunctionalFailure ai -> + "AI functional error: " + ai.errorMessage(); + default -> "ContentErrors=" + outcome.counters().contentErrorCount() + + ", TransientErrors=" + outcome.counters().transientErrorCount(); + }; + + return base + detail; + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentTextLimiter.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentTextLimiter.java new file mode 100644 index 0000000..4f66355 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentTextLimiter.java @@ -0,0 +1,55 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import java.util.Objects; + +/** + * Utility for limiting extracted document text to the configured maximum character count. + *

    + * The limitation is applied strictly before an AI request is composed. + * It operates on the extracted text as a character-count boundary without considering + * word or sentence boundaries, which is intentional: the AI is expected to handle + * partial text gracefully. + * + *

    Semantics

    + *
      + *
    • If the text length does not exceed the configured maximum, it is returned unchanged.
    • + *
    • If the text length exceeds the maximum, it is truncated to exactly + * {@code maxCharacters} characters.
    • + *
    + * + *

    Architecture boundary

    + *

    + * This limiter does not modify the originally extracted document text stored + * elsewhere in the pipeline. It produces a new, potentially shorter copy suitable + * for inclusion in the AI request. The caller is responsible for recording the + * effective character count (i.e., the length of the returned string) for persistence. + */ +public final class DocumentTextLimiter { + + private DocumentTextLimiter() { + // Static utility – no instances + } + + /** + * Returns the document text limited to {@code maxCharacters} characters. + *

    + * If {@code text.length() <= maxCharacters} the original text is returned unchanged. + * Otherwise the first {@code maxCharacters} characters are returned as a new string. + * + * @param text the extracted document text; must not be null + * @param maxCharacters the maximum number of characters to include; must be >= 1 + * @return the text limited to {@code maxCharacters} characters; never null + * @throws NullPointerException if {@code text} is null + * @throws IllegalArgumentException if {@code maxCharacters} is less than 1 + */ + public static String limit(String text, int maxCharacters) { + Objects.requireNonNull(text, "text must not be null"); + if (maxCharacters < 1) { + throw new IllegalArgumentException("maxCharacters must be >= 1, but was: " + maxCharacters); + } + if (text.length() <= maxCharacters) { + return text; + } + return text.substring(0, maxCharacters); + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java index 454edcc..6a94e41 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransition.java @@ -1,7 +1,10 @@ package de.gecheckt.pdf.umbenenner.application.service; import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters; +import de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure; +import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure; import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; +import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; @@ -10,7 +13,7 @@ import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; * Pure status and counter transition policy for document processing outcomes. *

    * This class encapsulates the deterministic rules for mapping a pipeline outcome - * (success, content error, or technical error) to a processing status, updated + * (pre-check, naming proposal, or failure) to a processing status, updated * failure counters, and retryability flag. *

    * The transition logic is independent of persistence, orchestration, or any @@ -18,15 +21,23 @@ import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; * *

    Transition rules

    *
      - *
    • Success: Status becomes {@link ProcessingStatus#SUCCESS}, - * counters remain unchanged, {@code retryable=false}.
    • - *
    • Deterministic content error (first occurrence): + *
    • Naming proposal ready: Status becomes + * {@link ProcessingStatus#PROPOSAL_READY}, counters unchanged, + * {@code retryable=false}.
    • + *
    • Pre-check content error (first occurrence): * Status becomes {@link ProcessingStatus#FAILED_RETRYABLE}, * content error counter incremented by 1, {@code retryable=true}.
    • - *
    • Deterministic content error (second or later occurrence): + *
    • Pre-check content error (second or later occurrence): * Status becomes {@link ProcessingStatus#FAILED_FINAL}, * content error counter incremented by 1, {@code retryable=false}.
    • - *
    • Technical error: Status becomes {@link ProcessingStatus#FAILED_RETRYABLE}, + *
    • AI functional failure (first occurrence): + * Status becomes {@link ProcessingStatus#FAILED_RETRYABLE}, + * content error counter incremented by 1, {@code retryable=true}.
    • + *
    • AI functional failure (second or later occurrence): + * Status becomes {@link ProcessingStatus#FAILED_FINAL}, + * content error counter incremented by 1, {@code retryable=false}.
    • + *
    • Technical error (pre-fingerprint / extraction / AI infrastructure): + * Status becomes {@link ProcessingStatus#FAILED_RETRYABLE}, * transient error counter incremented by 1, {@code retryable=true}.
    • *
    */ @@ -41,7 +52,7 @@ final class ProcessingOutcomeTransition { *

    * For new documents, all failure counters start at zero. * - * @param pipelineOutcome the outcome from the extraction and pre-check pipeline + * @param pipelineOutcome the outcome from the processing pipeline * @return the mapped outcome with status, counters, and retryability */ static ProcessingOutcome forNewDocument(DocumentProcessingOutcome pipelineOutcome) { @@ -51,11 +62,8 @@ final class ProcessingOutcomeTransition { /** * Maps a pipeline outcome to a processing outcome, considering the existing * failure counter state from a known document's history. - *

    - * This method applies the deterministic transition rules to produce an updated - * status, counters, and retryable flag. * - * @param pipelineOutcome the outcome from the extraction and pre-check pipeline + * @param pipelineOutcome the outcome from the processing pipeline * @param existingCounters the current failure counter values from the document's master record * @return the mapped outcome with updated status, counters, and retryability */ @@ -64,39 +72,61 @@ final class ProcessingOutcomeTransition { FailureCounters existingCounters) { return switch (pipelineOutcome) { - case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored -> { - // Success: document passed all pre-checks + case NamingProposalReady ignored -> { + // AI naming proposal produced → PROPOSAL_READY (not yet SUCCESS) yield new ProcessingOutcome( - ProcessingStatus.SUCCESS, - existingCounters, // counters unchanged on success + ProcessingStatus.PROPOSAL_READY, + existingCounters, // counters unchanged on proposal success false // not retryable ); } - case PreCheckFailed contentError -> { - // Deterministic content error: apply the 1-retry rule + case PreCheckFailed ignored2 -> { + // Deterministic content error from pre-check: apply the 1-retry rule FailureCounters updatedCounters = existingCounters.withIncrementedContentErrorCount(); boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0; if (isFirstOccurrence) { - // First content error → FAILED_RETRYABLE - yield new ProcessingOutcome( - ProcessingStatus.FAILED_RETRYABLE, - updatedCounters, - true - ); + yield new ProcessingOutcome(ProcessingStatus.FAILED_RETRYABLE, updatedCounters, true); } else { - // Second (or later) content error → FAILED_FINAL - yield new ProcessingOutcome( - ProcessingStatus.FAILED_FINAL, - updatedCounters, - false - ); + yield new ProcessingOutcome(ProcessingStatus.FAILED_FINAL, updatedCounters, false); } } - case TechnicalDocumentError technicalError -> { - // Technical error after fingerprinting: always FAILED_RETRYABLE, increment transient counter + case AiFunctionalFailure ignored3 -> { + // Deterministic content error from AI validation: apply the 1-retry rule + FailureCounters updatedCounters = existingCounters.withIncrementedContentErrorCount(); + boolean isFirstOccurrence = existingCounters.contentErrorCount() == 0; + + if (isFirstOccurrence) { + yield new ProcessingOutcome(ProcessingStatus.FAILED_RETRYABLE, updatedCounters, true); + } else { + yield new ProcessingOutcome(ProcessingStatus.FAILED_FINAL, updatedCounters, false); + } + } + + case TechnicalDocumentError ignored4 -> { + // Technical error (extraction / infrastructure): retryable, transient counter +1 + yield new ProcessingOutcome( + ProcessingStatus.FAILED_RETRYABLE, + existingCounters.withIncrementedTransientErrorCount(), + true + ); + } + + case AiTechnicalFailure ignored5 -> { + // Technical AI error (timeout, unreachable, bad JSON): retryable, transient counter +1 + yield new ProcessingOutcome( + ProcessingStatus.FAILED_RETRYABLE, + existingCounters.withIncrementedTransientErrorCount(), + true + ); + } + + case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed ignored6 -> { + // Pre-check passed without AI step: in normal flow this should not appear at + // the outcome transition level once the AI pipeline is fully wired. Treat it + // as a technical error to avoid silent inconsistency. yield new ProcessingOutcome( ProcessingStatus.FAILED_RETRYABLE, existingCounters.withIncrementedTransientErrorCount(), diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/TargetFilenameBuildingService.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/TargetFilenameBuildingService.java new file mode 100644 index 0000000..6113639 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/TargetFilenameBuildingService.java @@ -0,0 +1,159 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; + +import java.time.LocalDate; +import java.util.Objects; + +/** + * Stateless service for building the base target filename from a leading naming proposal. + *

    + * The base filename follows the verbindliches Zielformat: + *

    + *   YYYY-MM-DD - Titel.pdf
    + * 
    + * + *

    Input source

    + *

    + * The sole authoritative source for date and title is the most recent + * {@code PROPOSAL_READY} processing attempt. This service reads directly from a + * {@link ProcessingAttempt} whose + * {@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus#PROPOSAL_READY} + * status was confirmed by the caller. + * + *

    Consistency checks

    + *

    + * This service does not silently heal inconsistent persistence states. If the proposal + * attempt carries a title or date that violates the rules that were enforced during + * AI response validation, the state is treated as an inconsistent persistence state + * and the caller receives an {@link InconsistentProposalState} result. Such states + * must be surfaced as document-level technical errors. + * + *

    No new fachliche interpretation

    + *

    + * This service never re-evaluates or reinterprets the title: it uses the already-validated + * title from the proposal attempt unchanged. + */ +public final class TargetFilenameBuildingService { + + private TargetFilenameBuildingService() { + // static utility, no instances + } + + // ------------------------------------------------------------------------- + // Result type + // ------------------------------------------------------------------------- + + /** + * Sealed result of {@link #buildBaseFilename(ProcessingAttempt)}. + */ + public sealed interface BaseFilenameResult + permits BaseFilenameReady, InconsistentProposalState { + } + + /** + * Successful result containing the ready base filename. + * + * @param baseFilename the filename in {@code YYYY-MM-DD - Titel.pdf} format; + * never null or blank + */ + public record BaseFilenameReady(String baseFilename) implements BaseFilenameResult { + public BaseFilenameReady { + Objects.requireNonNull(baseFilename, "baseFilename must not be null"); + if (baseFilename.isBlank()) { + throw new IllegalArgumentException("baseFilename must not be blank"); + } + } + } + + /** + * Failure result indicating that the loaded proposal attempt contains data that + * violates the rules that were applied during naming-proposal validation, making + * the persistence state inconsistent. + * + * @param reason human-readable description of the inconsistency; never null + */ + public record InconsistentProposalState(String reason) implements BaseFilenameResult { + public InconsistentProposalState { + Objects.requireNonNull(reason, "reason must not be null"); + } + } + + // ------------------------------------------------------------------------- + // Main method + // ------------------------------------------------------------------------- + + /** + * Builds the base target filename from the resolved date and validated title stored + * in the given {@code PROPOSAL_READY} attempt. + *

    + * Validation rules applied defensively (already enforced during AI response validation): + *

      + *
    • Resolved date must be non-null.
    • + *
    • Validated title must be non-null and non-blank.
    • + *
    • Validated title must not exceed 20 characters.
    • + *
    • Validated title must contain only letters, digits, and spaces.
    • + *
    + * If any rule is violated, the state is treated as an + * {@link InconsistentProposalState}. + *

    + * The 20-character limit applies exclusively to the base title. A duplicate-avoidance + * suffix (e.g., {@code (1)}) may be appended by the target folder adapter after this + * method returns and is not counted against the 20 characters. + * + * @param proposalAttempt the leading {@code PROPOSAL_READY} attempt; must not be null + * @return a {@link BaseFilenameReady} with the complete filename, or an + * {@link InconsistentProposalState} describing the consistency violation + */ + public static BaseFilenameResult buildBaseFilename(ProcessingAttempt proposalAttempt) { + Objects.requireNonNull(proposalAttempt, "proposalAttempt must not be null"); + + LocalDate date = proposalAttempt.resolvedDate(); + String title = proposalAttempt.validatedTitle(); + + if (date == null) { + return new InconsistentProposalState( + "Leading PROPOSAL_READY attempt has no resolved date"); + } + + if (title == null || title.isBlank()) { + return new InconsistentProposalState( + "Leading PROPOSAL_READY attempt has no validated title"); + } + + if (title.length() > 20) { + return new InconsistentProposalState( + "Leading PROPOSAL_READY attempt has title exceeding 20 characters: '" + + title + "'"); + } + + if (!isAllowedTitleCharacters(title)) { + return new InconsistentProposalState( + "Leading PROPOSAL_READY attempt has title with disallowed characters " + + "(only letters, digits, and spaces are permitted): '" + + title + "'"); + } + + // Build: YYYY-MM-DD - Titel.pdf + String baseFilename = date + " - " + title + ".pdf"; + return new BaseFilenameReady(baseFilename); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + /** + * Returns {@code true} if every character in the title is a letter, a digit, or a space. + * Unicode letters (including German Umlauts and ß) are permitted. + */ + private static boolean isAllowedTitleCharacters(String title) { + for (int i = 0; i < title.length(); i++) { + char c = title.charAt(i); + if (!Character.isLetter(c) && !Character.isDigit(c) && c != ' ') { + return false; + } + } + return true; + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java index daa7a7e..c669a7e 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java @@ -13,12 +13,14 @@ import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException; import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException; import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort; +import de.gecheckt.pdf.umbenenner.application.service.AiNamingService; import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingCoordinator; import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed; import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; import java.time.Instant; @@ -67,13 +69,6 @@ import java.util.Objects; * written in sequence by {@link DocumentProcessingCoordinator}. Persistence failures for a single * document are caught and logged; the batch run continues with the remaining candidates. * - *

    Non-Goals (not implemented)

    - *
      - *
    • No KI/AI integration or prompt loading.
    • - *
    • No filename generation or target file copy.
    • - *
    • No retry rules for KI or target copy failures.
    • - *
    - * */ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCase { @@ -83,6 +78,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa private final PdfTextExtractionPort pdfTextExtractionPort; private final FingerprintPort fingerprintPort; private final DocumentProcessingCoordinator documentProcessingCoordinator; + private final AiNamingService aiNamingService; private final ProcessingLogger logger; /** @@ -102,6 +98,8 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa * must not be null * @param documentProcessingCoordinator for applying decision logic and persisting results; * must not be null + * @param aiNamingService for running the AI naming pipeline after pre-checks; + * must not be null * @param logger for processing-related logging; must not be null * @throws NullPointerException if any parameter is null */ @@ -112,6 +110,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa PdfTextExtractionPort pdfTextExtractionPort, FingerprintPort fingerprintPort, DocumentProcessingCoordinator documentProcessingCoordinator, + AiNamingService aiNamingService, ProcessingLogger logger) { this.runtimeConfiguration = Objects.requireNonNull(runtimeConfiguration, "runtimeConfiguration must not be null"); this.runLockPort = Objects.requireNonNull(runLockPort, "runLockPort must not be null"); @@ -122,6 +121,7 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa this.fingerprintPort = Objects.requireNonNull(fingerprintPort, "fingerprintPort must not be null"); this.documentProcessingCoordinator = Objects.requireNonNull( documentProcessingCoordinator, "documentProcessingCoordinator must not be null"); + this.aiNamingService = Objects.requireNonNull(aiNamingService, "aiNamingService must not be null"); this.logger = Objects.requireNonNull(logger, "logger must not be null"); } @@ -302,14 +302,24 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa } /** - * Runs the pipeline (PDF text extraction + pre-checks) for the given candidate. + * Runs the full pipeline for the given candidate: extraction, pre-checks, and AI naming. *

    * This method is called after a successful fingerprint computation. The result is * passed to {@link DocumentProcessingCoordinator}, which applies it only when the document is * not in a terminal state. + *

    + * Processing order: + *

      + *
    1. Extract PDF text and page count via the extraction port.
    2. + *
    3. Evaluate pre-checks (text quality, page limit). If any pre-check fails, + * return the failure outcome immediately — no AI call is made.
    4. + *
    5. If pre-checks pass, run the AI naming pipeline to obtain a naming proposal + * or classify the AI result as a technical or functional failure.
    6. + *
    * * @param candidate the candidate to run through the pipeline - * @return the pipeline outcome (pre-check passed, pre-check failed, or technical error) + * @return the pipeline outcome; one of {@code PreCheckFailed}, {@code TechnicalDocumentError}, + * {@code NamingProposalReady}, {@code AiTechnicalFailure}, or {@code AiFunctionalFailure} */ private DocumentProcessingOutcome runExtractionPipeline(SourceDocumentCandidate candidate) { PdfExtractionResult extractionResult = @@ -317,12 +327,22 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa logExtractionResult(candidate, extractionResult); - DocumentProcessingOutcome outcome = + DocumentProcessingOutcome preCheckOutcome = DocumentProcessingService.processDocument(candidate, extractionResult, runtimeConfiguration); - logProcessingOutcome(candidate, outcome); + // If pre-checks did not pass, return the failure outcome immediately. + // This avoids an AI call for documents that cannot be processed. + if (!(preCheckOutcome instanceof PreCheckPassed preCheckPassed)) { + logProcessingOutcome(candidate, preCheckOutcome); + return preCheckOutcome; + } - return outcome; + // Pre-checks passed — run the AI naming pipeline + logger.info("Pre-checks passed for '{}'. Invoking AI naming pipeline.", + candidate.uniqueIdentifier()); + DocumentProcessingOutcome aiOutcome = aiNamingService.invoke(preCheckPassed); + logProcessingOutcome(candidate, aiOutcome); + return aiOutcome; } /** @@ -361,21 +381,24 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa */ private void logProcessingOutcome(SourceDocumentCandidate candidate, DocumentProcessingOutcome outcome) { switch (outcome) { - case de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed passed -> { - logger.info("Pre-checks PASSED for '{}'. Candidate ready for persistence.", - candidate.uniqueIdentifier()); - } - case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed -> { + case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed -> logger.info("Pre-checks FAILED for '{}': {} (Deterministic content error).", candidate.uniqueIdentifier(), failed.failureReasonDescription()); - } - case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError -> { + case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError -> logger.warn("Processing FAILED for '{}': {} (Technical error – retryable).", candidate.uniqueIdentifier(), technicalError.errorMessage()); - } - default -> { - // Handle any other cases - } + case de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady ready -> + logger.info("AI naming proposal ready for '{}': title='{}', date={}.", + candidate.uniqueIdentifier(), + ready.proposal().validatedTitle(), + ready.proposal().resolvedDate()); + case de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure aiTechnical -> + logger.warn("AI technical failure for '{}': {} (Transient – retryable).", + candidate.uniqueIdentifier(), aiTechnical.errorMessage()); + case de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure aiFunctional -> + logger.info("AI functional failure for '{}': {} (Deterministic content error).", + candidate.uniqueIdentifier(), aiFunctional.errorMessage()); + default -> { /* other outcomes are handled elsewhere */ } } } } \ No newline at end of file diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingServiceTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingServiceTest.java new file mode 100644 index 0000000..e103be3 --- /dev/null +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingServiceTest.java @@ -0,0 +1,317 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort; +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort; +import de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure; +import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse; +import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation; +import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure; +import de.gecheckt.pdf.umbenenner.domain.model.DateSource; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; +import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; +import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed; +import de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.Mock; +import org.mockito.junit.jupiter.MockitoExtension; + +import java.time.Instant; +import java.time.LocalDate; +import java.time.ZoneOffset; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; + +/** + * Unit tests for {@link AiNamingService}. + *

    + * Covers: prompt load failure, AI invocation failure, unparseable response, + * functional validation failure, and the successful naming proposal path. + */ +@ExtendWith(MockitoExtension.class) +class AiNamingServiceTest { + + private static final String MODEL_NAME = "gpt-4"; + private static final int MAX_CHARS = 1000; + private static final Instant FIXED_INSTANT = Instant.parse("2026-04-07T10:00:00Z"); + + @Mock + private AiInvocationPort aiInvocationPort; + + @Mock + private PromptPort promptPort; + + private AiResponseValidator validator; + private AiNamingService service; + + private SourceDocumentCandidate candidate; + private PreCheckPassed preCheckPassed; + + @BeforeEach + void setUp() { + validator = new AiResponseValidator(() -> FIXED_INSTANT); + service = new AiNamingService(aiInvocationPort, promptPort, validator, MODEL_NAME, MAX_CHARS); + + candidate = new SourceDocumentCandidate( + "test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf")); + preCheckPassed = new PreCheckPassed( + candidate, new PdfExtractionSuccess("Document text content", new PdfPageCount(2))); + } + + // ------------------------------------------------------------------------- + // Helper + // ------------------------------------------------------------------------- + + private static AiRequestRepresentation dummyRequest() { + return new AiRequestRepresentation( + new PromptIdentifier("prompt.txt"), "Prompt content", "Document text", 13); + } + + private static AiInvocationSuccess successWith(String jsonBody) { + return new AiInvocationSuccess(dummyRequest(), new AiRawResponse(jsonBody)); + } + + private static AiInvocationTechnicalFailure technicalFailure(String reason, String message) { + return new AiInvocationTechnicalFailure(dummyRequest(), reason, message); + } + + // ------------------------------------------------------------------------- + // Prompt load failure + // ------------------------------------------------------------------------- + + @Test + void invoke_promptLoadFailure_returnsAiTechnicalFailure() { + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingFailure("FILE_NOT_FOUND", "Prompt file missing")); + + DocumentProcessingOutcome result = service.invoke(preCheckPassed); + + assertThat(result).isInstanceOf(AiTechnicalFailure.class); + AiTechnicalFailure failure = (AiTechnicalFailure) result; + assertThat(failure.errorMessage()).contains("Prompt loading failed"); + assertThat(failure.aiContext().modelName()).isEqualTo(MODEL_NAME); + } + + // ------------------------------------------------------------------------- + // AI invocation failure + // ------------------------------------------------------------------------- + + @Test + void invoke_aiInvocationTimeout_returnsAiTechnicalFailure() { + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingSuccess(new PromptIdentifier("prompt-v1.txt"), "Analyze this document.")); + when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn( + technicalFailure("TIMEOUT", "Request timed out after 30s")); + + DocumentProcessingOutcome result = service.invoke(preCheckPassed); + + assertThat(result).isInstanceOf(AiTechnicalFailure.class); + assertThat(((AiTechnicalFailure) result).errorMessage()).contains("TIMEOUT"); + } + + @Test + void invoke_aiInvocationConnectionError_returnsAiTechnicalFailure() { + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt content")); + when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn( + technicalFailure("CONNECTION_ERROR", "Connection refused")); + + DocumentProcessingOutcome result = service.invoke(preCheckPassed); + + assertThat(result).isInstanceOf(AiTechnicalFailure.class); + } + + // ------------------------------------------------------------------------- + // Response parsing failure (unparseable JSON → technical failure) + // ------------------------------------------------------------------------- + + @Test + void invoke_unparseableAiResponse_returnsAiTechnicalFailure() { + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt")); + when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn( + successWith("This is not JSON at all")); + + DocumentProcessingOutcome result = service.invoke(preCheckPassed); + + assertThat(result).isInstanceOf(AiTechnicalFailure.class); + assertThat(((AiTechnicalFailure) result).aiContext().aiRawResponse()) + .isEqualTo("This is not JSON at all"); + } + + @Test + void invoke_aiResponseMissingTitle_returnsAiTechnicalFailure() { + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt")); + when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn( + successWith("{\"reasoning\":\"No title provided\"}")); + + DocumentProcessingOutcome result = service.invoke(preCheckPassed); + + assertThat(result).isInstanceOf(AiTechnicalFailure.class); + } + + // ------------------------------------------------------------------------- + // Functional validation failure (parseable but semantically invalid) + // ------------------------------------------------------------------------- + + @Test + void invoke_aiResponseTitleTooLong_returnsAiFunctionalFailure() { + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt")); + // 21-char title: "TitleThatIsTooLongXXX" + when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn( + successWith("{\"title\":\"TitleThatIsTooLongXXX\",\"reasoning\":\"Too long\",\"date\":\"2026-01-15\"}")); + + DocumentProcessingOutcome result = service.invoke(preCheckPassed); + + assertThat(result).isInstanceOf(AiFunctionalFailure.class); + } + + @Test + void invoke_aiResponseGenericTitle_returnsAiFunctionalFailure() { + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt")); + when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn( + successWith("{\"title\":\"Dokument\",\"reasoning\":\"Generic\"}")); + + DocumentProcessingOutcome result = service.invoke(preCheckPassed); + + assertThat(result).isInstanceOf(AiFunctionalFailure.class); + } + + @Test + void invoke_aiResponseInvalidDateFormat_returnsAiFunctionalFailure() { + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt")); + when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn( + successWith("{\"title\":\"Rechnung\",\"reasoning\":\"OK\",\"date\":\"15.01.2026\"}")); + + DocumentProcessingOutcome result = service.invoke(preCheckPassed); + + assertThat(result).isInstanceOf(AiFunctionalFailure.class); + } + + // ------------------------------------------------------------------------- + // Successful naming proposal + // ------------------------------------------------------------------------- + + @Test + void invoke_validAiResponse_returnsNamingProposalReady() { + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingSuccess(new PromptIdentifier("prompt-v1.txt"), "Analyze the document.")); + when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn( + successWith("{\"title\":\"Stromabrechnung\",\"reasoning\":\"Electricity invoice\",\"date\":\"2026-01-15\"}")); + + DocumentProcessingOutcome result = service.invoke(preCheckPassed); + + assertThat(result).isInstanceOf(NamingProposalReady.class); + NamingProposalReady ready = (NamingProposalReady) result; + assertThat(ready.proposal().validatedTitle()).isEqualTo("Stromabrechnung"); + assertThat(ready.proposal().resolvedDate()).isEqualTo(LocalDate.of(2026, 1, 15)); + assertThat(ready.proposal().dateSource()).isEqualTo(DateSource.AI_PROVIDED); + assertThat(ready.aiContext().modelName()).isEqualTo(MODEL_NAME); + assertThat(ready.aiContext().promptIdentifier()).isEqualTo("prompt-v1.txt"); + } + + @Test + void invoke_validAiResponseWithoutDate_usesFallbackDate() { + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt")); + when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn( + successWith("{\"title\":\"Kontoauszug\",\"reasoning\":\"No date in document\"}")); + + DocumentProcessingOutcome result = service.invoke(preCheckPassed); + + assertThat(result).isInstanceOf(NamingProposalReady.class); + NamingProposalReady ready = (NamingProposalReady) result; + assertThat(ready.proposal().dateSource()).isEqualTo(DateSource.FALLBACK_CURRENT); + assertThat(ready.proposal().resolvedDate()) + .isEqualTo(FIXED_INSTANT.atZone(ZoneOffset.UTC).toLocalDate()); + } + + @Test + void invoke_documentTextLongerThanMax_sendsLimitedText() { + // max chars is 1000, document text is 2000 chars → sent chars should be 1000 + String longText = "X".repeat(2000); + PreCheckPassed longDoc = new PreCheckPassed( + candidate, new PdfExtractionSuccess(longText, new PdfPageCount(5))); + + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt")); + when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn( + successWith("{\"title\":\"Rechnung\",\"reasoning\":\"Invoice\",\"date\":\"2026-03-01\"}")); + + DocumentProcessingOutcome result = service.invoke(longDoc); + + assertThat(result).isInstanceOf(NamingProposalReady.class); + NamingProposalReady ready = (NamingProposalReady) result; + assertThat(ready.aiContext().sentCharacterCount()).isEqualTo(MAX_CHARS); + } + + @Test + void invoke_documentTextShorterThanMax_sendsFullText() { + String shortText = "Short document"; + PreCheckPassed shortDoc = new PreCheckPassed( + candidate, new PdfExtractionSuccess(shortText, new PdfPageCount(1))); + + when(promptPort.loadPrompt()).thenReturn( + new PromptLoadingSuccess(new PromptIdentifier("prompt.txt"), "Prompt")); + when(aiInvocationPort.invoke(any(AiRequestRepresentation.class))).thenReturn( + successWith("{\"title\":\"Rechnung\",\"reasoning\":\"Invoice\",\"date\":\"2026-03-01\"}")); + + DocumentProcessingOutcome result = service.invoke(shortDoc); + + assertThat(result).isInstanceOf(NamingProposalReady.class); + NamingProposalReady ready = (NamingProposalReady) result; + assertThat(ready.aiContext().sentCharacterCount()).isEqualTo(shortText.length()); + } + + // ------------------------------------------------------------------------- + // Null handling + // ------------------------------------------------------------------------- + + @Test + void invoke_nullPreCheckPassed_throwsNullPointerException() { + assertThatThrownBy(() -> service.invoke(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("preCheckPassed must not be null"); + } + + @Test + void constructor_nullAiPort_throwsNullPointerException() { + assertThatThrownBy(() -> new AiNamingService(null, promptPort, validator, MODEL_NAME, MAX_CHARS)) + .isInstanceOf(NullPointerException.class); + } + + @Test + void constructor_nullPromptPort_throwsNullPointerException() { + assertThatThrownBy(() -> new AiNamingService(aiInvocationPort, null, validator, MODEL_NAME, MAX_CHARS)) + .isInstanceOf(NullPointerException.class); + } + + @Test + void constructor_nullValidator_throwsNullPointerException() { + assertThatThrownBy(() -> new AiNamingService(aiInvocationPort, promptPort, null, MODEL_NAME, MAX_CHARS)) + .isInstanceOf(NullPointerException.class); + } + + @Test + void constructor_maxTextCharactersZero_throwsIllegalArgumentException() { + assertThatThrownBy(() -> new AiNamingService(aiInvocationPort, promptPort, validator, MODEL_NAME, 0)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("maxTextCharacters must be >= 1"); + } +} diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseParserTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseParserTest.java new file mode 100644 index 0000000..c5e74c5 --- /dev/null +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseParserTest.java @@ -0,0 +1,217 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse; +import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingFailure; +import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingResult; +import de.gecheckt.pdf.umbenenner.domain.model.AiResponseParsingSuccess; +import de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse; +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link AiResponseParser}. + *

    + * Covers structural parsing rules: valid JSON objects, mandatory fields, + * optional date, extra fields, and rejection of non-JSON or mixed-content responses. + */ +class AiResponseParserTest { + + // ------------------------------------------------------------------------- + // Success cases + // ------------------------------------------------------------------------- + + @Test + void parse_validJsonWithAllFields_returnsSuccess() { + AiRawResponse raw = new AiRawResponse( + "{\"title\":\"Stromabrechnung\",\"reasoning\":\"Found bill dated 2026-01-15\",\"date\":\"2026-01-15\"}"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingSuccess.class); + ParsedAiResponse parsed = ((AiResponseParsingSuccess) result).response(); + assertThat(parsed.title()).isEqualTo("Stromabrechnung"); + assertThat(parsed.reasoning()).isEqualTo("Found bill dated 2026-01-15"); + assertThat(parsed.dateString()).contains("2026-01-15"); + } + + @Test + void parse_validJsonWithoutDate_returnsSuccessWithEmptyOptional() { + AiRawResponse raw = new AiRawResponse( + "{\"title\":\"Kontoauszug\",\"reasoning\":\"No date found in document\"}"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingSuccess.class); + ParsedAiResponse parsed = ((AiResponseParsingSuccess) result).response(); + assertThat(parsed.title()).isEqualTo("Kontoauszug"); + assertThat(parsed.dateString()).isEmpty(); + } + + @Test + void parse_validJsonWithAdditionalFields_toleratesExtraFields() { + AiRawResponse raw = new AiRawResponse( + "{\"title\":\"Rechnung\",\"reasoning\":\"Invoice\",\"confidence\":0.95,\"lang\":\"de\"}"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingSuccess.class); + ParsedAiResponse parsed = ((AiResponseParsingSuccess) result).response(); + assertThat(parsed.title()).isEqualTo("Rechnung"); + } + + @Test + void parse_validJsonWithLeadingAndTrailingWhitespace_trimsAndSucceeds() { + AiRawResponse raw = new AiRawResponse( + " {\"title\":\"Vertrag\",\"reasoning\":\"Contract document\"} "); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingSuccess.class); + } + + @Test + void parse_emptyReasoningField_isAccepted() { + AiRawResponse raw = new AiRawResponse( + "{\"title\":\"Mahnung\",\"reasoning\":\"\"}"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingSuccess.class); + ParsedAiResponse parsed = ((AiResponseParsingSuccess) result).response(); + assertThat(parsed.reasoning()).isEmpty(); + } + + @Test + void parse_nullDateField_treatedAsAbsent() { + AiRawResponse raw = new AiRawResponse( + "{\"title\":\"Bescheid\",\"reasoning\":\"Administrative notice\",\"date\":null}"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingSuccess.class); + ParsedAiResponse parsed = ((AiResponseParsingSuccess) result).response(); + assertThat(parsed.dateString()).isEmpty(); + } + + // ------------------------------------------------------------------------- + // Failure cases – structural + // ------------------------------------------------------------------------- + + @Test + void parse_emptyBody_returnsFailure() { + AiRawResponse raw = new AiRawResponse(""); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingFailure.class); + assertThat(((AiResponseParsingFailure) result).failureReason()) + .isEqualTo("EMPTY_RESPONSE"); + } + + @Test + void parse_blankBody_returnsFailure() { + AiRawResponse raw = new AiRawResponse(" \t\n "); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingFailure.class); + } + + @Test + void parse_plainText_returnsFailure() { + AiRawResponse raw = new AiRawResponse("Sure, here is the title: Rechnung"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingFailure.class); + assertThat(((AiResponseParsingFailure) result).failureReason()) + .isEqualTo("NOT_JSON_OBJECT"); + } + + @Test + void parse_jsonEmbeddedInProse_returnsFailure() { + AiRawResponse raw = new AiRawResponse( + "Here is the result: {\"title\":\"Rechnung\",\"reasoning\":\"r\"} Hope that helps!"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingFailure.class); + assertThat(((AiResponseParsingFailure) result).failureReason()) + .isEqualTo("NOT_JSON_OBJECT"); + } + + @Test + void parse_jsonArray_returnsFailure() { + AiRawResponse raw = new AiRawResponse("[{\"title\":\"Rechnung\",\"reasoning\":\"r\"}]"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingFailure.class); + assertThat(((AiResponseParsingFailure) result).failureReason()) + .isEqualTo("NOT_JSON_OBJECT"); + } + + @Test + void parse_invalidJson_returnsFailure() { + AiRawResponse raw = new AiRawResponse("{\"title\":\"Rechnung\",\"reasoning\":}"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingFailure.class); + assertThat(((AiResponseParsingFailure) result).failureReason()) + .isEqualTo("INVALID_JSON"); + } + + @Test + void parse_missingTitle_returnsFailure() { + AiRawResponse raw = new AiRawResponse("{\"reasoning\":\"Some reasoning without title\"}"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingFailure.class); + assertThat(((AiResponseParsingFailure) result).failureReason()) + .isEqualTo("MISSING_TITLE"); + } + + @Test + void parse_nullTitle_returnsFailure() { + AiRawResponse raw = new AiRawResponse("{\"title\":null,\"reasoning\":\"r\"}"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingFailure.class); + assertThat(((AiResponseParsingFailure) result).failureReason()) + .isEqualTo("MISSING_TITLE"); + } + + @Test + void parse_blankTitle_returnsFailure() { + AiRawResponse raw = new AiRawResponse("{\"title\":\" \",\"reasoning\":\"r\"}"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingFailure.class); + assertThat(((AiResponseParsingFailure) result).failureReason()) + .isEqualTo("BLANK_TITLE"); + } + + @Test + void parse_missingReasoning_returnsFailure() { + AiRawResponse raw = new AiRawResponse("{\"title\":\"Rechnung\"}"); + + AiResponseParsingResult result = AiResponseParser.parse(raw); + + assertThat(result).isInstanceOf(AiResponseParsingFailure.class); + assertThat(((AiResponseParsingFailure) result).failureReason()) + .isEqualTo("MISSING_REASONING"); + } + + @Test + void parse_nullRawResponse_throwsNullPointerException() { + assertThatThrownBy(() -> AiResponseParser.parse(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("rawResponse must not be null"); + } +} diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseValidatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseValidatorTest.java new file mode 100644 index 0000000..22b14a8 --- /dev/null +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiResponseValidatorTest.java @@ -0,0 +1,236 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort; +import de.gecheckt.pdf.umbenenner.domain.model.DateSource; +import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal; +import de.gecheckt.pdf.umbenenner.domain.model.ParsedAiResponse; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.LocalDate; +import java.time.ZoneOffset; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link AiResponseValidator}. + *

    + * Covers: title character rules, length limit, generic placeholder detection, + * date parsing, date fallback via {@link ClockPort}, and null handling. + */ +class AiResponseValidatorTest { + + private static final Instant FIXED_INSTANT = Instant.parse("2026-04-07T10:00:00Z"); + private static final LocalDate FIXED_DATE = FIXED_INSTANT.atZone(ZoneOffset.UTC).toLocalDate(); + + private AiResponseValidator validator; + + @BeforeEach + void setUp() { + ClockPort fixedClock = () -> FIXED_INSTANT; + validator = new AiResponseValidator(fixedClock); + } + + // ------------------------------------------------------------------------- + // Valid cases + // ------------------------------------------------------------------------- + + @Test + void validate_validTitleAndAiDate_returnsValidWithAiProvided() { + ParsedAiResponse parsed = ParsedAiResponse.of("Stromabrechnung", "Electricity bill", "2026-01-15"); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class); + NamingProposal proposal = ((AiResponseValidator.AiValidationResult.Valid) result).proposal(); + assertThat(proposal.validatedTitle()).isEqualTo("Stromabrechnung"); + assertThat(proposal.resolvedDate()).isEqualTo(LocalDate.of(2026, 1, 15)); + assertThat(proposal.dateSource()).isEqualTo(DateSource.AI_PROVIDED); + assertThat(proposal.aiReasoning()).isEqualTo("Electricity bill"); + } + + @Test + void validate_validTitleNoDate_usesFallbackCurrentDate() { + ParsedAiResponse parsed = ParsedAiResponse.of("Kontoauszug", "No date in document", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class); + NamingProposal proposal = ((AiResponseValidator.AiValidationResult.Valid) result).proposal(); + assertThat(proposal.resolvedDate()).isEqualTo(FIXED_DATE); + assertThat(proposal.dateSource()).isEqualTo(DateSource.FALLBACK_CURRENT); + } + + @Test + void validate_titleWithUmlauts_isAccepted() { + ParsedAiResponse parsed = ParsedAiResponse.of("Mietvertrag Müller", "Rental contract", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class); + } + + @Test + void validate_titleWithSzligChar_isAccepted() { + ParsedAiResponse parsed = ParsedAiResponse.of("Straßenrechnung", "Street bill", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class); + } + + @Test + void validate_titleWithDigits_isAccepted() { + ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung 2026", "Invoice 2026", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class); + } + + @Test + void validate_titleExactly20Chars_isAccepted() { + String title = "12345678901234567890"; // exactly 20 chars + ParsedAiResponse parsed = ParsedAiResponse.of(title, "test", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class); + } + + @Test + void validate_emptyReasoning_isAccepted() { + ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung", "", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Valid.class); + } + + // ------------------------------------------------------------------------- + // Title validation failures + // ------------------------------------------------------------------------- + + @Test + void validate_title21Chars_returnsInvalid() { + String title = "1234567890123456789A1"; // 21 chars + ParsedAiResponse parsed = ParsedAiResponse.of(title, "reasoning", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class); + assertThat(((AiResponseValidator.AiValidationResult.Invalid) result).errorMessage()) + .contains("20"); + } + + @Test + void validate_titleWithSpecialChar_returnsInvalid() { + ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung!", "reasoning", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class); + assertThat(((AiResponseValidator.AiValidationResult.Invalid) result).errorMessage()) + .containsIgnoringCase("disallowed"); + } + + @Test + void validate_titleWithHyphen_returnsInvalid() { + ParsedAiResponse parsed = ParsedAiResponse.of("Strom-Rechnung", "reasoning", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class); + } + + @Test + void validate_genericTitleDokument_returnsInvalid() { + ParsedAiResponse parsed = ParsedAiResponse.of("Dokument", "reasoning", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class); + assertThat(((AiResponseValidator.AiValidationResult.Invalid) result).errorMessage()) + .containsIgnoringCase("placeholder"); + } + + @Test + void validate_genericTitleDateiCaseInsensitive_returnsInvalid() { + ParsedAiResponse parsed = ParsedAiResponse.of("DATEI", "reasoning", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class); + } + + @Test + void validate_genericTitleScan_returnsInvalid() { + ParsedAiResponse parsed = ParsedAiResponse.of("scan", "reasoning", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class); + } + + @Test + void validate_genericTitlePdf_returnsInvalid() { + ParsedAiResponse parsed = ParsedAiResponse.of("PDF", "reasoning", null); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class); + } + + // ------------------------------------------------------------------------- + // Date validation failures + // ------------------------------------------------------------------------- + + @Test + void validate_aiProvidesUnparseableDate_returnsInvalid() { + ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung", "reasoning", "not-a-date"); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class); + assertThat(((AiResponseValidator.AiValidationResult.Invalid) result).errorMessage()) + .contains("not-a-date"); + } + + @Test + void validate_aiProvidesWrongDateFormat_returnsInvalid() { + ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung", "reasoning", "15.01.2026"); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class); + } + + @Test + void validate_aiProvidesPartialDate_returnsInvalid() { + ParsedAiResponse parsed = ParsedAiResponse.of("Rechnung", "reasoning", "2026-01"); + + AiResponseValidator.AiValidationResult result = validator.validate(parsed); + + assertThat(result).isInstanceOf(AiResponseValidator.AiValidationResult.Invalid.class); + } + + // ------------------------------------------------------------------------- + // Null handling + // ------------------------------------------------------------------------- + + @Test + void validate_nullParsedResponse_throwsNullPointerException() { + assertThatThrownBy(() -> validator.validate(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("parsed must not be null"); + } + + @Test + void constructor_nullClockPort_throwsNullPointerException() { + assertThatThrownBy(() -> new AiResponseValidator(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("clockPort must not be null"); + } +} diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java index 4fa732b..3790d17 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java @@ -13,10 +13,22 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnica import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger; +import de.gecheckt.pdf.umbenenner.application.port.out.ResolvedTargetFilename; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFilenameResolutionResult; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderTechnicalFailure; import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort; +import de.gecheckt.pdf.umbenenner.domain.model.AiAttemptContext; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; +import de.gecheckt.pdf.umbenenner.domain.model.DateSource; import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; +import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal; +import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; @@ -32,6 +44,7 @@ import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import java.time.Instant; +import java.time.LocalDate; import java.util.ArrayList; import java.util.List; import java.util.function.Consumer; @@ -72,7 +85,8 @@ class DocumentProcessingCoordinatorTest { recordRepo = new CapturingDocumentRecordRepository(); attemptRepo = new CapturingProcessingAttemptRepository(); unitOfWorkPort = new CapturingUnitOfWorkPort(recordRepo, attemptRepo); - processor = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, new NoOpProcessingLogger()); + processor = new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger()); candidate = new SourceDocumentCandidate( "test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf")); @@ -86,17 +100,16 @@ class DocumentProcessingCoordinatorTest { // ------------------------------------------------------------------------- @Test - void process_newDocument_preCheckPassed_persistsSuccessStatus() { + void process_newDocument_namingProposalReady_persistsProposalReadyStatus() { recordRepo.setLookupResult(new DocumentUnknown()); - DocumentProcessingOutcome outcome = new PreCheckPassed( - candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); + DocumentProcessingOutcome outcome = buildNamingProposalOutcome(); processor.process(candidate, fingerprint, outcome, context, attemptStart); // One attempt written assertEquals(1, attemptRepo.savedAttempts.size()); ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0); - assertEquals(ProcessingStatus.SUCCESS, attempt.status()); + assertEquals(ProcessingStatus.PROPOSAL_READY, attempt.status()); assertFalse(attempt.retryable()); assertNull(attempt.failureClass()); assertNull(attempt.failureMessage()); @@ -104,10 +117,11 @@ class DocumentProcessingCoordinatorTest { // One master record created assertEquals(1, recordRepo.createdRecords.size()); DocumentRecord record = recordRepo.createdRecords.get(0); - assertEquals(ProcessingStatus.SUCCESS, record.overallStatus()); + assertEquals(ProcessingStatus.PROPOSAL_READY, record.overallStatus()); assertEquals(0, record.failureCounters().contentErrorCount()); assertEquals(0, record.failureCounters().transientErrorCount()); - assertNotNull(record.lastSuccessInstant()); + // lastSuccessInstant is null in M5; it is set by the target-copy stage (M6) + assertNull(record.lastSuccessInstant()); assertNull(record.lastFailureInstant()); } @@ -203,24 +217,24 @@ class DocumentProcessingCoordinatorTest { } @Test - void process_knownDocument_preCheckPassed_persistsSuccess() { + void process_knownDocument_namingProposalReady_persistsProposalReadyStatus() { DocumentRecord existingRecord = buildRecord( ProcessingStatus.FAILED_RETRYABLE, new FailureCounters(0, 1)); recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); - DocumentProcessingOutcome outcome = new PreCheckPassed( - candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); + DocumentProcessingOutcome outcome = buildNamingProposalOutcome(); processor.process(candidate, fingerprint, outcome, context, attemptStart); assertEquals(1, recordRepo.updatedRecords.size()); DocumentRecord record = recordRepo.updatedRecords.get(0); - assertEquals(ProcessingStatus.SUCCESS, record.overallStatus()); - // Counters unchanged on success + assertEquals(ProcessingStatus.PROPOSAL_READY, record.overallStatus()); + // Counters unchanged on naming proposal success assertEquals(0, record.failureCounters().contentErrorCount()); assertEquals(1, record.failureCounters().transientErrorCount()); - assertNotNull(record.lastSuccessInstant()); + // lastSuccessInstant is null in M5; it is set by the target-copy stage (M6) + assertNull(record.lastSuccessInstant()); } // ------------------------------------------------------------------------- @@ -469,8 +483,7 @@ class DocumentProcessingCoordinatorTest { // ------------------------------------------------------------------------- @Test - void process_newDocument_firstContentError_failureMessageContainsContentErrorCount() { - // Prüft, dass die Fehlermeldung die Fehleranzahl enthält (nicht leer ist) + void process_newDocument_firstContentError_failureMessageContainsFailureReason() { recordRepo.setLookupResult(new DocumentUnknown()); DocumentProcessingOutcome outcome = new PreCheckFailed( candidate, PreCheckFailureReason.NO_USABLE_TEXT); @@ -481,13 +494,13 @@ class DocumentProcessingCoordinatorTest { assertNotNull(attempt.failureMessage(), "Fehlermeldung darf nicht null sein bei FAILED_RETRYABLE"); assertFalse(attempt.failureMessage().isBlank(), "Fehlermeldung darf nicht leer sein bei FAILED_RETRYABLE"); - assertTrue(attempt.failureMessage().contains("ContentErrors=1"), - "Fehlermeldung muss den Inhaltsfehler-Zähler enthalten: " + attempt.failureMessage()); + assertTrue(attempt.failureMessage().contains("No usable text in extracted PDF content"), + "Fehlermeldung muss den Fehlergrund enthalten: " + attempt.failureMessage()); } @Test void process_knownDocument_secondContentError_failureMessageContainsFinalStatus() { - // Prüft, dass die Fehlermeldung bei FAILED_FINAL den Endzustand enthält + // Prüft, dass die Fehlermeldung bei FAILED_FINAL den Fehlergrund enthält DocumentRecord existingRecord = buildRecord(ProcessingStatus.FAILED_RETRYABLE, new FailureCounters(1, 0)); recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); DocumentProcessingOutcome outcome = new PreCheckFailed( @@ -499,13 +512,12 @@ class DocumentProcessingCoordinatorTest { assertNotNull(attempt.failureMessage(), "Fehlermeldung darf nicht null sein bei FAILED_FINAL"); assertFalse(attempt.failureMessage().isBlank(), "Fehlermeldung darf nicht leer sein bei FAILED_FINAL"); - assertTrue(attempt.failureMessage().contains("ContentErrors=2"), - "Fehlermeldung muss den aktualisierten Inhaltsfehler-Zähler enthalten: " + attempt.failureMessage()); + assertTrue(attempt.failureMessage().contains("Document page count exceeds configured limit"), + "Fehlermeldung muss den Fehlergrund enthalten: " + attempt.failureMessage()); } @Test - void process_newDocument_technicalError_failureMessageContainsTransientErrorCount() { - // Prüft, dass die Fehlermeldung bei transientem Fehler den Transient-Zähler enthält + void process_newDocument_technicalError_failureMessageContainsTechnicalDetail() { recordRepo.setLookupResult(new DocumentUnknown()); DocumentProcessingOutcome outcome = new TechnicalDocumentError(candidate, "Timeout", null); @@ -513,22 +525,21 @@ class DocumentProcessingCoordinatorTest { ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0); assertNotNull(attempt.failureMessage()); - assertTrue(attempt.failureMessage().contains("TransientErrors=1"), - "Fehlermeldung muss den Transient-Fehler-Zähler enthalten: " + attempt.failureMessage()); + assertTrue(attempt.failureMessage().contains("Timeout"), + "Fehlermeldung muss den technischen Fehlerdetail enthalten: " + attempt.failureMessage()); } @Test - void process_newDocument_preCheckPassed_failureClassAndMessageAreNull() { - // Prüft, dass bei Erfolg failureClass und failureMessage null sind + void process_newDocument_namingProposalReady_failureClassAndMessageAreNull() { + // Prüft, dass bei PROPOSAL_READY failureClass und failureMessage null sind recordRepo.setLookupResult(new DocumentUnknown()); - DocumentProcessingOutcome outcome = new PreCheckPassed( - candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); + DocumentProcessingOutcome outcome = buildNamingProposalOutcome(); processor.process(candidate, fingerprint, outcome, context, attemptStart); ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0); - assertNull(attempt.failureClass(), "Bei Erfolg muss failureClass null sein"); - assertNull(attempt.failureMessage(), "Bei Erfolg muss failureMessage null sein"); + assertNull(attempt.failureClass(), "Bei PROPOSAL_READY muss failureClass null sein"); + assertNull(attempt.failureMessage(), "Bei PROPOSAL_READY muss failureMessage null sein"); } // ------------------------------------------------------------------------- @@ -536,9 +547,9 @@ class DocumentProcessingCoordinatorTest { // ------------------------------------------------------------------------- @Test - void process_knownDocument_preCheckPassed_lastSuccessInstantSetAndLastFailureInstantFromPreviousRecord() { - // Prüft, dass bei SUCCESS am known-Dokument lastSuccessInstant gesetzt - // und lastFailureInstant aus dem Vorgänger-Datensatz übernommen wird + void process_knownDocument_namingProposalReady_lastSuccessInstantNullAndLastFailureInstantFromPreviousRecord() { + // Prüft, dass bei PROPOSAL_READY am known-Dokument lastSuccessInstant null bleibt + // (M6 setzt ihn erst nach der Zielkopie) und lastFailureInstant aus dem Vorgänger übernommen wird Instant previousFailureInstant = Instant.parse("2025-01-15T10:00:00Z"); DocumentRecord existingRecord = new DocumentRecord( fingerprint, @@ -549,19 +560,20 @@ class DocumentProcessingCoordinatorTest { previousFailureInstant, // lastFailureInstant vorhanden null, // noch kein Erfolgszeitpunkt Instant.now(), - Instant.now() + Instant.now(), + null, + null ); recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); - DocumentProcessingOutcome outcome = new PreCheckPassed( - candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); + DocumentProcessingOutcome outcome = buildNamingProposalOutcome(); processor.process(candidate, fingerprint, outcome, context, attemptStart); DocumentRecord updated = recordRepo.updatedRecords.get(0); - assertNotNull(updated.lastSuccessInstant(), - "lastSuccessInstant muss nach erfolgreichem Verarbeiten gesetzt sein"); + assertNull(updated.lastSuccessInstant(), + "lastSuccessInstant muss nach PROPOSAL_READY null bleiben (wird erst von M6 gesetzt)"); assertEquals(previousFailureInstant, updated.lastFailureInstant(), - "lastFailureInstant muss bei SUCCESS den Vorgänger-Wert beibehalten"); + "lastFailureInstant muss bei PROPOSAL_READY den Vorgänger-Wert beibehalten"); } @Test @@ -578,7 +590,9 @@ class DocumentProcessingCoordinatorTest { null, // noch keine Fehlzeit previousSuccessInstant, // vorheriger Erfolg vorhanden Instant.now(), - Instant.now() + Instant.now(), + null, + null ); recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); DocumentProcessingOutcome outcome = new PreCheckFailed( @@ -602,7 +616,8 @@ class DocumentProcessingCoordinatorTest { // Prüft, dass bei Lookup-Fehler ein Fehler-Log-Eintrag erzeugt wird CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = - new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger); + new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); recordRepo.setLookupResult(new PersistenceLookupTechnicalFailure("Datenbank nicht erreichbar", null)); DocumentProcessingOutcome outcome = new PreCheckPassed( candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); @@ -618,7 +633,8 @@ class DocumentProcessingCoordinatorTest { // Prüft, dass beim Überspringen eines bereits erfolgreich verarbeiteten Dokuments geloggt wird CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = - new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger); + new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero()); recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord)); DocumentProcessingOutcome outcome = new PreCheckPassed( @@ -635,7 +651,8 @@ class DocumentProcessingCoordinatorTest { // Prüft, dass beim Überspringen eines final fehlgeschlagenen Dokuments geloggt wird CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = - new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger); + new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); DocumentRecord existingRecord = buildRecord(ProcessingStatus.FAILED_FINAL, new FailureCounters(2, 0)); recordRepo.setLookupResult(new DocumentTerminalFinalFailure(existingRecord)); DocumentProcessingOutcome outcome = new PreCheckFailed( @@ -652,7 +669,8 @@ class DocumentProcessingCoordinatorTest { // Prüft, dass nach erfolgreichem Persistieren einer neuen Datei geloggt wird CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = - new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger); + new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); recordRepo.setLookupResult(new DocumentUnknown()); DocumentProcessingOutcome outcome = new PreCheckPassed( candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))); @@ -668,7 +686,8 @@ class DocumentProcessingCoordinatorTest { // Prüft, dass bei Persistenzfehler ein Fehler-Log-Eintrag erzeugt wird CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = - new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger); + new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); recordRepo.setLookupResult(new DocumentUnknown()); unitOfWorkPort.failOnExecute = true; DocumentProcessingOutcome outcome = new PreCheckPassed( @@ -685,7 +704,8 @@ class DocumentProcessingCoordinatorTest { // Prüft, dass nach erfolgreichem Skip-Persistieren ein Debug-Log erzeugt wird (persistSkipAttempt L301) CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = - new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger); + new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero()); recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord)); DocumentProcessingOutcome outcome = new PreCheckPassed( @@ -702,7 +722,8 @@ class DocumentProcessingCoordinatorTest { // Prüft, dass bei Persistenzfehler im Skip-Pfad ein Fehler geloggt wird (persistSkipAttempt L306) CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); DocumentProcessingCoordinator coordinatorWithCapturingLogger = - new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, capturingLogger); + new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger); DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero()); recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord)); unitOfWorkPort.failOnExecute = true; @@ -715,10 +736,192 @@ class DocumentProcessingCoordinatorTest { "Bei Persistenzfehler im Skip-Pfad muss ein Fehler geloggt werden"); } + // ------------------------------------------------------------------------- + // PROPOSAL_READY finalization path + // ------------------------------------------------------------------------- + + @Test + void processDeferredOutcome_proposalReady_successfulCopy_persistsSuccessWithTargetFileName() { + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, + c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); }); + + assertTrue(result, "Finalization should succeed"); + + ProcessingAttempt successAttempt = attemptRepo.savedAttempts.stream() + .filter(a -> a.status() == ProcessingStatus.SUCCESS) + .findFirst() + .orElse(null); + assertNotNull(successAttempt, "A SUCCESS attempt must be persisted"); + assertNotNull(successAttempt.finalTargetFileName(), "SUCCESS attempt must carry the final target filename"); + + DocumentRecord updated = recordRepo.updatedRecords.get(0); + assertEquals(ProcessingStatus.SUCCESS, updated.overallStatus()); + assertNotNull(updated.lastTargetFileName(), "Master record must carry the final target filename"); + assertNotNull(updated.lastTargetPath(), "Master record must carry the target folder path"); + assertNotNull(updated.lastSuccessInstant(), "lastSuccessInstant must be set on SUCCESS"); + } + + @Test + void processDeferredOutcome_proposalReady_missingProposalAttempt_persistsTransientError() { + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + // No PROPOSAL_READY attempt pre-populated + + // persistTransientError returns true when the error record was persisted successfully + processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() + .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) + .findFirst() + .orElse(null); + assertNotNull(errorAttempt, "A FAILED_RETRYABLE attempt must be persisted"); + assertTrue(errorAttempt.retryable(), "Transient error must be retryable"); + } + + @Test + void processDeferredOutcome_proposalReady_inconsistentProposalNullDate_persistsTransientError() { + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + + ProcessingAttempt badProposal = new ProcessingAttempt( + fingerprint, context.runId(), 1, Instant.now(), Instant.now(), + ProcessingStatus.PROPOSAL_READY, null, null, false, + "model", "prompt", 1, 100, "{}", "reason", + null, DateSource.AI_PROVIDED, "Rechnung", null); + attemptRepo.savedAttempts.add(badProposal); + + processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() + .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) + .findFirst() + .orElse(null); + assertNotNull(errorAttempt, "A FAILED_RETRYABLE attempt must be persisted for inconsistent proposal state"); + } + + @Test + void processDeferredOutcome_proposalReady_duplicateResolutionFailure_persistsTransientError() { + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + DocumentProcessingCoordinator coordinatorWithFailingFolder = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger()); + + coordinatorWithFailingFolder.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() + .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) + .findFirst() + .orElse(null); + assertNotNull(errorAttempt, "A FAILED_RETRYABLE attempt must be persisted when duplicate resolution fails"); + } + + @Test + void processDeferredOutcome_proposalReady_copyFailure_persistsTransientError() { + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + DocumentProcessingCoordinator coordinatorWithFailingCopy = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger()); + + coordinatorWithFailingCopy.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() + .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) + .findFirst() + .orElse(null); + assertNotNull(errorAttempt, "A FAILED_RETRYABLE attempt must be persisted when file copy fails"); + } + + @Test + void processDeferredOutcome_proposalReady_inconsistentProposalTitleExceeds20Chars_persistsTransientError() { + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + + // Title of 21 characters violates the 20-char base-title rule — inconsistent persistence state + ProcessingAttempt badProposal = new ProcessingAttempt( + fingerprint, context.runId(), 1, Instant.now(), Instant.now(), + ProcessingStatus.PROPOSAL_READY, null, null, false, + "model", "prompt", 1, 100, "{}", "reason", + LocalDate.of(2026, 1, 15), DateSource.AI_PROVIDED, + "A".repeat(21), null); + attemptRepo.savedAttempts.add(badProposal); + + processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() + .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) + .findFirst() + .orElse(null); + assertNotNull(errorAttempt, + "A FAILED_RETRYABLE attempt must be persisted when the proposal title is inconsistent"); + assertTrue(errorAttempt.retryable(), "Inconsistent proposal error must be retryable"); + } + + @Test + void processDeferredOutcome_proposalReady_inconsistentProposalTitleWithDisallowedChars_persistsTransientError() { + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + + // Hyphen is a disallowed character in the fachliche Titelregel + ProcessingAttempt badProposal = new ProcessingAttempt( + fingerprint, context.runId(), 1, Instant.now(), Instant.now(), + ProcessingStatus.PROPOSAL_READY, null, null, false, + "model", "prompt", 1, 100, "{}", "reason", + LocalDate.of(2026, 1, 15), DateSource.AI_PROVIDED, + "Rechnung-2026", null); + attemptRepo.savedAttempts.add(badProposal); + + processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() + .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) + .findFirst() + .orElse(null); + assertNotNull(errorAttempt, + "A FAILED_RETRYABLE attempt must be persisted when the proposal title has disallowed characters"); + } + + @Test + void processDeferredOutcome_proposalReady_persistenceFailureAfterCopy_returnsFalse() { + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + unitOfWorkPort.failOnExecute = true; + + boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + assertFalse(result, "Should return false when persistence fails after successful copy"); + } + // ------------------------------------------------------------------------- // Helpers // ------------------------------------------------------------------------- + private ProcessingAttempt buildValidProposalAttempt() { + return new ProcessingAttempt( + fingerprint, context.runId(), 1, Instant.now(), Instant.now(), + ProcessingStatus.PROPOSAL_READY, null, null, false, + "gpt-4", "prompt-v1.txt", 1, 500, "{}", "reason", + LocalDate.of(2026, 1, 15), DateSource.AI_PROVIDED, "Rechnung", null); + } + + private DocumentProcessingOutcome buildNamingProposalOutcome() { + AiAttemptContext ctx = new AiAttemptContext( + "gpt-4", "prompt-v1.txt", 1, 500, "{\"title\":\"Rechnung\",\"reasoning\":\"r\"}"); + NamingProposal proposal = new NamingProposal( + LocalDate.of(2026, 1, 15), DateSource.AI_PROVIDED, "Rechnung", "AI reasoning"); + return new NamingProposalReady(candidate, proposal, ctx); + } + private DocumentRecord buildRecord(ProcessingStatus status, FailureCounters counters) { Instant now = Instant.now(); return new DocumentRecord( @@ -730,7 +933,9 @@ class DocumentProcessingCoordinatorTest { status == ProcessingStatus.SUCCESS ? null : now, status == ProcessingStatus.SUCCESS ? now : null, now, - now + now, + null, + null ); } @@ -785,8 +990,16 @@ class DocumentProcessingCoordinatorTest { public List findAllByFingerprint(DocumentFingerprint fingerprint) { return List.copyOf(savedAttempts); } + + @Override + public ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint) { + return savedAttempts.stream() + .filter(a -> a.status() == de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus.PROPOSAL_READY) + .reduce((first, second) -> second) + .orElse(null); + } } - + private static class CapturingUnitOfWorkPort implements UnitOfWorkPort { private final CapturingDocumentRecordRepository recordRepo; private final CapturingProcessingAttemptRepository attemptRepo; @@ -850,6 +1063,58 @@ class DocumentProcessingCoordinatorTest { } } + private static class FailingTargetFolderPort implements TargetFolderPort { + @Override + public String getTargetFolderLocator() { + return "/tmp/target"; + } + + @Override + public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) { + return new TargetFolderTechnicalFailure("Simulated folder resolution failure"); + } + + @Override + public void tryDeleteTargetFile(String resolvedFilename) { + // No-op + } + } + + private static class FailingTargetFileCopyPort implements TargetFileCopyPort { + @Override + public TargetFileCopyResult copyToTarget( + de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator sourceLocator, + String resolvedFilename) { + return new TargetFileCopyTechnicalFailure("Simulated copy failure", false); + } + } + + private static class NoOpTargetFolderPort implements TargetFolderPort { + @Override + public String getTargetFolderLocator() { + return "/tmp/target"; + } + + @Override + public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) { + return new ResolvedTargetFilename(baseName); + } + + @Override + public void tryDeleteTargetFile(String resolvedFilename) { + // No-op + } + } + + private static class NoOpTargetFileCopyPort implements TargetFileCopyPort { + @Override + public TargetFileCopyResult copyToTarget( + de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator sourceLocator, + String resolvedFilename) { + return new TargetFileCopySuccess(); + } + } + /** Zählt Logger-Aufrufe je Level, um VoidMethodCallMutator-Mutationen zu erkennen. */ private static class CapturingProcessingLogger implements ProcessingLogger { int infoCallCount = 0; diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentTextLimiterTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentTextLimiterTest.java new file mode 100644 index 0000000..1181a8a --- /dev/null +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentTextLimiterTest.java @@ -0,0 +1,94 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import org.junit.jupiter.api.Test; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link DocumentTextLimiter}. + */ +class DocumentTextLimiterTest { + + @Test + void limit_textShorterThanMax_returnsTextUnchanged() { + String text = "short text"; + String result = DocumentTextLimiter.limit(text, 100); + assertThat(result).isEqualTo(text); + } + + @Test + void limit_textExactlyMax_returnsTextUnchanged() { + String text = "exactly ten"; // 11 chars + String result = DocumentTextLimiter.limit(text, 11); + assertThat(result).isEqualTo(text); + assertThat(result).hasSize(11); + } + + @Test + void limit_textLongerThanMax_returnsTruncatedText() { + String text = "Hello, World!"; + String result = DocumentTextLimiter.limit(text, 5); + assertThat(result).isEqualTo("Hello"); + assertThat(result).hasSize(5); + } + + @Test + void limit_maxCharactersOne_returnsSingleChar() { + String text = "ABC"; + String result = DocumentTextLimiter.limit(text, 1); + assertThat(result).isEqualTo("A"); + } + + @Test + void limit_emptyText_returnsEmptyString() { + String result = DocumentTextLimiter.limit("", 100); + assertThat(result).isEmpty(); + } + + @Test + void limit_emptyTextWithMinMax_returnsEmptyString() { + String result = DocumentTextLimiter.limit("", 1); + assertThat(result).isEmpty(); + } + + @Test + void limit_textWithUnicodeCharacters_respectsCharCount() { + // German umlauts are single chars in Java + String text = "Rechnungsübersicht"; // 18 chars + String result = DocumentTextLimiter.limit(text, 10); + assertThat(result).hasSize(10); + assertThat(result).startsWith("Rechnungs"); + } + + @Test + void limit_nullText_throwsNullPointerException() { + assertThatThrownBy(() -> DocumentTextLimiter.limit(null, 100)) + .isInstanceOf(NullPointerException.class) + .hasMessage("text must not be null"); + } + + @Test + void limit_maxCharactersZero_throwsIllegalArgumentException() { + assertThatThrownBy(() -> DocumentTextLimiter.limit("text", 0)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("maxCharacters must be >= 1"); + } + + @Test + void limit_negativeMaxCharacters_throwsIllegalArgumentException() { + assertThatThrownBy(() -> DocumentTextLimiter.limit("text", -5)) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("maxCharacters must be >= 1"); + } + + @Test + void limit_doesNotModifyOriginalText() { + String original = "This is the original document text that is long"; + String limited = DocumentTextLimiter.limit(original, 10); + + // The original String object is unchanged (Java Strings are immutable) + assertThat(limited).isNotSameAs(original); + assertThat(limited).hasSize(10); + } +} diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/TargetFilenameBuildingServiceTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/TargetFilenameBuildingServiceTest.java new file mode 100644 index 0000000..8eaafbf --- /dev/null +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/TargetFilenameBuildingServiceTest.java @@ -0,0 +1,264 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; +import de.gecheckt.pdf.umbenenner.application.service.TargetFilenameBuildingService.BaseFilenameReady; +import de.gecheckt.pdf.umbenenner.application.service.TargetFilenameBuildingService.BaseFilenameResult; +import de.gecheckt.pdf.umbenenner.application.service.TargetFilenameBuildingService.InconsistentProposalState; +import de.gecheckt.pdf.umbenenner.domain.model.DateSource; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; +import de.gecheckt.pdf.umbenenner.domain.model.RunId; + +import org.junit.jupiter.api.Test; + +import java.time.Instant; +import java.time.LocalDate; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatNullPointerException; + +/** + * Unit tests for {@link TargetFilenameBuildingService}. + *

    + * Covers the verbindliches Zielformat {@code YYYY-MM-DD - Titel.pdf}, the 20-character + * base-title rule, the fachliche Titelregel (only letters, digits, and spaces), and the + * detection of inconsistent persistence states. + */ +class TargetFilenameBuildingServiceTest { + + private static final DocumentFingerprint FINGERPRINT = + new DocumentFingerprint("a".repeat(64)); + private static final RunId RUN_ID = new RunId("run-test"); + + // ------------------------------------------------------------------------- + // Null guard + // ------------------------------------------------------------------------- + + @Test + void buildBaseFilename_rejectsNullAttempt() { + assertThatNullPointerException() + .isThrownBy(() -> TargetFilenameBuildingService.buildBaseFilename(null)); + } + + // ------------------------------------------------------------------------- + // Happy path – correct format + // ------------------------------------------------------------------------- + + @Test + void buildBaseFilename_validProposal_returnsCorrectFormat() { + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 15), "Rechnung"); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(BaseFilenameReady.class); + assertThat(((BaseFilenameReady) result).baseFilename()) + .isEqualTo("2026-01-15 - Rechnung.pdf"); + } + + @Test + void buildBaseFilename_dateWithLeadingZeros_formatsCorrectly() { + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 3, 5), "Kontoauszug"); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(BaseFilenameReady.class); + assertThat(((BaseFilenameReady) result).baseFilename()) + .isEqualTo("2026-03-05 - Kontoauszug.pdf"); + } + + @Test + void buildBaseFilename_titleWithDigits_isAccepted() { + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 6, 1), "Rechnung 2026"); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(BaseFilenameReady.class); + assertThat(((BaseFilenameReady) result).baseFilename()) + .isEqualTo("2026-06-01 - Rechnung 2026.pdf"); + } + + @Test + void buildBaseFilename_titleWithGermanUmlauts_isAccepted() { + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 4, 7), "Strom Abr"); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(BaseFilenameReady.class); + } + + @Test + void buildBaseFilename_titleWithUmlautsAndSzlig_isAccepted() { + // ä, ö, ü, ß are Unicode letters and must be accepted + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 4, 7), "Büroausgabe"); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(BaseFilenameReady.class); + assertThat(((BaseFilenameReady) result).baseFilename()) + .isEqualTo("2026-04-07 - Büroausgabe.pdf"); + } + + @Test + void buildBaseFilename_titleExactly20Chars_isAccepted() { + String title = "A".repeat(20); // exactly 20 characters + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), title); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(BaseFilenameReady.class); + } + + // ------------------------------------------------------------------------- + // 20-character rule applies only to base title; format structure is separate + // ------------------------------------------------------------------------- + + @Test + void buildBaseFilename_format_separatorAndExtensionAreNotCountedAgainstTitle() { + // A 20-char title produces "YYYY-MM-DD - <20chars>.pdf" — total > 20 chars, which is fine + String title = "Stromabrechnung 2026"; // 20 chars + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 3, 31), title); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(BaseFilenameReady.class); + String filename = ((BaseFilenameReady) result).baseFilename(); + assertThat(filename).isEqualTo("2026-03-31 - Stromabrechnung 2026.pdf"); + // The service does not append duplicate suffixes; those are added by the target folder adapter + assertThat(filename).doesNotContain("("); + } + + // ------------------------------------------------------------------------- + // InconsistentProposalState – null/invalid date + // ------------------------------------------------------------------------- + + @Test + void buildBaseFilename_nullDate_returnsInconsistentProposalState() { + ProcessingAttempt attempt = proposalAttempt(null, "Rechnung"); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(InconsistentProposalState.class); + assertThat(((InconsistentProposalState) result).reason()) + .contains("no resolved date"); + } + + // ------------------------------------------------------------------------- + // InconsistentProposalState – null/blank title + // ------------------------------------------------------------------------- + + @Test + void buildBaseFilename_nullTitle_returnsInconsistentProposalState() { + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), null); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(InconsistentProposalState.class); + assertThat(((InconsistentProposalState) result).reason()) + .contains("no validated title"); + } + + @Test + void buildBaseFilename_blankTitle_returnsInconsistentProposalState() { + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), " "); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(InconsistentProposalState.class); + assertThat(((InconsistentProposalState) result).reason()) + .contains("no validated title"); + } + + // ------------------------------------------------------------------------- + // InconsistentProposalState – title exceeds 20 characters + // ------------------------------------------------------------------------- + + @Test + void buildBaseFilename_titleExceeds20Chars_returnsInconsistentProposalState() { + String title = "A".repeat(21); // 21 characters + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), title); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(InconsistentProposalState.class); + assertThat(((InconsistentProposalState) result).reason()) + .contains("exceeding 20 characters"); + } + + // ------------------------------------------------------------------------- + // InconsistentProposalState – disallowed characters in title + // ------------------------------------------------------------------------- + + @Test + void buildBaseFilename_titleWithHyphen_returnsInconsistentProposalState() { + // Hyphens are not letters, digits, or spaces — disallowed by fachliche Titelregel + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), "Rechnung-2026"); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(InconsistentProposalState.class); + assertThat(((InconsistentProposalState) result).reason()) + .contains("disallowed characters"); + } + + @Test + void buildBaseFilename_titleWithSlash_returnsInconsistentProposalState() { + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), "Rg/Strom"); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(InconsistentProposalState.class); + } + + @Test + void buildBaseFilename_titleWithDot_returnsInconsistentProposalState() { + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 1, 1), "Rechnung.pdf"); + + BaseFilenameResult result = TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(result).isInstanceOf(InconsistentProposalState.class); + } + + // ------------------------------------------------------------------------- + // InconsistentProposalState reason field is non-null + // ------------------------------------------------------------------------- + + @Test + void incosistentProposalState_reason_isNeverNull() { + ProcessingAttempt attempt = proposalAttempt(null, "Rechnung"); + + InconsistentProposalState state = + (InconsistentProposalState) TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(state.reason()).isNotNull(); + } + + // ------------------------------------------------------------------------- + // BaseFilenameReady – result record is non-null and non-blank + // ------------------------------------------------------------------------- + + @Test + void baseFilenameReady_baseFilename_isNeverNullOrBlank() { + ProcessingAttempt attempt = proposalAttempt(LocalDate.of(2026, 7, 4), "Bescheid"); + + BaseFilenameReady ready = + (BaseFilenameReady) TargetFilenameBuildingService.buildBaseFilename(attempt); + + assertThat(ready.baseFilename()).isNotNull().isNotBlank(); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private ProcessingAttempt proposalAttempt(LocalDate date, String title) { + return new ProcessingAttempt( + FINGERPRINT, RUN_ID, 1, + Instant.now(), Instant.now(), + ProcessingStatus.PROPOSAL_READY, + null, null, false, + "gpt-4", "prompt-v1.txt", 1, 100, + "{}", "reasoning text", + date, DateSource.AI_PROVIDED, title, + null); + } +} diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java index ac87573..4219a76 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java @@ -2,6 +2,9 @@ package de.gecheckt.pdf.umbenenner.application.usecase; import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort; +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository; @@ -14,12 +17,23 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger; +import de.gecheckt.pdf.umbenenner.application.port.out.PromptLoadingSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.ResolvedTargetFilename; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFilenameResolutionResult; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort; +import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException; import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException; import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort; import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort; +import de.gecheckt.pdf.umbenenner.application.service.AiNamingService; +import de.gecheckt.pdf.umbenenner.application.service.AiResponseValidator; import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingCoordinator; +import de.gecheckt.pdf.umbenenner.domain.model.PromptIdentifier; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError; @@ -445,7 +459,8 @@ class BatchRunProcessingUseCaseTest { // Use a coordinator that always fails persistence DocumentProcessingCoordinator failingProcessor = new DocumentProcessingCoordinator( new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), - new NoOpUnitOfWorkPort(), new NoOpProcessingLogger()) { + new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), + new NoOpProcessingLogger()) { @Override public boolean processDeferredOutcome( de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate, @@ -488,7 +503,8 @@ class BatchRunProcessingUseCaseTest { // Coordinator that succeeds for first document, fails persistence for second DocumentProcessingCoordinator selectiveFailingProcessor = new DocumentProcessingCoordinator( new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), - new NoOpUnitOfWorkPort(), new NoOpProcessingLogger()) { + new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), + new NoOpProcessingLogger()) { private int callCount = 0; @Override @@ -535,7 +551,7 @@ class BatchRunProcessingUseCaseTest { DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( config, new MockRunLockPort(), candidatesPort, new NoOpExtractionPort(), alwaysFailingFingerprintPort, new NoOpDocumentProcessingCoordinator(), - capturingLogger); + buildStubAiNamingService(), capturingLogger); useCase.execute(new BatchRunContext(new RunId("fp-warn"), Instant.now())); @@ -556,7 +572,7 @@ class BatchRunProcessingUseCaseTest { DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( config, new MockRunLockPort(), failingPort, new NoOpExtractionPort(), new AlwaysSuccessFingerprintPort(), new NoOpDocumentProcessingCoordinator(), - capturingLogger); + buildStubAiNamingService(), capturingLogger); useCase.execute(new BatchRunContext(new RunId("source-err"), Instant.now())); @@ -578,7 +594,8 @@ class BatchRunProcessingUseCaseTest { // Coordinator der immer Persistenzfehler zurückgibt DocumentProcessingCoordinator failingCoordinator = new DocumentProcessingCoordinator( new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), - new NoOpUnitOfWorkPort(), new NoOpProcessingLogger()) { + new NoOpUnitOfWorkPort(), new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), + new NoOpProcessingLogger()) { @Override public boolean processDeferredOutcome( de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate c, @@ -592,7 +609,7 @@ class BatchRunProcessingUseCaseTest { DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( config, new MockRunLockPort(), candidatesPort, extractionPort, - new AlwaysSuccessFingerprintPort(), failingCoordinator, capturingLogger); + new AlwaysSuccessFingerprintPort(), failingCoordinator, buildStubAiNamingService(), capturingLogger); useCase.execute(new BatchRunContext(new RunId("persist-warn"), Instant.now())); @@ -610,7 +627,7 @@ class BatchRunProcessingUseCaseTest { DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( config, new MockRunLockPort(), new EmptyCandidatesPort(), new NoOpExtractionPort(), new AlwaysSuccessFingerprintPort(), new NoOpDocumentProcessingCoordinator(), - capturingLogger); + buildStubAiNamingService(), capturingLogger); useCase.execute(new BatchRunContext(new RunId("start-log"), Instant.now())); @@ -630,7 +647,7 @@ class BatchRunProcessingUseCaseTest { DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(), new AlwaysSuccessFingerprintPort(), new NoOpDocumentProcessingCoordinator(), - capturingLogger); + buildStubAiNamingService(), capturingLogger); useCase.execute(new BatchRunContext(new RunId("lock-warn"), Instant.now())); @@ -659,11 +676,11 @@ class BatchRunProcessingUseCaseTest { DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( config, new MockRunLockPort(), candidatesPort, extractionPort, - new AlwaysSuccessFingerprintPort(), processor, capturingLogger); + new AlwaysSuccessFingerprintPort(), processor, buildStubAiNamingService(), capturingLogger); useCase.execute(new BatchRunContext(new RunId("log-precheck"), Instant.now())); - // Ohne logExtractionResult wären es 4 debug()-Aufrufe; mit logExtractionResult 5 + // Ohne logExtractionResult wären es mindestens 4 debug()-Aufrufe; mit logExtractionResult 5 assertTrue(capturingLogger.debugCallCount >= 5, "logExtractionResult muss bei PdfExtractionSuccess debug() aufrufen (erwartet >= 5, war: " + capturingLogger.debugCallCount + ")"); @@ -689,7 +706,7 @@ class BatchRunProcessingUseCaseTest { DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( config, new MockRunLockPort(), candidatesPort, extractionPort, - new AlwaysSuccessFingerprintPort(), processor, capturingLogger); + new AlwaysSuccessFingerprintPort(), processor, buildStubAiNamingService(), capturingLogger); useCase.execute(new BatchRunContext(new RunId("log-content-error"), Instant.now())); @@ -718,7 +735,7 @@ class BatchRunProcessingUseCaseTest { DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase( config, new MockRunLockPort(), candidatesPort, extractionPort, - new AlwaysSuccessFingerprintPort(), processor, capturingLogger); + new AlwaysSuccessFingerprintPort(), processor, buildStubAiNamingService(), capturingLogger); useCase.execute(new BatchRunContext(new RunId("log-tech-error"), Instant.now())); @@ -735,6 +752,20 @@ class BatchRunProcessingUseCaseTest { // Helpers // ------------------------------------------------------------------------- + /** + * Builds a minimal stub {@link AiNamingService} that always returns an AI technical failure. + * Suitable for tests that do not care about the AI pipeline outcome. + */ + private static AiNamingService buildStubAiNamingService() { + AiInvocationPort stubAiPort = request -> + new AiInvocationTechnicalFailure(request, "STUBBED", "Stubbed AI for test"); + PromptPort stubPromptPort = () -> + new PromptLoadingSuccess(new PromptIdentifier("stub-prompt"), "stub prompt content"); + ClockPort stubClock = () -> java.time.Instant.EPOCH; + AiResponseValidator validator = new AiResponseValidator(stubClock); + return new AiNamingService(stubAiPort, stubPromptPort, validator, "stub-model", 1000); + } + private static DefaultBatchRunProcessingUseCase buildUseCase( RuntimeConfiguration runtimeConfig, RunLockPort lockPort, @@ -744,7 +775,7 @@ class BatchRunProcessingUseCaseTest { DocumentProcessingCoordinator processor) { return new DefaultBatchRunProcessingUseCase( runtimeConfig, lockPort, candidatesPort, extractionPort, fingerprintPort, processor, - new NoOpProcessingLogger()); + buildStubAiNamingService(), new NoOpProcessingLogger()); } private static RuntimeConfiguration buildConfig(Path tempDir) throws Exception { @@ -906,7 +937,7 @@ class BatchRunProcessingUseCaseTest { private static class NoOpDocumentProcessingCoordinator extends DocumentProcessingCoordinator { NoOpDocumentProcessingCoordinator() { super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(), - new NoOpProcessingLogger()); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger()); } } @@ -918,7 +949,7 @@ class BatchRunProcessingUseCaseTest { TrackingDocumentProcessingCoordinator() { super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository(), new NoOpUnitOfWorkPort(), - new NoOpProcessingLogger()); + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger()); } @Override @@ -948,6 +979,32 @@ class BatchRunProcessingUseCaseTest { int processCallCount() { return processCallCount; } } + private static class NoOpTargetFolderPort implements TargetFolderPort { + @Override + public String getTargetFolderLocator() { + return "/tmp/target"; + } + + @Override + public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) { + return new ResolvedTargetFilename(baseName); + } + + @Override + public void tryDeleteTargetFile(String resolvedFilename) { + // No-op + } + } + + private static class NoOpTargetFileCopyPort implements TargetFileCopyPort { + @Override + public TargetFileCopyResult copyToTarget( + de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator sourceLocator, + String resolvedFilename) { + return new TargetFileCopySuccess(); + } + } + /** No-op DocumentRecordRepository for use in test instances. */ private static class NoOpDocumentRecordRepository implements DocumentRecordRepository { @Override @@ -983,8 +1040,13 @@ class BatchRunProcessingUseCaseTest { public List findAllByFingerprint(DocumentFingerprint fingerprint) { return List.of(); } + + @Override + public ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint) { + return null; + } } - + /** No-op UnitOfWorkPort for use in test instances. */ private static class NoOpUnitOfWorkPort implements UnitOfWorkPort { @Override diff --git a/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java b/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java index 20a339d..beedbaf 100644 --- a/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java +++ b/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java @@ -9,31 +9,43 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import de.gecheckt.pdf.umbenenner.adapter.in.cli.SchedulerBatchCommand; +import de.gecheckt.pdf.umbenenner.adapter.out.ai.OpenAiHttpAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.InvalidStartConfigurationException; import de.gecheckt.pdf.umbenenner.adapter.out.bootstrap.validation.StartConfigurationValidator; +import de.gecheckt.pdf.umbenenner.adapter.out.clock.SystemClockAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.configuration.ConfigurationLoadingException; import de.gecheckt.pdf.umbenenner.adapter.out.configuration.PropertiesConfigurationPortAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.fingerprint.Sha256FingerprintAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.lock.FilesystemRunLockPortAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.pdfextraction.PdfTextExtractionPortAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.prompt.FilesystemPromptPortAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.sourcedocument.SourceDocumentCandidatesPortAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteDocumentRecordRepositoryAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter; import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteUnitOfWorkAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.targetfolder.FilesystemTargetFolderAdapter; import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration; import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase; +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort; +import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort; import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository; import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort; import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository; import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger; +import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort; import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort; +import de.gecheckt.pdf.umbenenner.application.service.AiNamingService; +import de.gecheckt.pdf.umbenenner.application.service.AiResponseValidator; import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingCoordinator; import de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase; import de.gecheckt.pdf.umbenenner.bootstrap.adapter.Log4jProcessingLogger; @@ -71,11 +83,15 @@ import de.gecheckt.pdf.umbenenner.domain.model.RunId; *

      *
    • {@link PropertiesConfigurationPortAdapter} — loads configuration from properties and environment.
    • *
    • {@link FilesystemRunLockPortAdapter} — ensures exclusive execution via a lock file.
    • - *
    • {@link SqliteSchemaInitializationAdapter} — initializes SQLite schema at startup.
    • + *
    • {@link SqliteSchemaInitializationAdapter} — initializes SQLite schema (including target-copy + * schema evolution) at startup.
    • *
    • {@link Sha256FingerprintAdapter} — provides content-based document identification.
    • *
    • {@link SqliteDocumentRecordRepositoryAdapter} — manages document master records.
    • *
    • {@link SqliteProcessingAttemptRepositoryAdapter} — maintains attempt history.
    • *
    • {@link SqliteUnitOfWorkAdapter} — coordinates atomic persistence operations.
    • + *
    • {@link FilesystemTargetFolderAdapter} — resolves unique filenames in the configured target folder.
    • + *
    • {@link FilesystemTargetFileCopyAdapter} — copies source documents to the target folder via + * a temporary file and final move/rename.
    • *
    *

    * Schema initialization is performed exactly once in {@link #run()} before the batch processing loop @@ -162,12 +178,22 @@ public class BootstrapRunner { *

  • {@link SourceDocumentCandidatesPortAdapter} for PDF candidate discovery.
  • *
  • {@link PdfTextExtractionPortAdapter} for PDFBox-based text and page count extraction.
  • *
  • {@link Sha256FingerprintAdapter} for SHA-256 content fingerprinting.
  • - *
  • {@link SqliteSchemaInitializationAdapter} for SQLite schema DDL at startup.
  • + *
  • {@link SqliteSchemaInitializationAdapter} for SQLite schema DDL and target-copy schema + * evolution at startup.
  • *
  • {@link SqliteDocumentRecordRepositoryAdapter} for document master record CRUD.
  • *
  • {@link SqliteProcessingAttemptRepositoryAdapter} for attempt history CRUD.
  • *
  • {@link SqliteUnitOfWorkAdapter} for atomic persistence operations.
  • + *
  • {@link FilesystemTargetFolderAdapter} for duplicate-safe filename resolution in the + * configured {@code target.folder}.
  • + *
  • {@link FilesystemTargetFileCopyAdapter} for copying source documents to the target folder + * via a temporary file and final atomic move/rename.
  • * *

    + * Target folder availability and write access are validated in + * {@link #loadAndValidateConfiguration()} via {@link StartConfigurationValidator} before + * schema initialisation and batch processing begin. If the target folder does not yet exist, + * the validator creates it; failure to do so is a hard startup error. + *

    * Schema initialisation is performed explicitly in {@link #run()} before the batch loop * begins. Failure during initialisation aborts the run with exit code 1. */ @@ -189,8 +215,23 @@ public class BootstrapRunner { UnitOfWorkPort unitOfWorkPort = new SqliteUnitOfWorkAdapter(jdbcUrl); ProcessingLogger coordinatorLogger = new Log4jProcessingLogger(DocumentProcessingCoordinator.class); + TargetFolderPort targetFolderPort = new FilesystemTargetFolderAdapter(startConfig.targetFolder()); + TargetFileCopyPort targetFileCopyPort = new FilesystemTargetFileCopyAdapter(startConfig.targetFolder()); DocumentProcessingCoordinator documentProcessingCoordinator = - new DocumentProcessingCoordinator(documentRecordRepository, processingAttemptRepository, unitOfWorkPort, coordinatorLogger); + new DocumentProcessingCoordinator(documentRecordRepository, processingAttemptRepository, unitOfWorkPort, targetFolderPort, targetFileCopyPort, coordinatorLogger); + + // Wire AI naming pipeline + AiInvocationPort aiInvocationPort = new OpenAiHttpAdapter(startConfig); + PromptPort promptPort = new FilesystemPromptPortAdapter(startConfig.promptTemplateFile()); + ClockPort clockPort = new SystemClockAdapter(); + AiResponseValidator aiResponseValidator = new AiResponseValidator(clockPort); + AiNamingService aiNamingService = new AiNamingService( + aiInvocationPort, + promptPort, + aiResponseValidator, + startConfig.apiModel(), + startConfig.maxTextCharacters()); + ProcessingLogger useCaseLogger = new Log4jProcessingLogger(DefaultBatchRunProcessingUseCase.class); return new DefaultBatchRunProcessingUseCase( runtimeConfig, @@ -199,6 +240,7 @@ public class BootstrapRunner { new PdfTextExtractionPortAdapter(), fingerprintPort, documentProcessingCoordinator, + aiNamingService, useCaseLogger); }; this.commandFactory = SchedulerBatchCommand::new; @@ -272,8 +314,17 @@ public class BootstrapRunner { /** * Loads configuration via {@link ConfigurationPort} and validates it via - * {@link StartConfigurationValidator}. Validation includes checking that the - * {@code sqlite.file} parent directory exists or is technically creatable. + * {@link StartConfigurationValidator}. + *

    + * Validation includes: + *

      + *
    • {@code source.folder}: must exist, be a directory, and be readable.
    • + *
    • {@code target.folder}: must exist as a writable directory, or be technically + * creatable (validator attempts {@code Files.createDirectories} if absent; + * failure here is a hard startup error).
    • + *
    • {@code sqlite.file}: parent directory must exist.
    • + *
    • All numeric and URI constraints.
    • + *
    */ private StartConfiguration loadAndValidateConfiguration() { ConfigurationPort configPort = configPortFactory.create(); diff --git a/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/package-info.java b/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/package-info.java index 777414a..83dc24f 100644 --- a/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/package-info.java +++ b/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/package-info.java @@ -28,11 +28,15 @@ * Startup sequence: *
      *
    • Load and validate complete startup configuration from properties file and environment variables
    • - *
    • Initialize SQLite persistence schema via {@link de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort}, + *
    • Validate target folder availability and write access; create target folder if absent + * (failure is a hard startup error)
    • + *
    • Initialize SQLite persistence schema (including target-copy schema evolution) via + * {@link de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort}, * ensuring the database is ready before any batch processing
    • *
    • Schema initialization failure is treated as a hard bootstrap error and causes exit code 1
    • *
    • Create run lock adapter and acquire exclusive lock
    • - *
    • Wire all outbound adapters (document candidates, PDF extraction, fingerprint, persistence, logging)
    • + *
    • Wire all outbound adapters (document candidates, PDF extraction, fingerprint, persistence, + * target folder duplicate resolution, target file copy, logging)
    • *
    • Wire and invoke the batch processing CLI adapter
    • *
    • Map batch outcome to process exit code
    • *
    diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/AiAttemptContext.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/AiAttemptContext.java new file mode 100644 index 0000000..8091140 --- /dev/null +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/AiAttemptContext.java @@ -0,0 +1,53 @@ +package de.gecheckt.pdf.umbenenner.domain.model; + +import java.util.Objects; + +/** + * Carries the AI-related traceability data produced during an AI naming attempt. + *

    + * This record aggregates the metadata required to persist full AI traceability in + * the processing attempt history: + *

      + *
    • AI infrastructure details (model name, prompt identifier)
    • + *
    • Request size metrics (processed pages, sent character count)
    • + *
    • Raw AI output (for audit and diagnostics; stored in SQLite, not in log files)
    • + *
    + *

    + * This context is produced whenever an AI call is attempted, regardless of whether + * the call succeeded or failed. Fields that could not be determined (e.g. raw response + * on connection failure) may be {@code null}. + * + * @param modelName the AI model name used in the request; never null + * @param promptIdentifier stable identifier of the prompt template; never null + * @param processedPageCount number of PDF pages included in the extraction; must be >= 1 + * @param sentCharacterCount number of document-text characters sent to the AI; must be >= 0 + * @param aiRawResponse the complete raw AI response body; {@code null} if the call did + * not return a response body (e.g. timeout or connection error) + */ +public record AiAttemptContext( + String modelName, + String promptIdentifier, + int processedPageCount, + int sentCharacterCount, + String aiRawResponse) { + + /** + * Compact constructor validating mandatory fields. + * + * @throws NullPointerException if {@code modelName} or {@code promptIdentifier} is null + * @throws IllegalArgumentException if {@code processedPageCount} < 1 or + * {@code sentCharacterCount} < 0 + */ + public AiAttemptContext { + Objects.requireNonNull(modelName, "modelName must not be null"); + Objects.requireNonNull(promptIdentifier, "promptIdentifier must not be null"); + if (processedPageCount < 1) { + throw new IllegalArgumentException( + "processedPageCount must be >= 1, but was: " + processedPageCount); + } + if (sentCharacterCount < 0) { + throw new IllegalArgumentException( + "sentCharacterCount must be >= 0, but was: " + sentCharacterCount); + } + } +} diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/AiFunctionalFailure.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/AiFunctionalFailure.java new file mode 100644 index 0000000..c3a9d29 --- /dev/null +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/AiFunctionalFailure.java @@ -0,0 +1,40 @@ +package de.gecheckt.pdf.umbenenner.domain.model; + +import java.util.Objects; + +/** + * Outcome indicating a deterministic functional (content) failure in the AI naming pipeline. + *

    + * Functional failures occur when the AI returns a structurally valid response but the + * content violates the applicable fachliche rules, for example: + *

      + *
    • Title exceeds 20 characters
    • + *
    • Title contains prohibited special characters
    • + *
    • Title is a generic placeholder (e.g., "Dokument", "Scan")
    • + *
    • AI-provided date is present but not a valid YYYY-MM-DD string
    • + *
    + *

    + * These failures are deterministic: retrying the same document against the same AI + * and prompt is unlikely to resolve the issue without a document or prompt change. + * The content error counter is incremented, and the standard one-retry rule applies. + * + * @param candidate the source document candidate; never null + * @param errorMessage human-readable description of the validation failure; never null + * @param aiContext AI traceability context for the attempt record; never null + */ +public record AiFunctionalFailure( + SourceDocumentCandidate candidate, + String errorMessage, + AiAttemptContext aiContext) implements DocumentProcessingOutcome { + + /** + * Compact constructor validating mandatory fields. + * + * @throws NullPointerException if any field is null + */ + public AiFunctionalFailure { + Objects.requireNonNull(candidate, "candidate must not be null"); + Objects.requireNonNull(errorMessage, "errorMessage must not be null"); + Objects.requireNonNull(aiContext, "aiContext must not be null"); + } +} diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/AiTechnicalFailure.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/AiTechnicalFailure.java new file mode 100644 index 0000000..9d6a0d6 --- /dev/null +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/AiTechnicalFailure.java @@ -0,0 +1,40 @@ +package de.gecheckt.pdf.umbenenner.domain.model; + +import java.util.Objects; + +/** + * Outcome indicating a transient technical failure during the AI naming pipeline. + *

    + * Technical failures include: + *

      + *
    • AI service not reachable
    • + *
    • HTTP timeout
    • + *
    • Connection error
    • + *
    • Unparseable or structurally invalid AI response (missing mandatory fields, invalid JSON)
    • + *
    + *

    + * These failures are retryable. The transient error counter is incremented. + * + * @param candidate the source document candidate; never null + * @param errorMessage human-readable description of the failure; never null + * @param cause the underlying exception, or {@code null} if not applicable + * @param aiContext AI traceability context captured before or during the failure; never null + */ +public record AiTechnicalFailure( + SourceDocumentCandidate candidate, + String errorMessage, + Throwable cause, + AiAttemptContext aiContext) implements DocumentProcessingOutcome { + + /** + * Compact constructor validating mandatory fields. + * + * @throws NullPointerException if {@code candidate}, {@code errorMessage}, or + * {@code aiContext} is null + */ + public AiTechnicalFailure { + Objects.requireNonNull(candidate, "candidate must not be null"); + Objects.requireNonNull(errorMessage, "errorMessage must not be null"); + Objects.requireNonNull(aiContext, "aiContext must not be null"); + } +} diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcome.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcome.java index f26a40a..43f0489 100644 --- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcome.java +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcome.java @@ -26,6 +26,7 @@ package de.gecheckt.pdf.umbenenner.domain.model; * */ public sealed interface DocumentProcessingOutcome - permits PreCheckPassed, PreCheckFailed, TechnicalDocumentError { + permits PreCheckPassed, PreCheckFailed, TechnicalDocumentError, + NamingProposalReady, AiTechnicalFailure, AiFunctionalFailure { // Marker interface; concrete implementations define structure } \ No newline at end of file diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/NamingProposalReady.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/NamingProposalReady.java new file mode 100644 index 0000000..f09329b --- /dev/null +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/NamingProposalReady.java @@ -0,0 +1,39 @@ +package de.gecheckt.pdf.umbenenner.domain.model; + +import java.util.Objects; + +/** + * Outcome indicating that an AI naming pipeline completed successfully and produced + * a validated naming proposal ready for persistence. + *

    + * This outcome is returned when: + *

      + *
    1. PDF text extraction and pre-checks passed.
    2. + *
    3. The AI was invoked and returned a parseable response.
    4. + *
    5. The response passed all semantic validation rules.
    6. + *
    7. A {@link NamingProposal} was produced.
    8. + *
    + *

    + * The document master record will be updated to {@link ProcessingStatus#PROPOSAL_READY}; + * a physical target copy is not yet produced at this stage. + * + * @param candidate the source document candidate; never null + * @param proposal the validated naming proposal ready for persistence; never null + * @param aiContext AI traceability data required for the processing attempt record; never null + */ +public record NamingProposalReady( + SourceDocumentCandidate candidate, + NamingProposal proposal, + AiAttemptContext aiContext) implements DocumentProcessingOutcome { + + /** + * Compact constructor validating all fields. + * + * @throws NullPointerException if any field is null + */ + public NamingProposalReady { + Objects.requireNonNull(candidate, "candidate must not be null"); + Objects.requireNonNull(proposal, "proposal must not be null"); + Objects.requireNonNull(aiContext, "aiContext must not be null"); + } +} diff --git a/pdf-umbenenner-domain/src/test/java/de/gecheckt/pdf/umbenenner/domain/model/AiResponseTypesTest.java b/pdf-umbenenner-domain/src/test/java/de/gecheckt/pdf/umbenenner/domain/model/AiResponseTypesTest.java new file mode 100644 index 0000000..2fa59a8 --- /dev/null +++ b/pdf-umbenenner-domain/src/test/java/de/gecheckt/pdf/umbenenner/domain/model/AiResponseTypesTest.java @@ -0,0 +1,201 @@ +package de.gecheckt.pdf.umbenenner.domain.model; + +import org.junit.jupiter.api.Test; + +import java.util.Optional; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for AI-related domain types: + * {@link ParsedAiResponse}, {@link AiRequestRepresentation}, + * {@link AiResponseParsingFailure}, {@link AiResponseParsingSuccess}, + * and {@link AiErrorClassification}. + */ +class AiResponseTypesTest { + + // ------------------------------------------------------------------------- + // ParsedAiResponse + // ------------------------------------------------------------------------- + + @Test + void parsedAiResponse_withDate_fieldsAreSet() { + var parsed = new ParsedAiResponse("Rechnung", "AI reasoning", Optional.of("2026-01-15")); + + assertEquals("Rechnung", parsed.title()); + assertEquals("AI reasoning", parsed.reasoning()); + assertTrue(parsed.dateString().isPresent()); + assertEquals("2026-01-15", parsed.dateString().get()); + } + + @Test + void parsedAiResponse_withoutDate_dateStringIsEmpty() { + var parsed = new ParsedAiResponse("Rechnung", "AI reasoning", Optional.empty()); + + assertFalse(parsed.dateString().isPresent()); + } + + @Test + void parsedAiResponse_factoryMethod_withDate_wrapsDate() { + var parsed = ParsedAiResponse.of("Vertrag", "reasoning", "2026-03-01"); + + assertEquals("Vertrag", parsed.title()); + assertTrue(parsed.dateString().isPresent()); + assertEquals("2026-03-01", parsed.dateString().get()); + } + + @Test + void parsedAiResponse_factoryMethod_withNullDate_producesEmpty() { + var parsed = ParsedAiResponse.of("Vertrag", "reasoning", null); + + assertFalse(parsed.dateString().isPresent()); + } + + @Test + void parsedAiResponse_nullTitle_throwsNPE() { + assertThrows(NullPointerException.class, + () -> new ParsedAiResponse(null, "reasoning", Optional.empty())); + } + + @Test + void parsedAiResponse_nullReasoning_throwsNPE() { + assertThrows(NullPointerException.class, + () -> new ParsedAiResponse("title", null, Optional.empty())); + } + + @Test + void parsedAiResponse_nullDateString_throwsNPE() { + assertThrows(NullPointerException.class, + () -> new ParsedAiResponse("title", "reasoning", null)); + } + + // ------------------------------------------------------------------------- + // AiRequestRepresentation + // ------------------------------------------------------------------------- + + @Test + void aiRequestRepresentation_validConstruction_fieldsAreSet() { + var promptId = new PromptIdentifier("prompt-v1"); + var repr = new AiRequestRepresentation(promptId, "Prompt text", "Document content", 16); + + assertEquals(promptId, repr.promptIdentifier()); + assertEquals("Prompt text", repr.promptContent()); + assertEquals("Document content", repr.documentText()); + assertEquals(16, repr.sentCharacterCount()); + } + + @Test + void aiRequestRepresentation_zeroSentCharacterCount_isValid() { + var promptId = new PromptIdentifier("p"); + var repr = new AiRequestRepresentation(promptId, "prompt", "text", 0); + assertEquals(0, repr.sentCharacterCount()); + } + + @Test + void aiRequestRepresentation_sentCharCountEqualsTextLength_isValid() { + var promptId = new PromptIdentifier("p"); + String text = "hello"; + var repr = new AiRequestRepresentation(promptId, "prompt", text, text.length()); + assertEquals(text.length(), repr.sentCharacterCount()); + } + + @Test + void aiRequestRepresentation_negativeSentCharCount_throwsIllegalArgument() { + var promptId = new PromptIdentifier("p"); + assertThrows(IllegalArgumentException.class, + () -> new AiRequestRepresentation(promptId, "prompt", "text", -1)); + } + + @Test + void aiRequestRepresentation_sentCharCountExceedsTextLength_throwsIllegalArgument() { + var promptId = new PromptIdentifier("p"); + assertThrows(IllegalArgumentException.class, + () -> new AiRequestRepresentation(promptId, "prompt", "short", 100)); + } + + @Test + void aiRequestRepresentation_nullPromptIdentifier_throwsNPE() { + assertThrows(NullPointerException.class, + () -> new AiRequestRepresentation(null, "prompt", "text", 4)); + } + + @Test + void aiRequestRepresentation_nullPromptContent_throwsNPE() { + var promptId = new PromptIdentifier("p"); + assertThrows(NullPointerException.class, + () -> new AiRequestRepresentation(promptId, null, "text", 4)); + } + + @Test + void aiRequestRepresentation_nullDocumentText_throwsNPE() { + var promptId = new PromptIdentifier("p"); + assertThrows(NullPointerException.class, + () -> new AiRequestRepresentation(promptId, "prompt", null, 0)); + } + + // ------------------------------------------------------------------------- + // AiResponseParsingFailure + // ------------------------------------------------------------------------- + + @Test + void aiResponseParsingFailure_fieldsAreSet() { + var failure = new AiResponseParsingFailure("MISSING_TITLE", "AI response missing mandatory field 'title'"); + + assertEquals("MISSING_TITLE", failure.failureReason()); + assertEquals("AI response missing mandatory field 'title'", failure.failureMessage()); + assertInstanceOf(AiResponseParsingResult.class, failure); + } + + @Test + void aiResponseParsingFailure_nullFailureReason_throwsNPE() { + assertThrows(NullPointerException.class, + () -> new AiResponseParsingFailure(null, "message")); + } + + @Test + void aiResponseParsingFailure_nullFailureMessage_throwsNPE() { + assertThrows(NullPointerException.class, + () -> new AiResponseParsingFailure("REASON", null)); + } + + // ------------------------------------------------------------------------- + // AiResponseParsingSuccess + // ------------------------------------------------------------------------- + + @Test + void aiResponseParsingSuccess_fieldsAreSet() { + var parsed = ParsedAiResponse.of("Rechnung", "reasoning", "2026-01-01"); + var success = new AiResponseParsingSuccess(parsed); + + assertEquals(parsed, success.response()); + assertInstanceOf(AiResponseParsingResult.class, success); + } + + @Test + void aiResponseParsingSuccess_nullResponse_throwsNPE() { + assertThrows(NullPointerException.class, + () -> new AiResponseParsingSuccess(null)); + } + + // ------------------------------------------------------------------------- + // AiErrorClassification + // ------------------------------------------------------------------------- + + @Test + void aiErrorClassification_hasTwoValues() { + AiErrorClassification[] values = AiErrorClassification.values(); + assertEquals(2, values.length); + } + + @Test + void aiErrorClassification_technical_isEnumValue() { + assertEquals(AiErrorClassification.TECHNICAL, + AiErrorClassification.valueOf("TECHNICAL")); + } + + @Test + void aiErrorClassification_functional_isEnumValue() { + assertEquals(AiErrorClassification.FUNCTIONAL, + AiErrorClassification.valueOf("FUNCTIONAL")); + } +} diff --git a/pdf-umbenenner-domain/src/test/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcomeTest.java b/pdf-umbenenner-domain/src/test/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcomeTest.java index 68e9256..02d0650 100644 --- a/pdf-umbenenner-domain/src/test/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcomeTest.java +++ b/pdf-umbenenner-domain/src/test/java/de/gecheckt/pdf/umbenenner/domain/model/DocumentProcessingOutcomeTest.java @@ -6,6 +6,7 @@ import org.junit.jupiter.api.io.TempDir; import java.nio.file.Files; import java.nio.file.Path; +import java.time.LocalDate; import static org.junit.jupiter.api.Assertions.*; @@ -108,4 +109,166 @@ class DocumentProcessingOutcomeTest { assertInstanceOf(DocumentProcessingOutcome.class, outcome); } } + + // ------------------------------------------------------------------------- + // AiAttemptContext + // ------------------------------------------------------------------------- + + @Test + void aiAttemptContext_validConstruction_fieldsAreSet() { + var ctx = new AiAttemptContext("gpt-4", "prompt-v1", 2, 500, "{\"title\":\"Test\"}"); + + assertEquals("gpt-4", ctx.modelName()); + assertEquals("prompt-v1", ctx.promptIdentifier()); + assertEquals(2, ctx.processedPageCount()); + assertEquals(500, ctx.sentCharacterCount()); + assertEquals("{\"title\":\"Test\"}", ctx.aiRawResponse()); + } + + @Test + void aiAttemptContext_nullRawResponse_isAllowed() { + var ctx = new AiAttemptContext("gpt-4", "prompt-v1", 1, 0, null); + assertNull(ctx.aiRawResponse()); + } + + @Test + void aiAttemptContext_nullModelName_throwsNPE() { + assertThrows(NullPointerException.class, + () -> new AiAttemptContext(null, "prompt-v1", 1, 0, null)); + } + + @Test + void aiAttemptContext_nullPromptIdentifier_throwsNPE() { + assertThrows(NullPointerException.class, + () -> new AiAttemptContext("gpt-4", null, 1, 0, null)); + } + + @Test + void aiAttemptContext_zeroPageCount_throwsIllegalArgument() { + assertThrows(IllegalArgumentException.class, + () -> new AiAttemptContext("gpt-4", "prompt-v1", 0, 0, null)); + } + + @Test + void aiAttemptContext_negativeCharCount_throwsIllegalArgument() { + assertThrows(IllegalArgumentException.class, + () -> new AiAttemptContext("gpt-4", "prompt-v1", 1, -1, null)); + } + + // ------------------------------------------------------------------------- + // AiFunctionalFailure + // ------------------------------------------------------------------------- + + @Test + void aiFunctionalFailure_validConstruction_fieldsAreSet() { + var ctx = new AiAttemptContext("gpt-4", "prompt-v1", 1, 100, "{}"); + var failure = new AiFunctionalFailure(candidate, "Title too long", ctx); + + assertEquals(candidate, failure.candidate()); + assertEquals("Title too long", failure.errorMessage()); + assertEquals(ctx, failure.aiContext()); + assertInstanceOf(DocumentProcessingOutcome.class, failure); + } + + @Test + void aiFunctionalFailure_nullCandidate_throwsNPE() { + var ctx = new AiAttemptContext("gpt-4", "p", 1, 0, null); + assertThrows(NullPointerException.class, + () -> new AiFunctionalFailure(null, "error", ctx)); + } + + @Test + void aiFunctionalFailure_nullErrorMessage_throwsNPE() { + var ctx = new AiAttemptContext("gpt-4", "p", 1, 0, null); + assertThrows(NullPointerException.class, + () -> new AiFunctionalFailure(candidate, null, ctx)); + } + + @Test + void aiFunctionalFailure_nullAiContext_throwsNPE() { + assertThrows(NullPointerException.class, + () -> new AiFunctionalFailure(candidate, "error", null)); + } + + // ------------------------------------------------------------------------- + // AiTechnicalFailure + // ------------------------------------------------------------------------- + + @Test + void aiTechnicalFailure_validConstruction_fieldsAreSet() { + var ctx = new AiAttemptContext("gpt-4", "prompt-v1", 1, 100, null); + var cause = new RuntimeException("timeout"); + var failure = new AiTechnicalFailure(candidate, "HTTP timeout", cause, ctx); + + assertEquals(candidate, failure.candidate()); + assertEquals("HTTP timeout", failure.errorMessage()); + assertEquals(cause, failure.cause()); + assertEquals(ctx, failure.aiContext()); + assertInstanceOf(DocumentProcessingOutcome.class, failure); + } + + @Test + void aiTechnicalFailure_nullCause_isAllowed() { + var ctx = new AiAttemptContext("gpt-4", "p", 1, 0, null); + var failure = new AiTechnicalFailure(candidate, "error", null, ctx); + assertNull(failure.cause()); + } + + @Test + void aiTechnicalFailure_nullCandidate_throwsNPE() { + var ctx = new AiAttemptContext("gpt-4", "p", 1, 0, null); + assertThrows(NullPointerException.class, + () -> new AiTechnicalFailure(null, "error", null, ctx)); + } + + @Test + void aiTechnicalFailure_nullErrorMessage_throwsNPE() { + var ctx = new AiAttemptContext("gpt-4", "p", 1, 0, null); + assertThrows(NullPointerException.class, + () -> new AiTechnicalFailure(candidate, null, null, ctx)); + } + + @Test + void aiTechnicalFailure_nullAiContext_throwsNPE() { + assertThrows(NullPointerException.class, + () -> new AiTechnicalFailure(candidate, "error", null, null)); + } + + // ------------------------------------------------------------------------- + // NamingProposalReady + // ------------------------------------------------------------------------- + + @Test + void namingProposalReady_validConstruction_fieldsAreSet() { + var ctx = new AiAttemptContext("gpt-4", "prompt-v1", 1, 100, "{\"title\":\"Rechnung\"}"); + var proposal = new NamingProposal(LocalDate.of(2026, 1, 15), DateSource.AI_PROVIDED, "Rechnung", "AI reasoning"); + var ready = new NamingProposalReady(candidate, proposal, ctx); + + assertEquals(candidate, ready.candidate()); + assertEquals(proposal, ready.proposal()); + assertEquals(ctx, ready.aiContext()); + assertInstanceOf(DocumentProcessingOutcome.class, ready); + } + + @Test + void namingProposalReady_nullCandidate_throwsNPE() { + var ctx = new AiAttemptContext("gpt-4", "p", 1, 0, null); + var proposal = new NamingProposal(LocalDate.now(), DateSource.AI_PROVIDED, "Test", "r"); + assertThrows(NullPointerException.class, + () -> new NamingProposalReady(null, proposal, ctx)); + } + + @Test + void namingProposalReady_nullProposal_throwsNPE() { + var ctx = new AiAttemptContext("gpt-4", "p", 1, 0, null); + assertThrows(NullPointerException.class, + () -> new NamingProposalReady(candidate, null, ctx)); + } + + @Test + void namingProposalReady_nullAiContext_throwsNPE() { + var proposal = new NamingProposal(LocalDate.now(), DateSource.AI_PROVIDED, "Test", "r"); + assertThrows(NullPointerException.class, + () -> new NamingProposalReady(candidate, proposal, null)); + } } \ No newline at end of file diff --git a/run-m6.ps1 b/run-m6.ps1 new file mode 100644 index 0000000..b406303 --- /dev/null +++ b/run-m6.ps1 @@ -0,0 +1,78 @@ +# run-m6.ps1 +# Fuehrt alle M6-Arbeitspakete sequenziell aus. +# Nach jedem AP wird der Build geprueft. Bei Fehler wird abgebrochen. +# Ausfuehren im Projektroot: .\run-m6.ps1 + +param( + [int]$StartAp = 1, + [int]$EndAp = 8, + [string]$Model = "claude-sonnet-4-6", + [string]$Workpackage = "M6" +) + +$ErrorActionPreference = "Stop" + +$BuildCmd = ".\mvnw.cmd clean verify -pl pdf-umbenenner-domain,pdf-umbenenner-application,pdf-umbenenner-adapter-out,pdf-umbenenner-adapter-in-cli,pdf-umbenenner-bootstrap --also-make" + +function Get-Prompt { + param([int]$Ap) + + $baseline = "" + if ($Ap -gt 1) { + $prev = $Ap - 1 + $baseline = "AP-001 bis AP-00$prev sind bereits abgeschlossen und bilden die Baseline. " + } + + $apFormatted = $Ap.ToString("D3") + + return "Lies CLAUDE.md und 'docs/workpackages/$Workpackage - Arbeitspakete.md' vollständig. ${baseline}Implementiere ausschließlich AP-$apFormatted gemäß WORKFLOW.md." +} + +Write-Host "" +Write-Host "========================================" -ForegroundColor Cyan +Write-Host " $Workpackage Automatisierung" -ForegroundColor Cyan +Write-Host " Modell : $Model" -ForegroundColor Cyan +Write-Host " APs : $StartAp bis $EndAp" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "" + +for ($ap = $StartAp; $ap -le $EndAp; $ap++) { + $apFormatted = $ap.ToString("D3") + + Write-Host "----------------------------------------" -ForegroundColor Yellow + Write-Host " Starte AP-$apFormatted" -ForegroundColor Yellow + Write-Host "----------------------------------------" -ForegroundColor Yellow + + $prompt = Get-Prompt -Ap $ap + + # Claude Code ausfuehren + $claudeArgs = @("--model", $Model, "--dangerously-skip-permissions", "--print", $prompt) + & claude @claudeArgs + + if ($LASTEXITCODE -ne 0) { + Write-Host "" + Write-Host "[FEHLER] Claude Code ist bei AP-$apFormatted fehlgeschlagen (Exit $LASTEXITCODE)." -ForegroundColor Red + Write-Host "Bitte AP-$apFormatted manuell pruefen und das Skript mit -StartAp $ap neu starten." -ForegroundColor Red + exit 1 + } + + # Build pruefen + Write-Host "" + Write-Host "[BUILD] Pruefe Build nach AP-$apFormatted ..." -ForegroundColor Cyan + Invoke-Expression $BuildCmd + + if ($LASTEXITCODE -ne 0) { + Write-Host "" + Write-Host "[FEHLER] Build nach AP-$apFormatted fehlgeschlagen." -ForegroundColor Red + Write-Host "Bitte den Stand pruefen und das Skript mit -StartAp $ap neu starten." -ForegroundColor Red + exit 1 + } + + Write-Host "" + Write-Host "[OK] AP-$apFormatted abgeschlossen und Build gruen." -ForegroundColor Green + Write-Host "" +} + +Write-Host "========================================" -ForegroundColor Green +Write-Host " $Workpackage vollstaendig abgeschlossen!" -ForegroundColor Green +Write-Host "========================================" -ForegroundColor Green