M6 komplett umgesetzt

This commit is contained in:
2026-04-07 12:26:14 +02:00
parent 506f5ac32e
commit 8bcd80d70a
51 changed files with 5960 additions and 536 deletions
@@ -4,6 +4,7 @@ import de.gecheckt.pdf.umbenenner.application.config.startup.StartConfiguration;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
@@ -16,8 +17,20 @@ import java.util.List;
* and basic path existence checks. Throws {@link InvalidStartConfigurationException}
* if any validation rule fails.
* <p>
* Supports injected source folder validation for testability
* Supports injected source and target folder validation for testability
* (allows mocking of platform-dependent filesystem checks).
*
* <h2>Target folder validation</h2>
* <p>
* The target folder is validated as "present or technically creatable":
* <ul>
* <li>If it already exists: must be a directory and writable.</li>
* <li>If it does not yet exist: the {@link TargetFolderChecker} attempts to create it
* via {@code Files.createDirectories}. Creation failure is a hard validation error.</li>
* </ul>
* <p>
* This behaviour ensures the target write path is technically usable before any
* document processing begins, without requiring the operator to create the folder manually.
*/
public class StartConfigurationValidator {
@@ -48,22 +61,64 @@ public class StartConfigurationValidator {
String checkSourceFolder(Path path);
}
/**
* Abstraction for target folder existence, creatability, and write-access checks.
* <p>
* Separates filesystem operations from validation logic to enable
* platform-independent unit testing (mocking) of write-access and creation edge cases.
* <p>
* The default implementation attempts to create the folder via
* {@code Files.createDirectories} if it does not yet exist, then verifies it is a
* directory and writable. Tests can substitute alternative implementations.
*/
@FunctionalInterface
public interface TargetFolderChecker {
/**
* Checks target folder usability and returns a validation error message, or null if valid.
* <p>
* Checks (in order):
* <ol>
* <li>If folder does not exist: attempt to create it via {@code createDirectories}.</li>
* <li>Is a directory.</li>
* <li>Is writable (required for the file-copy write path).</li>
* </ol>
*
* @param path the target folder path
* @return error message string, or null if all checks pass
*/
String checkTargetFolder(Path path);
}
private final SourceFolderChecker sourceFolderChecker;
private final TargetFolderChecker targetFolderChecker;
/**
* Creates a validator with the default source folder checker (NIO-based).
* Creates a validator with default NIO-based source and target folder checkers.
*/
public StartConfigurationValidator() {
this(new DefaultSourceFolderChecker());
this(new DefaultSourceFolderChecker(), new DefaultTargetFolderChecker());
}
/**
* Creates a validator with a custom source folder checker (primarily for testing).
* Uses the default NIO-based target folder checker.
*
* @param sourceFolderChecker the checker to use (must not be null)
* @param sourceFolderChecker the source folder checker to use (must not be null)
*/
public StartConfigurationValidator(SourceFolderChecker sourceFolderChecker) {
this(sourceFolderChecker, new DefaultTargetFolderChecker());
}
/**
* Creates a validator with custom source and target folder checkers (primarily for testing).
*
* @param sourceFolderChecker the source folder checker to use (must not be null)
* @param targetFolderChecker the target folder checker to use (must not be null)
*/
public StartConfigurationValidator(SourceFolderChecker sourceFolderChecker,
TargetFolderChecker targetFolderChecker) {
this.sourceFolderChecker = sourceFolderChecker;
this.targetFolderChecker = targetFolderChecker;
}
/**
@@ -130,7 +185,14 @@ public class StartConfigurationValidator {
}
private void validateTargetFolder(Path targetFolder, List<String> errors) {
validateRequiredExistingDirectory(targetFolder, "target.folder", errors);
if (targetFolder == null) {
errors.add("- target.folder: must not be null");
return;
}
String checkError = targetFolderChecker.checkTargetFolder(targetFolder);
if (checkError != null) {
errors.add(checkError);
}
}
private void validateSqliteFile(Path sqliteFile, List<String> errors) {
@@ -321,4 +383,38 @@ public class StartConfigurationValidator {
return null; // All checks passed
}
}
/**
* Default NIO-based implementation of {@link TargetFolderChecker}.
* <p>
* Validates that the target folder is present and writable for the file-copy write path.
* If the folder does not yet exist, creation is attempted via {@code Files.createDirectories}.
* <p>
* This satisfies the "present or technically creatable" requirement: the folder need not
* exist before the application starts, but must be reachable at startup time.
* <p>
* This separation allows unit tests to inject alternative implementations
* that control the outcome of write-access or creation checks without relying on actual
* filesystem permissions (which are platform-dependent).
*/
private static class DefaultTargetFolderChecker implements TargetFolderChecker {
@Override
public String checkTargetFolder(Path path) {
if (!Files.exists(path)) {
try {
Files.createDirectories(path);
} catch (IOException e) {
return "- target.folder: path does not exist and could not be created: "
+ path + " (" + e.getMessage() + ")";
}
}
if (!Files.isDirectory(path)) {
return "- target.folder: path is not a directory: " + path;
}
if (!Files.isWritable(path)) {
return "- target.folder: directory is not writable: " + path;
}
return null; // All checks passed
}
}
}
@@ -0,0 +1,24 @@
package de.gecheckt.pdf.umbenenner.adapter.out.clock;
import java.time.Instant;
import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort;
/**
* System clock implementation of {@link ClockPort}.
* <p>
* Returns the current wall-clock time from the JVM system clock.
* Intended for production use; tests should inject a controlled clock implementation.
*/
public class SystemClockAdapter implements ClockPort {
/**
* Returns the current system time as an {@link Instant}.
*
* @return the current UTC instant; never null
*/
@Override
public Instant now() {
return Instant.now();
}
}
@@ -76,7 +76,7 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo
}
String sql = """
SELECT
SELECT
last_known_source_locator,
last_known_source_file_name,
overall_status,
@@ -85,7 +85,9 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo
last_failure_instant,
last_success_instant,
created_at,
updated_at
updated_at,
last_target_path,
last_target_file_name
FROM document_record
WHERE fingerprint = ?
""";
@@ -146,8 +148,10 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo
last_failure_instant,
last_success_instant,
created_at,
updated_at
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
updated_at,
last_target_path,
last_target_file_name
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""";
try (Connection connection = getConnection();
@@ -163,6 +167,8 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo
statement.setString(8, instantToString(record.lastSuccessInstant()));
statement.setString(9, instantToString(record.createdAt()));
statement.setString(10, instantToString(record.updatedAt()));
statement.setString(11, record.lastTargetPath());
statement.setString(12, record.lastTargetFileName());
int rowsAffected = statement.executeUpdate();
if (rowsAffected != 1) {
@@ -205,7 +211,9 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo
transient_error_count = ?,
last_failure_instant = ?,
last_success_instant = ?,
updated_at = ?
updated_at = ?,
last_target_path = ?,
last_target_file_name = ?
WHERE fingerprint = ?
""";
@@ -220,7 +228,9 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo
statement.setString(6, instantToString(record.lastFailureInstant()));
statement.setString(7, instantToString(record.lastSuccessInstant()));
statement.setString(8, instantToString(record.updatedAt()));
statement.setString(9, record.fingerprint().sha256Hex());
statement.setString(9, record.lastTargetPath());
statement.setString(10, record.lastTargetFileName());
statement.setString(11, record.fingerprint().sha256Hex());
int rowsAffected = statement.executeUpdate();
if (rowsAffected != 1) {
@@ -260,7 +270,9 @@ public class SqliteDocumentRecordRepositoryAdapter implements DocumentRecordRepo
stringToInstant(rs.getString("last_failure_instant")),
stringToInstant(rs.getString("last_success_instant")),
stringToInstant(rs.getString("created_at")),
stringToInstant(rs.getString("updated_at"))
stringToInstant(rs.getString("updated_at")),
rs.getString("last_target_path"),
rs.getString("last_target_file_name")
);
}
@@ -6,7 +6,9 @@ import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.sql.Types;
import java.time.Instant;
import java.time.LocalDate;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
@@ -17,13 +19,21 @@ import org.apache.logging.log4j.Logger;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
import de.gecheckt.pdf.umbenenner.domain.model.DateSource;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
/**
* SQLite implementation of {@link ProcessingAttemptRepository}.
* <p>
* Provides CRUD operations for the processing attempt history (Versuchshistorie)
* with explicit mapping between application types and the SQLite schema.
* including all AI traceability fields added during schema evolution.
* <p>
* <strong>Schema compatibility:</strong> This adapter writes all columns including
* the AI traceability columns. When reading rows that were written before schema
* evolution, those columns contain {@code NULL} and are mapped to {@code null}
* in the Java record.
* <p>
* <strong>Architecture boundary:</strong> All JDBC and SQLite details are strictly
* confined to this class. No JDBC types appear in the port interface or in any
@@ -65,9 +75,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
*/
@Override
public int loadNextAttemptNumber(DocumentFingerprint fingerprint) {
if (fingerprint == null) {
throw new NullPointerException("fingerprint must not be null");
}
Objects.requireNonNull(fingerprint, "fingerprint must not be null");
String sql = """
SELECT COALESCE(MAX(attempt_number), 0) + 1 AS next_attempt_number
@@ -78,7 +86,6 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
try (Connection connection = getConnection();
PreparedStatement statement = connection.prepareStatement(sql)) {
// Enable foreign key enforcement for this connection
try (Statement pragmaStmt = connection.createStatement()) {
pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON);
}
@@ -89,34 +96,27 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
if (rs.next()) {
return rs.getInt("next_attempt_number");
} else {
// This should not happen, but fallback to 1
return 1;
}
}
} catch (SQLException e) {
String message = "Failed to load next attempt number for fingerprint '" +
fingerprint.sha256Hex() + "': " + e.getMessage();
String message = "Failed to load next attempt number for fingerprint '"
+ fingerprint.sha256Hex() + "': " + e.getMessage();
logger.error(message, e);
throw new DocumentPersistenceException(message, e);
}
}
/**
* Persists exactly one processing attempt record.
* <p>
* The {@link ProcessingAttempt#attemptNumber()} must have been obtained from
* {@link #loadNextAttemptNumber(DocumentFingerprint)} in the same run to guarantee
* monotonic ordering.
* Persists exactly one processing attempt record including all AI traceability fields.
*
* @param attempt the attempt to persist; must not be null
* @throws DocumentPersistenceException if the insert fails due to a technical error
*/
@Override
public void save(ProcessingAttempt attempt) {
if (attempt == null) {
throw new NullPointerException("attempt must not be null");
}
Objects.requireNonNull(attempt, "attempt must not be null");
String sql = """
INSERT INTO processing_attempt (
@@ -128,15 +128,24 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
status,
failure_class,
failure_message,
retryable
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
retryable,
model_name,
prompt_identifier,
processed_page_count,
sent_character_count,
ai_raw_response,
ai_reasoning,
resolved_date,
date_source,
validated_title,
final_target_file_name
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""";
try (Connection connection = getConnection();
Statement pragmaStmt = connection.createStatement();
PreparedStatement statement = connection.prepareStatement(sql)) {
// Enable foreign key enforcement for this connection
pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON);
statement.setString(1, attempt.fingerprint().sha256Hex());
@@ -145,11 +154,22 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
statement.setString(4, attempt.startedAt().toString());
statement.setString(5, attempt.endedAt().toString());
statement.setString(6, attempt.status().name());
// Handle nullable fields
statement.setString(7, attempt.failureClass());
statement.setString(8, attempt.failureMessage());
setNullableString(statement, 7, attempt.failureClass());
setNullableString(statement, 8, attempt.failureMessage());
statement.setBoolean(9, attempt.retryable());
// AI traceability fields
setNullableString(statement, 10, attempt.modelName());
setNullableString(statement, 11, attempt.promptIdentifier());
setNullableInteger(statement, 12, attempt.processedPageCount());
setNullableInteger(statement, 13, attempt.sentCharacterCount());
setNullableString(statement, 14, attempt.aiRawResponse());
setNullableString(statement, 15, attempt.aiReasoning());
setNullableString(statement, 16,
attempt.resolvedDate() != null ? attempt.resolvedDate().toString() : null);
setNullableString(statement, 17,
attempt.dateSource() != null ? attempt.dateSource().name() : null);
setNullableString(statement, 18, attempt.validatedTitle());
setNullableString(statement, 19, attempt.finalTargetFileName());
int rowsAffected = statement.executeUpdate();
if (rowsAffected != 1) {
@@ -157,12 +177,12 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
"Expected to insert 1 row but affected " + rowsAffected + " rows");
}
logger.debug("Saved processing attempt #{} for fingerprint: {}",
attempt.attemptNumber(), attempt.fingerprint().sha256Hex());
logger.debug("Saved processing attempt #{} for fingerprint: {}",
attempt.attemptNumber(), attempt.fingerprint().sha256Hex());
} catch (SQLException e) {
String message = "Failed to save processing attempt #" + attempt.attemptNumber() +
" for fingerprint '" + attempt.fingerprint().sha256Hex() + "': " + e.getMessage();
String message = "Failed to save processing attempt #" + attempt.attemptNumber()
+ " for fingerprint '" + attempt.fingerprint().sha256Hex() + "': " + e.getMessage();
logger.error(message, e);
throw new DocumentPersistenceException(message, e);
}
@@ -171,31 +191,22 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
/**
* Returns all historised attempts for the given fingerprint, ordered by
* {@link ProcessingAttempt#attemptNumber()} ascending.
* <p>
* Returns an empty list if no attempts have been recorded yet.
* Intended for use in tests and diagnostics; not required on the primary batch path.
*
* @param fingerprint the document identity; must not be null
* @return immutable list of attempts, ordered by attempt number; never null
* @throws DocumentPersistenceException if the query fails due to a technical error
* @return immutable list of attempts; never null
* @throws DocumentPersistenceException if the query fails
*/
@Override
public List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint) {
if (fingerprint == null) {
throw new NullPointerException("fingerprint must not be null");
}
Objects.requireNonNull(fingerprint, "fingerprint must not be null");
String sql = """
SELECT
fingerprint,
run_id,
attempt_number,
started_at,
ended_at,
status,
failure_class,
failure_message,
retryable
SELECT
fingerprint, run_id, attempt_number, started_at, ended_at,
status, failure_class, failure_message, retryable,
model_name, prompt_identifier, processed_page_count, sent_character_count,
ai_raw_response, ai_reasoning, resolved_date, date_source, validated_title,
final_target_file_name
FROM processing_attempt
WHERE fingerprint = ?
ORDER BY attempt_number ASC
@@ -205,67 +216,151 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
Statement pragmaStmt = connection.createStatement();
PreparedStatement statement = connection.prepareStatement(sql)) {
// Enable foreign key enforcement for this connection
pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON);
statement.setString(1, fingerprint.sha256Hex());
try (ResultSet rs = statement.executeQuery()) {
List<ProcessingAttempt> attempts = new ArrayList<>();
while (rs.next()) {
ProcessingAttempt attempt = mapResultSetToProcessingAttempt(rs);
attempts.add(attempt);
attempts.add(mapResultSetToProcessingAttempt(rs));
}
return List.copyOf(attempts); // Return immutable copy
return List.copyOf(attempts);
}
} catch (SQLException e) {
String message = "Failed to find processing attempts for fingerprint '" +
fingerprint.sha256Hex() + "': " + e.getMessage();
String message = "Failed to find processing attempts for fingerprint '"
+ fingerprint.sha256Hex() + "': " + e.getMessage();
logger.error(message, e);
throw new DocumentPersistenceException(message, e);
}
}
/**
* Maps a ResultSet row to a ProcessingAttempt.
* Returns the most recent attempt with status {@code PROPOSAL_READY} for the given
* fingerprint, or {@code null} if no such attempt exists.
* <p>
* This is the <em>leading source</em> for the naming proposal: the most recent
* {@code PROPOSAL_READY} attempt carries the validated date, title, and reasoning
* that subsequent processing steps consume.
*
* @param rs the ResultSet positioned at the current row
* @return the mapped ProcessingAttempt
* @throws SQLException if reading from the ResultSet fails
* @param fingerprint the document identity; must not be null
* @return the most recent {@code PROPOSAL_READY} attempt, or {@code null}
* @throws DocumentPersistenceException if the query fails
*/
public ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint) {
Objects.requireNonNull(fingerprint, "fingerprint must not be null");
String sql = """
SELECT
fingerprint, run_id, attempt_number, started_at, ended_at,
status, failure_class, failure_message, retryable,
model_name, prompt_identifier, processed_page_count, sent_character_count,
ai_raw_response, ai_reasoning, resolved_date, date_source, validated_title,
final_target_file_name
FROM processing_attempt
WHERE fingerprint = ?
AND status = 'PROPOSAL_READY'
ORDER BY attempt_number DESC
LIMIT 1
""";
try (Connection connection = getConnection();
Statement pragmaStmt = connection.createStatement();
PreparedStatement statement = connection.prepareStatement(sql)) {
pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON);
statement.setString(1, fingerprint.sha256Hex());
try (ResultSet rs = statement.executeQuery()) {
if (rs.next()) {
return mapResultSetToProcessingAttempt(rs);
}
return null;
}
} catch (SQLException e) {
String message = "Failed to find latest PROPOSAL_READY attempt for fingerprint '"
+ fingerprint.sha256Hex() + "': " + e.getMessage();
logger.error(message, e);
throw new DocumentPersistenceException(message, e);
}
}
// -------------------------------------------------------------------------
// Mapping helpers
// -------------------------------------------------------------------------
private ProcessingAttempt mapResultSetToProcessingAttempt(ResultSet rs) throws SQLException {
String resolvedDateStr = rs.getString("resolved_date");
LocalDate resolvedDate = resolvedDateStr != null ? LocalDate.parse(resolvedDateStr) : null;
String dateSourceStr = rs.getString("date_source");
DateSource dateSource = dateSourceStr != null ? DateSource.valueOf(dateSourceStr) : null;
Integer processedPageCount = (Integer) getNullableInt(rs, "processed_page_count");
Integer sentCharacterCount = (Integer) getNullableInt(rs, "sent_character_count");
return new ProcessingAttempt(
new DocumentFingerprint(rs.getString("fingerprint")),
new de.gecheckt.pdf.umbenenner.domain.model.RunId(rs.getString("run_id")),
new RunId(rs.getString("run_id")),
rs.getInt("attempt_number"),
Instant.parse(rs.getString("started_at")),
Instant.parse(rs.getString("ended_at")),
de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus.valueOf(rs.getString("status")),
ProcessingStatus.valueOf(rs.getString("status")),
rs.getString("failure_class"),
rs.getString("failure_message"),
rs.getBoolean("retryable")
rs.getBoolean("retryable"),
rs.getString("model_name"),
rs.getString("prompt_identifier"),
processedPageCount,
sentCharacterCount,
rs.getString("ai_raw_response"),
rs.getString("ai_reasoning"),
resolvedDate,
dateSource,
rs.getString("validated_title"),
rs.getString("final_target_file_name")
);
}
// -------------------------------------------------------------------------
// JDBC nullable helpers
// -------------------------------------------------------------------------
private static void setNullableString(PreparedStatement stmt, int index, String value)
throws SQLException {
if (value == null) {
stmt.setNull(index, Types.VARCHAR);
} else {
stmt.setString(index, value);
}
}
private static void setNullableInteger(PreparedStatement stmt, int index, Integer value)
throws SQLException {
if (value == null) {
stmt.setNull(index, Types.INTEGER);
} else {
stmt.setInt(index, value);
}
}
private static Object getNullableInt(ResultSet rs, String column) throws SQLException {
int value = rs.getInt(column);
return rs.wasNull() ? null : value;
}
/**
* Returns the JDBC URL this adapter uses to connect to the SQLite database.
* <p>
* Intended for logging and diagnostics only.
* Returns the JDBC URL this adapter uses.
*
* @return the JDBC URL; never null or blank
*/
public String getJdbcUrl() {
return jdbcUrl;
}
/**
* Gets a connection to the database.
* <p>
* This method can be overridden by subclasses to provide a shared connection.
*
* @return a new database connection
* @throws SQLException if the connection cannot be established
* Returns a JDBC connection. May be overridden in tests to provide shared connections.
*/
protected Connection getConnection() throws SQLException {
return DriverManager.getConnection(jdbcUrl);
@@ -2,6 +2,7 @@ package de.gecheckt.pdf.umbenenner.adapter.out.sqlite;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Objects;
@@ -16,9 +17,8 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitiali
* SQLite implementation of {@link PersistenceSchemaInitializationPort}.
* <p>
* Creates or verifies the two-level persistence schema in the configured SQLite
* database file. All DDL uses {@code IF NOT EXISTS} semantics, making the operation
* fully idempotent: calling {@link #initializeSchema()} on an already-initialised
* database succeeds without error and without modifying existing data.
* database file, and performs a controlled schema evolution from an earlier schema
* version to the current one.
*
* <h2>Two-level schema</h2>
* <p>The schema consists of exactly two tables:
@@ -30,10 +30,29 @@ import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitiali
* the master record via fingerprint.</li>
* </ol>
*
* <h2>Schema evolution</h2>
* <p>
* When upgrading from an earlier schema, this adapter uses idempotent
* {@code ALTER TABLE ... ADD COLUMN} statements for both tables. Columns that already
* exist are silently skipped, making the evolution safe to run on both fresh and existing
* databases. The current evolution adds:
* <ul>
* <li>AI-traceability columns to {@code processing_attempt}</li>
* <li>Target-copy columns ({@code last_target_path}, {@code last_target_file_name}) to
* {@code document_record}</li>
* <li>Target-copy column ({@code final_target_file_name}) to {@code processing_attempt}</li>
* </ul>
*
* <h2>M4→current-schema status migration</h2>
* <p>
* Documents in an earlier positive intermediate state ({@code SUCCESS} recorded without
* a validated naming proposal) are idempotently migrated to {@code READY_FOR_AI} so that
* the AI naming pipeline processes them in the next run. Terminal negative states
* ({@code FAILED_RETRYABLE}, {@code FAILED_FINAL}, skip states) are left unchanged.
*
* <h2>Initialisation timing</h2>
* <p>This adapter must be invoked <em>once</em> at program startup, before the batch
* document processing loop begins. It is wired by the bootstrap module and called
* explicitly through the port. There is no lazy or deferred initialisation.
* document processing loop begins.
*
* <h2>Architecture boundary</h2>
* <p>All JDBC connections, SQL DDL, and SQLite-specific behaviour are strictly confined
@@ -44,34 +63,17 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti
private static final Logger logger = LogManager.getLogger(SqliteSchemaInitializationAdapter.class);
// -------------------------------------------------------------------------
// DDL — document_record table
// -------------------------------------------------------------------------
/**
* DDL for the document master record table.
* <p>
* <strong>Columns (mandatory fields):</strong>
* <ul>
* <li>{@code id} — internal surrogate primary key (auto-increment).</li>
* <li>{@code fingerprint} — SHA-256 hex string; unique natural key; never null.</li>
* <li>{@code last_known_source_locator} — opaque locator value (file path string);
* never null.</li>
* <li>{@code last_known_source_file_name} — human-readable file name for logging;
* never null.</li>
* <li>{@code overall_status} — current processing status as enum name string;
* never null.</li>
* <li>{@code content_error_count} — count of deterministic content errors;
* default 0; never negative.</li>
* <li>{@code transient_error_count} — count of transient technical errors;
* default 0; never negative.</li>
* <li>{@code last_failure_instant} — ISO-8601 UTC timestamp of the most recent
* failure; nullable.</li>
* <li>{@code last_success_instant} — ISO-8601 UTC timestamp of the successful
* processing; nullable.</li>
* <li>{@code created_at} — ISO-8601 UTC timestamp of record creation; never null.</li>
* <li>{@code updated_at} — ISO-8601 UTC timestamp of the most recent update;
* never null.</li>
* </ul>
* <p>
* <strong>Not included (M5+ fields):</strong> target path, target file name,
* AI-related fields.
* Columns: id (PK), fingerprint (unique), last_known_source_locator,
* last_known_source_file_name, overall_status, content_error_count,
* transient_error_count, last_failure_instant, last_success_instant,
* created_at, updated_at.
*/
private static final String DDL_CREATE_DOCUMENT_RECORD = """
CREATE TABLE IF NOT EXISTS document_record (
@@ -90,36 +92,18 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti
)
""";
// -------------------------------------------------------------------------
// DDL — processing_attempt table (base schema, without AI traceability cols)
// -------------------------------------------------------------------------
/**
* DDL for the processing attempt history table.
* DDL for the base processing attempt history table.
* <p>
* <strong>Columns (mandatory fields):</strong>
* <ul>
* <li>{@code id} — internal surrogate primary key (auto-increment).</li>
* <li>{@code fingerprint} — foreign key reference to
* {@code document_record.fingerprint}; never null.</li>
* <li>{@code run_id} — identifier of the batch run; never null.</li>
* <li>{@code attempt_number} — monotonically increasing per fingerprint, starting
* at 1; never null. The unique constraint on {@code (fingerprint, attempt_number)}
* enforces uniqueness per document.</li>
* <li>{@code started_at} — ISO-8601 UTC timestamp of attempt start; never null.</li>
* <li>{@code ended_at} — ISO-8601 UTC timestamp of attempt end; never null.</li>
* <li>{@code status} — outcome status as enum name string; never null.</li>
* <li>{@code failure_class} — short failure classification; nullable (null for
* success and skip attempts).</li>
* <li>{@code failure_message} — human-readable failure description; nullable
* (null for success and skip attempts).</li>
* <li>{@code retryable} — 1 if the failure is retryable in a later run, 0 otherwise;
* never null. Always 0 for success and skip attempts.</li>
* </ul>
* Base columns (present in all schema versions): id, fingerprint, run_id,
* attempt_number, started_at, ended_at, status, failure_class, failure_message, retryable.
* <p>
* <strong>Skip attempts:</strong> Skip statuses ({@code SKIPPED_ALREADY_PROCESSED},
* {@code SKIPPED_FINAL_FAILURE}) are stored as regular rows with {@code retryable = 0}
* and null failure fields.
* <p>
* <strong>Not included (M5+ fields):</strong> model name, prompt identifier,
* AI raw response, AI reasoning, resolved date, date source, final title,
* final target file name.
* AI traceability columns are added separately via {@code ALTER TABLE} to support
* idempotent evolution from earlier schemas.
*/
private static final String DDL_CREATE_PROCESSING_ATTEMPT = """
CREATE TABLE IF NOT EXISTS processing_attempt (
@@ -140,6 +124,10 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti
)
""";
// -------------------------------------------------------------------------
// DDL — indexes
// -------------------------------------------------------------------------
/** Index on {@code processing_attempt.fingerprint} for fast per-document lookups. */
private static final String DDL_IDX_ATTEMPT_FINGERPRINT =
"CREATE INDEX IF NOT EXISTS idx_processing_attempt_fingerprint "
@@ -155,14 +143,69 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti
"CREATE INDEX IF NOT EXISTS idx_document_record_overall_status "
+ "ON document_record (overall_status)";
// -------------------------------------------------------------------------
// DDL — columns added to processing_attempt via schema evolution
// -------------------------------------------------------------------------
/**
* Columns to add idempotently to {@code processing_attempt}.
* Each entry is {@code [column_name, column_type]}.
*/
private static final String[][] EVOLUTION_ATTEMPT_COLUMNS = {
{"model_name", "TEXT"},
{"prompt_identifier", "TEXT"},
{"processed_page_count", "INTEGER"},
{"sent_character_count", "INTEGER"},
{"ai_raw_response", "TEXT"},
{"ai_reasoning", "TEXT"},
{"resolved_date", "TEXT"},
{"date_source", "TEXT"},
{"validated_title", "TEXT"},
{"final_target_file_name", "TEXT"},
};
// -------------------------------------------------------------------------
// DDL — columns added to document_record via schema evolution
// -------------------------------------------------------------------------
/**
* Columns to add idempotently to {@code document_record}.
* Each entry is {@code [column_name, column_type]}.
*/
private static final String[][] EVOLUTION_RECORD_COLUMNS = {
{"last_target_path", "TEXT"},
{"last_target_file_name", "TEXT"},
};
// -------------------------------------------------------------------------
// M4→current-schema status migration
// -------------------------------------------------------------------------
/**
* Migrates earlier positive intermediate states in {@code document_record} that were
* recorded as {@code SUCCESS} without a validated naming proposal to {@code READY_FOR_AI},
* so the AI naming pipeline processes them in the next run.
* <p>
* Only rows with {@code overall_status = 'SUCCESS'} that have no corresponding
* {@code processing_attempt} with {@code status = 'PROPOSAL_READY'} are updated.
* This migration is idempotent.
*/
private static final String SQL_MIGRATE_LEGACY_SUCCESS_TO_READY_FOR_AI = """
UPDATE document_record
SET overall_status = 'READY_FOR_AI',
updated_at = datetime('now')
WHERE overall_status = 'SUCCESS'
AND NOT EXISTS (
SELECT 1 FROM processing_attempt pa
WHERE pa.fingerprint = document_record.fingerprint
AND pa.status = 'PROPOSAL_READY'
)
""";
private final String jdbcUrl;
/**
* Constructs the adapter with the JDBC URL of the SQLite database file.
* <p>
* The JDBC URL must be in the form {@code jdbc:sqlite:/path/to/file.db}.
* The file and its parent directories need not exist at construction time;
* SQLite creates them when the connection is first opened.
*
* @param jdbcUrl the JDBC URL of the SQLite database; must not be null or blank
* @throws NullPointerException if {@code jdbcUrl} is null
@@ -177,26 +220,22 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti
}
/**
* Creates or verifies the persistence schema in the SQLite database.
* Creates or verifies the persistence schema and performs schema evolution and
* status migration.
* <p>
* Executes the following DDL statements in order:
* Execution order:
* <ol>
* <li>Enable foreign key enforcement ({@code PRAGMA foreign_keys = ON})</li>
* <li>Create {@code document_record} table (if not exists)</li>
* <li>Create {@code processing_attempt} table (if not exists)</li>
* <li>Create indexes on {@code processing_attempt.fingerprint},
* {@code processing_attempt.run_id}, and
* {@code document_record.overall_status}</li>
* <li>Enable foreign key enforcement.</li>
* <li>Create {@code document_record} table (if not exists).</li>
* <li>Create {@code processing_attempt} table (if not exists).</li>
* <li>Create all indexes (if not exist).</li>
* <li>Add AI-traceability columns to {@code processing_attempt} (idempotent evolution).</li>
* <li>Migrate earlier positive intermediate state to {@code READY_FOR_AI} (idempotent).</li>
* </ol>
* <p>
* All statements use {@code IF NOT EXISTS} semantics. Calling this method on an
* already-initialised database is safe and produces no changes.
* <p>
* <strong>Timing:</strong> Must be called once at program startup, before the
* batch document processing loop begins.
* All steps are safe to run on both fresh and existing databases.
*
* @throws DocumentPersistenceException if the schema cannot be created or verified
* due to a JDBC or SQLite error
* @throws DocumentPersistenceException if any DDL or migration step fails
*/
@Override
public void initializeSchema() {
@@ -211,7 +250,7 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti
statement.execute(DDL_CREATE_DOCUMENT_RECORD);
logger.debug("Table 'document_record' created or already present.");
// Level 2: processing attempt history
// Level 2: processing attempt history (base columns only)
statement.execute(DDL_CREATE_PROCESSING_ATTEMPT);
logger.debug("Table 'processing_attempt' created or already present.");
@@ -221,7 +260,20 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti
statement.execute(DDL_IDX_RECORD_STATUS);
logger.debug("Indexes created or already present.");
logger.info("M4 SQLite schema initialisation completed successfully.");
// Schema evolution: add AI-traceability + target-copy columns (idempotent)
evolveTableColumns(connection, "processing_attempt", EVOLUTION_ATTEMPT_COLUMNS);
evolveTableColumns(connection, "document_record", EVOLUTION_RECORD_COLUMNS);
// Status migration: earlier positive intermediate state → READY_FOR_AI
int migrated = statement.executeUpdate(SQL_MIGRATE_LEGACY_SUCCESS_TO_READY_FOR_AI);
if (migrated > 0) {
logger.info("Status migration: {} document(s) migrated from legacy SUCCESS state to READY_FOR_AI.",
migrated);
} else {
logger.debug("Status migration: no documents required migration.");
}
logger.info("SQLite schema initialisation and migration completed successfully.");
} catch (SQLException e) {
String message = "Failed to initialise SQLite persistence schema at '" + jdbcUrl + "': " + e.getMessage();
@@ -231,9 +283,43 @@ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaIniti
}
/**
* Returns the JDBC URL this adapter uses to connect to the SQLite database.
* Idempotently adds the given columns to the specified table.
* <p>
* Intended for logging and diagnostics only.
* For each column that does not yet exist, an {@code ALTER TABLE ... ADD COLUMN}
* statement is executed. Columns that already exist are silently skipped.
*
* @param connection an open JDBC connection to the database
* @param tableName the name of the table to evolve
* @param columns array of {@code [column_name, column_type]} pairs to add
* @throws SQLException if a column addition fails for a reason other than duplicate column
*/
private void evolveTableColumns(Connection connection, String tableName, String[][] columns)
throws SQLException {
java.util.Set<String> existingColumns = new java.util.HashSet<>();
try (ResultSet rs = connection.getMetaData().getColumns(null, null, tableName, null)) {
while (rs.next()) {
existingColumns.add(rs.getString("COLUMN_NAME").toLowerCase());
}
}
for (String[] col : columns) {
String columnName = col[0];
String columnType = col[1];
if (!existingColumns.contains(columnName.toLowerCase())) {
String alterSql = "ALTER TABLE " + tableName + " ADD COLUMN " + columnName + " " + columnType;
try (Statement stmt = connection.createStatement()) {
stmt.execute(alterSql);
}
logger.debug("Schema evolution: added column '{}' to '{}'.", columnName, tableName);
} else {
logger.debug("Schema evolution: column '{}' in '{}' already present, skipped.",
columnName, tableName);
}
}
}
/**
* Returns the JDBC URL this adapter uses to connect to the SQLite database.
*
* @return the JDBC URL; never null or blank
*/
@@ -0,0 +1,141 @@
package de.gecheckt.pdf.umbenenner.adapter.out.targetcopy;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.nio.file.AtomicMoveNotSupportedException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.util.Objects;
/**
* Filesystem-based implementation of {@link TargetFileCopyPort}.
* <p>
* Copies a source PDF to the configured target folder using a two-step approach:
* <ol>
* <li>Write the source content to a temporary file in the target folder.</li>
* <li>Rename/move the temporary file to the final resolved filename.</li>
* </ol>
* The atomic-move option is attempted first. If the filesystem does not support atomic
* moves (e.g., across different volumes), a standard move is used as a fallback.
*
* <h2>Source integrity</h2>
* <p>
* The source file is never modified, moved, or deleted. Only a copy is created.
*
* <h2>Temporary file naming</h2>
* <p>
* The temporary file uses the suffix {@code .tmp} appended to the resolved filename
* and is placed in the same target folder. This ensures the final rename is typically
* an intra-filesystem operation, maximising atomicity.
*
* <h2>Architecture boundary</h2>
* <p>
* All NIO operations are confined to this adapter. No {@code Path} or {@code File}
* types appear in the port interface.
*/
public class FilesystemTargetFileCopyAdapter implements TargetFileCopyPort {
private static final Logger logger = LogManager.getLogger(FilesystemTargetFileCopyAdapter.class);
private final Path targetFolderPath;
/**
* Creates the adapter for the given target folder.
*
* @param targetFolderPath the target folder path; must not be null
* @throws NullPointerException if {@code targetFolderPath} is null
*/
public FilesystemTargetFileCopyAdapter(Path targetFolderPath) {
this.targetFolderPath = Objects.requireNonNull(targetFolderPath, "targetFolderPath must not be null");
}
/**
* Copies the source document to the target folder under the given resolved filename.
* <p>
* The copy is performed via a temporary file ({@code resolvedFilename + ".tmp"}) in
* the target folder followed by a move/rename to the final name.
* <p>
* If any step fails, a best-effort cleanup of the temporary file is attempted
* before returning the failure result.
*
* @param sourceLocator opaque locator identifying the source file; must not be null
* @param resolvedFilename the final filename in the target folder; must not be null or blank
* @return {@link TargetFileCopySuccess} on success, or
* {@link TargetFileCopyTechnicalFailure} on any failure
*/
@Override
public TargetFileCopyResult copyToTarget(SourceDocumentLocator sourceLocator, String resolvedFilename) {
Objects.requireNonNull(sourceLocator, "sourceLocator must not be null");
Objects.requireNonNull(resolvedFilename, "resolvedFilename must not be null");
Path sourcePath = Paths.get(sourceLocator.value());
Path finalTargetPath = targetFolderPath.resolve(resolvedFilename);
Path tempTargetPath = targetFolderPath.resolve(resolvedFilename + ".tmp");
boolean tempCreated = false;
try {
// Step 1: Copy source to temporary file in target folder
Files.copy(sourcePath, tempTargetPath, StandardCopyOption.REPLACE_EXISTING);
tempCreated = true;
logger.debug("Copied source '{}' to temporary file '{}'.",
sourceLocator.value(), tempTargetPath.getFileName());
// Step 2: Atomic move/rename to final target filename
moveToFinalTarget(tempTargetPath, finalTargetPath);
logger.debug("Target copy completed: '{}'.", resolvedFilename);
return new TargetFileCopySuccess();
} catch (Exception e) {
String message = "Failed to copy source '" + sourceLocator.value()
+ "' to target '" + resolvedFilename + "': " + e.getMessage();
logger.error(message, e);
boolean cleaned = tempCreated && tryDeletePath(tempTargetPath);
return new TargetFileCopyTechnicalFailure(message, cleaned);
}
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
/**
* Moves the temporary file to the final target path.
* Attempts an atomic move first; falls back to a standard move if the filesystem
* does not support atomic moves.
*/
private void moveToFinalTarget(Path tempPath, Path finalPath) throws IOException {
try {
Files.move(tempPath, finalPath, StandardCopyOption.ATOMIC_MOVE);
} catch (AtomicMoveNotSupportedException e) {
logger.debug("Atomic move not supported, falling back to standard move.");
Files.move(tempPath, finalPath, StandardCopyOption.REPLACE_EXISTING);
}
}
/**
* Best-effort deletion of a path. Returns {@code true} if deletion succeeded
* or the file did not exist; {@code false} if an exception occurred.
*/
private boolean tryDeletePath(Path path) {
try {
Files.deleteIfExists(path);
return true;
} catch (IOException e) {
logger.warn("Best-effort cleanup: could not delete temporary file '{}': {}",
path, e.getMessage());
return false;
}
}
}
@@ -0,0 +1,140 @@
package de.gecheckt.pdf.umbenenner.adapter.out.targetfolder;
import de.gecheckt.pdf.umbenenner.application.port.out.ResolvedTargetFilename;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFilenameResolutionResult;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderTechnicalFailure;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Objects;
/**
* Filesystem-based implementation of {@link TargetFolderPort}.
* <p>
* Resolves unique filenames for the configured target folder by checking for existing
* files and appending a numeric collision-avoidance suffix when necessary.
*
* <h2>Duplicate resolution algorithm</h2>
* <p>
* Given a base name such as {@code 2024-01-15 - Rechnung.pdf}, the adapter checks:
* <ol>
* <li>{@code 2024-01-15 - Rechnung.pdf} — if free, return it.</li>
* <li>{@code 2024-01-15 - Rechnung(1).pdf} — if free, return it.</li>
* <li>{@code 2024-01-15 - Rechnung(2).pdf} — and so on.</li>
* </ol>
* The suffix is inserted immediately before {@code .pdf}.
* The 20-character base-title limit does not apply to the suffix.
*
* <h2>Architecture boundary</h2>
* <p>
* All NIO operations are confined to this adapter. No {@code Path} or {@code File} types
* appear in the port interface.
*/
public class FilesystemTargetFolderAdapter implements TargetFolderPort {
private static final Logger logger = LogManager.getLogger(FilesystemTargetFolderAdapter.class);
/** Maximum number of duplicate suffixes attempted before giving up. */
private static final int MAX_SUFFIX_ATTEMPTS = 9999;
private final Path targetFolderPath;
/**
* Creates the adapter for the given target folder.
*
* @param targetFolderPath the target folder path; must not be null
* @throws NullPointerException if {@code targetFolderPath} is null
*/
public FilesystemTargetFolderAdapter(Path targetFolderPath) {
this.targetFolderPath = Objects.requireNonNull(targetFolderPath, "targetFolderPath must not be null");
}
/**
* Returns the absolute string representation of the target folder path.
* <p>
* Used by the application layer as an opaque target-folder locator for persistence.
*
* @return absolute path string of the target folder; never null or blank
*/
@Override
public String getTargetFolderLocator() {
return targetFolderPath.toAbsolutePath().toString();
}
/**
* Resolves the first available unique filename in the target folder for the given base name.
* <p>
* Checks for {@code baseName} first; if taken, appends {@code (1)}, {@code (2)}, etc.
* directly before {@code .pdf} until a free name is found.
*
* @param baseName the desired filename including {@code .pdf} extension;
* must not be null or blank
* @return a {@link ResolvedTargetFilename} with the first available name, or a
* {@link TargetFolderTechnicalFailure} if folder access fails
*/
@Override
public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) {
Objects.requireNonNull(baseName, "baseName must not be null");
try {
// Try without suffix first
if (!Files.exists(targetFolderPath.resolve(baseName))) {
logger.debug("Resolved target filename without suffix: '{}'", baseName);
return new ResolvedTargetFilename(baseName);
}
// Determine split point: everything before the final ".pdf"
if (!baseName.toLowerCase().endsWith(".pdf")) {
return new TargetFolderTechnicalFailure(
"Base name does not end with .pdf: '" + baseName + "'");
}
String nameWithoutExt = baseName.substring(0, baseName.length() - 4);
// Try (1), (2), ...
for (int i = 1; i <= MAX_SUFFIX_ATTEMPTS; i++) {
String candidate = nameWithoutExt + "(" + i + ").pdf";
if (!Files.exists(targetFolderPath.resolve(candidate))) {
logger.debug("Resolved target filename with suffix ({}): '{}'", i, candidate);
return new ResolvedTargetFilename(candidate);
}
}
return new TargetFolderTechnicalFailure(
"Too many duplicate files for base name '" + baseName
+ "': checked up to suffix (" + MAX_SUFFIX_ATTEMPTS + ")");
} catch (Exception e) {
String message = "Failed to check target folder for duplicate resolution: " + e.getMessage();
logger.error(message, e);
return new TargetFolderTechnicalFailure(message);
}
}
/**
* Best-effort deletion of a file in the target folder.
* <p>
* Used for rollback after a successful copy when subsequent persistence fails.
* Never throws; all exceptions are caught and logged at warn level.
*
* @param resolvedFilename the filename (not full path) to delete; must not be null
*/
@Override
public void tryDeleteTargetFile(String resolvedFilename) {
Objects.requireNonNull(resolvedFilename, "resolvedFilename must not be null");
try {
boolean deleted = Files.deleteIfExists(targetFolderPath.resolve(resolvedFilename));
if (deleted) {
logger.debug("Best-effort rollback: deleted target file '{}'.", resolvedFilename);
} else {
logger.debug("Best-effort rollback: target file '{}' did not exist.", resolvedFilename);
}
} catch (IOException e) {
logger.warn("Best-effort rollback: could not delete target file '{}': {}",
resolvedFilename, e.getMessage());
}
}
}