diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapter.java new file mode 100644 index 0000000..2033277 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapter.java @@ -0,0 +1,244 @@ +package de.gecheckt.pdf.umbenenner.adapter.out.sqlite; + +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; +import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.Objects; + +/** + * SQLite implementation of {@link PersistenceSchemaInitializationPort}. + *

+ * Creates or verifies the M4 two-level persistence schema in the configured SQLite + * database file. All DDL uses {@code IF NOT EXISTS} semantics, making the operation + * fully idempotent: calling {@link #initializeSchema()} on an already-initialised + * database succeeds without error and without modifying existing data. + * + *

Two-level schema

+ *

The schema consists of exactly two tables: + *

    + *
  1. {@code document_record} — the document master record + * (Dokument-Stammsatz). One row per unique SHA-256 fingerprint.
  2. + *
  3. {@code processing_attempt} — the processing attempt history + * (Versuchshistorie). One row per historised processing attempt, referencing + * the master record via fingerprint.
  4. + *
+ * + *

Initialisation timing

+ *

This adapter must be invoked once at program startup, before the batch + * document processing loop begins. It is wired by the bootstrap module and called + * explicitly through the port. There is no lazy or deferred initialisation. + * + *

Architecture boundary

+ *

All JDBC connections, SQL DDL, and SQLite-specific behaviour are strictly confined + * to this class. No JDBC or SQLite types appear in the port interface or in any + * application/domain type. + * + * @since M4-AP-003 + */ +public class SqliteSchemaInitializationAdapter implements PersistenceSchemaInitializationPort { + + private static final Logger logger = LogManager.getLogger(SqliteSchemaInitializationAdapter.class); + + /** + * DDL for the document master record table. + *

+ * Columns (M4 mandatory fields): + *

+ *

+ * Not included (M5+ fields): target path, target file name, + * AI-related fields. + */ + private static final String DDL_CREATE_DOCUMENT_RECORD = """ + CREATE TABLE IF NOT EXISTS document_record ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + fingerprint TEXT NOT NULL, + last_known_source_locator TEXT NOT NULL, + last_known_source_file_name TEXT NOT NULL, + overall_status TEXT NOT NULL, + content_error_count INTEGER NOT NULL DEFAULT 0, + transient_error_count INTEGER NOT NULL DEFAULT 0, + last_failure_instant TEXT, + last_success_instant TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + CONSTRAINT uq_document_record_fingerprint UNIQUE (fingerprint) + ) + """; + + /** + * DDL for the processing attempt history table. + *

+ * Columns (M4 mandatory fields): + *

+ *

+ * Skip attempts: Skip statuses ({@code SKIPPED_ALREADY_PROCESSED}, + * {@code SKIPPED_FINAL_FAILURE}) are stored as regular rows with {@code retryable = 0} + * and null failure fields. + *

+ * Not included (M5+ fields): model name, prompt identifier, + * AI raw response, AI reasoning, resolved date, date source, final title, + * final target file name. + */ + private static final String DDL_CREATE_PROCESSING_ATTEMPT = """ + CREATE TABLE IF NOT EXISTS processing_attempt ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + fingerprint TEXT NOT NULL, + run_id TEXT NOT NULL, + attempt_number INTEGER NOT NULL, + started_at TEXT NOT NULL, + ended_at TEXT NOT NULL, + status TEXT NOT NULL, + failure_class TEXT, + failure_message TEXT, + retryable INTEGER NOT NULL DEFAULT 0, + CONSTRAINT fk_processing_attempt_fingerprint + FOREIGN KEY (fingerprint) REFERENCES document_record (fingerprint), + CONSTRAINT uq_processing_attempt_fingerprint_number + UNIQUE (fingerprint, attempt_number) + ) + """; + + /** Index on {@code processing_attempt.fingerprint} for fast per-document lookups. */ + private static final String DDL_IDX_ATTEMPT_FINGERPRINT = + "CREATE INDEX IF NOT EXISTS idx_processing_attempt_fingerprint " + + "ON processing_attempt (fingerprint)"; + + /** Index on {@code processing_attempt.run_id} for fast per-run lookups. */ + private static final String DDL_IDX_ATTEMPT_RUN_ID = + "CREATE INDEX IF NOT EXISTS idx_processing_attempt_run_id " + + "ON processing_attempt (run_id)"; + + /** Index on {@code document_record.overall_status} for fast status-based filtering. */ + private static final String DDL_IDX_RECORD_STATUS = + "CREATE INDEX IF NOT EXISTS idx_document_record_overall_status " + + "ON document_record (overall_status)"; + + private final String jdbcUrl; + + /** + * Constructs the adapter with the JDBC URL of the SQLite database file. + *

+ * The JDBC URL must be in the form {@code jdbc:sqlite:/path/to/file.db}. + * The file and its parent directories need not exist at construction time; + * SQLite creates them when the connection is first opened. + * + * @param jdbcUrl the JDBC URL of the SQLite database; must not be null or blank + * @throws NullPointerException if {@code jdbcUrl} is null + * @throws IllegalArgumentException if {@code jdbcUrl} is blank + */ + public SqliteSchemaInitializationAdapter(String jdbcUrl) { + Objects.requireNonNull(jdbcUrl, "jdbcUrl must not be null"); + if (jdbcUrl.isBlank()) { + throw new IllegalArgumentException("jdbcUrl must not be blank"); + } + this.jdbcUrl = jdbcUrl; + } + + /** + * Creates or verifies the M4 persistence schema in the SQLite database. + *

+ * Executes the following DDL statements in order: + *

    + *
  1. Enable foreign key enforcement ({@code PRAGMA foreign_keys = ON})
  2. + *
  3. Create {@code document_record} table (if not exists)
  4. + *
  5. Create {@code processing_attempt} table (if not exists)
  6. + *
  7. Create indexes on {@code processing_attempt.fingerprint}, + * {@code processing_attempt.run_id}, and + * {@code document_record.overall_status}
  8. + *
+ *

+ * All statements use {@code IF NOT EXISTS} semantics. Calling this method on an + * already-initialised database is safe and produces no changes. + *

+ * Timing: Must be called once at program startup, before the + * batch document processing loop begins. + * + * @throws DocumentPersistenceException if the schema cannot be created or verified + * due to a JDBC or SQLite error + */ + @Override + public void initializeSchema() { + logger.info("Initialising M4 SQLite schema at: {}", jdbcUrl); + try (Connection connection = DriverManager.getConnection(jdbcUrl); + Statement statement = connection.createStatement()) { + + // Enable foreign key enforcement (SQLite disables it by default) + statement.execute("PRAGMA foreign_keys = ON"); + + // Level 1: document master record + statement.execute(DDL_CREATE_DOCUMENT_RECORD); + logger.debug("Table 'document_record' created or already present."); + + // Level 2: processing attempt history + statement.execute(DDL_CREATE_PROCESSING_ATTEMPT); + logger.debug("Table 'processing_attempt' created or already present."); + + // Indexes for efficient per-document, per-run, and per-status access + statement.execute(DDL_IDX_ATTEMPT_FINGERPRINT); + statement.execute(DDL_IDX_ATTEMPT_RUN_ID); + statement.execute(DDL_IDX_RECORD_STATUS); + logger.debug("Indexes created or already present."); + + logger.info("M4 SQLite schema initialisation completed successfully."); + + } catch (SQLException e) { + String message = "Failed to initialise M4 SQLite schema at '" + jdbcUrl + "': " + e.getMessage(); + logger.error(message, e); + throw new DocumentPersistenceException(message, e); + } + } + + /** + * Returns the JDBC URL this adapter uses to connect to the SQLite database. + *

+ * Intended for logging and diagnostics only. + * + * @return the JDBC URL; never null or blank + */ + public String getJdbcUrl() { + return jdbcUrl; + } +} diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/package-info.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/package-info.java new file mode 100644 index 0000000..08a36a9 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/package-info.java @@ -0,0 +1,37 @@ +/** + * SQLite persistence adapter for the M4 two-level persistence model. + * + *

Purpose

+ *

This package contains the technical SQLite infrastructure for the M4 persistence + * layer. It is the only place in the entire application where JDBC connections, SQL DDL, + * and SQLite-specific types are used. No JDBC or SQLite types leak into the + * {@code application} or {@code domain} modules. + * + *

Two-level persistence model

+ *

M4 persistence is structured in exactly two levels: + *

    + *
  1. Document master record ({@code document_record} table) — + * one row per unique SHA-256 fingerprint; carries the current overall status, + * failure counters, and the most recently known source location.
  2. + *
  3. Processing attempt history ({@code processing_attempt} table) — + * one row per historised processing attempt; references the master record via + * fingerprint; attempt numbers are monotonically increasing per fingerprint.
  4. + *
+ * + *

Schema initialisation timing

+ *

The {@link de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter} + * implements the + * {@link de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort} + * and must be called once at program startup, before the batch document + * processing loop begins. There is no lazy or hidden initialisation during document + * processing. + * + *

Architecture boundary

+ *

All JDBC connections, SQL statements, and SQLite-specific behaviour are strictly + * confined to this package. The application layer interacts exclusively through the + * port interfaces defined in + * {@code de.gecheckt.pdf.umbenenner.application.port.out}. + * + * @since M4-AP-003 + */ +package de.gecheckt.pdf.umbenenner.adapter.out.sqlite; diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapterTest.java new file mode 100644 index 0000000..ed5df09 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapterTest.java @@ -0,0 +1,288 @@ +package de.gecheckt.pdf.umbenenner.adapter.out.sqlite; + +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Path; +import java.sql.Connection; +import java.sql.DatabaseMetaData; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.HashSet; +import java.util.Set; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link SqliteSchemaInitializationAdapter}. + *

+ * Verifies that the M4 two-level schema is created correctly, that the operation + * is idempotent, and that invalid configuration is rejected. + * + * @since M4-AP-003 + */ +class SqliteSchemaInitializationAdapterTest { + + @TempDir + Path tempDir; + + // ------------------------------------------------------------------------- + // Construction + // ------------------------------------------------------------------------- + + @Test + void constructor_rejectsNullJdbcUrl() { + assertThatThrownBy(() -> new SqliteSchemaInitializationAdapter(null)) + .isInstanceOf(NullPointerException.class) + .hasMessageContaining("jdbcUrl"); + } + + @Test + void constructor_rejectsBlankJdbcUrl() { + assertThatThrownBy(() -> new SqliteSchemaInitializationAdapter(" ")) + .isInstanceOf(IllegalArgumentException.class) + .hasMessageContaining("jdbcUrl"); + } + + @Test + void getJdbcUrl_returnsConfiguredUrl() { + String url = "jdbc:sqlite:/some/path/test.db"; + SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(url); + assertThat(adapter.getJdbcUrl()).isEqualTo(url); + } + + // ------------------------------------------------------------------------- + // Schema creation – tables present + // ------------------------------------------------------------------------- + + @Test + void initializeSchema_createsBothTables(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "schema_test.db"); + SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(jdbcUrl); + + adapter.initializeSchema(); + + Set tables = readTableNames(jdbcUrl); + assertThat(tables).contains("document_record", "processing_attempt"); + } + + @Test + void initializeSchema_documentRecordHasAllMandatoryColumns(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "columns_test.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + Set columns = readColumnNames(jdbcUrl, "document_record"); + assertThat(columns).containsExactlyInAnyOrder( + "id", + "fingerprint", + "last_known_source_locator", + "last_known_source_file_name", + "overall_status", + "content_error_count", + "transient_error_count", + "last_failure_instant", + "last_success_instant", + "created_at", + "updated_at" + ); + } + + @Test + void initializeSchema_processingAttemptHasAllMandatoryColumns(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "attempt_columns_test.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + Set columns = readColumnNames(jdbcUrl, "processing_attempt"); + assertThat(columns).containsExactlyInAnyOrder( + "id", + "fingerprint", + "run_id", + "attempt_number", + "started_at", + "ended_at", + "status", + "failure_class", + "failure_message", + "retryable" + ); + } + + // ------------------------------------------------------------------------- + // Idempotency + // ------------------------------------------------------------------------- + + @Test + void initializeSchema_isIdempotent_calledTwice(@TempDir Path dir) { + String jdbcUrl = jdbcUrl(dir, "idempotent_test.db"); + SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(jdbcUrl); + + // Must not throw on second call + adapter.initializeSchema(); + adapter.initializeSchema(); + } + + // ------------------------------------------------------------------------- + // Unique constraint: fingerprint in document_record + // ------------------------------------------------------------------------- + + @Test + void documentRecord_fingerprintUniqueConstraintIsEnforced(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "unique_test.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + String insertSql = """ + INSERT INTO document_record + (fingerprint, last_known_source_locator, last_known_source_file_name, + overall_status, created_at, updated_at) + VALUES (?, 'locator', 'file.pdf', 'SUCCESS', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z') + """; + String fp = "a".repeat(64); + + try (Connection conn = DriverManager.getConnection(jdbcUrl)) { + try (var ps = conn.prepareStatement(insertSql)) { + ps.setString(1, fp); + ps.executeUpdate(); + } + // Second insert with same fingerprint must fail + try (var ps = conn.prepareStatement(insertSql)) { + ps.setString(1, fp); + org.junit.jupiter.api.Assertions.assertThrows( + SQLException.class, ps::executeUpdate, + "Expected UNIQUE constraint violation on document_record.fingerprint"); + } + } + } + + // ------------------------------------------------------------------------- + // Unique constraint: (fingerprint, attempt_number) in processing_attempt + // ------------------------------------------------------------------------- + + @Test + void processingAttempt_fingerprintAttemptNumberUniqueConstraintIsEnforced(@TempDir Path dir) + throws SQLException { + String jdbcUrl = jdbcUrl(dir, "attempt_unique_test.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + String fp = "b".repeat(64); + + // Insert master record first (FK) + try (Connection conn = DriverManager.getConnection(jdbcUrl)) { + try (var ps = conn.prepareStatement(""" + INSERT INTO document_record + (fingerprint, last_known_source_locator, last_known_source_file_name, + overall_status, created_at, updated_at) + VALUES (?, 'loc', 'f.pdf', 'FAILED_RETRYABLE', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z') + """)) { + ps.setString(1, fp); + ps.executeUpdate(); + } + + String attemptSql = """ + INSERT INTO processing_attempt + (fingerprint, run_id, attempt_number, started_at, ended_at, status, retryable) + VALUES (?, 'run-1', 1, '2026-01-01T00:00:00Z', '2026-01-01T00:01:00Z', 'FAILED_RETRYABLE', 1) + """; + + try (var ps = conn.prepareStatement(attemptSql)) { + ps.setString(1, fp); + ps.executeUpdate(); + } + // Duplicate (fingerprint, attempt_number) must fail + try (var ps = conn.prepareStatement(attemptSql)) { + ps.setString(1, fp); + org.junit.jupiter.api.Assertions.assertThrows( + SQLException.class, ps::executeUpdate, + "Expected UNIQUE constraint violation on (fingerprint, attempt_number)"); + } + } + } + + // ------------------------------------------------------------------------- + // Skip attempts are storable + // ------------------------------------------------------------------------- + + @Test + void processingAttempt_skipStatusIsStorable(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "skip_test.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + String fp = "c".repeat(64); + + try (Connection conn = DriverManager.getConnection(jdbcUrl)) { + // Insert master record + try (var ps = conn.prepareStatement(""" + INSERT INTO document_record + (fingerprint, last_known_source_locator, last_known_source_file_name, + overall_status, created_at, updated_at) + VALUES (?, 'loc', 'f.pdf', 'SUCCESS', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z') + """)) { + ps.setString(1, fp); + ps.executeUpdate(); + } + + // Insert a SKIPPED_ALREADY_PROCESSED attempt (null failure fields, retryable=0) + try (var ps = conn.prepareStatement(""" + INSERT INTO processing_attempt + (fingerprint, run_id, attempt_number, started_at, ended_at, + status, failure_class, failure_message, retryable) + VALUES (?, 'run-2', 2, '2026-01-02T00:00:00Z', '2026-01-02T00:00:01Z', + 'SKIPPED_ALREADY_PROCESSED', NULL, NULL, 0) + """)) { + ps.setString(1, fp); + int rows = ps.executeUpdate(); + assertThat(rows).isEqualTo(1); + } + } + } + + // ------------------------------------------------------------------------- + // Error handling + // ------------------------------------------------------------------------- + + @Test + void initializeSchema_throwsDocumentPersistenceException_onInvalidUrl() { + // SQLite is lenient with paths; use a truly invalid JDBC URL format + SqliteSchemaInitializationAdapter badAdapter = + new SqliteSchemaInitializationAdapter("not-a-jdbc-url-at-all"); + + assertThatThrownBy(badAdapter::initializeSchema) + .isInstanceOf(DocumentPersistenceException.class); + } + + // ------------------------------------------------------------------------- + // Helpers + // ------------------------------------------------------------------------- + + private static String jdbcUrl(Path dir, String filename) { + return "jdbc:sqlite:" + dir.resolve(filename).toAbsolutePath(); + } + + private static Set readTableNames(String jdbcUrl) throws SQLException { + Set tables = new HashSet<>(); + try (Connection conn = DriverManager.getConnection(jdbcUrl)) { + DatabaseMetaData meta = conn.getMetaData(); + try (ResultSet rs = meta.getTables(null, null, "%", new String[]{"TABLE"})) { + while (rs.next()) { + tables.add(rs.getString("TABLE_NAME").toLowerCase()); + } + } + } + return tables; + } + + private static Set readColumnNames(String jdbcUrl, String tableName) throws SQLException { + Set columns = new HashSet<>(); + try (Connection conn = DriverManager.getConnection(jdbcUrl)) { + DatabaseMetaData meta = conn.getMetaData(); + try (ResultSet rs = meta.getColumns(null, null, tableName, "%")) { + while (rs.next()) { + columns.add(rs.getString("COLUMN_NAME").toLowerCase()); + } + } + } + return columns; + } +}