1
0

M4 AP-003 SQLite-Schema und Persistenzbasis einführen

This commit is contained in:
2026-04-02 20:19:54 +02:00
parent cae9c944d7
commit 6a44def89b
3 changed files with 569 additions and 0 deletions

View File

@@ -0,0 +1,244 @@
package de.gecheckt.pdf.umbenenner.adapter.out.sqlite;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.Objects;
/**
* SQLite implementation of {@link PersistenceSchemaInitializationPort}.
* <p>
* Creates or verifies the M4 two-level persistence schema in the configured SQLite
* database file. All DDL uses {@code IF NOT EXISTS} semantics, making the operation
* fully idempotent: calling {@link #initializeSchema()} on an already-initialised
* database succeeds without error and without modifying existing data.
*
* <h2>Two-level schema</h2>
* <p>The schema consists of exactly two tables:
* <ol>
* <li><strong>{@code document_record}</strong> — the document master record
* (Dokument-Stammsatz). One row per unique SHA-256 fingerprint.</li>
* <li><strong>{@code processing_attempt}</strong> — the processing attempt history
* (Versuchshistorie). One row per historised processing attempt, referencing
* the master record via fingerprint.</li>
* </ol>
*
* <h2>Initialisation timing</h2>
* <p>This adapter must be invoked <em>once</em> at program startup, before the batch
* document processing loop begins. It is wired by the bootstrap module and called
* explicitly through the port. There is no lazy or deferred initialisation.
*
* <h2>Architecture boundary</h2>
* <p>All JDBC connections, SQL DDL, and SQLite-specific behaviour are strictly confined
* to this class. No JDBC or SQLite types appear in the port interface or in any
* application/domain type.
*
* @since M4-AP-003
*/
public class SqliteSchemaInitializationAdapter implements PersistenceSchemaInitializationPort {
private static final Logger logger = LogManager.getLogger(SqliteSchemaInitializationAdapter.class);
/**
* DDL for the document master record table.
* <p>
* <strong>Columns (M4 mandatory fields):</strong>
* <ul>
* <li>{@code id} — internal surrogate primary key (auto-increment).</li>
* <li>{@code fingerprint} — SHA-256 hex string; unique natural key; never null.</li>
* <li>{@code last_known_source_locator} — opaque locator value (file path string);
* never null.</li>
* <li>{@code last_known_source_file_name} — human-readable file name for logging;
* never null.</li>
* <li>{@code overall_status} — current processing status as enum name string;
* never null.</li>
* <li>{@code content_error_count} — count of deterministic content errors;
* default 0; never negative.</li>
* <li>{@code transient_error_count} — count of transient technical errors;
* default 0; never negative.</li>
* <li>{@code last_failure_instant} — ISO-8601 UTC timestamp of the most recent
* failure; nullable.</li>
* <li>{@code last_success_instant} — ISO-8601 UTC timestamp of the successful
* processing; nullable.</li>
* <li>{@code created_at} — ISO-8601 UTC timestamp of record creation; never null.</li>
* <li>{@code updated_at} — ISO-8601 UTC timestamp of the most recent update;
* never null.</li>
* </ul>
* <p>
* <strong>Not included (M5+ fields):</strong> target path, target file name,
* AI-related fields.
*/
private static final String DDL_CREATE_DOCUMENT_RECORD = """
CREATE TABLE IF NOT EXISTS document_record (
id INTEGER PRIMARY KEY AUTOINCREMENT,
fingerprint TEXT NOT NULL,
last_known_source_locator TEXT NOT NULL,
last_known_source_file_name TEXT NOT NULL,
overall_status TEXT NOT NULL,
content_error_count INTEGER NOT NULL DEFAULT 0,
transient_error_count INTEGER NOT NULL DEFAULT 0,
last_failure_instant TEXT,
last_success_instant TEXT,
created_at TEXT NOT NULL,
updated_at TEXT NOT NULL,
CONSTRAINT uq_document_record_fingerprint UNIQUE (fingerprint)
)
""";
/**
* DDL for the processing attempt history table.
* <p>
* <strong>Columns (M4 mandatory fields):</strong>
* <ul>
* <li>{@code id} — internal surrogate primary key (auto-increment).</li>
* <li>{@code fingerprint} — foreign key reference to
* {@code document_record.fingerprint}; never null.</li>
* <li>{@code run_id} — identifier of the batch run; never null.</li>
* <li>{@code attempt_number} — monotonically increasing per fingerprint, starting
* at 1; never null. The unique constraint on {@code (fingerprint, attempt_number)}
* enforces uniqueness per document.</li>
* <li>{@code started_at} — ISO-8601 UTC timestamp of attempt start; never null.</li>
* <li>{@code ended_at} — ISO-8601 UTC timestamp of attempt end; never null.</li>
* <li>{@code status} — outcome status as enum name string; never null.</li>
* <li>{@code failure_class} — short failure classification; nullable (null for
* success and skip attempts).</li>
* <li>{@code failure_message} — human-readable failure description; nullable
* (null for success and skip attempts).</li>
* <li>{@code retryable} — 1 if the failure is retryable in a later run, 0 otherwise;
* never null. Always 0 for success and skip attempts.</li>
* </ul>
* <p>
* <strong>Skip attempts:</strong> Skip statuses ({@code SKIPPED_ALREADY_PROCESSED},
* {@code SKIPPED_FINAL_FAILURE}) are stored as regular rows with {@code retryable = 0}
* and null failure fields.
* <p>
* <strong>Not included (M5+ fields):</strong> model name, prompt identifier,
* AI raw response, AI reasoning, resolved date, date source, final title,
* final target file name.
*/
private static final String DDL_CREATE_PROCESSING_ATTEMPT = """
CREATE TABLE IF NOT EXISTS processing_attempt (
id INTEGER PRIMARY KEY AUTOINCREMENT,
fingerprint TEXT NOT NULL,
run_id TEXT NOT NULL,
attempt_number INTEGER NOT NULL,
started_at TEXT NOT NULL,
ended_at TEXT NOT NULL,
status TEXT NOT NULL,
failure_class TEXT,
failure_message TEXT,
retryable INTEGER NOT NULL DEFAULT 0,
CONSTRAINT fk_processing_attempt_fingerprint
FOREIGN KEY (fingerprint) REFERENCES document_record (fingerprint),
CONSTRAINT uq_processing_attempt_fingerprint_number
UNIQUE (fingerprint, attempt_number)
)
""";
/** Index on {@code processing_attempt.fingerprint} for fast per-document lookups. */
private static final String DDL_IDX_ATTEMPT_FINGERPRINT =
"CREATE INDEX IF NOT EXISTS idx_processing_attempt_fingerprint "
+ "ON processing_attempt (fingerprint)";
/** Index on {@code processing_attempt.run_id} for fast per-run lookups. */
private static final String DDL_IDX_ATTEMPT_RUN_ID =
"CREATE INDEX IF NOT EXISTS idx_processing_attempt_run_id "
+ "ON processing_attempt (run_id)";
/** Index on {@code document_record.overall_status} for fast status-based filtering. */
private static final String DDL_IDX_RECORD_STATUS =
"CREATE INDEX IF NOT EXISTS idx_document_record_overall_status "
+ "ON document_record (overall_status)";
private final String jdbcUrl;
/**
* Constructs the adapter with the JDBC URL of the SQLite database file.
* <p>
* The JDBC URL must be in the form {@code jdbc:sqlite:/path/to/file.db}.
* The file and its parent directories need not exist at construction time;
* SQLite creates them when the connection is first opened.
*
* @param jdbcUrl the JDBC URL of the SQLite database; must not be null or blank
* @throws NullPointerException if {@code jdbcUrl} is null
* @throws IllegalArgumentException if {@code jdbcUrl} is blank
*/
public SqliteSchemaInitializationAdapter(String jdbcUrl) {
Objects.requireNonNull(jdbcUrl, "jdbcUrl must not be null");
if (jdbcUrl.isBlank()) {
throw new IllegalArgumentException("jdbcUrl must not be blank");
}
this.jdbcUrl = jdbcUrl;
}
/**
* Creates or verifies the M4 persistence schema in the SQLite database.
* <p>
* Executes the following DDL statements in order:
* <ol>
* <li>Enable foreign key enforcement ({@code PRAGMA foreign_keys = ON})</li>
* <li>Create {@code document_record} table (if not exists)</li>
* <li>Create {@code processing_attempt} table (if not exists)</li>
* <li>Create indexes on {@code processing_attempt.fingerprint},
* {@code processing_attempt.run_id}, and
* {@code document_record.overall_status}</li>
* </ol>
* <p>
* All statements use {@code IF NOT EXISTS} semantics. Calling this method on an
* already-initialised database is safe and produces no changes.
* <p>
* <strong>Timing:</strong> Must be called once at program startup, before the
* batch document processing loop begins.
*
* @throws DocumentPersistenceException if the schema cannot be created or verified
* due to a JDBC or SQLite error
*/
@Override
public void initializeSchema() {
logger.info("Initialising M4 SQLite schema at: {}", jdbcUrl);
try (Connection connection = DriverManager.getConnection(jdbcUrl);
Statement statement = connection.createStatement()) {
// Enable foreign key enforcement (SQLite disables it by default)
statement.execute("PRAGMA foreign_keys = ON");
// Level 1: document master record
statement.execute(DDL_CREATE_DOCUMENT_RECORD);
logger.debug("Table 'document_record' created or already present.");
// Level 2: processing attempt history
statement.execute(DDL_CREATE_PROCESSING_ATTEMPT);
logger.debug("Table 'processing_attempt' created or already present.");
// Indexes for efficient per-document, per-run, and per-status access
statement.execute(DDL_IDX_ATTEMPT_FINGERPRINT);
statement.execute(DDL_IDX_ATTEMPT_RUN_ID);
statement.execute(DDL_IDX_RECORD_STATUS);
logger.debug("Indexes created or already present.");
logger.info("M4 SQLite schema initialisation completed successfully.");
} catch (SQLException e) {
String message = "Failed to initialise M4 SQLite schema at '" + jdbcUrl + "': " + e.getMessage();
logger.error(message, e);
throw new DocumentPersistenceException(message, e);
}
}
/**
* Returns the JDBC URL this adapter uses to connect to the SQLite database.
* <p>
* Intended for logging and diagnostics only.
*
* @return the JDBC URL; never null or blank
*/
public String getJdbcUrl() {
return jdbcUrl;
}
}

View File

@@ -0,0 +1,37 @@
/**
* SQLite persistence adapter for the M4 two-level persistence model.
*
* <h2>Purpose</h2>
* <p>This package contains the technical SQLite infrastructure for the M4 persistence
* layer. It is the only place in the entire application where JDBC connections, SQL DDL,
* and SQLite-specific types are used. No JDBC or SQLite types leak into the
* {@code application} or {@code domain} modules.
*
* <h2>Two-level persistence model</h2>
* <p>M4 persistence is structured in exactly two levels:
* <ol>
* <li><strong>Document master record</strong> ({@code document_record} table) —
* one row per unique SHA-256 fingerprint; carries the current overall status,
* failure counters, and the most recently known source location.</li>
* <li><strong>Processing attempt history</strong> ({@code processing_attempt} table) —
* one row per historised processing attempt; references the master record via
* fingerprint; attempt numbers are monotonically increasing per fingerprint.</li>
* </ol>
*
* <h2>Schema initialisation timing</h2>
* <p>The {@link de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter}
* implements the
* {@link de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort}
* and must be called <em>once</em> at program startup, before the batch document
* processing loop begins. There is no lazy or hidden initialisation during document
* processing.
*
* <h2>Architecture boundary</h2>
* <p>All JDBC connections, SQL statements, and SQLite-specific behaviour are strictly
* confined to this package. The application layer interacts exclusively through the
* port interfaces defined in
* {@code de.gecheckt.pdf.umbenenner.application.port.out}.
*
* @since M4-AP-003
*/
package de.gecheckt.pdf.umbenenner.adapter.out.sqlite;

View File

@@ -0,0 +1,288 @@
package de.gecheckt.pdf.umbenenner.adapter.out.sqlite;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.nio.file.Path;
import java.sql.Connection;
import java.sql.DatabaseMetaData;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.HashSet;
import java.util.Set;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
/**
* Unit tests for {@link SqliteSchemaInitializationAdapter}.
* <p>
* Verifies that the M4 two-level schema is created correctly, that the operation
* is idempotent, and that invalid configuration is rejected.
*
* @since M4-AP-003
*/
class SqliteSchemaInitializationAdapterTest {
@TempDir
Path tempDir;
// -------------------------------------------------------------------------
// Construction
// -------------------------------------------------------------------------
@Test
void constructor_rejectsNullJdbcUrl() {
assertThatThrownBy(() -> new SqliteSchemaInitializationAdapter(null))
.isInstanceOf(NullPointerException.class)
.hasMessageContaining("jdbcUrl");
}
@Test
void constructor_rejectsBlankJdbcUrl() {
assertThatThrownBy(() -> new SqliteSchemaInitializationAdapter(" "))
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("jdbcUrl");
}
@Test
void getJdbcUrl_returnsConfiguredUrl() {
String url = "jdbc:sqlite:/some/path/test.db";
SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(url);
assertThat(adapter.getJdbcUrl()).isEqualTo(url);
}
// -------------------------------------------------------------------------
// Schema creation tables present
// -------------------------------------------------------------------------
@Test
void initializeSchema_createsBothTables(@TempDir Path dir) throws SQLException {
String jdbcUrl = jdbcUrl(dir, "schema_test.db");
SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(jdbcUrl);
adapter.initializeSchema();
Set<String> tables = readTableNames(jdbcUrl);
assertThat(tables).contains("document_record", "processing_attempt");
}
@Test
void initializeSchema_documentRecordHasAllMandatoryColumns(@TempDir Path dir) throws SQLException {
String jdbcUrl = jdbcUrl(dir, "columns_test.db");
new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema();
Set<String> columns = readColumnNames(jdbcUrl, "document_record");
assertThat(columns).containsExactlyInAnyOrder(
"id",
"fingerprint",
"last_known_source_locator",
"last_known_source_file_name",
"overall_status",
"content_error_count",
"transient_error_count",
"last_failure_instant",
"last_success_instant",
"created_at",
"updated_at"
);
}
@Test
void initializeSchema_processingAttemptHasAllMandatoryColumns(@TempDir Path dir) throws SQLException {
String jdbcUrl = jdbcUrl(dir, "attempt_columns_test.db");
new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema();
Set<String> columns = readColumnNames(jdbcUrl, "processing_attempt");
assertThat(columns).containsExactlyInAnyOrder(
"id",
"fingerprint",
"run_id",
"attempt_number",
"started_at",
"ended_at",
"status",
"failure_class",
"failure_message",
"retryable"
);
}
// -------------------------------------------------------------------------
// Idempotency
// -------------------------------------------------------------------------
@Test
void initializeSchema_isIdempotent_calledTwice(@TempDir Path dir) {
String jdbcUrl = jdbcUrl(dir, "idempotent_test.db");
SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(jdbcUrl);
// Must not throw on second call
adapter.initializeSchema();
adapter.initializeSchema();
}
// -------------------------------------------------------------------------
// Unique constraint: fingerprint in document_record
// -------------------------------------------------------------------------
@Test
void documentRecord_fingerprintUniqueConstraintIsEnforced(@TempDir Path dir) throws SQLException {
String jdbcUrl = jdbcUrl(dir, "unique_test.db");
new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema();
String insertSql = """
INSERT INTO document_record
(fingerprint, last_known_source_locator, last_known_source_file_name,
overall_status, created_at, updated_at)
VALUES (?, 'locator', 'file.pdf', 'SUCCESS', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')
""";
String fp = "a".repeat(64);
try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
try (var ps = conn.prepareStatement(insertSql)) {
ps.setString(1, fp);
ps.executeUpdate();
}
// Second insert with same fingerprint must fail
try (var ps = conn.prepareStatement(insertSql)) {
ps.setString(1, fp);
org.junit.jupiter.api.Assertions.assertThrows(
SQLException.class, ps::executeUpdate,
"Expected UNIQUE constraint violation on document_record.fingerprint");
}
}
}
// -------------------------------------------------------------------------
// Unique constraint: (fingerprint, attempt_number) in processing_attempt
// -------------------------------------------------------------------------
@Test
void processingAttempt_fingerprintAttemptNumberUniqueConstraintIsEnforced(@TempDir Path dir)
throws SQLException {
String jdbcUrl = jdbcUrl(dir, "attempt_unique_test.db");
new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema();
String fp = "b".repeat(64);
// Insert master record first (FK)
try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
try (var ps = conn.prepareStatement("""
INSERT INTO document_record
(fingerprint, last_known_source_locator, last_known_source_file_name,
overall_status, created_at, updated_at)
VALUES (?, 'loc', 'f.pdf', 'FAILED_RETRYABLE', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')
""")) {
ps.setString(1, fp);
ps.executeUpdate();
}
String attemptSql = """
INSERT INTO processing_attempt
(fingerprint, run_id, attempt_number, started_at, ended_at, status, retryable)
VALUES (?, 'run-1', 1, '2026-01-01T00:00:00Z', '2026-01-01T00:01:00Z', 'FAILED_RETRYABLE', 1)
""";
try (var ps = conn.prepareStatement(attemptSql)) {
ps.setString(1, fp);
ps.executeUpdate();
}
// Duplicate (fingerprint, attempt_number) must fail
try (var ps = conn.prepareStatement(attemptSql)) {
ps.setString(1, fp);
org.junit.jupiter.api.Assertions.assertThrows(
SQLException.class, ps::executeUpdate,
"Expected UNIQUE constraint violation on (fingerprint, attempt_number)");
}
}
}
// -------------------------------------------------------------------------
// Skip attempts are storable
// -------------------------------------------------------------------------
@Test
void processingAttempt_skipStatusIsStorable(@TempDir Path dir) throws SQLException {
String jdbcUrl = jdbcUrl(dir, "skip_test.db");
new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema();
String fp = "c".repeat(64);
try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
// Insert master record
try (var ps = conn.prepareStatement("""
INSERT INTO document_record
(fingerprint, last_known_source_locator, last_known_source_file_name,
overall_status, created_at, updated_at)
VALUES (?, 'loc', 'f.pdf', 'SUCCESS', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')
""")) {
ps.setString(1, fp);
ps.executeUpdate();
}
// Insert a SKIPPED_ALREADY_PROCESSED attempt (null failure fields, retryable=0)
try (var ps = conn.prepareStatement("""
INSERT INTO processing_attempt
(fingerprint, run_id, attempt_number, started_at, ended_at,
status, failure_class, failure_message, retryable)
VALUES (?, 'run-2', 2, '2026-01-02T00:00:00Z', '2026-01-02T00:00:01Z',
'SKIPPED_ALREADY_PROCESSED', NULL, NULL, 0)
""")) {
ps.setString(1, fp);
int rows = ps.executeUpdate();
assertThat(rows).isEqualTo(1);
}
}
}
// -------------------------------------------------------------------------
// Error handling
// -------------------------------------------------------------------------
@Test
void initializeSchema_throwsDocumentPersistenceException_onInvalidUrl() {
// SQLite is lenient with paths; use a truly invalid JDBC URL format
SqliteSchemaInitializationAdapter badAdapter =
new SqliteSchemaInitializationAdapter("not-a-jdbc-url-at-all");
assertThatThrownBy(badAdapter::initializeSchema)
.isInstanceOf(DocumentPersistenceException.class);
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
private static String jdbcUrl(Path dir, String filename) {
return "jdbc:sqlite:" + dir.resolve(filename).toAbsolutePath();
}
private static Set<String> readTableNames(String jdbcUrl) throws SQLException {
Set<String> tables = new HashSet<>();
try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
DatabaseMetaData meta = conn.getMetaData();
try (ResultSet rs = meta.getTables(null, null, "%", new String[]{"TABLE"})) {
while (rs.next()) {
tables.add(rs.getString("TABLE_NAME").toLowerCase());
}
}
}
return tables;
}
private static Set<String> readColumnNames(String jdbcUrl, String tableName) throws SQLException {
Set<String> columns = new HashSet<>();
try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
DatabaseMetaData meta = conn.getMetaData();
try (ResultSet rs = meta.getColumns(null, null, tableName, "%")) {
while (rs.next()) {
columns.add(rs.getString("COLUMN_NAME").toLowerCase());
}
}
}
return columns;
}
}