M4 AP-003 SQLite-Schema und Persistenzbasis einführen
This commit is contained in:
@@ -0,0 +1,244 @@
|
|||||||
|
package de.gecheckt.pdf.umbenenner.adapter.out.sqlite;
|
||||||
|
|
||||||
|
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
|
||||||
|
import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort;
|
||||||
|
import org.apache.logging.log4j.LogManager;
|
||||||
|
import org.apache.logging.log4j.Logger;
|
||||||
|
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.sql.Statement;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SQLite implementation of {@link PersistenceSchemaInitializationPort}.
|
||||||
|
* <p>
|
||||||
|
* Creates or verifies the M4 two-level persistence schema in the configured SQLite
|
||||||
|
* database file. All DDL uses {@code IF NOT EXISTS} semantics, making the operation
|
||||||
|
* fully idempotent: calling {@link #initializeSchema()} on an already-initialised
|
||||||
|
* database succeeds without error and without modifying existing data.
|
||||||
|
*
|
||||||
|
* <h2>Two-level schema</h2>
|
||||||
|
* <p>The schema consists of exactly two tables:
|
||||||
|
* <ol>
|
||||||
|
* <li><strong>{@code document_record}</strong> — the document master record
|
||||||
|
* (Dokument-Stammsatz). One row per unique SHA-256 fingerprint.</li>
|
||||||
|
* <li><strong>{@code processing_attempt}</strong> — the processing attempt history
|
||||||
|
* (Versuchshistorie). One row per historised processing attempt, referencing
|
||||||
|
* the master record via fingerprint.</li>
|
||||||
|
* </ol>
|
||||||
|
*
|
||||||
|
* <h2>Initialisation timing</h2>
|
||||||
|
* <p>This adapter must be invoked <em>once</em> at program startup, before the batch
|
||||||
|
* document processing loop begins. It is wired by the bootstrap module and called
|
||||||
|
* explicitly through the port. There is no lazy or deferred initialisation.
|
||||||
|
*
|
||||||
|
* <h2>Architecture boundary</h2>
|
||||||
|
* <p>All JDBC connections, SQL DDL, and SQLite-specific behaviour are strictly confined
|
||||||
|
* to this class. No JDBC or SQLite types appear in the port interface or in any
|
||||||
|
* application/domain type.
|
||||||
|
*
|
||||||
|
* @since M4-AP-003
|
||||||
|
*/
|
||||||
|
public class SqliteSchemaInitializationAdapter implements PersistenceSchemaInitializationPort {
|
||||||
|
|
||||||
|
private static final Logger logger = LogManager.getLogger(SqliteSchemaInitializationAdapter.class);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DDL for the document master record table.
|
||||||
|
* <p>
|
||||||
|
* <strong>Columns (M4 mandatory fields):</strong>
|
||||||
|
* <ul>
|
||||||
|
* <li>{@code id} — internal surrogate primary key (auto-increment).</li>
|
||||||
|
* <li>{@code fingerprint} — SHA-256 hex string; unique natural key; never null.</li>
|
||||||
|
* <li>{@code last_known_source_locator} — opaque locator value (file path string);
|
||||||
|
* never null.</li>
|
||||||
|
* <li>{@code last_known_source_file_name} — human-readable file name for logging;
|
||||||
|
* never null.</li>
|
||||||
|
* <li>{@code overall_status} — current processing status as enum name string;
|
||||||
|
* never null.</li>
|
||||||
|
* <li>{@code content_error_count} — count of deterministic content errors;
|
||||||
|
* default 0; never negative.</li>
|
||||||
|
* <li>{@code transient_error_count} — count of transient technical errors;
|
||||||
|
* default 0; never negative.</li>
|
||||||
|
* <li>{@code last_failure_instant} — ISO-8601 UTC timestamp of the most recent
|
||||||
|
* failure; nullable.</li>
|
||||||
|
* <li>{@code last_success_instant} — ISO-8601 UTC timestamp of the successful
|
||||||
|
* processing; nullable.</li>
|
||||||
|
* <li>{@code created_at} — ISO-8601 UTC timestamp of record creation; never null.</li>
|
||||||
|
* <li>{@code updated_at} — ISO-8601 UTC timestamp of the most recent update;
|
||||||
|
* never null.</li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* <strong>Not included (M5+ fields):</strong> target path, target file name,
|
||||||
|
* AI-related fields.
|
||||||
|
*/
|
||||||
|
private static final String DDL_CREATE_DOCUMENT_RECORD = """
|
||||||
|
CREATE TABLE IF NOT EXISTS document_record (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
fingerprint TEXT NOT NULL,
|
||||||
|
last_known_source_locator TEXT NOT NULL,
|
||||||
|
last_known_source_file_name TEXT NOT NULL,
|
||||||
|
overall_status TEXT NOT NULL,
|
||||||
|
content_error_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
transient_error_count INTEGER NOT NULL DEFAULT 0,
|
||||||
|
last_failure_instant TEXT,
|
||||||
|
last_success_instant TEXT,
|
||||||
|
created_at TEXT NOT NULL,
|
||||||
|
updated_at TEXT NOT NULL,
|
||||||
|
CONSTRAINT uq_document_record_fingerprint UNIQUE (fingerprint)
|
||||||
|
)
|
||||||
|
""";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* DDL for the processing attempt history table.
|
||||||
|
* <p>
|
||||||
|
* <strong>Columns (M4 mandatory fields):</strong>
|
||||||
|
* <ul>
|
||||||
|
* <li>{@code id} — internal surrogate primary key (auto-increment).</li>
|
||||||
|
* <li>{@code fingerprint} — foreign key reference to
|
||||||
|
* {@code document_record.fingerprint}; never null.</li>
|
||||||
|
* <li>{@code run_id} — identifier of the batch run; never null.</li>
|
||||||
|
* <li>{@code attempt_number} — monotonically increasing per fingerprint, starting
|
||||||
|
* at 1; never null. The unique constraint on {@code (fingerprint, attempt_number)}
|
||||||
|
* enforces uniqueness per document.</li>
|
||||||
|
* <li>{@code started_at} — ISO-8601 UTC timestamp of attempt start; never null.</li>
|
||||||
|
* <li>{@code ended_at} — ISO-8601 UTC timestamp of attempt end; never null.</li>
|
||||||
|
* <li>{@code status} — outcome status as enum name string; never null.</li>
|
||||||
|
* <li>{@code failure_class} — short failure classification; nullable (null for
|
||||||
|
* success and skip attempts).</li>
|
||||||
|
* <li>{@code failure_message} — human-readable failure description; nullable
|
||||||
|
* (null for success and skip attempts).</li>
|
||||||
|
* <li>{@code retryable} — 1 if the failure is retryable in a later run, 0 otherwise;
|
||||||
|
* never null. Always 0 for success and skip attempts.</li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* <strong>Skip attempts:</strong> Skip statuses ({@code SKIPPED_ALREADY_PROCESSED},
|
||||||
|
* {@code SKIPPED_FINAL_FAILURE}) are stored as regular rows with {@code retryable = 0}
|
||||||
|
* and null failure fields.
|
||||||
|
* <p>
|
||||||
|
* <strong>Not included (M5+ fields):</strong> model name, prompt identifier,
|
||||||
|
* AI raw response, AI reasoning, resolved date, date source, final title,
|
||||||
|
* final target file name.
|
||||||
|
*/
|
||||||
|
private static final String DDL_CREATE_PROCESSING_ATTEMPT = """
|
||||||
|
CREATE TABLE IF NOT EXISTS processing_attempt (
|
||||||
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||||
|
fingerprint TEXT NOT NULL,
|
||||||
|
run_id TEXT NOT NULL,
|
||||||
|
attempt_number INTEGER NOT NULL,
|
||||||
|
started_at TEXT NOT NULL,
|
||||||
|
ended_at TEXT NOT NULL,
|
||||||
|
status TEXT NOT NULL,
|
||||||
|
failure_class TEXT,
|
||||||
|
failure_message TEXT,
|
||||||
|
retryable INTEGER NOT NULL DEFAULT 0,
|
||||||
|
CONSTRAINT fk_processing_attempt_fingerprint
|
||||||
|
FOREIGN KEY (fingerprint) REFERENCES document_record (fingerprint),
|
||||||
|
CONSTRAINT uq_processing_attempt_fingerprint_number
|
||||||
|
UNIQUE (fingerprint, attempt_number)
|
||||||
|
)
|
||||||
|
""";
|
||||||
|
|
||||||
|
/** Index on {@code processing_attempt.fingerprint} for fast per-document lookups. */
|
||||||
|
private static final String DDL_IDX_ATTEMPT_FINGERPRINT =
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_processing_attempt_fingerprint "
|
||||||
|
+ "ON processing_attempt (fingerprint)";
|
||||||
|
|
||||||
|
/** Index on {@code processing_attempt.run_id} for fast per-run lookups. */
|
||||||
|
private static final String DDL_IDX_ATTEMPT_RUN_ID =
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_processing_attempt_run_id "
|
||||||
|
+ "ON processing_attempt (run_id)";
|
||||||
|
|
||||||
|
/** Index on {@code document_record.overall_status} for fast status-based filtering. */
|
||||||
|
private static final String DDL_IDX_RECORD_STATUS =
|
||||||
|
"CREATE INDEX IF NOT EXISTS idx_document_record_overall_status "
|
||||||
|
+ "ON document_record (overall_status)";
|
||||||
|
|
||||||
|
private final String jdbcUrl;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructs the adapter with the JDBC URL of the SQLite database file.
|
||||||
|
* <p>
|
||||||
|
* The JDBC URL must be in the form {@code jdbc:sqlite:/path/to/file.db}.
|
||||||
|
* The file and its parent directories need not exist at construction time;
|
||||||
|
* SQLite creates them when the connection is first opened.
|
||||||
|
*
|
||||||
|
* @param jdbcUrl the JDBC URL of the SQLite database; must not be null or blank
|
||||||
|
* @throws NullPointerException if {@code jdbcUrl} is null
|
||||||
|
* @throws IllegalArgumentException if {@code jdbcUrl} is blank
|
||||||
|
*/
|
||||||
|
public SqliteSchemaInitializationAdapter(String jdbcUrl) {
|
||||||
|
Objects.requireNonNull(jdbcUrl, "jdbcUrl must not be null");
|
||||||
|
if (jdbcUrl.isBlank()) {
|
||||||
|
throw new IllegalArgumentException("jdbcUrl must not be blank");
|
||||||
|
}
|
||||||
|
this.jdbcUrl = jdbcUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates or verifies the M4 persistence schema in the SQLite database.
|
||||||
|
* <p>
|
||||||
|
* Executes the following DDL statements in order:
|
||||||
|
* <ol>
|
||||||
|
* <li>Enable foreign key enforcement ({@code PRAGMA foreign_keys = ON})</li>
|
||||||
|
* <li>Create {@code document_record} table (if not exists)</li>
|
||||||
|
* <li>Create {@code processing_attempt} table (if not exists)</li>
|
||||||
|
* <li>Create indexes on {@code processing_attempt.fingerprint},
|
||||||
|
* {@code processing_attempt.run_id}, and
|
||||||
|
* {@code document_record.overall_status}</li>
|
||||||
|
* </ol>
|
||||||
|
* <p>
|
||||||
|
* All statements use {@code IF NOT EXISTS} semantics. Calling this method on an
|
||||||
|
* already-initialised database is safe and produces no changes.
|
||||||
|
* <p>
|
||||||
|
* <strong>Timing:</strong> Must be called once at program startup, before the
|
||||||
|
* batch document processing loop begins.
|
||||||
|
*
|
||||||
|
* @throws DocumentPersistenceException if the schema cannot be created or verified
|
||||||
|
* due to a JDBC or SQLite error
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public void initializeSchema() {
|
||||||
|
logger.info("Initialising M4 SQLite schema at: {}", jdbcUrl);
|
||||||
|
try (Connection connection = DriverManager.getConnection(jdbcUrl);
|
||||||
|
Statement statement = connection.createStatement()) {
|
||||||
|
|
||||||
|
// Enable foreign key enforcement (SQLite disables it by default)
|
||||||
|
statement.execute("PRAGMA foreign_keys = ON");
|
||||||
|
|
||||||
|
// Level 1: document master record
|
||||||
|
statement.execute(DDL_CREATE_DOCUMENT_RECORD);
|
||||||
|
logger.debug("Table 'document_record' created or already present.");
|
||||||
|
|
||||||
|
// Level 2: processing attempt history
|
||||||
|
statement.execute(DDL_CREATE_PROCESSING_ATTEMPT);
|
||||||
|
logger.debug("Table 'processing_attempt' created or already present.");
|
||||||
|
|
||||||
|
// Indexes for efficient per-document, per-run, and per-status access
|
||||||
|
statement.execute(DDL_IDX_ATTEMPT_FINGERPRINT);
|
||||||
|
statement.execute(DDL_IDX_ATTEMPT_RUN_ID);
|
||||||
|
statement.execute(DDL_IDX_RECORD_STATUS);
|
||||||
|
logger.debug("Indexes created or already present.");
|
||||||
|
|
||||||
|
logger.info("M4 SQLite schema initialisation completed successfully.");
|
||||||
|
|
||||||
|
} catch (SQLException e) {
|
||||||
|
String message = "Failed to initialise M4 SQLite schema at '" + jdbcUrl + "': " + e.getMessage();
|
||||||
|
logger.error(message, e);
|
||||||
|
throw new DocumentPersistenceException(message, e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the JDBC URL this adapter uses to connect to the SQLite database.
|
||||||
|
* <p>
|
||||||
|
* Intended for logging and diagnostics only.
|
||||||
|
*
|
||||||
|
* @return the JDBC URL; never null or blank
|
||||||
|
*/
|
||||||
|
public String getJdbcUrl() {
|
||||||
|
return jdbcUrl;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -0,0 +1,37 @@
|
|||||||
|
/**
|
||||||
|
* SQLite persistence adapter for the M4 two-level persistence model.
|
||||||
|
*
|
||||||
|
* <h2>Purpose</h2>
|
||||||
|
* <p>This package contains the technical SQLite infrastructure for the M4 persistence
|
||||||
|
* layer. It is the only place in the entire application where JDBC connections, SQL DDL,
|
||||||
|
* and SQLite-specific types are used. No JDBC or SQLite types leak into the
|
||||||
|
* {@code application} or {@code domain} modules.
|
||||||
|
*
|
||||||
|
* <h2>Two-level persistence model</h2>
|
||||||
|
* <p>M4 persistence is structured in exactly two levels:
|
||||||
|
* <ol>
|
||||||
|
* <li><strong>Document master record</strong> ({@code document_record} table) —
|
||||||
|
* one row per unique SHA-256 fingerprint; carries the current overall status,
|
||||||
|
* failure counters, and the most recently known source location.</li>
|
||||||
|
* <li><strong>Processing attempt history</strong> ({@code processing_attempt} table) —
|
||||||
|
* one row per historised processing attempt; references the master record via
|
||||||
|
* fingerprint; attempt numbers are monotonically increasing per fingerprint.</li>
|
||||||
|
* </ol>
|
||||||
|
*
|
||||||
|
* <h2>Schema initialisation timing</h2>
|
||||||
|
* <p>The {@link de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter}
|
||||||
|
* implements the
|
||||||
|
* {@link de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort}
|
||||||
|
* and must be called <em>once</em> at program startup, before the batch document
|
||||||
|
* processing loop begins. There is no lazy or hidden initialisation during document
|
||||||
|
* processing.
|
||||||
|
*
|
||||||
|
* <h2>Architecture boundary</h2>
|
||||||
|
* <p>All JDBC connections, SQL statements, and SQLite-specific behaviour are strictly
|
||||||
|
* confined to this package. The application layer interacts exclusively through the
|
||||||
|
* port interfaces defined in
|
||||||
|
* {@code de.gecheckt.pdf.umbenenner.application.port.out}.
|
||||||
|
*
|
||||||
|
* @since M4-AP-003
|
||||||
|
*/
|
||||||
|
package de.gecheckt.pdf.umbenenner.adapter.out.sqlite;
|
||||||
@@ -0,0 +1,288 @@
|
|||||||
|
package de.gecheckt.pdf.umbenenner.adapter.out.sqlite;
|
||||||
|
|
||||||
|
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.sql.Connection;
|
||||||
|
import java.sql.DatabaseMetaData;
|
||||||
|
import java.sql.DriverManager;
|
||||||
|
import java.sql.ResultSet;
|
||||||
|
import java.sql.SQLException;
|
||||||
|
import java.util.HashSet;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
|
import static org.assertj.core.api.Assertions.assertThat;
|
||||||
|
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit tests for {@link SqliteSchemaInitializationAdapter}.
|
||||||
|
* <p>
|
||||||
|
* Verifies that the M4 two-level schema is created correctly, that the operation
|
||||||
|
* is idempotent, and that invalid configuration is rejected.
|
||||||
|
*
|
||||||
|
* @since M4-AP-003
|
||||||
|
*/
|
||||||
|
class SqliteSchemaInitializationAdapterTest {
|
||||||
|
|
||||||
|
@TempDir
|
||||||
|
Path tempDir;
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Construction
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void constructor_rejectsNullJdbcUrl() {
|
||||||
|
assertThatThrownBy(() -> new SqliteSchemaInitializationAdapter(null))
|
||||||
|
.isInstanceOf(NullPointerException.class)
|
||||||
|
.hasMessageContaining("jdbcUrl");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void constructor_rejectsBlankJdbcUrl() {
|
||||||
|
assertThatThrownBy(() -> new SqliteSchemaInitializationAdapter(" "))
|
||||||
|
.isInstanceOf(IllegalArgumentException.class)
|
||||||
|
.hasMessageContaining("jdbcUrl");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void getJdbcUrl_returnsConfiguredUrl() {
|
||||||
|
String url = "jdbc:sqlite:/some/path/test.db";
|
||||||
|
SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(url);
|
||||||
|
assertThat(adapter.getJdbcUrl()).isEqualTo(url);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Schema creation – tables present
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void initializeSchema_createsBothTables(@TempDir Path dir) throws SQLException {
|
||||||
|
String jdbcUrl = jdbcUrl(dir, "schema_test.db");
|
||||||
|
SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(jdbcUrl);
|
||||||
|
|
||||||
|
adapter.initializeSchema();
|
||||||
|
|
||||||
|
Set<String> tables = readTableNames(jdbcUrl);
|
||||||
|
assertThat(tables).contains("document_record", "processing_attempt");
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void initializeSchema_documentRecordHasAllMandatoryColumns(@TempDir Path dir) throws SQLException {
|
||||||
|
String jdbcUrl = jdbcUrl(dir, "columns_test.db");
|
||||||
|
new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema();
|
||||||
|
|
||||||
|
Set<String> columns = readColumnNames(jdbcUrl, "document_record");
|
||||||
|
assertThat(columns).containsExactlyInAnyOrder(
|
||||||
|
"id",
|
||||||
|
"fingerprint",
|
||||||
|
"last_known_source_locator",
|
||||||
|
"last_known_source_file_name",
|
||||||
|
"overall_status",
|
||||||
|
"content_error_count",
|
||||||
|
"transient_error_count",
|
||||||
|
"last_failure_instant",
|
||||||
|
"last_success_instant",
|
||||||
|
"created_at",
|
||||||
|
"updated_at"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void initializeSchema_processingAttemptHasAllMandatoryColumns(@TempDir Path dir) throws SQLException {
|
||||||
|
String jdbcUrl = jdbcUrl(dir, "attempt_columns_test.db");
|
||||||
|
new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema();
|
||||||
|
|
||||||
|
Set<String> columns = readColumnNames(jdbcUrl, "processing_attempt");
|
||||||
|
assertThat(columns).containsExactlyInAnyOrder(
|
||||||
|
"id",
|
||||||
|
"fingerprint",
|
||||||
|
"run_id",
|
||||||
|
"attempt_number",
|
||||||
|
"started_at",
|
||||||
|
"ended_at",
|
||||||
|
"status",
|
||||||
|
"failure_class",
|
||||||
|
"failure_message",
|
||||||
|
"retryable"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Idempotency
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void initializeSchema_isIdempotent_calledTwice(@TempDir Path dir) {
|
||||||
|
String jdbcUrl = jdbcUrl(dir, "idempotent_test.db");
|
||||||
|
SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(jdbcUrl);
|
||||||
|
|
||||||
|
// Must not throw on second call
|
||||||
|
adapter.initializeSchema();
|
||||||
|
adapter.initializeSchema();
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Unique constraint: fingerprint in document_record
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void documentRecord_fingerprintUniqueConstraintIsEnforced(@TempDir Path dir) throws SQLException {
|
||||||
|
String jdbcUrl = jdbcUrl(dir, "unique_test.db");
|
||||||
|
new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema();
|
||||||
|
|
||||||
|
String insertSql = """
|
||||||
|
INSERT INTO document_record
|
||||||
|
(fingerprint, last_known_source_locator, last_known_source_file_name,
|
||||||
|
overall_status, created_at, updated_at)
|
||||||
|
VALUES (?, 'locator', 'file.pdf', 'SUCCESS', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')
|
||||||
|
""";
|
||||||
|
String fp = "a".repeat(64);
|
||||||
|
|
||||||
|
try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
|
||||||
|
try (var ps = conn.prepareStatement(insertSql)) {
|
||||||
|
ps.setString(1, fp);
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
// Second insert with same fingerprint must fail
|
||||||
|
try (var ps = conn.prepareStatement(insertSql)) {
|
||||||
|
ps.setString(1, fp);
|
||||||
|
org.junit.jupiter.api.Assertions.assertThrows(
|
||||||
|
SQLException.class, ps::executeUpdate,
|
||||||
|
"Expected UNIQUE constraint violation on document_record.fingerprint");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Unique constraint: (fingerprint, attempt_number) in processing_attempt
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void processingAttempt_fingerprintAttemptNumberUniqueConstraintIsEnforced(@TempDir Path dir)
|
||||||
|
throws SQLException {
|
||||||
|
String jdbcUrl = jdbcUrl(dir, "attempt_unique_test.db");
|
||||||
|
new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema();
|
||||||
|
|
||||||
|
String fp = "b".repeat(64);
|
||||||
|
|
||||||
|
// Insert master record first (FK)
|
||||||
|
try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
|
||||||
|
try (var ps = conn.prepareStatement("""
|
||||||
|
INSERT INTO document_record
|
||||||
|
(fingerprint, last_known_source_locator, last_known_source_file_name,
|
||||||
|
overall_status, created_at, updated_at)
|
||||||
|
VALUES (?, 'loc', 'f.pdf', 'FAILED_RETRYABLE', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')
|
||||||
|
""")) {
|
||||||
|
ps.setString(1, fp);
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
|
||||||
|
String attemptSql = """
|
||||||
|
INSERT INTO processing_attempt
|
||||||
|
(fingerprint, run_id, attempt_number, started_at, ended_at, status, retryable)
|
||||||
|
VALUES (?, 'run-1', 1, '2026-01-01T00:00:00Z', '2026-01-01T00:01:00Z', 'FAILED_RETRYABLE', 1)
|
||||||
|
""";
|
||||||
|
|
||||||
|
try (var ps = conn.prepareStatement(attemptSql)) {
|
||||||
|
ps.setString(1, fp);
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
// Duplicate (fingerprint, attempt_number) must fail
|
||||||
|
try (var ps = conn.prepareStatement(attemptSql)) {
|
||||||
|
ps.setString(1, fp);
|
||||||
|
org.junit.jupiter.api.Assertions.assertThrows(
|
||||||
|
SQLException.class, ps::executeUpdate,
|
||||||
|
"Expected UNIQUE constraint violation on (fingerprint, attempt_number)");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Skip attempts are storable
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void processingAttempt_skipStatusIsStorable(@TempDir Path dir) throws SQLException {
|
||||||
|
String jdbcUrl = jdbcUrl(dir, "skip_test.db");
|
||||||
|
new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema();
|
||||||
|
|
||||||
|
String fp = "c".repeat(64);
|
||||||
|
|
||||||
|
try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
|
||||||
|
// Insert master record
|
||||||
|
try (var ps = conn.prepareStatement("""
|
||||||
|
INSERT INTO document_record
|
||||||
|
(fingerprint, last_known_source_locator, last_known_source_file_name,
|
||||||
|
overall_status, created_at, updated_at)
|
||||||
|
VALUES (?, 'loc', 'f.pdf', 'SUCCESS', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z')
|
||||||
|
""")) {
|
||||||
|
ps.setString(1, fp);
|
||||||
|
ps.executeUpdate();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert a SKIPPED_ALREADY_PROCESSED attempt (null failure fields, retryable=0)
|
||||||
|
try (var ps = conn.prepareStatement("""
|
||||||
|
INSERT INTO processing_attempt
|
||||||
|
(fingerprint, run_id, attempt_number, started_at, ended_at,
|
||||||
|
status, failure_class, failure_message, retryable)
|
||||||
|
VALUES (?, 'run-2', 2, '2026-01-02T00:00:00Z', '2026-01-02T00:00:01Z',
|
||||||
|
'SKIPPED_ALREADY_PROCESSED', NULL, NULL, 0)
|
||||||
|
""")) {
|
||||||
|
ps.setString(1, fp);
|
||||||
|
int rows = ps.executeUpdate();
|
||||||
|
assertThat(rows).isEqualTo(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Error handling
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void initializeSchema_throwsDocumentPersistenceException_onInvalidUrl() {
|
||||||
|
// SQLite is lenient with paths; use a truly invalid JDBC URL format
|
||||||
|
SqliteSchemaInitializationAdapter badAdapter =
|
||||||
|
new SqliteSchemaInitializationAdapter("not-a-jdbc-url-at-all");
|
||||||
|
|
||||||
|
assertThatThrownBy(badAdapter::initializeSchema)
|
||||||
|
.isInstanceOf(DocumentPersistenceException.class);
|
||||||
|
}
|
||||||
|
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
// Helpers
|
||||||
|
// -------------------------------------------------------------------------
|
||||||
|
|
||||||
|
private static String jdbcUrl(Path dir, String filename) {
|
||||||
|
return "jdbc:sqlite:" + dir.resolve(filename).toAbsolutePath();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Set<String> readTableNames(String jdbcUrl) throws SQLException {
|
||||||
|
Set<String> tables = new HashSet<>();
|
||||||
|
try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
|
||||||
|
DatabaseMetaData meta = conn.getMetaData();
|
||||||
|
try (ResultSet rs = meta.getTables(null, null, "%", new String[]{"TABLE"})) {
|
||||||
|
while (rs.next()) {
|
||||||
|
tables.add(rs.getString("TABLE_NAME").toLowerCase());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return tables;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static Set<String> readColumnNames(String jdbcUrl, String tableName) throws SQLException {
|
||||||
|
Set<String> columns = new HashSet<>();
|
||||||
|
try (Connection conn = DriverManager.getConnection(jdbcUrl)) {
|
||||||
|
DatabaseMetaData meta = conn.getMetaData();
|
||||||
|
try (ResultSet rs = meta.getColumns(null, null, tableName, "%")) {
|
||||||
|
while (rs.next()) {
|
||||||
|
columns.add(rs.getString("COLUMN_NAME").toLowerCase());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return columns;
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user