M4 AP-003 SQLite-Schema und Persistenzbasis einführen
This commit is contained in:
+244
@@ -0,0 +1,244 @@
|
||||
package de.gecheckt.pdf.umbenenner.adapter.out.sqlite;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* SQLite implementation of {@link PersistenceSchemaInitializationPort}.
|
||||
* <p>
|
||||
* Creates or verifies the M4 two-level persistence schema in the configured SQLite
|
||||
* database file. All DDL uses {@code IF NOT EXISTS} semantics, making the operation
|
||||
* fully idempotent: calling {@link #initializeSchema()} on an already-initialised
|
||||
* database succeeds without error and without modifying existing data.
|
||||
*
|
||||
* <h2>Two-level schema</h2>
|
||||
* <p>The schema consists of exactly two tables:
|
||||
* <ol>
|
||||
* <li><strong>{@code document_record}</strong> — the document master record
|
||||
* (Dokument-Stammsatz). One row per unique SHA-256 fingerprint.</li>
|
||||
* <li><strong>{@code processing_attempt}</strong> — the processing attempt history
|
||||
* (Versuchshistorie). One row per historised processing attempt, referencing
|
||||
* the master record via fingerprint.</li>
|
||||
* </ol>
|
||||
*
|
||||
* <h2>Initialisation timing</h2>
|
||||
* <p>This adapter must be invoked <em>once</em> at program startup, before the batch
|
||||
* document processing loop begins. It is wired by the bootstrap module and called
|
||||
* explicitly through the port. There is no lazy or deferred initialisation.
|
||||
*
|
||||
* <h2>Architecture boundary</h2>
|
||||
* <p>All JDBC connections, SQL DDL, and SQLite-specific behaviour are strictly confined
|
||||
* to this class. No JDBC or SQLite types appear in the port interface or in any
|
||||
* application/domain type.
|
||||
*
|
||||
* @since M4-AP-003
|
||||
*/
|
||||
public class SqliteSchemaInitializationAdapter implements PersistenceSchemaInitializationPort {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(SqliteSchemaInitializationAdapter.class);
|
||||
|
||||
/**
|
||||
* DDL for the document master record table.
|
||||
* <p>
|
||||
* <strong>Columns (M4 mandatory fields):</strong>
|
||||
* <ul>
|
||||
* <li>{@code id} — internal surrogate primary key (auto-increment).</li>
|
||||
* <li>{@code fingerprint} — SHA-256 hex string; unique natural key; never null.</li>
|
||||
* <li>{@code last_known_source_locator} — opaque locator value (file path string);
|
||||
* never null.</li>
|
||||
* <li>{@code last_known_source_file_name} — human-readable file name for logging;
|
||||
* never null.</li>
|
||||
* <li>{@code overall_status} — current processing status as enum name string;
|
||||
* never null.</li>
|
||||
* <li>{@code content_error_count} — count of deterministic content errors;
|
||||
* default 0; never negative.</li>
|
||||
* <li>{@code transient_error_count} — count of transient technical errors;
|
||||
* default 0; never negative.</li>
|
||||
* <li>{@code last_failure_instant} — ISO-8601 UTC timestamp of the most recent
|
||||
* failure; nullable.</li>
|
||||
* <li>{@code last_success_instant} — ISO-8601 UTC timestamp of the successful
|
||||
* processing; nullable.</li>
|
||||
* <li>{@code created_at} — ISO-8601 UTC timestamp of record creation; never null.</li>
|
||||
* <li>{@code updated_at} — ISO-8601 UTC timestamp of the most recent update;
|
||||
* never null.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Not included (M5+ fields):</strong> target path, target file name,
|
||||
* AI-related fields.
|
||||
*/
|
||||
private static final String DDL_CREATE_DOCUMENT_RECORD = """
|
||||
CREATE TABLE IF NOT EXISTS document_record (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
fingerprint TEXT NOT NULL,
|
||||
last_known_source_locator TEXT NOT NULL,
|
||||
last_known_source_file_name TEXT NOT NULL,
|
||||
overall_status TEXT NOT NULL,
|
||||
content_error_count INTEGER NOT NULL DEFAULT 0,
|
||||
transient_error_count INTEGER NOT NULL DEFAULT 0,
|
||||
last_failure_instant TEXT,
|
||||
last_success_instant TEXT,
|
||||
created_at TEXT NOT NULL,
|
||||
updated_at TEXT NOT NULL,
|
||||
CONSTRAINT uq_document_record_fingerprint UNIQUE (fingerprint)
|
||||
)
|
||||
""";
|
||||
|
||||
/**
|
||||
* DDL for the processing attempt history table.
|
||||
* <p>
|
||||
* <strong>Columns (M4 mandatory fields):</strong>
|
||||
* <ul>
|
||||
* <li>{@code id} — internal surrogate primary key (auto-increment).</li>
|
||||
* <li>{@code fingerprint} — foreign key reference to
|
||||
* {@code document_record.fingerprint}; never null.</li>
|
||||
* <li>{@code run_id} — identifier of the batch run; never null.</li>
|
||||
* <li>{@code attempt_number} — monotonically increasing per fingerprint, starting
|
||||
* at 1; never null. The unique constraint on {@code (fingerprint, attempt_number)}
|
||||
* enforces uniqueness per document.</li>
|
||||
* <li>{@code started_at} — ISO-8601 UTC timestamp of attempt start; never null.</li>
|
||||
* <li>{@code ended_at} — ISO-8601 UTC timestamp of attempt end; never null.</li>
|
||||
* <li>{@code status} — outcome status as enum name string; never null.</li>
|
||||
* <li>{@code failure_class} — short failure classification; nullable (null for
|
||||
* success and skip attempts).</li>
|
||||
* <li>{@code failure_message} — human-readable failure description; nullable
|
||||
* (null for success and skip attempts).</li>
|
||||
* <li>{@code retryable} — 1 if the failure is retryable in a later run, 0 otherwise;
|
||||
* never null. Always 0 for success and skip attempts.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Skip attempts:</strong> Skip statuses ({@code SKIPPED_ALREADY_PROCESSED},
|
||||
* {@code SKIPPED_FINAL_FAILURE}) are stored as regular rows with {@code retryable = 0}
|
||||
* and null failure fields.
|
||||
* <p>
|
||||
* <strong>Not included (M5+ fields):</strong> model name, prompt identifier,
|
||||
* AI raw response, AI reasoning, resolved date, date source, final title,
|
||||
* final target file name.
|
||||
*/
|
||||
private static final String DDL_CREATE_PROCESSING_ATTEMPT = """
|
||||
CREATE TABLE IF NOT EXISTS processing_attempt (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
fingerprint TEXT NOT NULL,
|
||||
run_id TEXT NOT NULL,
|
||||
attempt_number INTEGER NOT NULL,
|
||||
started_at TEXT NOT NULL,
|
||||
ended_at TEXT NOT NULL,
|
||||
status TEXT NOT NULL,
|
||||
failure_class TEXT,
|
||||
failure_message TEXT,
|
||||
retryable INTEGER NOT NULL DEFAULT 0,
|
||||
CONSTRAINT fk_processing_attempt_fingerprint
|
||||
FOREIGN KEY (fingerprint) REFERENCES document_record (fingerprint),
|
||||
CONSTRAINT uq_processing_attempt_fingerprint_number
|
||||
UNIQUE (fingerprint, attempt_number)
|
||||
)
|
||||
""";
|
||||
|
||||
/** Index on {@code processing_attempt.fingerprint} for fast per-document lookups. */
|
||||
private static final String DDL_IDX_ATTEMPT_FINGERPRINT =
|
||||
"CREATE INDEX IF NOT EXISTS idx_processing_attempt_fingerprint "
|
||||
+ "ON processing_attempt (fingerprint)";
|
||||
|
||||
/** Index on {@code processing_attempt.run_id} for fast per-run lookups. */
|
||||
private static final String DDL_IDX_ATTEMPT_RUN_ID =
|
||||
"CREATE INDEX IF NOT EXISTS idx_processing_attempt_run_id "
|
||||
+ "ON processing_attempt (run_id)";
|
||||
|
||||
/** Index on {@code document_record.overall_status} for fast status-based filtering. */
|
||||
private static final String DDL_IDX_RECORD_STATUS =
|
||||
"CREATE INDEX IF NOT EXISTS idx_document_record_overall_status "
|
||||
+ "ON document_record (overall_status)";
|
||||
|
||||
private final String jdbcUrl;
|
||||
|
||||
/**
|
||||
* Constructs the adapter with the JDBC URL of the SQLite database file.
|
||||
* <p>
|
||||
* The JDBC URL must be in the form {@code jdbc:sqlite:/path/to/file.db}.
|
||||
* The file and its parent directories need not exist at construction time;
|
||||
* SQLite creates them when the connection is first opened.
|
||||
*
|
||||
* @param jdbcUrl the JDBC URL of the SQLite database; must not be null or blank
|
||||
* @throws NullPointerException if {@code jdbcUrl} is null
|
||||
* @throws IllegalArgumentException if {@code jdbcUrl} is blank
|
||||
*/
|
||||
public SqliteSchemaInitializationAdapter(String jdbcUrl) {
|
||||
Objects.requireNonNull(jdbcUrl, "jdbcUrl must not be null");
|
||||
if (jdbcUrl.isBlank()) {
|
||||
throw new IllegalArgumentException("jdbcUrl must not be blank");
|
||||
}
|
||||
this.jdbcUrl = jdbcUrl;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates or verifies the M4 persistence schema in the SQLite database.
|
||||
* <p>
|
||||
* Executes the following DDL statements in order:
|
||||
* <ol>
|
||||
* <li>Enable foreign key enforcement ({@code PRAGMA foreign_keys = ON})</li>
|
||||
* <li>Create {@code document_record} table (if not exists)</li>
|
||||
* <li>Create {@code processing_attempt} table (if not exists)</li>
|
||||
* <li>Create indexes on {@code processing_attempt.fingerprint},
|
||||
* {@code processing_attempt.run_id}, and
|
||||
* {@code document_record.overall_status}</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* All statements use {@code IF NOT EXISTS} semantics. Calling this method on an
|
||||
* already-initialised database is safe and produces no changes.
|
||||
* <p>
|
||||
* <strong>Timing:</strong> Must be called once at program startup, before the
|
||||
* batch document processing loop begins.
|
||||
*
|
||||
* @throws DocumentPersistenceException if the schema cannot be created or verified
|
||||
* due to a JDBC or SQLite error
|
||||
*/
|
||||
@Override
|
||||
public void initializeSchema() {
|
||||
logger.info("Initialising M4 SQLite schema at: {}", jdbcUrl);
|
||||
try (Connection connection = DriverManager.getConnection(jdbcUrl);
|
||||
Statement statement = connection.createStatement()) {
|
||||
|
||||
// Enable foreign key enforcement (SQLite disables it by default)
|
||||
statement.execute("PRAGMA foreign_keys = ON");
|
||||
|
||||
// Level 1: document master record
|
||||
statement.execute(DDL_CREATE_DOCUMENT_RECORD);
|
||||
logger.debug("Table 'document_record' created or already present.");
|
||||
|
||||
// Level 2: processing attempt history
|
||||
statement.execute(DDL_CREATE_PROCESSING_ATTEMPT);
|
||||
logger.debug("Table 'processing_attempt' created or already present.");
|
||||
|
||||
// Indexes for efficient per-document, per-run, and per-status access
|
||||
statement.execute(DDL_IDX_ATTEMPT_FINGERPRINT);
|
||||
statement.execute(DDL_IDX_ATTEMPT_RUN_ID);
|
||||
statement.execute(DDL_IDX_RECORD_STATUS);
|
||||
logger.debug("Indexes created or already present.");
|
||||
|
||||
logger.info("M4 SQLite schema initialisation completed successfully.");
|
||||
|
||||
} catch (SQLException e) {
|
||||
String message = "Failed to initialise M4 SQLite schema at '" + jdbcUrl + "': " + e.getMessage();
|
||||
logger.error(message, e);
|
||||
throw new DocumentPersistenceException(message, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the JDBC URL this adapter uses to connect to the SQLite database.
|
||||
* <p>
|
||||
* Intended for logging and diagnostics only.
|
||||
*
|
||||
* @return the JDBC URL; never null or blank
|
||||
*/
|
||||
public String getJdbcUrl() {
|
||||
return jdbcUrl;
|
||||
}
|
||||
}
|
||||
+37
@@ -0,0 +1,37 @@
|
||||
/**
|
||||
* SQLite persistence adapter for the M4 two-level persistence model.
|
||||
*
|
||||
* <h2>Purpose</h2>
|
||||
* <p>This package contains the technical SQLite infrastructure for the M4 persistence
|
||||
* layer. It is the only place in the entire application where JDBC connections, SQL DDL,
|
||||
* and SQLite-specific types are used. No JDBC or SQLite types leak into the
|
||||
* {@code application} or {@code domain} modules.
|
||||
*
|
||||
* <h2>Two-level persistence model</h2>
|
||||
* <p>M4 persistence is structured in exactly two levels:
|
||||
* <ol>
|
||||
* <li><strong>Document master record</strong> ({@code document_record} table) —
|
||||
* one row per unique SHA-256 fingerprint; carries the current overall status,
|
||||
* failure counters, and the most recently known source location.</li>
|
||||
* <li><strong>Processing attempt history</strong> ({@code processing_attempt} table) —
|
||||
* one row per historised processing attempt; references the master record via
|
||||
* fingerprint; attempt numbers are monotonically increasing per fingerprint.</li>
|
||||
* </ol>
|
||||
*
|
||||
* <h2>Schema initialisation timing</h2>
|
||||
* <p>The {@link de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter}
|
||||
* implements the
|
||||
* {@link de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort}
|
||||
* and must be called <em>once</em> at program startup, before the batch document
|
||||
* processing loop begins. There is no lazy or hidden initialisation during document
|
||||
* processing.
|
||||
*
|
||||
* <h2>Architecture boundary</h2>
|
||||
* <p>All JDBC connections, SQL statements, and SQLite-specific behaviour are strictly
|
||||
* confined to this package. The application layer interacts exclusively through the
|
||||
* port interfaces defined in
|
||||
* {@code de.gecheckt.pdf.umbenenner.application.port.out}.
|
||||
*
|
||||
* @since M4-AP-003
|
||||
*/
|
||||
package de.gecheckt.pdf.umbenenner.adapter.out.sqlite;
|
||||
Reference in New Issue
Block a user