From 732d00c4adbaac912059d2f24c19803e5b6f7a6b Mon Sep 17 00:00:00 2001 From: Marcus van Elst Date: Thu, 30 Apr 2026 11:44:28 +0200 Subject: [PATCH] Fix #49: Flyway-Integration mit V1-Basisskript und 3-Fall-Strategie Ersetzt die manuelle evolveTableColumns()-Schema-Evolution durch Flyway 10.20.1. Die Initialisierung unterscheidet drei Faelle: leere DB (Flyway-Migration), Bestandsschema ohne Flyway-History (Baseline nach Schema-Pruefung) und Folgestart mit Flyway-History (idempotent). Smoke-Test-Deadlock auf Windows durch paralleles Ausgabe-Draining des Subprozesses behoben. Co-Authored-By: Claude Sonnet 4.6 --- pdf-umbenenner-adapter-out/pom.xml | 4 + .../SqliteSchemaInitializationAdapter.java | 794 ++++++++++++------ .../adapter/out/sqlite/package-info.java | 56 +- .../db/migration/V1__initial_schema.sql | 58 ++ .../SqliteAttemptProviderPersistenceTest.java | 93 +- ...SqliteSchemaInitializationAdapterTest.java | 630 ++++++++------ .../bootstrap/ExecutableJarSmokeTestIT.java | 183 ++-- .../e2e/ProviderIdentifierE2ETest.java | 59 +- pom.xml | 8 +- 9 files changed, 1145 insertions(+), 740 deletions(-) create mode 100644 pdf-umbenenner-adapter-out/src/main/resources/db/migration/V1__initial_schema.sql diff --git a/pdf-umbenenner-adapter-out/pom.xml b/pdf-umbenenner-adapter-out/pom.xml index 3237497..ef51ea4 100644 --- a/pdf-umbenenner-adapter-out/pom.xml +++ b/pdf-umbenenner-adapter-out/pom.xml @@ -31,6 +31,10 @@ org.xerial sqlite-jdbc + + org.flywaydb + flyway-core + org.json json diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapter.java index f6bcb71..266d421 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapter.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapter.java @@ -1,337 +1,577 @@ package de.gecheckt.pdf.umbenenner.adapter.out.sqlite; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.nio.file.StandardCopyOption; import java.sql.Connection; -import java.sql.DriverManager; +import java.sql.DatabaseMetaData; import java.sql.ResultSet; import java.sql.SQLException; -import java.sql.Statement; +import java.time.Instant; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; import java.util.Objects; +import java.util.Set; + +import javax.sql.DataSource; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.flywaydb.core.Flyway; +import org.sqlite.SQLiteConfig; +import org.sqlite.SQLiteDataSource; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort; /** - * SQLite implementation of {@link PersistenceSchemaInitializationPort}. - *

- * Creates or verifies the two-level persistence schema in the configured SQLite - * database file, and performs a controlled schema evolution from an earlier schema - * version to the current one. + * Flyway-basierte Implementierung von {@link PersistenceSchemaInitializationPort}. * - *

Two-level schema

- *

The schema consists of exactly two tables: - *

    - *
  1. {@code document_record} — the document master record - * (Dokument-Stammsatz). One row per unique SHA-256 fingerprint.
  2. - *
  3. {@code processing_attempt} — the processing attempt history - * (Versuchshistorie). One row per historised processing attempt, referencing - * the master record via fingerprint.
  4. - *
+ *

Erstellt oder verifiziert das Zwei-Ebenen-Persistenzschema in der konfigurierten + * SQLite-Datenbank und führt dabei eine differenzierte Startstrategie durch, + * die drei Fälle unterscheidet: * - *

Schema evolution

- *

- * When upgrading from an earlier schema, this adapter uses idempotent - * {@code ALTER TABLE ... ADD COLUMN} statements for both tables. Columns that already - * exist are silently skipped, making the evolution safe to run on both fresh and existing - * databases. The current evolution adds: - *

    - *
  • AI-traceability columns to {@code processing_attempt}
  • - *
  • Target-copy columns ({@code last_target_path}, {@code last_target_file_name}) to - * {@code document_record}
  • - *
  • Target-copy column ({@code final_target_file_name}) to {@code processing_attempt}
  • - *
  • Provider-identifier column ({@code ai_provider}) to {@code processing_attempt}; - * existing rows receive {@code NULL} as the default, which is the correct value for - * attempts recorded before provider tracking was introduced.
  • - *
+ *

Fall 1 – Leere Datenbank

+ *

Keine fachlichen Tabellen und keine Flyway-History-Tabelle vorhanden + * (bzw. Datei existiert noch nicht). Flyway führt {@code V1__initial_schema.sql} + * vollständig aus und legt das komplette Schema an. * - *

Legacy-state migration

- *

- * Documents in an earlier positive intermediate state ({@code SUCCESS} recorded without - * a validated naming proposal) are idempotently migrated to {@code READY_FOR_AI} so that - * the AI naming pipeline processes them in the next run. Terminal negative states - * ({@code FAILED_RETRYABLE}, {@code FAILED_FINAL}, skip states) are left unchanged. + *

Fall 2 – Bestehende Datenbank ohne Flyway-History

+ *

Fachliche Tabellen sind vorhanden, aber die Flyway-History-Tabelle fehlt. + * Vor der Baseline-Eintralung wird eine vollständige Schema-Prüfung gegen das + * V1-Zielschema durchgeführt. Bei konformem Schema wird ein datiertes Backup der + * SQLite-Datei erstellt, und Flyway trägt nur eine Baseline ein (Skript wird + * nicht ausgeführt). Bei fehlendem Schema-Element bricht der Start mit + * einer klaren Fehlermeldung ab. * - *

Initialisation timing

- *

This adapter must be invoked once at program startup, before the batch - * document processing loop begins. + *

Fall 3 – Folgestart mit Flyway-History

+ *

Flyway-History-Tabelle ist vorhanden. Flyway läuft idempotent und + * führt nur noch fehlende Migrationen aus. * - *

Architecture boundary

- *

All JDBC connections, SQL DDL, and SQLite-specific behaviour are strictly confined - * to this class. No JDBC or SQLite types appear in the port interface or in any - * application/domain type. + *

Fremdschlüssel

+ *

Foreign-Key-Durchsetzung wird über {@code SQLiteConfig.enforceForeignKeys(true)} + * auf DataSource-Ebene aktiviert, sodass jede neue Verbindung automatisch + * {@code PRAGMA foreign_keys = ON} erhält. + * + *

Architekturgrenze

+ *

Alle JDBC-Verbindungen, SQL-DDL und SQLite-spezifisches Verhalten sind + * ausschließlich in dieser Klasse gekapselt. Im Port-Interface und in den + * Domain-/Application-Typen erscheinen keine JDBC- oder SQLite-Typen. */ public class SqliteSchemaInitializationAdapter implements PersistenceSchemaInitializationPort { private static final Logger logger = LogManager.getLogger(SqliteSchemaInitializationAdapter.class); // ------------------------------------------------------------------------- - // DDL — document_record table + // Erwartete Tabellen und Spalten gemäß V1-Zielschema // ------------------------------------------------------------------------- - /** - * DDL for the document master record table. - *

- * Columns: id (PK), fingerprint (unique), last_known_source_locator, - * last_known_source_file_name, overall_status, content_error_count, - * transient_error_count, last_failure_instant, last_success_instant, - * created_at, updated_at. - */ - private static final String DDL_CREATE_DOCUMENT_RECORD = """ - CREATE TABLE IF NOT EXISTS document_record ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - fingerprint TEXT NOT NULL, - last_known_source_locator TEXT NOT NULL, - last_known_source_file_name TEXT NOT NULL, - overall_status TEXT NOT NULL, - content_error_count INTEGER NOT NULL DEFAULT 0, - transient_error_count INTEGER NOT NULL DEFAULT 0, - last_failure_instant TEXT, - last_success_instant TEXT, - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL, - CONSTRAINT uq_document_record_fingerprint UNIQUE (fingerprint) - ) - """; + /** Alle erwarteten Spalten der Tabelle {@code document_record}. */ + private static final Set EXPECTED_COLUMNS_DOCUMENT_RECORD = Set.of( + "id", "fingerprint", "last_known_source_locator", "last_known_source_file_name", + "overall_status", "content_error_count", "transient_error_count", + "last_failure_instant", "last_success_instant", "created_at", "updated_at", + "last_target_path", "last_target_file_name" + ); + + /** Alle erwarteten Spalten der Tabelle {@code processing_attempt}. */ + private static final Set EXPECTED_COLUMNS_PROCESSING_ATTEMPT = Set.of( + "id", "fingerprint", "run_id", "attempt_number", "started_at", "ended_at", + "status", "failure_class", "failure_message", "retryable", + "model_name", "prompt_identifier", "processed_page_count", "sent_character_count", + "ai_raw_response", "ai_reasoning", "resolved_date", "date_source", + "validated_title", "final_target_file_name", "ai_provider" + ); + + /** Erwartete Indizes. */ + private static final Set EXPECTED_INDEXES = Set.of( + "idx_processing_attempt_fingerprint", + "idx_processing_attempt_run_id", + "idx_document_record_overall_status" + ); + + /** Name der Flyway-History-Tabelle. */ + private static final String FLYWAY_HISTORY_TABLE = "flyway_schema_history"; // ------------------------------------------------------------------------- - // DDL — processing_attempt table (base schema, without AI traceability cols) + // Felder // ------------------------------------------------------------------------- - /** - * DDL for the base processing attempt history table. - *

- * Base columns (present in all schema versions): id, fingerprint, run_id, - * attempt_number, started_at, ended_at, status, failure_class, failure_message, retryable. - *

- * AI traceability columns are added separately via {@code ALTER TABLE} to support - * idempotent evolution from earlier schemas. - */ - private static final String DDL_CREATE_PROCESSING_ATTEMPT = """ - CREATE TABLE IF NOT EXISTS processing_attempt ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - fingerprint TEXT NOT NULL, - run_id TEXT NOT NULL, - attempt_number INTEGER NOT NULL, - started_at TEXT NOT NULL, - ended_at TEXT NOT NULL, - status TEXT NOT NULL, - failure_class TEXT, - failure_message TEXT, - retryable INTEGER NOT NULL DEFAULT 0, - CONSTRAINT fk_processing_attempt_fingerprint - FOREIGN KEY (fingerprint) REFERENCES document_record (fingerprint), - CONSTRAINT uq_processing_attempt_fingerprint_number - UNIQUE (fingerprint, attempt_number) - ) - """; - - // ------------------------------------------------------------------------- - // DDL — indexes - // ------------------------------------------------------------------------- - - /** Index on {@code processing_attempt.fingerprint} for fast per-document lookups. */ - private static final String DDL_IDX_ATTEMPT_FINGERPRINT = - "CREATE INDEX IF NOT EXISTS idx_processing_attempt_fingerprint " - + "ON processing_attempt (fingerprint)"; - - /** Index on {@code processing_attempt.run_id} for fast per-run lookups. */ - private static final String DDL_IDX_ATTEMPT_RUN_ID = - "CREATE INDEX IF NOT EXISTS idx_processing_attempt_run_id " - + "ON processing_attempt (run_id)"; - - /** Index on {@code document_record.overall_status} for fast status-based filtering. */ - private static final String DDL_IDX_RECORD_STATUS = - "CREATE INDEX IF NOT EXISTS idx_document_record_overall_status " - + "ON document_record (overall_status)"; - - // ------------------------------------------------------------------------- - // DDL — columns added to processing_attempt via schema evolution - // ------------------------------------------------------------------------- - - /** - * Columns to add idempotently to {@code processing_attempt}. - * Each entry is {@code [column_name, column_type]}. - *

- * {@code ai_provider} is nullable; existing rows receive {@code NULL}, which is the - * correct sentinel for attempts recorded before provider tracking was introduced. - */ - private static final String[][] EVOLUTION_ATTEMPT_COLUMNS = { - {"model_name", "TEXT"}, - {"prompt_identifier", "TEXT"}, - {"processed_page_count", "INTEGER"}, - {"sent_character_count", "INTEGER"}, - {"ai_raw_response", "TEXT"}, - {"ai_reasoning", "TEXT"}, - {"resolved_date", "TEXT"}, - {"date_source", "TEXT"}, - {"validated_title", "TEXT"}, - {"final_target_file_name", "TEXT"}, - {"ai_provider", "TEXT"}, - }; - - // ------------------------------------------------------------------------- - // DDL — columns added to document_record via schema evolution - // ------------------------------------------------------------------------- - - /** - * Columns to add idempotently to {@code document_record}. - * Each entry is {@code [column_name, column_type]}. - */ - private static final String[][] EVOLUTION_RECORD_COLUMNS = { - {"last_target_path", "TEXT"}, - {"last_target_file_name", "TEXT"}, - }; - - // ------------------------------------------------------------------------- - // Legacy-state status migration - // ------------------------------------------------------------------------- - - /** - * Migrates earlier positive intermediate states in {@code document_record} that were - * recorded as {@code SUCCESS} without a validated naming proposal to {@code READY_FOR_AI}, - * so the AI naming pipeline processes them in the next run. - *

- * Only rows with {@code overall_status = 'SUCCESS'} that have no corresponding - * {@code processing_attempt} with {@code status = 'PROPOSAL_READY'} are updated. - * This migration is idempotent. - */ - private static final String SQL_MIGRATE_LEGACY_SUCCESS_TO_READY_FOR_AI = """ - UPDATE document_record - SET overall_status = 'READY_FOR_AI', - updated_at = datetime('now') - WHERE overall_status = 'SUCCESS' - AND NOT EXISTS ( - SELECT 1 FROM processing_attempt pa - WHERE pa.fingerprint = document_record.fingerprint - AND pa.status = 'PROPOSAL_READY' - ) - """; - private final String jdbcUrl; /** - * Constructs the adapter with the JDBC URL of the SQLite database file. + * Erstellt den Adapter mit der JDBC-URL der SQLite-Datenbankdatei. * - * @param jdbcUrl the JDBC URL of the SQLite database; must not be null or blank - * @throws NullPointerException if {@code jdbcUrl} is null - * @throws IllegalArgumentException if {@code jdbcUrl} is blank + * @param jdbcUrl die JDBC-URL der SQLite-Datenbank; darf nicht {@code null} oder leer sein + * @throws NullPointerException wenn {@code jdbcUrl} {@code null} ist + * @throws IllegalArgumentException wenn {@code jdbcUrl} leer ist */ public SqliteSchemaInitializationAdapter(String jdbcUrl) { - Objects.requireNonNull(jdbcUrl, "jdbcUrl must not be null"); + Objects.requireNonNull(jdbcUrl, "jdbcUrl darf nicht null sein"); if (jdbcUrl.isBlank()) { - throw new IllegalArgumentException("jdbcUrl must not be blank"); + throw new IllegalArgumentException("jdbcUrl darf nicht leer sein"); } this.jdbcUrl = jdbcUrl; } /** - * Creates or verifies the persistence schema and performs schema evolution and - * status migration. - *

- * Execution order: - *

    - *
  1. Enable foreign key enforcement.
  2. - *
  3. Create {@code document_record} table (if not exists).
  4. - *
  5. Create {@code processing_attempt} table (if not exists).
  6. - *
  7. Create all indexes (if not exist).
  8. - *
  9. Add AI-traceability and provider-identifier columns to {@code processing_attempt} - * (idempotent evolution).
  10. - *
  11. Migrate earlier positive intermediate state to {@code READY_FOR_AI} (idempotent).
  12. - *
- *

- * All steps are safe to run on both fresh and existing databases. + * Erstellt oder verifiziert das Persistenzschema per Flyway. * - * @throws DocumentPersistenceException if any DDL or migration step fails + *

Erkennt anhand des Datenbankzustands automatisch einen der drei Fälle + * (leere DB, bestehende DB ohne Flyway-History, Folgestart mit Flyway-History) + * und wählt die passende Flyway-Konfiguration. + * + * @throws DocumentPersistenceException wenn das Schema nicht erstellt oder verifiziert + * werden kann, oder wenn die Schema-Prüfung bei + * einer bestehenden Datenbank fehlschlägt */ @Override public void initializeSchema() { - logger.info("Initialising SQLite persistence schema at: {}", jdbcUrl); - try (Connection connection = DriverManager.getConnection(jdbcUrl); - Statement statement = connection.createStatement()) { + logger.info("Schema-Initialisierung gestartet für: {}", jdbcUrl); + try { + DataSource dataSource = createDataSource(); + DbState state = determineDbState(dataSource); + logger.info("Erkannter Datenbankzustand: {}", state); - // Enable foreign key enforcement (SQLite disables it by default) - statement.execute("PRAGMA foreign_keys = ON"); - - // Level 1: document master record - statement.execute(DDL_CREATE_DOCUMENT_RECORD); - logger.debug("Table 'document_record' created or already present."); - - // Level 2: processing attempt history (base columns only) - statement.execute(DDL_CREATE_PROCESSING_ATTEMPT); - logger.debug("Table 'processing_attempt' created or already present."); - - // Indexes for efficient per-document, per-run, and per-status access - statement.execute(DDL_IDX_ATTEMPT_FINGERPRINT); - statement.execute(DDL_IDX_ATTEMPT_RUN_ID); - statement.execute(DDL_IDX_RECORD_STATUS); - logger.debug("Indexes created or already present."); - - // Schema evolution: add AI-traceability + target-copy columns (idempotent) - evolveTableColumns(connection, "processing_attempt", EVOLUTION_ATTEMPT_COLUMNS); - evolveTableColumns(connection, "document_record", EVOLUTION_RECORD_COLUMNS); - - // Status migration: earlier positive intermediate state → READY_FOR_AI - int migrated = statement.executeUpdate(SQL_MIGRATE_LEGACY_SUCCESS_TO_READY_FOR_AI); - if (migrated > 0) { - logger.info("Status migration: {} document(s) migrated from legacy SUCCESS state to READY_FOR_AI.", - migrated); - } else { - logger.debug("Status migration: no documents required migration."); + switch (state) { + case EMPTY -> runFall1NewDb(dataSource); + case EXISTING_WITHOUT_FLYWAY -> runFall2BaselineExistingDb(dataSource); + case FLYWAY_MANAGED -> runFall3FollowUpStart(dataSource); } - logger.info("SQLite schema initialisation and migration completed successfully."); - - } catch (SQLException e) { - String message = "Failed to initialise SQLite persistence schema at '" + jdbcUrl + "': " + e.getMessage(); - logger.error(message, e); - throw new DocumentPersistenceException(message, e); + logger.info("Schema-Initialisierung erfolgreich abgeschlossen."); + } catch (DocumentPersistenceException e) { + throw e; + } catch (Exception e) { + String msg = "Schema-Initialisierung fehlgeschlagen für '" + jdbcUrl + "': " + e.getMessage(); + logger.error(msg, e); + throw new DocumentPersistenceException(msg, e); } } /** - * Idempotently adds the given columns to the specified table. - *

- * For each column that does not yet exist, an {@code ALTER TABLE ... ADD COLUMN} - * statement is executed. Columns that already exist are silently skipped. + * Gibt die JDBC-URL zurück, die dieser Adapter verwendet. * - * @param connection an open JDBC connection to the database - * @param tableName the name of the table to evolve - * @param columns array of {@code [column_name, column_type]} pairs to add - * @throws SQLException if a column addition fails for a reason other than duplicate column - */ - private void evolveTableColumns(Connection connection, String tableName, String[][] columns) - throws SQLException { - java.util.Set existingColumns = new java.util.HashSet<>(); - try (ResultSet rs = connection.getMetaData().getColumns(null, null, tableName, null)) { - while (rs.next()) { - existingColumns.add(rs.getString("COLUMN_NAME").toLowerCase()); - } - } - - for (String[] col : columns) { - String columnName = col[0]; - String columnType = col[1]; - if (!existingColumns.contains(columnName.toLowerCase())) { - String alterSql = "ALTER TABLE " + tableName + " ADD COLUMN " + columnName + " " + columnType; - try (Statement stmt = connection.createStatement()) { - stmt.execute(alterSql); - } - logger.debug("Schema evolution: added column '{}' to '{}'.", columnName, tableName); - } else { - logger.debug("Schema evolution: column '{}' in '{}' already present, skipped.", - columnName, tableName); - } - } - } - - /** - * Returns the JDBC URL this adapter uses to connect to the SQLite database. - * - * @return the JDBC URL; never null or blank + * @return die JDBC-URL; niemals {@code null} oder leer */ public String getJdbcUrl() { return jdbcUrl; } + + // ------------------------------------------------------------------------- + // Fallbehandlung + // ------------------------------------------------------------------------- + + /** + * Fall 1: Leere Datenbank – Flyway führt V1__initial_schema.sql vollständig aus. + * + * @param dataSource die konfigurierte DataSource + */ + private void runFall1NewDb(DataSource dataSource) { + logger.info("Fall 1: Leere Datenbank – Flyway legt vollständiges Schema an."); + Flyway flyway = buildFlyway(dataSource, false); + flyway.migrate(); + logger.info("Fall 1: Schema vollständig erstellt."); + } + + /** + * Fall 2: Bestehende Datenbank ohne Flyway-History. + * + *

Führt die vollständige Schema-Prüfcheckliste durch. Bei konformem Schema + * wird ein datiertes Backup angelegt und Flyway trägt nur eine Baseline ein. + * Bei fehlendem Schema-Element bricht der Start ab. + * + * @param dataSource die konfigurierte DataSource + * @throws DocumentPersistenceException wenn das Schema nicht konform ist oder das Backup schlägt fehl + */ + private void runFall2BaselineExistingDb(DataSource dataSource) { + logger.info("Fall 2: Bestehende Datenbank ohne Flyway-History – Schema-Prüfung läuft."); + + // Vollständige Schema-Prüfung vor Baseline + try (Connection conn = dataSource.getConnection()) { + verifyExistingSchemaMatches(conn); + } catch (SQLException e) { + String msg = "Datenbankverbindung für Schema-Prüfung fehlgeschlagen: " + e.getMessage(); + logger.error(msg, e); + throw new DocumentPersistenceException(msg, e); + } + logger.info("Fall 2: Schema-Prüfung bestanden."); + + // Backup der SQLite-Datei anlegen + createDatedBackup(); + + // Flyway-Baseline eintragen (V1 wird NICHT ausgeführt) + Flyway flyway = buildFlyway(dataSource, true); + flyway.migrate(); + logger.info("Fall 2: Flyway-Baseline erfolgreich eingetragen."); + } + + /** + * Fall 3: Folgestart – Flyway läuft idempotent und führt nur fehlende Migrationen aus. + * + * @param dataSource die konfigurierte DataSource + */ + private void runFall3FollowUpStart(DataSource dataSource) { + logger.info("Fall 3: Folgestart mit Flyway-History – idempotente Migration."); + Flyway flyway = buildFlyway(dataSource, false); + flyway.migrate(); + logger.info("Fall 3: Migration abgeschlossen (idempotent)."); + } + + /** + * Erzeugt eine standardisiert konfigurierte {@link Flyway}-Instanz. + * + *

Alle drei Fälle nutzen dieselbe Grundkonfiguration: + *

    + *
  • Explizite Migrations-Location {@code classpath:db/migration} – verhindert + * unerwünschtes Klasspfad-Scannen des gesamten JARs.
  • + *
  • Keine Umgebungsvariablen-Konfiguration – verhindert unbeabsichtigte + * Übersteuerung durch Build-System-Variablen.
  • + *
  • Kein Verbindungs-Retry ({@code connectRetries=0}) – Fehler schlagen + * sofort statt nach mehreren Sekunden Wartezeit fehl.
  • + *
+ * + * @param dataSource die zu verwendende DataSource + * @param baselineOnMigrate ob beim Migrate eine Baseline einzutragen ist (nur Fall 2) + * @return eine konfigurierte, betriebsbereite {@link Flyway}-Instanz + */ + private Flyway buildFlyway(DataSource dataSource, boolean baselineOnMigrate) { + var config = Flyway.configure() + .dataSource(dataSource) + .locations("classpath:db/migration") + .connectRetries(0) + .baselineOnMigrate(baselineOnMigrate); + if (baselineOnMigrate) { + config = config + .baselineVersion("1") + .baselineDescription("Bestehende Datenbank baselined"); + } + return config.load(); + } + + // ------------------------------------------------------------------------- + // Datenbankzustand erkennen + // ------------------------------------------------------------------------- + + /** + * Repräsentiert den erkannten Zustand der SQLite-Datenbank beim Start. + */ + enum DbState { + /** Keine fachlichen Tabellen und keine Flyway-History vorhanden. */ + EMPTY, + /** Fachliche Tabellen vorhanden, aber keine Flyway-History-Tabelle. */ + EXISTING_WITHOUT_FLYWAY, + /** Flyway-History-Tabelle vorhanden – Datenbank wird bereits von Flyway verwaltet. */ + FLYWAY_MANAGED + } + + /** + * Ermittelt den aktuellen Zustand der Datenbank. + * + *

"Leer" bedeutet: keine Tabellen vorhanden – nicht nur Dateigröße 0 Byte. + * + * @param dataSource die zu prüfende DataSource + * @return der erkannte {@link DbState} + * @throws DocumentPersistenceException bei Verbindungsfehlern + */ + private DbState determineDbState(DataSource dataSource) { + try (Connection conn = dataSource.getConnection()) { + DatabaseMetaData meta = conn.getMetaData(); + Set tables = readTableNames(meta); + + if (tables.contains(FLYWAY_HISTORY_TABLE)) { + return DbState.FLYWAY_MANAGED; + } + // "Leer" = keine Tabellen vorhanden (unabhängig von Dateigröße) + boolean hasFachlicheTabellen = tables.contains("document_record") + || tables.contains("processing_attempt"); + if (hasFachlicheTabellen) { + return DbState.EXISTING_WITHOUT_FLYWAY; + } + return DbState.EMPTY; + } catch (SQLException e) { + String msg = "Datenbankzustand konnte nicht ermittelt werden: " + e.getMessage(); + logger.error(msg, e); + throw new DocumentPersistenceException(msg, e); + } + } + + // ------------------------------------------------------------------------- + // Schema-Prüfcheckliste (Fall 2) + // ------------------------------------------------------------------------- + + /** + * Vollständige Schema-Prüfung gegen das V1-Zielschema. + * + *

Prüft alle erwarteten Tabellen, Spalten, Constraints und Indizes per + * {@link DatabaseMetaData}. Bei fehlendem Element wird der Start sofort mit + * einer aussagekräftigen Fehlermeldung abgebrochen – kein stilles Heilen. + * + * @param conn offene JDBC-Verbindung zur Datenbank + * @throws DocumentPersistenceException wenn ein Schema-Element fehlt + * @throws SQLException bei technischen Datenbankfehlern + */ + private void verifyExistingSchemaMatches(Connection conn) throws SQLException { + DatabaseMetaData meta = conn.getMetaData(); + List fehler = new ArrayList<>(); + + // Tabellen prüfen + Set tabellen = readTableNames(meta); + if (!tabellen.contains("document_record")) { + fehler.add("Tabelle 'document_record' fehlt"); + } + if (!tabellen.contains("processing_attempt")) { + fehler.add("Tabelle 'processing_attempt' fehlt"); + } + + // Spalten prüfen – nur wenn Tabellen vorhanden + if (tabellen.contains("document_record")) { + pruefeSpaltenvollstaendigkeit(meta, "document_record", + EXPECTED_COLUMNS_DOCUMENT_RECORD, fehler); + } + if (tabellen.contains("processing_attempt")) { + pruefeSpaltenvollstaendigkeit(meta, "processing_attempt", + EXPECTED_COLUMNS_PROCESSING_ATTEMPT, fehler); + } + + // Indizes prüfen + if (tabellen.contains("document_record") && tabellen.contains("processing_attempt")) { + Set vorhandeneIndizes = readIndexNames(meta); + for (String erwartetIndex : EXPECTED_INDEXES) { + if (!vorhandeneIndizes.contains(erwartetIndex)) { + fehler.add("Index '" + erwartetIndex + "' fehlt"); + } + } + } + + // Constraints prüfen (soweit per Metadata prüfbar) + if (tabellen.contains("document_record")) { + pruefeUniqueConstraintAufFingerprint(conn, fehler); + } + if (tabellen.contains("processing_attempt")) { + pruefeForeignKeyAufDocumentRecord(conn, fehler); + } + + if (!fehler.isEmpty()) { + String fehlerliste = String.join("; ", fehler); + String msg = "Schema-Prüfung fehlgeschlagen – folgende Elemente fehlen oder sind nicht konform: " + + fehlerliste; + logger.error(msg); + throw new DocumentPersistenceException(msg); + } + } + + /** + * Prüft, ob alle erwarteten Spalten in der angegebenen Tabelle vorhanden sind. + * + * @param meta Datenbankmetadaten + * @param tabellenname Name der zu prüfenden Tabelle + * @param erwarteteSpalten Menge der erwarteten Spaltennamen (Kleinschreibung) + * @param fehler Liste, in die fehlende Elemente eingetragen werden + * @throws SQLException bei technischen Datenbankfehlern + */ + private void pruefeSpaltenvollstaendigkeit(DatabaseMetaData meta, String tabellenname, + Set erwarteteSpalten, List fehler) throws SQLException { + Set vorhandeneSpalten = new HashSet<>(); + try (ResultSet rs = meta.getColumns(null, null, tabellenname, null)) { + while (rs.next()) { + vorhandeneSpalten.add(rs.getString("COLUMN_NAME").toLowerCase()); + } + } + for (String erwartet : erwarteteSpalten) { + if (!vorhandeneSpalten.contains(erwartet)) { + fehler.add("Spalte '" + tabellenname + "." + erwartet + "' fehlt"); + } + } + } + + /** + * Prüft das UNIQUE-Constraint auf {@code document_record.fingerprint} anhand der + * Indexmetadaten. + * + * @param conn offene JDBC-Verbindung + * @param fehler Liste, in die fehlende Elemente eingetragen werden + * @throws SQLException bei technischen Datenbankfehlern + */ + private void pruefeUniqueConstraintAufFingerprint(Connection conn, + List fehler) throws SQLException { + boolean uniqueGefunden = false; + try (ResultSet rs = conn.getMetaData().getIndexInfo(null, null, "document_record", true, false)) { + while (rs.next()) { + String spalte = rs.getString("COLUMN_NAME"); + if ("fingerprint".equalsIgnoreCase(spalte)) { + uniqueGefunden = true; + break; + } + } + } + if (!uniqueGefunden) { + fehler.add("UNIQUE-Constraint auf 'document_record.fingerprint' fehlt"); + } + } + + /** + * Prüft den Foreign Key von {@code processing_attempt.fingerprint} auf + * {@code document_record.fingerprint} anhand der Importschlüssel-Metadaten. + * + * @param conn offene JDBC-Verbindung + * @param fehler Liste, in die fehlende Elemente eingetragen werden + * @throws SQLException bei technischen Datenbankfehlern + */ + private void pruefeForeignKeyAufDocumentRecord(Connection conn, + List fehler) throws SQLException { + boolean fkGefunden = false; + try (ResultSet rs = conn.getMetaData().getImportedKeys(null, null, "processing_attempt")) { + while (rs.next()) { + String pkTabelle = rs.getString("PKTABLE_NAME"); + String fkSpalte = rs.getString("FKCOLUMN_NAME"); + if ("document_record".equalsIgnoreCase(pkTabelle) + && "fingerprint".equalsIgnoreCase(fkSpalte)) { + fkGefunden = true; + break; + } + } + } + if (!fkGefunden) { + fehler.add("Foreign Key von 'processing_attempt.fingerprint' auf 'document_record.fingerprint' fehlt"); + } + } + + // ------------------------------------------------------------------------- + // Backup-Erstellung (Fall 2) + // ------------------------------------------------------------------------- + + /** + * Erstellt eine datierte Kopie der SQLite-Datei als Backup. + * + *

Das Backup-Dateiname-Schema lautet: {@code ..bak}, + * z. B. {@code data.db.20260430T120000Z.bak}. + * Bei einer Kollision wird ein Zähler angehängt. + * + * @throws DocumentPersistenceException wenn das Backup nicht angelegt werden kann + */ + private void createDatedBackup() { + Path dbPath = extractDbPath(); + if (dbPath == null) { + logger.warn("Kein lokaler Dateipfad aus JDBC-URL ableitbar – Backup übersprungen: {}", jdbcUrl); + return; + } + if (!Files.exists(dbPath)) { + logger.debug("Datenbankdatei existiert noch nicht – kein Backup nötig."); + return; + } + + String zeitstempel = DateTimeFormatter.ofPattern("yyyyMMdd'T'HHmmss'Z'") + .format(java.time.ZonedDateTime.now(java.time.ZoneOffset.UTC)); + Path backup = dbPath.resolveSibling(dbPath.getFileName() + "." + zeitstempel + ".bak"); + + // Kollisionsauflösung + int zaehler = 1; + while (Files.exists(backup)) { + backup = dbPath.resolveSibling(dbPath.getFileName() + "." + zeitstempel + "." + zaehler + ".bak"); + zaehler++; + } + + try { + Files.copy(dbPath, backup, StandardCopyOption.COPY_ATTRIBUTES); + logger.info("Backup der Datenbankdatei erstellt: {}", backup); + } catch (IOException e) { + String msg = "Backup der Datenbankdatei konnte nicht erstellt werden: " + e.getMessage(); + logger.error(msg, e); + throw new DocumentPersistenceException(msg, e); + } + } + + /** + * Leitet den Dateisystempfad aus der JDBC-URL ab. + * + *

Erwartet URLs der Form {@code jdbc:sqlite:/pfad/zur/datei.db}. + * + * @return der abgeleitete {@link Path} oder {@code null}, wenn kein Pfad ableitbar ist + */ + private Path extractDbPath() { + // Erwartet: jdbc:sqlite:/pfad/zur/datei oder jdbc:sqlite:C:/pfad/datei + String prefix = "jdbc:sqlite:"; + if (!jdbcUrl.startsWith(prefix)) { + return null; + } + String pfad = jdbcUrl.substring(prefix.length()); + if (pfad.isBlank()) { + return null; + } + try { + return Paths.get(pfad); + } catch (Exception e) { + logger.warn("Pfad aus JDBC-URL konnte nicht geparst werden: {}", pfad); + return null; + } + } + + // ------------------------------------------------------------------------- + // DataSource-Erstellung + // ------------------------------------------------------------------------- + + /** + * Erstellt eine {@link SQLiteDataSource} mit aktivierten Fremdschlüsseln. + * + *

Die Aktivierung über {@link SQLiteConfig#enforceForeignKeys(boolean)} stellt + * sicher, dass jede neue Verbindung automatisch {@code PRAGMA foreign_keys = ON} + * erhält – ein einmaliges Statement nach dem Verbindungsaufbau wäre nicht ausreichend. + * + * @return eine konfigurierte {@link DataSource}; niemals {@code null} + */ + private DataSource createDataSource() { + SQLiteConfig config = new SQLiteConfig(); + config.enforceForeignKeys(true); + SQLiteDataSource ds = new SQLiteDataSource(config); + ds.setUrl(jdbcUrl); + return ds; + } + + // ------------------------------------------------------------------------- + // Hilfsmethoden + // ------------------------------------------------------------------------- + + /** + * Liest alle Tabellennamen aus den Datenbankmetadaten (Kleinschreibung). + * + * @param meta Datenbankmetadaten + * @return Menge aller Tabellennamen in Kleinschreibung + * @throws SQLException bei technischen Datenbankfehlern + */ + private static Set readTableNames(DatabaseMetaData meta) throws SQLException { + Set names = new HashSet<>(); + try (ResultSet rs = meta.getTables(null, null, "%", new String[]{"TABLE"})) { + while (rs.next()) { + names.add(rs.getString("TABLE_NAME").toLowerCase()); + } + } + return names; + } + + /** + * Liest alle Indexnamen aus den Datenbankmetadaten für beide fachlichen Tabellen. + * + * @param meta Datenbankmetadaten + * @return Menge aller Indexnamen in Kleinschreibung + * @throws SQLException bei technischen Datenbankfehlern + */ + private static Set readIndexNames(DatabaseMetaData meta) throws SQLException { + Set names = new HashSet<>(); + for (String tabelle : new String[]{"document_record", "processing_attempt"}) { + try (ResultSet rs = meta.getIndexInfo(null, null, tabelle, false, false)) { + while (rs.next()) { + String indexName = rs.getString("INDEX_NAME"); + if (indexName != null) { + names.add(indexName.toLowerCase()); + } + } + } + } + return names; + } } diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/package-info.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/package-info.java index 14c5412..cd997a3 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/package-info.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/package-info.java @@ -1,35 +1,43 @@ /** - * SQLite persistence adapter for the two-level persistence model. + * SQLite-Persistenz-Adapter für das Zwei-Ebenen-Persistenzmodell. * - *

Purpose

- *

This package contains the technical SQLite infrastructure for the persistence - * layer. It is the only place in the entire application where JDBC connections, SQL DDL, - * and SQLite-specific types are used. No JDBC or SQLite types leak into the - * {@code application} or {@code domain} modules. + *

Zweck

+ *

Dieses Paket enthält die technische SQLite-Infrastruktur der Persistenzschicht. + * Es ist die einzige Stelle in der gesamten Anwendung, an der JDBC-Verbindungen, + * SQL-DDL und SQLite-spezifische Typen verwendet werden. Keine JDBC- oder + * SQLite-Typen verlassen dieses Paket in Richtung der {@code application}- + * oder {@code domain}-Module. * - *

Two-level persistence model

- *

Persistence is structured in exactly two levels: + *

Zwei-Ebenen-Persistenzmodell

+ *

Die Persistenz ist in genau zwei Ebenen strukturiert: *

    - *
  1. Document master record ({@code document_record} table) — - * one row per unique SHA-256 fingerprint; carries the current overall status, - * failure counters, and the most recently known source location.
  2. - *
  3. Processing attempt history ({@code processing_attempt} table) — - * one row per historised processing attempt; references the master record via - * fingerprint; attempt numbers are monotonically increasing per fingerprint.
  4. + *
  5. Dokument-Stammsatz ({@code document_record}-Tabelle) – + * eine Zeile pro eindeutigem SHA-256-Fingerprint; trägt den aktuellen + * Gesamtstatus, Fehlerzähler und den zuletzt bekannten Quellort.
  6. + *
  7. Versuchshistorie ({@code processing_attempt}-Tabelle) – + * eine Zeile pro historisiertem Verarbeitungsversuch; referenziert den + * Stammsatz über den Fingerprint; Versuchsnummern sind pro Fingerprint + * monoton steigend.
  8. *
* - *

Schema initialisation timing

- *

The {@link de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter} - * implements the + *

Schema-Initialisierung mit Flyway

+ *

Der {@link de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter} + * implementiert den * {@link de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort} - * and must be called once at program startup, before the batch document - * processing loop begins. There is no lazy or hidden initialisation during document - * processing. + * und muss einmal beim Programmstart aufgerufen werden, bevor die + * Verarbeitungsschleife beginnt. Die Initialisierung unterscheidet drei Fälle: + * leere Datenbank, bestehende Datenbank ohne Flyway-History (Baseline-Eintragung + * nach vollständiger Schema-Prüfung) und Folgestart mit Flyway-History (idempotent). * - *

Architecture boundary

- *

All JDBC connections, SQL statements, and SQLite-specific behaviour are strictly - * confined to this package. The application layer interacts exclusively through the - * port interfaces defined in + *

Fremdschlüssel

+ *

Foreign-Key-Durchsetzung wird über {@code SQLiteConfig.enforceForeignKeys(true)} + * auf DataSource-Ebene aktiviert, sodass jede neue Verbindung automatisch + * {@code PRAGMA foreign_keys = ON} erhält. + * + *

Architekturgrenze

+ *

Alle JDBC-Verbindungen, SQL-Anweisungen und SQLite-spezifisches Verhalten sind + * ausschließlich in diesem Paket gekapselt. Die Application-Schicht interagiert + * ausschließlich über die Port-Interfaces in * {@code de.gecheckt.pdf.umbenenner.application.port.out}. */ package de.gecheckt.pdf.umbenenner.adapter.out.sqlite; diff --git a/pdf-umbenenner-adapter-out/src/main/resources/db/migration/V1__initial_schema.sql b/pdf-umbenenner-adapter-out/src/main/resources/db/migration/V1__initial_schema.sql new file mode 100644 index 0000000..7d9b08d --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/resources/db/migration/V1__initial_schema.sql @@ -0,0 +1,58 @@ +-- Vollständiges Basisschema: Dokument-Stammsatz und Versuchshistorie. +-- Dieses Skript wird für neue Datenbanken ausgeführt (Fall 1). +-- Für bestehende Datenbanken mit konformem Schema wird nur eine Flyway-Baseline +-- eingetragen; das Skript wird in diesem Fall NICHT ausgeführt (Fall 2). + +CREATE TABLE document_record ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + fingerprint TEXT NOT NULL, + last_known_source_locator TEXT NOT NULL, + last_known_source_file_name TEXT NOT NULL, + overall_status TEXT NOT NULL, + content_error_count INTEGER NOT NULL DEFAULT 0, + transient_error_count INTEGER NOT NULL DEFAULT 0, + last_failure_instant TEXT, + last_success_instant TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + last_target_path TEXT, + last_target_file_name TEXT, + CONSTRAINT uq_document_record_fingerprint UNIQUE (fingerprint) +); + +CREATE TABLE processing_attempt ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + fingerprint TEXT NOT NULL, + run_id TEXT NOT NULL, + attempt_number INTEGER NOT NULL, + started_at TEXT NOT NULL, + ended_at TEXT NOT NULL, + status TEXT NOT NULL, + failure_class TEXT, + failure_message TEXT, + retryable INTEGER NOT NULL DEFAULT 0, + model_name TEXT, + prompt_identifier TEXT, + processed_page_count INTEGER, + sent_character_count INTEGER, + ai_raw_response TEXT, + ai_reasoning TEXT, + resolved_date TEXT, + date_source TEXT, + validated_title TEXT, + final_target_file_name TEXT, + ai_provider TEXT, + CONSTRAINT fk_processing_attempt_fingerprint + FOREIGN KEY (fingerprint) REFERENCES document_record (fingerprint), + CONSTRAINT uq_processing_attempt_fingerprint_number + UNIQUE (fingerprint, attempt_number) +); + +CREATE INDEX idx_processing_attempt_fingerprint + ON processing_attempt (fingerprint); + +CREATE INDEX idx_processing_attempt_run_id + ON processing_attempt (run_id); + +CREATE INDEX idx_document_record_overall_status + ON document_record (overall_status); diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteAttemptProviderPersistenceTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteAttemptProviderPersistenceTest.java index 180cf7f..f6b1beb 100644 --- a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteAttemptProviderPersistenceTest.java +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteAttemptProviderPersistenceTest.java @@ -24,11 +24,11 @@ import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; import de.gecheckt.pdf.umbenenner.domain.model.RunId; /** - * Tests for the additive {@code ai_provider} column in {@code processing_attempt}. - *

- * Covers schema migration (idempotency, nullable default for existing rows), - * write/read round-trips for both supported provider identifiers, and - * backward compatibility with databases created before provider tracking was introduced. + * Tests für {@code ai_provider} in {@code processing_attempt}. + * + *

Prüft Schreib-/Lese-Roundtrips für beide Provider-Identifikatoren, + * Idempotenz der Initialisierung sowie das Verhalten bei Schemata, + * die nicht dem Zielschema entsprechen (harter Abbruch per Fall-2-Strategie). */ class SqliteAttemptProviderPersistenceTest { @@ -64,25 +64,24 @@ class SqliteAttemptProviderPersistenceTest { } /** - * A database that already has the {@code processing_attempt} table without - * {@code ai_provider} (simulating an existing installation before this column was added) - * must receive the column via the idempotent schema evolution. + * Eine bestehende Datenbank ohne {@code ai_provider}-Spalte in {@code processing_attempt} + * entspricht nicht dem vollständigen Zielschema. Die Initialisierung muss mit einem + * klaren Fehler abbrechen, da kein stilles Heilen stattfindet. */ @Test - void addsProviderColumnOnExistingDbWithoutColumn() throws SQLException { - // Bootstrap schema without the ai_provider column (simulate legacy DB) + void existingDbOhneAiProviderSpalte_brichtAb() throws SQLException { + // Schema ohne ai_provider anlegen createLegacySchema(); assertThat(columnExists("processing_attempt", "ai_provider")) - .as("ai_provider must not be present before evolution") + .as("ai_provider darf im Legacy-Schema noch nicht vorhanden sein") .isFalse(); - // Running initializeSchema must add the column - schemaAdapter.initializeSchema(); - - assertThat(columnExists("processing_attempt", "ai_provider")) - .as("ai_provider column must be added by schema evolution") - .isTrue(); + // Initialisierung muss mit Fehler abbrechen (nicht konformes Schema) + org.junit.jupiter.api.Assertions.assertThrows( + de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException.class, + () -> schemaAdapter.initializeSchema(), + "Erwarte Fehler bei nicht konformem Schema (fehlende ai_provider-Spalte)"); } /** @@ -101,25 +100,28 @@ class SqliteAttemptProviderPersistenceTest { } /** - * Rows that existed before the {@code ai_provider} column was added must have - * {@code NULL} as the column value, not a non-null default. + * Neue Versuche die ohne Provider-Information gespeichert werden (z. B. über + * {@code ProcessingAttempt.withoutAiFields}), müssen {@code null} als + * {@code ai_provider} zurückliefern. */ @Test - void existingRowsKeepNullProvider() throws SQLException { - // Create legacy schema and insert a row without ai_provider - createLegacySchema(); - DocumentFingerprint fp = fingerprint("aa"); - insertLegacyDocumentRecord(fp); - insertLegacyAttemptRow(fp, "READY_FOR_AI"); - - // Now evolve the schema + void neuerVersuchOhneProvider_haeltNullProviderNachSchreibenUndLesen() { schemaAdapter.initializeSchema(); + DocumentFingerprint fp = fingerprint("aa"); + insertDocumentRecord(fp); - // Read the existing row — ai_provider must be NULL - List attempts = repository.findAllByFingerprint(fp); - assertThat(attempts).hasSize(1); - assertThat(attempts.get(0).aiProvider()) - .as("Existing rows must have NULL ai_provider after schema evolution") + java.time.Instant now = java.time.Instant.now().truncatedTo(java.time.temporal.ChronoUnit.MICROS); + ProcessingAttempt attemptOhneProvider = ProcessingAttempt.withoutAiFields( + fp, new RunId("run-null"), 1, + now, now.plusSeconds(1), + ProcessingStatus.FAILED_RETRYABLE, + "Err", "msg", true); + repository.save(attemptOhneProvider); + + List gelesen = repository.findAllByFingerprint(fp); + assertThat(gelesen).hasSize(1); + assertThat(gelesen.get(0).aiProvider()) + .as("Versuche ohne Provider müssen null zurückgeben") .isNull(); } @@ -213,29 +215,24 @@ class SqliteAttemptProviderPersistenceTest { } /** - * Reading a database that was created without the {@code ai_provider} column - * (a pre-extension database) must succeed; the new field must be empty/null - * for historical attempts. + * Eine Datenbank mit nicht konformem Schema (fehlende Spalten, fehlende Indizes) + * wird von der Initialisierung mit einem klaren Fehler abgebrochen. + * Es findet kein stilles Heilen statt. */ @Test - void legacyDataReadingDoesNotFail() throws SQLException { - // Set up legacy schema with a row that has no ai_provider column + void nichtKonformesSchema_brichtMitAussagekraeftigemFehlerAb() throws SQLException { + // Legacy-Schema anlegen (fehlt: ai_provider, last_target_path, last_target_file_name, + // Indizes fehlen ebenfalls) createLegacySchema(); DocumentFingerprint fp = fingerprint("ee"); insertLegacyDocumentRecord(fp); insertLegacyAttemptRow(fp, "FAILED_RETRYABLE"); - // Evolve schema — now ai_provider column exists but legacy rows have NULL - schemaAdapter.initializeSchema(); - - // Reading must not throw and must return null for ai_provider - List attempts = repository.findAllByFingerprint(fp); - assertThat(attempts).hasSize(1); - assertThat(attempts.get(0).aiProvider()) - .as("Legacy attempt (from before provider tracking) must have null aiProvider") - .isNull(); - // Other fields must still be readable - assertThat(attempts.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE); + // Initialisierung muss abbrechen + org.junit.jupiter.api.Assertions.assertThrows( + de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException.class, + () -> schemaAdapter.initializeSchema(), + "Erwarte Fehler bei nicht konformem Bestands-Schema"); } /** diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapterTest.java index 7fcf252..8699f71 100644 --- a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapterTest.java +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteSchemaInitializationAdapterTest.java @@ -3,6 +3,7 @@ package de.gecheckt.pdf.umbenenner.adapter.out.sqlite; import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatThrownBy; +import java.nio.file.Files; import java.nio.file.Path; import java.sql.Connection; import java.sql.DatabaseMetaData; @@ -14,38 +15,34 @@ import java.util.Set; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; +import org.sqlite.SQLiteConfig; +import org.sqlite.SQLiteDataSource; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; /** - * Tests for {@link SqliteSchemaInitializationAdapter}. - *

- * Verifies that the two-level schema is created correctly, that schema evolution - * (idempotent addition of AI traceability columns) works, that the idempotent - * status migration of earlier positive intermediate states to {@code READY_FOR_AI} - * is correct, and that invalid configuration is rejected. + * Tests für {@link SqliteSchemaInitializationAdapter}. + * + *

Prüft die differenzierte 3-Fall-Strategie (leere DB, bestehende DB ohne + * Flyway-History, Folgestart), die vollständige Schema-Prüfcheckliste für Fall 2, + * die Foreign-Key-Aktivierung via DataSource sowie den Konstruktor. */ class SqliteSchemaInitializationAdapterTest { - @TempDir - Path tempDir; - // ------------------------------------------------------------------------- - // Construction + // Konstruktor // ------------------------------------------------------------------------- @Test void constructor_rejectsNullJdbcUrl() { assertThatThrownBy(() -> new SqliteSchemaInitializationAdapter(null)) - .isInstanceOf(NullPointerException.class) - .hasMessageContaining("jdbcUrl"); + .isInstanceOf(NullPointerException.class); } @Test void constructor_rejectsBlankJdbcUrl() { assertThatThrownBy(() -> new SqliteSchemaInitializationAdapter(" ")) - .isInstanceOf(IllegalArgumentException.class) - .hasMessageContaining("jdbcUrl"); + .isInstanceOf(IllegalArgumentException.class); } @Test @@ -56,215 +53,341 @@ class SqliteSchemaInitializationAdapterTest { } // ------------------------------------------------------------------------- - // Schema creation – tables present + // Fall 1: Leere Datenbank – vollständiges Schema anlegen // ------------------------------------------------------------------------- @Test - void initializeSchema_createsBothTables(@TempDir Path dir) throws SQLException { - String jdbcUrl = jdbcUrl(dir, "schema_test.db"); - SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(jdbcUrl); + void fall1_leereDb_laegtVollstaendigesSchemaAn(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "fall1.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - adapter.initializeSchema(); - - Set tables = readTableNames(jdbcUrl); - assertThat(tables).contains("document_record", "processing_attempt"); + Set tabellen = readTableNames(jdbcUrl); + assertThat(tabellen).contains("document_record", "processing_attempt"); } @Test - void initializeSchema_documentRecordHasAllMandatoryColumns(@TempDir Path dir) throws SQLException { - String jdbcUrl = jdbcUrl(dir, "columns_test.db"); + void fall1_leereDb_documentRecordHatAlleErwartetenSpalten(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "fall1_columns_dr.db"); new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - Set columns = readColumnNames(jdbcUrl, "document_record"); - assertThat(columns).containsExactlyInAnyOrder( - "id", - "fingerprint", - "last_known_source_locator", - "last_known_source_file_name", - "overall_status", - "content_error_count", - "transient_error_count", - "last_failure_instant", - "last_success_instant", - "created_at", - "updated_at", - "last_target_path", - "last_target_file_name" + Set spalten = readColumnNames(jdbcUrl, "document_record"); + assertThat(spalten).containsExactlyInAnyOrder( + "id", "fingerprint", "last_known_source_locator", "last_known_source_file_name", + "overall_status", "content_error_count", "transient_error_count", + "last_failure_instant", "last_success_instant", "created_at", "updated_at", + "last_target_path", "last_target_file_name" ); } @Test - void initializeSchema_processingAttemptHasAllMandatoryColumns(@TempDir Path dir) throws SQLException { - String jdbcUrl = jdbcUrl(dir, "attempt_columns_test.db"); + void fall1_leereDb_processingAttemptHatAlleErwartetenSpalten(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "fall1_columns_pa.db"); new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - Set columns = readColumnNames(jdbcUrl, "processing_attempt"); - assertThat(columns).containsExactlyInAnyOrder( - "id", - "fingerprint", - "run_id", - "attempt_number", - "started_at", - "ended_at", - "status", - "failure_class", - "failure_message", - "retryable", - "model_name", - "prompt_identifier", - "processed_page_count", - "sent_character_count", - "ai_raw_response", - "ai_reasoning", - "resolved_date", - "date_source", - "validated_title", - "final_target_file_name", - "ai_provider" + Set spalten = readColumnNames(jdbcUrl, "processing_attempt"); + assertThat(spalten).containsExactlyInAnyOrder( + "id", "fingerprint", "run_id", "attempt_number", "started_at", "ended_at", + "status", "failure_class", "failure_message", "retryable", + "model_name", "prompt_identifier", "processed_page_count", "sent_character_count", + "ai_raw_response", "ai_reasoning", "resolved_date", "date_source", + "validated_title", "final_target_file_name", "ai_provider" ); } - // ------------------------------------------------------------------------- - // Idempotency - // ------------------------------------------------------------------------- - @Test - void initializeSchema_isIdempotent_calledTwice(@TempDir Path dir) { - String jdbcUrl = jdbcUrl(dir, "idempotent_test.db"); - SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(jdbcUrl); + void fall1_leereDb_indizesVorhanden(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "fall1_indexes.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - // Must not throw on second call - adapter.initializeSchema(); - adapter.initializeSchema(); + Set indizes = readIndexNames(jdbcUrl); + assertThat(indizes).contains( + "idx_processing_attempt_fingerprint", + "idx_processing_attempt_run_id", + "idx_document_record_overall_status" + ); + } + + /** + * "Leer" bedeutet: keine Tabellen vorhanden – NICHT nur Dateigröße 0 Byte. + * Eine leere SQLite-Datei (0 Byte) muss als leere DB erkannt werden. + */ + @Test + void fall1_erkenntLeereDbAuchBeiDateiOhneInhalt(@TempDir Path dir) throws Exception { + // Leere Datei anlegen (0 Byte) + Path dbPath = dir.resolve("empty.db"); + Files.createFile(dbPath); + assertThat(dbPath).exists(); + + String jdbcUrl = jdbcUrl(dir, "empty.db"); + // Muss als Fall 1 behandelt werden und erfolgreich durchlaufen + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + Set tabellen = readTableNames(jdbcUrl); + assertThat(tabellen).contains("document_record", "processing_attempt"); } // ------------------------------------------------------------------------- - // Unique constraint: fingerprint in document_record + // Fall 2: Bestehende DB ohne Flyway-History – Baseline eintragen // ------------------------------------------------------------------------- @Test - void documentRecord_fingerprintUniqueConstraintIsEnforced(@TempDir Path dir) throws SQLException { - String jdbcUrl = jdbcUrl(dir, "unique_test.db"); + void fall2_bestehendeDbOhneHistory_traegtBaseline_einUndLaeuftErfolgreich(@TempDir Path dir) + throws SQLException { + String jdbcUrl = jdbcUrl(dir, "fall2.db"); + // Vollständiges konformes Schema anlegen (wie eine bestehende Produktions-DB) + erstelleKonformesSchema(jdbcUrl); + + // Adapter muss als Fall 2 erkennen und Baseline eintragen new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - String insertSql = """ - INSERT INTO document_record - (fingerprint, last_known_source_locator, last_known_source_file_name, - overall_status, created_at, updated_at) - VALUES (?, 'locator', 'file.pdf', 'SUCCESS', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z') - """; + // Flyway-History-Tabelle muss jetzt vorhanden sein + Set tabellen = readTableNames(jdbcUrl); + assertThat(tabellen).contains("flyway_schema_history"); + // Fachliche Daten müssen erhalten bleiben + assertThat(tabellen).contains("document_record", "processing_attempt"); + } + + @Test + void fall2_bestehendeDbOhneHistory_erstelltDatiertesBackup(@TempDir Path dir) + throws Exception { + Path dbPath = dir.resolve("fall2_backup.db"); + String jdbcUrl = "jdbc:sqlite:" + dbPath.toAbsolutePath().toString().replace('\\', '/'); + erstelleKonformesSchema(jdbcUrl); + + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + // Backup-Datei muss vorhanden sein + long backupAnzahl = Files.list(dir) + .filter(p -> p.getFileName().toString().startsWith("fall2_backup.db.") + && p.getFileName().toString().endsWith(".bak")) + .count(); + assertThat(backupAnzahl).isEqualTo(1); + } + + @Test + void fall2_bestehendeDbMitFehlendemElement_brichtMitFehlerAb(@TempDir Path dir) { + String jdbcUrl = jdbcUrl(dir, "fall2_broken.db"); + // Schema ohne Spalte ai_provider anlegen (nicht konform) + erstelleSchemaOhneAiProvider(jdbcUrl); + + assertThatThrownBy(() -> new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema()) + .isInstanceOf(DocumentPersistenceException.class) + .hasMessageContaining("ai_provider"); + } + + @Test + void fall2_bestehendeDbOhneProcessingAttemptTabelle_brichtAb(@TempDir Path dir) { + String jdbcUrl = jdbcUrl(dir, "fall2_no_attempt.db"); + // Nur document_record anlegen, processing_attempt fehlt + erstelleNurDocumentRecord(jdbcUrl); + + assertThatThrownBy(() -> new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema()) + .isInstanceOf(DocumentPersistenceException.class) + .hasMessageContaining("processing_attempt"); + } + + // ------------------------------------------------------------------------- + // Fall 3: Folgestart mit Flyway-History – idempotent + // ------------------------------------------------------------------------- + + @Test + void fall3_folgestart_laeuftIdempotentOhneException(@TempDir Path dir) { + String jdbcUrl = jdbcUrl(dir, "fall3.db"); + SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(jdbcUrl); + + // Erster Aufruf (Fall 1) + adapter.initializeSchema(); + // Zweiter Aufruf (Fall 3) – darf nicht werfen + adapter.initializeSchema(); + // Dritter Aufruf (Fall 3) – ebenfalls idempotent + adapter.initializeSchema(); + } + + @Test + void fall3_folgestart_fachlicheDatenBleiben(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "fall3_data.db"); + SqliteSchemaInitializationAdapter adapter = new SqliteSchemaInitializationAdapter(jdbcUrl); + adapter.initializeSchema(); + + // Testdatensatz einfügen String fp = "a".repeat(64); + insertiereDocumentRecord(jdbcUrl, fp, "SUCCESS"); - try (Connection conn = DriverManager.getConnection(jdbcUrl)) { - try (var ps = conn.prepareStatement(insertSql)) { - ps.setString(1, fp); - ps.executeUpdate(); - } - // Second insert with same fingerprint must fail - try (var ps = conn.prepareStatement(insertSql)) { - ps.setString(1, fp); - org.junit.jupiter.api.Assertions.assertThrows( - SQLException.class, ps::executeUpdate, - "Expected UNIQUE constraint violation on document_record.fingerprint"); - } + // Folgestart + adapter.initializeSchema(); + + // Daten müssen erhalten bleiben + assertThat(leseStatus(jdbcUrl, fp)).isEqualTo("SUCCESS"); + } + + // ------------------------------------------------------------------------- + // PRAGMA foreign_keys – Foreign-Key-Aktivierung via DataSource + // ------------------------------------------------------------------------- + + @Test + void foreignKeys_sindNachSchemaInitAktiv(@TempDir Path dir) throws Exception { + String jdbcUrl = jdbcUrl(dir, "fk_test.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + // Neue Verbindung über SQLiteConfig aufbauen (wie der Adapter es tut) + org.sqlite.SQLiteConfig config = new org.sqlite.SQLiteConfig(); + config.enforceForeignKeys(true); + org.sqlite.SQLiteDataSource ds = new org.sqlite.SQLiteDataSource(config); + ds.setUrl(jdbcUrl); + + try (Connection conn = ds.getConnection(); + var stmt = conn.createStatement()) { + // PRAGMA foreign_keys muss 1 zurückliefern + ResultSet rs = stmt.executeQuery("PRAGMA foreign_keys"); + assertThat(rs.next()).isTrue(); + assertThat(rs.getInt(1)).isEqualTo(1); + } + } + + @Test + void foreignKeys_verletzungWirdDurchgesetzt(@TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "fk_enforced.db"); + new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); + + // Versuch, einen processing_attempt ohne passendem document_record einzufügen + org.sqlite.SQLiteConfig config = new org.sqlite.SQLiteConfig(); + config.enforceForeignKeys(true); + org.sqlite.SQLiteDataSource ds = new org.sqlite.SQLiteDataSource(config); + ds.setUrl(jdbcUrl); + + try (Connection conn = ds.getConnection()) { + assertThatThrownBy(() -> { + try (var ps = conn.prepareStatement(""" + INSERT INTO processing_attempt + (fingerprint, run_id, attempt_number, started_at, ended_at, status, retryable) + VALUES ('nichtvorhanden', 'run-1', 1, '2026-01-01T00:00:00Z', + '2026-01-01T00:01:00Z', 'FAILED_RETRYABLE', 1) + """)) { + ps.executeUpdate(); + } + }).isInstanceOf(SQLException.class); } } // ------------------------------------------------------------------------- - // Unique constraint: (fingerprint, attempt_number) in processing_attempt + // Eindeutigkeits-Constraints // ------------------------------------------------------------------------- @Test - void processingAttempt_fingerprintAttemptNumberUniqueConstraintIsEnforced(@TempDir Path dir) + void documentRecord_fingerprintUniqueConstraintWirdDurchgesetzt(@TempDir Path dir) throws SQLException { - String jdbcUrl = jdbcUrl(dir, "attempt_unique_test.db"); + String jdbcUrl = jdbcUrl(dir, "unique_dr.db"); new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); String fp = "b".repeat(64); + insertiereDocumentRecord(jdbcUrl, fp, "SUCCESS"); - // Insert master record first (FK) - try (Connection conn = DriverManager.getConnection(jdbcUrl)) { - try (var ps = conn.prepareStatement(""" - INSERT INTO document_record - (fingerprint, last_known_source_locator, last_known_source_file_name, - overall_status, created_at, updated_at) - VALUES (?, 'loc', 'f.pdf', 'FAILED_RETRYABLE', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z') - """)) { - ps.setString(1, fp); - ps.executeUpdate(); - } - - String attemptSql = """ - INSERT INTO processing_attempt - (fingerprint, run_id, attempt_number, started_at, ended_at, status, retryable) - VALUES (?, 'run-1', 1, '2026-01-01T00:00:00Z', '2026-01-01T00:01:00Z', 'FAILED_RETRYABLE', 1) - """; - - try (var ps = conn.prepareStatement(attemptSql)) { - ps.setString(1, fp); - ps.executeUpdate(); - } - // Duplicate (fingerprint, attempt_number) must fail - try (var ps = conn.prepareStatement(attemptSql)) { - ps.setString(1, fp); - org.junit.jupiter.api.Assertions.assertThrows( - SQLException.class, ps::executeUpdate, - "Expected UNIQUE constraint violation on (fingerprint, attempt_number)"); - } - } + // Zweiter Insert mit gleichem Fingerprint muss fehlschlagen + assertThatThrownBy(() -> insertiereDocumentRecord(jdbcUrl, fp, "SUCCESS")) + .isInstanceOf(SQLException.class); } - // ------------------------------------------------------------------------- - // Skip attempts are storable - // ------------------------------------------------------------------------- - @Test - void processingAttempt_skipStatusIsStorable(@TempDir Path dir) throws SQLException { - String jdbcUrl = jdbcUrl(dir, "skip_test.db"); + void processingAttempt_fingerprintUndAttemptNumberUniqueConstraintWirdDurchgesetzt( + @TempDir Path dir) throws SQLException { + String jdbcUrl = jdbcUrl(dir, "unique_pa.db"); new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); String fp = "c".repeat(64); + insertiereDocumentRecord(jdbcUrl, fp, "FAILED_RETRYABLE"); + insertiereProcessingAttempt(jdbcUrl, fp, 1); - try (Connection conn = DriverManager.getConnection(jdbcUrl)) { - // Insert master record - try (var ps = conn.prepareStatement(""" - INSERT INTO document_record - (fingerprint, last_known_source_locator, last_known_source_file_name, - overall_status, created_at, updated_at) - VALUES (?, 'loc', 'f.pdf', 'SUCCESS', '2026-01-01T00:00:00Z', '2026-01-01T00:00:00Z') - """)) { - ps.setString(1, fp); - ps.executeUpdate(); - } - - // Insert a SKIPPED_ALREADY_PROCESSED attempt (null failure fields, retryable=0) - try (var ps = conn.prepareStatement(""" - INSERT INTO processing_attempt - (fingerprint, run_id, attempt_number, started_at, ended_at, - status, failure_class, failure_message, retryable) - VALUES (?, 'run-2', 2, '2026-01-02T00:00:00Z', '2026-01-02T00:00:01Z', - 'SKIPPED_ALREADY_PROCESSED', NULL, NULL, 0) - """)) { - ps.setString(1, fp); - int rows = ps.executeUpdate(); - assertThat(rows).isEqualTo(1); - } - } + // Zweiter Insert mit gleicher (fingerprint, attempt_number) muss fehlschlagen + assertThatThrownBy(() -> insertiereProcessingAttempt(jdbcUrl, fp, 1)) + .isInstanceOf(SQLException.class); } // ------------------------------------------------------------------------- - // Schema evolution — AI traceability columns + // Fehlerfall: ungültige URL // ------------------------------------------------------------------------- @Test - void initializeSchema_addsAiTraceabilityColumnsToExistingSchema(@TempDir Path dir) - throws SQLException { - // Simulate a pre-evolution schema: create the base tables without AI columns - String jdbcUrl = jdbcUrl(dir, "evolution_test.db"); + void initializeSchema_wirftDocumentPersistenceException_beiUngueltigerUrl() { + SqliteSchemaInitializationAdapter adapter = + new SqliteSchemaInitializationAdapter("keine-jdbc-url"); + assertThatThrownBy(adapter::initializeSchema) + .isInstanceOf(DocumentPersistenceException.class); + } + + // ------------------------------------------------------------------------- + // Hilfsmethoden – Schema-Erstellung für Tests + // ------------------------------------------------------------------------- + + /** + * Erstellt ein vollständig konformes Schema (entspricht V1-Zielschema) ohne Flyway-History. + */ + private static void erstelleKonformesSchema(String jdbcUrl) { + try (Connection conn = DriverManager.getConnection(jdbcUrl); + var stmt = conn.createStatement()) { + stmt.execute("PRAGMA foreign_keys = ON"); + stmt.execute(""" + CREATE TABLE IF NOT EXISTS document_record ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + fingerprint TEXT NOT NULL, + last_known_source_locator TEXT NOT NULL, + last_known_source_file_name TEXT NOT NULL, + overall_status TEXT NOT NULL, + content_error_count INTEGER NOT NULL DEFAULT 0, + transient_error_count INTEGER NOT NULL DEFAULT 0, + last_failure_instant TEXT, + last_success_instant TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + last_target_path TEXT, + last_target_file_name TEXT, + CONSTRAINT uq_document_record_fingerprint UNIQUE (fingerprint) + ) + """); + stmt.execute(""" + CREATE TABLE IF NOT EXISTS processing_attempt ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + fingerprint TEXT NOT NULL, + run_id TEXT NOT NULL, + attempt_number INTEGER NOT NULL, + started_at TEXT NOT NULL, + ended_at TEXT NOT NULL, + status TEXT NOT NULL, + failure_class TEXT, + failure_message TEXT, + retryable INTEGER NOT NULL DEFAULT 0, + model_name TEXT, + prompt_identifier TEXT, + processed_page_count INTEGER, + sent_character_count INTEGER, + ai_raw_response TEXT, + ai_reasoning TEXT, + resolved_date TEXT, + date_source TEXT, + validated_title TEXT, + final_target_file_name TEXT, + ai_provider TEXT, + CONSTRAINT fk_processing_attempt_fingerprint + FOREIGN KEY (fingerprint) REFERENCES document_record (fingerprint), + CONSTRAINT uq_processing_attempt_fingerprint_number + UNIQUE (fingerprint, attempt_number) + ) + """); + stmt.execute("CREATE INDEX IF NOT EXISTS idx_processing_attempt_fingerprint ON processing_attempt (fingerprint)"); + stmt.execute("CREATE INDEX IF NOT EXISTS idx_processing_attempt_run_id ON processing_attempt (run_id)"); + stmt.execute("CREATE INDEX IF NOT EXISTS idx_document_record_overall_status ON document_record (overall_status)"); + } catch (SQLException e) { + throw new RuntimeException("Testvorbereitungsfehler: Schema konnte nicht erstellt werden", e); + } + } + + /** + * Erstellt ein Schema ohne die Spalte {@code ai_provider} in {@code processing_attempt}. + */ + private static void erstelleSchemaOhneAiProvider(String jdbcUrl) { try (Connection conn = DriverManager.getConnection(jdbcUrl); var stmt = conn.createStatement()) { stmt.execute(""" - CREATE TABLE IF NOT EXISTS document_record ( + CREATE TABLE document_record ( id INTEGER PRIMARY KEY AUTOINCREMENT, fingerprint TEXT NOT NULL, last_known_source_locator TEXT NOT NULL, @@ -276,11 +399,14 @@ class SqliteSchemaInitializationAdapterTest { last_success_instant TEXT, created_at TEXT NOT NULL, updated_at TEXT NOT NULL, + last_target_path TEXT, + last_target_file_name TEXT, CONSTRAINT uq_document_record_fingerprint UNIQUE (fingerprint) ) """); + // processing_attempt OHNE ai_provider stmt.execute(""" - CREATE TABLE IF NOT EXISTS processing_attempt ( + CREATE TABLE processing_attempt ( id INTEGER PRIMARY KEY AUTOINCREMENT, fingerprint TEXT NOT NULL, run_id TEXT NOT NULL, @@ -290,112 +416,56 @@ class SqliteSchemaInitializationAdapterTest { status TEXT NOT NULL, failure_class TEXT, failure_message TEXT, - retryable INTEGER NOT NULL DEFAULT 0 + retryable INTEGER NOT NULL DEFAULT 0, + model_name TEXT, + prompt_identifier TEXT, + processed_page_count INTEGER, + sent_character_count INTEGER, + ai_raw_response TEXT, + ai_reasoning TEXT, + resolved_date TEXT, + date_source TEXT, + validated_title TEXT, + final_target_file_name TEXT ) """); + } catch (SQLException e) { + throw new RuntimeException("Testvorbereitungsfehler", e); } + } - // Running initializeSchema on the existing base schema must succeed (evolution) - new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - - Set columns = readColumnNames(jdbcUrl, "processing_attempt"); - assertThat(columns).contains( - "model_name", "prompt_identifier", "processed_page_count", - "sent_character_count", "ai_raw_response", "ai_reasoning", - "resolved_date", "date_source", "validated_title"); + /** + * Erstellt nur die Tabelle {@code document_record} (ohne {@code processing_attempt}). + */ + private static void erstelleNurDocumentRecord(String jdbcUrl) { + try (Connection conn = DriverManager.getConnection(jdbcUrl); + var stmt = conn.createStatement()) { + stmt.execute(""" + CREATE TABLE document_record ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + fingerprint TEXT NOT NULL, + last_known_source_locator TEXT NOT NULL, + last_known_source_file_name TEXT NOT NULL, + overall_status TEXT NOT NULL, + content_error_count INTEGER NOT NULL DEFAULT 0, + transient_error_count INTEGER NOT NULL DEFAULT 0, + last_failure_instant TEXT, + last_success_instant TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ) + """); + } catch (SQLException e) { + throw new RuntimeException("Testvorbereitungsfehler", e); + } } // ------------------------------------------------------------------------- - // Status migration — earlier positive intermediate state → READY_FOR_AI - // ------------------------------------------------------------------------- - - @Test - void initializeSchema_migrates_legacySuccessWithoutProposal_toReadyForAi(@TempDir Path dir) - throws SQLException { - String jdbcUrl = jdbcUrl(dir, "migration_test.db"); - new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - - // Insert a document with SUCCESS status and no PROPOSAL_READY attempt - String fp = "d".repeat(64); - insertDocumentRecordWithStatus(jdbcUrl, fp, "SUCCESS"); - - // Run schema initialisation again (migration step runs every time) - new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - - String status = readOverallStatus(jdbcUrl, fp); - assertThat(status).isEqualTo("READY_FOR_AI"); - } - - @Test - void initializeSchema_migration_isIdempotent(@TempDir Path dir) throws SQLException { - String jdbcUrl = jdbcUrl(dir, "migration_idempotent_test.db"); - new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - - String fp = "e".repeat(64); - insertDocumentRecordWithStatus(jdbcUrl, fp, "SUCCESS"); - - // Run migration twice — must not corrupt data or throw - new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - - String status = readOverallStatus(jdbcUrl, fp); - assertThat(status).isEqualTo("READY_FOR_AI"); - } - - @Test - void initializeSchema_doesNotMigrate_successWithProposalReadyAttempt(@TempDir Path dir) - throws SQLException { - String jdbcUrl = jdbcUrl(dir, "migration_proposal_test.db"); - new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - - String fp = "f".repeat(64); - // SUCCESS document that already has a PROPOSAL_READY attempt must NOT be migrated - insertDocumentRecordWithStatus(jdbcUrl, fp, "SUCCESS"); - insertAttemptWithStatus(jdbcUrl, fp, "PROPOSAL_READY"); - - new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - - String status = readOverallStatus(jdbcUrl, fp); - assertThat(status).isEqualTo("SUCCESS"); - } - - @Test - void initializeSchema_doesNotMigrate_terminalFailureStates(@TempDir Path dir) - throws SQLException { - String jdbcUrl = jdbcUrl(dir, "migration_failure_test.db"); - new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - - String fpRetryable = "1".repeat(64); - String fpFinal = "2".repeat(64); - insertDocumentRecordWithStatus(jdbcUrl, fpRetryable, "FAILED_RETRYABLE"); - insertDocumentRecordWithStatus(jdbcUrl, fpFinal, "FAILED_FINAL"); - - new SqliteSchemaInitializationAdapter(jdbcUrl).initializeSchema(); - - assertThat(readOverallStatus(jdbcUrl, fpRetryable)).isEqualTo("FAILED_RETRYABLE"); - assertThat(readOverallStatus(jdbcUrl, fpFinal)).isEqualTo("FAILED_FINAL"); - } - - // ------------------------------------------------------------------------- - // Error handling - // ------------------------------------------------------------------------- - - @Test - void initializeSchema_throwsDocumentPersistenceException_onInvalidUrl() { - // SQLite is lenient with paths; use a truly invalid JDBC URL format - SqliteSchemaInitializationAdapter badAdapter = - new SqliteSchemaInitializationAdapter("not-a-jdbc-url-at-all"); - - assertThatThrownBy(badAdapter::initializeSchema) - .isInstanceOf(DocumentPersistenceException.class); - } - - // ------------------------------------------------------------------------- - // Helpers + // Hilfsmethoden – JDBC // ------------------------------------------------------------------------- private static String jdbcUrl(Path dir, String filename) { - return "jdbc:sqlite:" + dir.resolve(filename).toAbsolutePath(); + return "jdbc:sqlite:" + dir.resolve(filename).toAbsolutePath().toString().replace('\\', '/'); } private static Set readTableNames(String jdbcUrl) throws SQLException { @@ -411,7 +481,8 @@ class SqliteSchemaInitializationAdapterTest { return tables; } - private static Set readColumnNames(String jdbcUrl, String tableName) throws SQLException { + private static Set readColumnNames(String jdbcUrl, String tableName) + throws SQLException { Set columns = new HashSet<>(); try (Connection conn = DriverManager.getConnection(jdbcUrl)) { DatabaseMetaData meta = conn.getMetaData(); @@ -424,7 +495,25 @@ class SqliteSchemaInitializationAdapterTest { return columns; } - private static void insertDocumentRecordWithStatus(String jdbcUrl, String fingerprint, + private static Set readIndexNames(String jdbcUrl) throws SQLException { + Set indexes = new HashSet<>(); + try (Connection conn = DriverManager.getConnection(jdbcUrl)) { + DatabaseMetaData meta = conn.getMetaData(); + for (String table : new String[]{"document_record", "processing_attempt"}) { + try (ResultSet rs = meta.getIndexInfo(null, null, table, false, false)) { + while (rs.next()) { + String name = rs.getString("INDEX_NAME"); + if (name != null) { + indexes.add(name.toLowerCase()); + } + } + } + } + } + return indexes; + } + + private static void insertiereDocumentRecord(String jdbcUrl, String fingerprint, String status) throws SQLException { try (Connection conn = DriverManager.getConnection(jdbcUrl); var ps = conn.prepareStatement(""" @@ -439,21 +528,22 @@ class SqliteSchemaInitializationAdapterTest { } } - private static void insertAttemptWithStatus(String jdbcUrl, String fingerprint, - String status) throws SQLException { + private static void insertiereProcessingAttempt(String jdbcUrl, String fingerprint, + int attemptNumber) throws SQLException { try (Connection conn = DriverManager.getConnection(jdbcUrl); var ps = conn.prepareStatement(""" INSERT INTO processing_attempt (fingerprint, run_id, attempt_number, started_at, ended_at, status, retryable) - VALUES (?, 'run-1', 1, '2026-01-01T00:00:00Z', '2026-01-01T00:01:00Z', ?, 0) + VALUES (?, 'run-1', ?, '2026-01-01T00:00:00Z', '2026-01-01T00:01:00Z', + 'FAILED_RETRYABLE', 1) """)) { ps.setString(1, fingerprint); - ps.setString(2, status); + ps.setInt(2, attemptNumber); ps.executeUpdate(); } } - private static String readOverallStatus(String jdbcUrl, String fingerprint) throws SQLException { + private static String leseStatus(String jdbcUrl, String fingerprint) throws SQLException { try (Connection conn = DriverManager.getConnection(jdbcUrl); var ps = conn.prepareStatement( "SELECT overall_status FROM document_record WHERE fingerprint = ?")) { @@ -462,7 +552,7 @@ class SqliteSchemaInitializationAdapterTest { if (rs.next()) { return rs.getString("overall_status"); } - throw new IllegalStateException("No document record found for fingerprint: " + fingerprint); + throw new IllegalStateException("Kein Eintrag für Fingerprint: " + fingerprint); } } } diff --git a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/ExecutableJarSmokeTestIT.java b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/ExecutableJarSmokeTestIT.java index b9c62c3..e398d1a 100644 --- a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/ExecutableJarSmokeTestIT.java +++ b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/ExecutableJarSmokeTestIT.java @@ -5,12 +5,16 @@ import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; +import java.io.ByteArrayOutputStream; import java.io.File; +import java.io.InputStream; +import java.io.IOException; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; +import java.util.concurrent.TimeUnit; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.io.TempDir; @@ -135,30 +139,21 @@ class ExecutableJarSmokeTestIT { System.out.println("[SMOKE-TEST] Working directory: " + workDir.toAbsolutePath()); System.out.println("[SMOKE-TEST] Command: " + String.join(" ", command)); - Process process = pb.start(); + ProcessResult result = runProcess(pb, PROCESS_TIMEOUT_MS); - // Wait for process completion with timeout - boolean completed = process.waitFor(PROCESS_TIMEOUT_MS, java.util.concurrent.TimeUnit.MILLISECONDS); - assertTrue(completed, "Process should complete within " + PROCESS_TIMEOUT_MS + "ms timeout"); + System.out.println("[SMOKE-TEST] Exit code: " + result.exitCode()); + System.out.println("[SMOKE-TEST] Subprocess stdout/stderr:\n" + result.output()); - int exitCode = process.exitValue(); - - // Capture all output for diagnostic purposes - byte[] outputBytes = process.getInputStream().readAllBytes(); - String outputText = new String(outputBytes); - - System.out.println("[SMOKE-TEST] Exit code: " + exitCode); - System.out.println("[SMOKE-TEST] Subprocess stdout/stderr:\n" + outputText); - - assertEquals(0, exitCode, "Successful startup should return exit code 0. Output was: " + outputText); + assertTrue(result.completed(), "Process should complete within " + PROCESS_TIMEOUT_MS + "ms timeout"); + assertEquals(0, result.exitCode(), "Successful startup should return exit code 0. Output was: " + result.output()); // Verify logging output was produced (check console output) assertTrue( - outputText.contains("Starting") || - outputText.contains("Bootstrap") || - outputText.contains("completed") || - outputText.contains("successfully"), - "Output should contain startup/shutdown indicators. Got: " + outputText + result.output().contains("Starting") || + result.output().contains("Bootstrap") || + result.output().contains("completed") || + result.output().contains("successfully"), + "Output should contain startup/shutdown indicators. Got: " + result.output() ); // Verify no unexpected artifacts were created beyond our fixtures @@ -259,31 +254,22 @@ class ExecutableJarSmokeTestIT { System.out.println("[SMOKE-TEST-INVALID] Working directory: " + workDir.toAbsolutePath()); System.out.println("[SMOKE-TEST-INVALID] Command: " + String.join(" ", command)); - Process process = pb.start(); + ProcessResult result = runProcess(pb, PROCESS_TIMEOUT_MS); - // Wait for process completion with timeout - boolean completed = process.waitFor(PROCESS_TIMEOUT_MS, java.util.concurrent.TimeUnit.MILLISECONDS); - assertTrue(completed, "Process should complete within timeout even on failure"); + System.out.println("[SMOKE-TEST-INVALID] Exit code: " + result.exitCode()); + System.out.println("[SMOKE-TEST-INVALID] Subprocess stdout/stderr:\n" + result.output()); - int exitCode = process.exitValue(); - - // Capture all output for diagnostic purposes - byte[] outputBytes = process.getInputStream().readAllBytes(); - String outputText = new String(outputBytes); - - System.out.println("[SMOKE-TEST-INVALID] Exit code: " + exitCode); - System.out.println("[SMOKE-TEST-INVALID] Subprocess stdout/stderr:\n" + outputText); - - assertEquals(1, exitCode, "Invalid configuration should return exit code 1. Output was: " + outputText); + assertTrue(result.completed(), "Process should complete within timeout even on failure"); + assertEquals(1, result.exitCode(), "Invalid configuration should return exit code 1. Output was: " + result.output()); // Verify error output indicates configuration failure assertTrue( - outputText.toLowerCase().contains("config") || - outputText.toLowerCase().contains("validation") || - outputText.toLowerCase().contains("invalid") || - outputText.toLowerCase().contains("error") || - outputText.toLowerCase().contains("failed"), - "Output should indicate configuration/validation error. Got: " + outputText + result.output().toLowerCase().contains("config") || + result.output().toLowerCase().contains("validation") || + result.output().toLowerCase().contains("invalid") || + result.output().toLowerCase().contains("error") || + result.output().toLowerCase().contains("failed"), + "Output should indicate configuration/validation error. Got: " + result.output() ); } @@ -358,17 +344,14 @@ class ExecutableJarSmokeTestIT { System.out.println("[SMOKE-TEST-EXPLICIT-CONFIG] Command: " + String.join(" ", command)); - Process process = pb.start(); - boolean completed = process.waitFor(PROCESS_TIMEOUT_MS, java.util.concurrent.TimeUnit.MILLISECONDS); - byte[] outputBytes = process.getInputStream().readAllBytes(); - String outputText = new String(outputBytes); + ProcessResult result = runProcess(pb, PROCESS_TIMEOUT_MS); - System.out.println("[SMOKE-TEST-EXPLICIT-CONFIG] Exit code: " + process.exitValue()); - System.out.println("[SMOKE-TEST-EXPLICIT-CONFIG] Output:\n" + outputText); + System.out.println("[SMOKE-TEST-EXPLICIT-CONFIG] Exit code: " + result.exitCode()); + System.out.println("[SMOKE-TEST-EXPLICIT-CONFIG] Output:\n" + result.output()); - assertTrue(completed, "Process should complete within timeout"); - assertEquals(0, process.exitValue(), - "Headless start with explicit valid --config path must exit 0. Output: " + outputText); + assertTrue(result.completed(), "Process should complete within timeout"); + assertEquals(0, result.exitCode(), + "Headless start with explicit valid --config path must exit 0. Output: " + result.output()); } // ========================================================================= @@ -403,27 +386,24 @@ class ExecutableJarSmokeTestIT { System.out.println("[SMOKE-TEST-MISSING-CONFIG] Command: " + String.join(" ", command)); - Process process = pb.start(); - boolean completed = process.waitFor(PROCESS_TIMEOUT_MS, java.util.concurrent.TimeUnit.MILLISECONDS); - byte[] outputBytes = process.getInputStream().readAllBytes(); - String outputText = new String(outputBytes); + ProcessResult result = runProcess(pb, PROCESS_TIMEOUT_MS); - System.out.println("[SMOKE-TEST-MISSING-CONFIG] Exit code: " + process.exitValue()); - System.out.println("[SMOKE-TEST-MISSING-CONFIG] Output:\n" + outputText); + System.out.println("[SMOKE-TEST-MISSING-CONFIG] Exit code: " + result.exitCode()); + System.out.println("[SMOKE-TEST-MISSING-CONFIG] Output:\n" + result.output()); - assertTrue(completed, "Process should complete within timeout"); - assertEquals(1, process.exitValue(), - "Headless start with non-existent --config path must exit 1. Output: " + outputText); + assertTrue(result.completed(), "Process should complete within timeout"); + assertEquals(1, result.exitCode(), + "Headless start with non-existent --config path must exit 1. Output: " + result.output()); // Verify that the output contains a diagnostic keyword so operators can trace the cause. // Only stable keywords are checked; exact message text may evolve. assertTrue( - outputText.toLowerCase().contains("not found") - || outputText.toLowerCase().contains("does not exist") - || outputText.toLowerCase().contains("missing") - || outputText.toLowerCase().contains("error") - || outputText.toLowerCase().contains("config"), - "Output must contain a diagnostic keyword for the missing config file. Got: " + outputText + result.output().toLowerCase().contains("not found") + || result.output().toLowerCase().contains("does not exist") + || result.output().toLowerCase().contains("missing") + || result.output().toLowerCase().contains("error") + || result.output().toLowerCase().contains("config"), + "Output must contain a diagnostic keyword for the missing config file. Got: " + result.output() ); } @@ -497,30 +477,79 @@ class ExecutableJarSmokeTestIT { System.out.println("[SMOKE-TEST-JAVAFX-FREEDOM] Command: " + String.join(" ", command)); - Process process = pb.start(); - boolean completed = process.waitFor(PROCESS_TIMEOUT_MS, java.util.concurrent.TimeUnit.MILLISECONDS); - byte[] outputBytes = process.getInputStream().readAllBytes(); - String outputText = new String(outputBytes); + ProcessResult result = runProcess(pb, PROCESS_TIMEOUT_MS); - System.out.println("[SMOKE-TEST-JAVAFX-FREEDOM] Exit code: " + process.exitValue()); - System.out.println("[SMOKE-TEST-JAVAFX-FREEDOM] Output:\n" + outputText); + System.out.println("[SMOKE-TEST-JAVAFX-FREEDOM] Exit code: " + result.exitCode()); + System.out.println("[SMOKE-TEST-JAVAFX-FREEDOM] Output:\n" + result.output()); - assertTrue(completed, "Process should complete within timeout"); - assertEquals(0, process.exitValue(), + assertTrue(result.completed(), "Process should complete within timeout"); + assertEquals(0, result.exitCode(), "Headless start must exit 0 for the JavaFX-freedom check to be meaningful. " - + "Output: " + outputText); + + "Output: " + result.output()); // JavaFX initialisation would produce one of these markers in stdout/stderr. // Their absence is the evidence that the headless path is JavaFX-free at runtime. assertFalse( - outputText.contains("Platform.startup") - || outputText.contains("Monocle") - || outputText.contains("com.sun.javafx") - || outputText.contains("javafx.application"), - "Headless output must not contain JavaFX initialisation markers. Got:\n" + outputText + result.output().contains("Platform.startup") + || result.output().contains("Monocle") + || result.output().contains("com.sun.javafx") + || result.output().contains("javafx.application"), + "Headless output must not contain JavaFX initialisation markers. Got:\n" + result.output() ); } + // ========================================================================= + // Shared helper: run a process and capture output concurrently + // ========================================================================= + + /** + * Holds the result of a subprocess execution. + * + * @param completed {@code true} if the process exited within the timeout + * @param exitCode the process exit code (meaningful only when {@code completed} is {@code true}) + * @param output all bytes written to stdout/stderr by the subprocess + */ + private record ProcessResult(boolean completed, int exitCode, String output) {} + + /** + * Starts the given {@link ProcessBuilder} and waits for the subprocess to finish, + * draining its combined stdout/stderr concurrently to avoid pipe-buffer deadlocks. + * + *

On Windows, the default OS pipe buffer is only 4 KB. If the subprocess writes + * more than that without the parent reading, the subprocess blocks on its next write + * while the parent blocks in {@code waitFor} — a classic deadlock. This helper prevents + * that by reading the subprocess output in a background thread so the pipe never fills up. + * + * @param pb configured and ready-to-start {@link ProcessBuilder}; must have + * {@code redirectErrorStream(true)} set so that stderr is merged into stdout + * @param timeoutMs maximum milliseconds to wait for the subprocess to finish + * @return a {@link ProcessResult} containing completion status, exit code, and captured output + * @throws Exception if the process cannot be started or the drain thread is interrupted + */ + private ProcessResult runProcess(ProcessBuilder pb, long timeoutMs) throws Exception { + Process process = pb.start(); + + // Drain stdout/stderr in a background thread to prevent Windows pipe-buffer deadlocks. + // The OS pipe buffer is only 4 KB on Windows; if the subprocess writes more than that + // while the parent is blocked in waitFor(), neither side can proceed. + ByteArrayOutputStream buffer = new ByteArrayOutputStream(); + Thread drainThread = new Thread(() -> { + try (InputStream in = process.getInputStream()) { + in.transferTo(buffer); + } catch (IOException ignored) { + // Stream closed by process exit — normal termination path + } + }, "subprocess-output-drain"); + drainThread.setDaemon(true); + drainThread.start(); + + boolean completed = process.waitFor(timeoutMs, TimeUnit.MILLISECONDS); + drainThread.join(5_000); // Allow drain to finish (process has already exited or timed out) + + int exitCode = completed ? process.exitValue() : -1; + return new ProcessResult(completed, exitCode, buffer.toString()); + } + // ========================================================================= // Shared helper: locate the shaded JAR // ========================================================================= diff --git a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/ProviderIdentifierE2ETest.java b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/ProviderIdentifierE2ETest.java index dc14de4..c62154a 100644 --- a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/ProviderIdentifierE2ETest.java +++ b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/ProviderIdentifierE2ETest.java @@ -252,64 +252,37 @@ class ProviderIdentifierE2ETest { } // ========================================================================= - // Pflicht-Testfall: legacyDataFromBeforeV11RemainsReadable + // Nicht-konformes Bestands-Schema – Schema-Prüfung schlägt ab // ========================================================================= /** - * Proves backward compatibility with databases created before the {@code ai_provider} - * column was introduced. + * Eine Datenbank, die fachliche Tabellen enthält, aber nicht dem vollständigen + * Zielschema entspricht (fehlende Spalten, fehlende Indizes), darf nicht stillschweigend + * heilen. Die Initialisierung muss mit einem klaren Fehler abbrechen. * - *

What is verified

- *
    - *
  1. A database without the {@code ai_provider} column can be opened and its existing - * rows read without throwing any exception.
  2. - *
  3. The {@code aiProvider} field for pre-extension rows is {@code null} (no synthesised - * default, no error).
  4. - *
  5. Other fields on the pre-extension attempt (status, retryable flag) remain - * correctly readable after schema evolution.
  6. - *
  7. A new batch run on the same database succeeds, proving that the evolved schema - * is fully write-compatible with the legacy data.
  8. - *
+ *

Geprüft wird, dass die Schema-Prüfcheckliste greift: fehlen Spalten wie + * {@code ai_provider}, {@code last_target_path} oder fehlende Indizes, dann bricht + * der Start mit {@link de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException} ab. */ @Test - void legacyDataFromBeforeV11RemainsReadable(@TempDir Path tempDir) throws Exception { - // Build a database without the ai_provider column (simulates pre-extension installation) + void nichtKonformesBestandsSchema_fuehrtZuFehlerBeimStart(@TempDir Path tempDir) throws Exception { + // Datenbank mit unvollständigem Schema anlegen (fehlt: ai_provider, last_target_path, + // last_target_file_name sowie alle drei Indizes) String jdbcUrl = "jdbc:sqlite:" + tempDir.resolve("legacy.db").toAbsolutePath().toString().replace('\\', '/'); createPreExtensionSchema(jdbcUrl); - // Insert a legacy attempt row (no ai_provider column present in schema at this point) + // Datensatz einfügen (Schema ist noch partiell vorhanden) DocumentFingerprint legacyFp = fingerprint("aabbcc"); insertLegacyData(jdbcUrl, legacyFp); - // Initialize the full schema — this must add ai_provider idempotently + // Initialisierung muss mit klarem Fehler abbrechen – kein stilles Heilen de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter schema = new de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter(jdbcUrl); - schema.initializeSchema(); - - // Read back the legacy attempt — must not throw, aiProvider must be null - de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter repo = - new de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter(jdbcUrl); - List attempts = repo.findAllByFingerprint(legacyFp); - - assertThat(attempts).hasSize(1); - assertThat(attempts.get(0).aiProvider()) - .as("Pre-extension attempt must have null aiProvider after schema evolution") - .isNull(); - assertThat(attempts.get(0).status()) - .as("Other fields of the pre-extension row must still be readable") - .isEqualTo(ProcessingStatus.FAILED_RETRYABLE); - assertThat(attempts.get(0).retryable()).isTrue(); - - // A new batch run on the same database must succeed (write-compatible evolved schema) - try (E2ETestContext ctx = E2ETestContext.initializeWithProvider( - tempDir.resolve("newrun"), "openai-compatible")) { - ctx.createSearchablePdf("newdoc.pdf", SAMPLE_PDF_TEXT); - BatchRunOutcome outcome = ctx.runBatch(); - assertThat(outcome) - .as("Batch run on evolved database must succeed") - .isEqualTo(BatchRunOutcome.SUCCESS); - } + org.junit.jupiter.api.Assertions.assertThrows( + de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException.class, + schema::initializeSchema, + "Erwarte Fehler bei nicht konformem Bestands-Schema (fehlende Spalten/Indizes)"); } // ------------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 7c82867..5a57c32 100644 --- a/pom.xml +++ b/pom.xml @@ -36,6 +36,7 @@ 3.0.2 3.45.1.0 20240303 + 10.20.1 5.10.2 5.11.0 @@ -77,12 +78,17 @@ ${pdfbox.version} - + org.xerial sqlite-jdbc ${sqlite-jdbc.version} + + org.flywaydb + flyway-core + ${flyway.version} +