1
0

M4 AP-006 Idempotenz- und Persistenzlogik integrieren

This commit is contained in:
2026-04-02 23:36:22 +02:00
parent 8ee4041feb
commit 00c4cf1e5c
7 changed files with 1598 additions and 174 deletions

View File

@@ -5,16 +5,26 @@ import org.apache.logging.log4j.Logger;
import de.gecheckt.pdf.umbenenner.adapter.in.cli.SchedulerBatchCommand;
import de.gecheckt.pdf.umbenenner.adapter.out.configuration.PropertiesConfigurationPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.fingerprint.Sha256FingerprintAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.lock.FilesystemRunLockPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.pdfextraction.PdfTextExtractionPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sourcedocument.SourceDocumentCandidatesPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteDocumentRecordRepositoryAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter;
import de.gecheckt.pdf.umbenenner.application.config.InvalidStartConfigurationException;
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.config.StartConfigurationValidator;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunProcessingUseCase;
import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceSchemaInitializationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor;
import de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
@@ -29,20 +39,35 @@ import java.util.UUID;
* <p>
* Responsibilities:
* <ol>
* <li>Load and validate the startup configuration</li>
* <li>Resolve the run-lock file path (with default fallback)</li>
* <li>Create and wire all ports and adapters</li>
* <li>Start the CLI adapter and execute the batch use case</li>
* <li>Map the batch outcome to a process exit code</li>
* <li>Load and validate the startup configuration.</li>
* <li>Resolve the run-lock file path (with default fallback).</li>
* <li>Initialise the SQLite schema (M4: before the batch document loop begins).</li>
* <li>Create and wire all ports and adapters, including the M4 persistence ports.</li>
* <li>Start the CLI adapter and execute the batch use case.</li>
* <li>Map the batch outcome to a process exit code.</li>
* </ol>
* <p>
* Exit code semantics:
*
* <h2>Exit code semantics</h2>
* <ul>
* <li>{@code 0}: Batch run executed successfully; individual document failures do not
* change the exit code as long as the run itself completed without a hard infrastructure error.</li>
* <li>{@code 1}: Hard start, bootstrap, or configuration failure that prevented the run
* from beginning, or a critical infrastructure failure during the run.</li>
* change the exit code as long as the run itself completed without a hard
* infrastructure error.</li>
* <li>{@code 1}: Hard start, bootstrap, configuration, or schema-initialisation failure
* that prevented the run from beginning, or a critical infrastructure failure
* during the run.</li>
* </ul>
*
* <h2>M4 wiring</h2>
* <p>
* The production constructor wires the following M4 adapters:
* <ul>
* <li>{@link Sha256FingerprintAdapter} — SHA-256 content fingerprinting.</li>
* <li>{@link SqliteSchemaInitializationAdapter} — schema initialisation at startup.</li>
* <li>{@link SqliteDocumentRecordRepositoryAdapter} — document master record CRUD.</li>
* <li>{@link SqliteProcessingAttemptRepositoryAdapter} — attempt history CRUD.</li>
* </ul>
*
* @since M2 (extended in M4-AP-006)
*/
public class BootstrapRunner {
@@ -83,7 +108,7 @@ public class BootstrapRunner {
* <p>
* Receives the already-loaded and validated {@link StartConfiguration} and run lock port.
* The factory is responsible for creating and wiring any additional outbound ports
* required by the use case (e.g., source document port, PDF extraction port).
* required by the use case (e.g., source document port, PDF extraction port, M4 ports).
*/
@FunctionalInterface
public interface UseCaseFactory {
@@ -101,34 +126,52 @@ public class BootstrapRunner {
/**
* Creates the BootstrapRunner with default factories for production use.
* <p>
* Wires the full processing pipeline:
* Wires the full M4 processing pipeline:
* <ul>
* <li>{@link PropertiesConfigurationPortAdapter} for configuration loading</li>
* <li>{@link FilesystemRunLockPortAdapter} for exclusive run locking</li>
* <li>{@link SourceDocumentCandidatesPortAdapter} for PDF candidate discovery</li>
* <li>{@link PdfTextExtractionPortAdapter} for PDFBox-based text and page count extraction</li>
* <li>{@link PropertiesConfigurationPortAdapter} for configuration loading.</li>
* <li>{@link FilesystemRunLockPortAdapter} for exclusive run locking.</li>
* <li>{@link SourceDocumentCandidatesPortAdapter} for PDF candidate discovery.</li>
* <li>{@link PdfTextExtractionPortAdapter} for PDFBox-based text and page count extraction.</li>
* <li>{@link Sha256FingerprintAdapter} for SHA-256 content fingerprinting.</li>
* <li>{@link SqliteDocumentRecordRepositoryAdapter} for document master record CRUD.</li>
* <li>{@link SqliteProcessingAttemptRepositoryAdapter} for attempt history CRUD.</li>
* </ul>
* <p>
* Schema initialisation is performed in {@link #run()} before the use case is created,
* using {@link SqliteSchemaInitializationAdapter}.
*/
public BootstrapRunner() {
this.configPortFactory = PropertiesConfigurationPortAdapter::new;
this.runLockPortFactory = FilesystemRunLockPortAdapter::new;
this.validatorFactory = StartConfigurationValidator::new;
this.useCaseFactory = (config, lock) -> new DefaultBatchRunProcessingUseCase(
config,
lock,
new SourceDocumentCandidatesPortAdapter(config.sourceFolder()),
new PdfTextExtractionPortAdapter());
this.useCaseFactory = (config, lock) -> {
String jdbcUrl = buildJdbcUrl(config);
FingerprintPort fingerprintPort = new Sha256FingerprintAdapter();
DocumentRecordRepository documentRecordRepository =
new SqliteDocumentRecordRepositoryAdapter(jdbcUrl);
ProcessingAttemptRepository processingAttemptRepository =
new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl);
M4DocumentProcessor m4Processor =
new M4DocumentProcessor(documentRecordRepository, processingAttemptRepository);
return new DefaultBatchRunProcessingUseCase(
config,
lock,
new SourceDocumentCandidatesPortAdapter(config.sourceFolder()),
new PdfTextExtractionPortAdapter(),
fingerprintPort,
m4Processor);
};
this.commandFactory = SchedulerBatchCommand::new;
}
/**
* Creates the BootstrapRunner with custom factories for testing.
*
* @param configPortFactory factory for creating ConfigurationPort instances
* @param configPortFactory factory for creating ConfigurationPort instances
* @param runLockPortFactory factory for creating RunLockPort instances
* @param validatorFactory factory for creating StartConfigurationValidator instances
* @param useCaseFactory factory for creating BatchRunProcessingUseCase instances
* @param commandFactory factory for creating SchedulerBatchCommand instances
* @param validatorFactory factory for creating StartConfigurationValidator instances
* @param useCaseFactory factory for creating BatchRunProcessingUseCase instances
* @param commandFactory factory for creating SchedulerBatchCommand instances
*/
public BootstrapRunner(ConfigurationPortFactory configPortFactory,
RunLockPortFactory runLockPortFactory,
@@ -145,11 +188,17 @@ public class BootstrapRunner {
/**
* Runs the application startup sequence.
* <p>
* AP-003: Manually wires the object graph and invokes the CLI command.
* AP-005: Wires ConfigurationPort adapter and passes it to the use case.
* AP-006: Validates configuration before allowing processing to start.
* M4 additions:
* <ul>
* <li>Derives the SQLite JDBC URL from the configured {@code sqlite.file} path.</li>
* <li>Initialises the M4 SQLite schema via
* {@link PersistenceSchemaInitializationPort#initializeSchema()} before the
* batch document loop begins. A schema initialisation failure aborts the run
* with exit code&nbsp;1.</li>
* </ul>
*
* @return exit code: 0 for success, 1 for invalid configuration or unexpected failure
* @return exit code: 0 for success, 1 for invalid configuration, schema failure,
* or unexpected bootstrap failure
*/
public int run() {
LOG.info("Bootstrap flow started.");
@@ -160,61 +209,105 @@ public class BootstrapRunner {
// Step 2: Load configuration
var config = configPort.loadConfiguration();
// Step 3: Validate configuration (AP-006)
// Step 3: Validate configuration
StartConfigurationValidator validator = validatorFactory.create();
validator.validate(config);
// Step 4: Resolve lock file path apply default if not configured (AP-006)
// Step 4: Resolve lock file path apply default if not configured
Path lockFilePath = config.runtimeLockFile();
if (lockFilePath == null || lockFilePath.toString().isBlank()) {
lockFilePath = Paths.get("pdf-umbenenner.lock");
LOG.info("runtime.lock.file not configured, using default lock path: {}", lockFilePath.toAbsolutePath());
LOG.info("runtime.lock.file not configured, using default lock path: {}",
lockFilePath.toAbsolutePath());
}
RunLockPort runLockPort = runLockPortFactory.create(lockFilePath);
// Step 5: Create the batch run context
// Generate a unique run ID and initialize the run context
// Step 5 (M4): Initialise the SQLite schema before the batch loop begins.
// A failure here is a hard start error → exit code 1.
initializeSchema(config);
// Step 6: Create the batch run context
RunId runId = new RunId(UUID.randomUUID().toString());
BatchRunContext runContext = new BatchRunContext(runId, Instant.now());
LOG.info("Batch run started. RunId: {}", runId);
// Step 6: Create the use case with the validated config and run lock (application layer).
// Step 7: Create the use case with the validated config and run lock.
// Config is passed directly; the use case does not re-read the properties file.
// Adapters (source document port, PDF extraction port) are wired by the factory.
// Adapters (source document port, PDF extraction port, M4 ports) are wired by the factory.
BatchRunProcessingUseCase useCase = useCaseFactory.create(config, runLockPort);
// Step 7: Create the CLI command adapter with the use case
// Step 8: Create the CLI command adapter with the use case
SchedulerBatchCommand command = commandFactory.create(useCase);
// Step 8: Execute the command with the run context and handle the outcome
// Step 9: Execute the command with the run context and handle the outcome
BatchRunOutcome outcome = command.run(runContext);
// Mark run as completed (AP-003)
// Mark run as completed
runContext.setEndInstant(Instant.now());
if (outcome.isSuccess()) {
LOG.info("Batch run completed successfully. RunId: {}", runContext.runId());
return 0;
} else if (outcome.isLockUnavailable()) {
LOG.warn("Batch run aborted: another instance is already running. RunId: {}", runContext.runId());
LOG.warn("Batch run aborted: another instance is already running. RunId: {}",
runContext.runId());
return 1;
} else {
LOG.error("Batch run failed. RunId: {}", runContext.runId());
return 1;
}
} catch (InvalidStartConfigurationException e) {
// Controlled failure for invalid configuration - log clearly without stack trace
// Controlled failure for invalid configuration log clearly without stack trace
LOG.error("Configuration validation failed: {}", e.getMessage());
return 1;
} catch (IllegalStateException e) {
// Configuration loading failed due to missing/invalid required properties
// Treat as invalid configuration for controlled failure
LOG.error("Configuration loading failed: {}", e.getMessage());
return 1;
} catch (DocumentPersistenceException e) {
// Schema initialisation failed hard start error
LOG.error("SQLite schema initialisation failed: {}", e.getMessage(), e);
return 1;
} catch (Exception e) {
LOG.error("Bootstrap failure during startup.", e);
return 1;
}
}
}
/**
* Initialises the M4 SQLite schema using the configured SQLite file path.
* <p>
* This method is called once at startup, before the batch document loop begins.
* It uses the production {@link SqliteSchemaInitializationAdapter} directly because
* schema initialisation is a startup concern, not a per-document concern, and the
* {@link UseCaseFactory} abstraction is not the right place for it.
* <p>
* If the {@code sqlite.file} configuration is null or blank, schema initialisation
* is skipped with a warning. This allows the existing test infrastructure (which
* uses the custom {@link UseCaseFactory}) to continue working without a real SQLite
* file.
*
* @param config the validated startup configuration
* @throws DocumentPersistenceException if schema initialisation fails
*/
private void initializeSchema(StartConfiguration config) {
if (config.sqliteFile() == null) {
LOG.warn("sqlite.file not configured skipping schema initialisation.");
return;
}
String jdbcUrl = buildJdbcUrl(config);
PersistenceSchemaInitializationPort schemaPort = new SqliteSchemaInitializationAdapter(jdbcUrl);
schemaPort.initializeSchema();
LOG.info("M4 SQLite schema initialised at: {}", jdbcUrl);
}
/**
* Builds the JDBC URL for the SQLite database from the configured file path.
*
* @param config the startup configuration containing the SQLite file path
* @return the JDBC URL in the form {@code jdbc:sqlite:/path/to/file.db}
*/
static String buildJdbcUrl(StartConfiguration config) {
return "jdbc:sqlite:" + config.sqliteFile().toAbsolutePath().toString().replace('\\', '/');
}
}