1
0

M3-AP-005: Batchlauf im Use-Case integriert und sauber von Bootstrap

entkoppelt
This commit is contained in:
2026-04-01 20:34:15 +02:00
parent c482b20df9
commit d60d050948
3 changed files with 413 additions and 80 deletions

View File

@@ -3,41 +3,56 @@ package de.gecheckt.pdf.umbenenner.application.usecase;
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.in.RunBatchProcessingUseCase;
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.service.M3PreCheckEvaluator;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import java.util.List;
/**
* M2 implementation of {@link RunBatchProcessingUseCase}.
* M3 batch processing implementation of {@link RunBatchProcessingUseCase}.
* <p>
* This use case orchestrates the batch processing workflow with start protection
* and controlled execution lifecycle, but without actual document processing.
* <p>
* Responsibilities:
* <ul>
* Orchestrates the complete M3 batch processing workflow:
* <ol>
* <li>Acquire exclusive run lock to prevent concurrent instances</li>
* <li>Initialize batch execution with the provided run context</li>
* <li>Release lock only if it was successfully acquired</li>
* <li>Return structured outcome for Bootstrap exit code mapping</li>
* <li>Scan source folder for PDF candidates</li>
* <li>For each candidate: extract text and page count, run M3 pre-checks</li>
* <li>Log per-document M3 decision; end each document controlled without KI or target copy</li>
* <li>Release lock and return structured outcome for Bootstrap exit code mapping</li>
* </ol>
* <p>
* M3 processing boundary:
* <ul>
* <li>Documents that pass M3 pre-checks end controlled and are ready for M4+ (KI, persistence, copy)</li>
* <li>Documents with deterministic content errors (no usable text, page limit exceeded) end controlled</li>
* <li>Documents with technical extraction errors end controlled; they do not abort the overall run</li>
* <li>If the source folder itself is inaccessible, the run fails with {@link BatchRunOutcome#FAILURE}</li>
* </ul>
* <p>
* M2 Non-Goals (not implemented):
* M3 Non-Goals (not implemented):
* <ul>
* <li>No source folder scanning</li>
* <li>No PDF filtering or text extraction</li>
* <li>No fingerprinting</li>
* <li>No SQLite persistence</li>
* <li>No AI integration</li>
* <li>No filename generation</li>
* <li>No target file copying</li>
* <li>No business-level retry logic</li>
* <li>No single-document processing</li>
* <li>No fingerprinting or SQLite persistence</li>
* <li>No KI/AI integration or prompt loading</li>
* <li>No filename generation or target file copy</li>
* <li>No cross-run retry logic</li>
* </ul>
*
* @since M2-AP-004
* @since M2-AP-004 (extended in M3-AP-005)
*/
public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
@@ -45,25 +60,35 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
private final StartConfiguration configuration;
private final RunLockPort runLockPort;
private final SourceDocumentCandidatesPort sourceDocumentCandidatesPort;
private final PdfTextExtractionPort pdfTextExtractionPort;
/**
* Creates the M2 batch use case with the already-loaded startup configuration and run lock port.
* Creates the batch use case with the already-loaded startup configuration and all required ports.
* <p>
* The configuration is loaded and validated by Bootstrap before use case creation;
* the use case receives the result directly and does not re-read it.
*
* @param configuration the validated startup configuration
* @param runLockPort for exclusive run locking
* @param sourceDocumentCandidatesPort for loading PDF candidates from the source folder
* @param pdfTextExtractionPort for extracting text and page count from a single PDF
* @throws NullPointerException if any parameter is null
*/
public M2BatchRunProcessingUseCase(StartConfiguration configuration, RunLockPort runLockPort) {
public M2BatchRunProcessingUseCase(
StartConfiguration configuration,
RunLockPort runLockPort,
SourceDocumentCandidatesPort sourceDocumentCandidatesPort,
PdfTextExtractionPort pdfTextExtractionPort) {
this.configuration = configuration;
this.runLockPort = runLockPort;
this.sourceDocumentCandidatesPort = sourceDocumentCandidatesPort;
this.pdfTextExtractionPort = pdfTextExtractionPort;
}
@Override
public BatchRunOutcome execute(BatchRunContext context) {
LOG.info("M2 batch processing initiated with RunId: {}", context.runId());
LOG.info("Batch processing initiated. RunId: {}", context.runId());
boolean lockAcquired = false;
try {
@@ -77,18 +102,28 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
return BatchRunOutcome.LOCK_UNAVAILABLE;
}
// Step 2: M2 Batch execution frame (no document processing)
LOG.debug("Configuration in use: source={}, target={}", configuration.sourceFolder(), configuration.targetFolder());
LOG.info("Batch execution frame initialized - RunId: {}, Start: {}", context.runId(), context.startInstant());
LOG.info("Batch run started. RunId: {}, Start: {}", context.runId(), context.startInstant());
// M2 Non-goal: No source folder scanning, PDF processing, persistence, or filename generation
// This is a controlled no-op batch cycle that validates the entire orchestration path.
// Step 2: Load PDF candidates from source folder
List<SourceDocumentCandidate> candidates;
try {
candidates = sourceDocumentCandidatesPort.loadCandidates();
} catch (SourceDocumentAccessException e) {
LOG.error("Cannot access source folder: {}", e.getMessage(), e);
return BatchRunOutcome.FAILURE;
}
LOG.info("Found {} PDF candidate(s) in source folder.", candidates.size());
LOG.info("Batch execution frame completed successfully");
// Step 3: Process each candidate through the M3 pipeline
for (SourceDocumentCandidate candidate : candidates) {
processCandidate(candidate);
}
LOG.info("Batch run completed. Processed {} candidate(s). RunId: {}", candidates.size(), context.runId());
return BatchRunOutcome.SUCCESS;
} catch (Exception e) {
// Unexpected error during batch orchestration
LOG.error("Unexpected error during batch processing", e);
return BatchRunOutcome.FAILURE;
} finally {
@@ -98,11 +133,53 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
if (lockAcquired) {
try {
runLockPort.release();
LOG.debug("Run lock released");
LOG.debug("Run lock released.");
} catch (Exception e) {
LOG.warn("Warning: Failed to release run lock", e);
LOG.warn("Warning: Failed to release run lock.", e);
}
}
}
}
/**
* Processes a single PDF candidate through the M3 pipeline.
* <p>
* M3 processing steps per document:
* <ol>
* <li>Extract text and page count from the PDF via {@link PdfTextExtractionPort}</li>
* <li>On successful extraction: run M3 pre-checks via {@link M3PreCheckEvaluator}</li>
* <li>Log the per-document M3 decision and end controlled</li>
* </ol>
* <p>
* Per-document errors (extraction failure, pre-check failure) do not abort the overall
* batch run. Each candidate ends controlled regardless of its outcome.
* <p>
* M3 processing boundary: no KI call, no persistence, no filename generation,
* no target file copy is initiated here, even for candidates that pass all pre-checks.
*
* @param candidate the candidate to process
*/
private void processCandidate(SourceDocumentCandidate candidate) {
PdfExtractionResult extractionResult = pdfTextExtractionPort.extractTextAndPageCount(candidate);
switch (extractionResult) {
case PdfExtractionSuccess success -> {
M3ProcessingDecision decision = M3PreCheckEvaluator.evaluate(candidate, success, configuration);
switch (decision) {
case M3PreCheckPassed passed ->
LOG.info("M3 pre-checks passed for '{}'. Candidate ready for further processing (M4+).",
candidate.uniqueIdentifier());
case M3PreCheckFailed failed ->
LOG.info("M3 pre-check failed for '{}': {}",
candidate.uniqueIdentifier(), failed.failureReason());
}
}
case PdfExtractionContentError contentError ->
LOG.info("PDF content not extractable for '{}': {}",
candidate.uniqueIdentifier(), contentError.reason());
case PdfExtractionTechnicalError technicalError ->
LOG.warn("Technical error extracting PDF '{}': {}",
candidate.uniqueIdentifier(), technicalError.errorMessage());
}
}
}