M3-AP-005: Batchlauf im Use-Case integriert und sauber von Bootstrap
entkoppelt
This commit is contained in:
@@ -3,41 +3,56 @@ package de.gecheckt.pdf.umbenenner.application.usecase;
|
||||
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.RunBatchProcessingUseCase;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.service.M3PreCheckEvaluator;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* M2 implementation of {@link RunBatchProcessingUseCase}.
|
||||
* M3 batch processing implementation of {@link RunBatchProcessingUseCase}.
|
||||
* <p>
|
||||
* This use case orchestrates the batch processing workflow with start protection
|
||||
* and controlled execution lifecycle, but without actual document processing.
|
||||
* <p>
|
||||
* Responsibilities:
|
||||
* <ul>
|
||||
* Orchestrates the complete M3 batch processing workflow:
|
||||
* <ol>
|
||||
* <li>Acquire exclusive run lock to prevent concurrent instances</li>
|
||||
* <li>Initialize batch execution with the provided run context</li>
|
||||
* <li>Release lock only if it was successfully acquired</li>
|
||||
* <li>Return structured outcome for Bootstrap exit code mapping</li>
|
||||
* <li>Scan source folder for PDF candidates</li>
|
||||
* <li>For each candidate: extract text and page count, run M3 pre-checks</li>
|
||||
* <li>Log per-document M3 decision; end each document controlled without KI or target copy</li>
|
||||
* <li>Release lock and return structured outcome for Bootstrap exit code mapping</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* M3 processing boundary:
|
||||
* <ul>
|
||||
* <li>Documents that pass M3 pre-checks end controlled and are ready for M4+ (KI, persistence, copy)</li>
|
||||
* <li>Documents with deterministic content errors (no usable text, page limit exceeded) end controlled</li>
|
||||
* <li>Documents with technical extraction errors end controlled; they do not abort the overall run</li>
|
||||
* <li>If the source folder itself is inaccessible, the run fails with {@link BatchRunOutcome#FAILURE}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* M2 Non-Goals (not implemented):
|
||||
* M3 Non-Goals (not implemented):
|
||||
* <ul>
|
||||
* <li>No source folder scanning</li>
|
||||
* <li>No PDF filtering or text extraction</li>
|
||||
* <li>No fingerprinting</li>
|
||||
* <li>No SQLite persistence</li>
|
||||
* <li>No AI integration</li>
|
||||
* <li>No filename generation</li>
|
||||
* <li>No target file copying</li>
|
||||
* <li>No business-level retry logic</li>
|
||||
* <li>No single-document processing</li>
|
||||
* <li>No fingerprinting or SQLite persistence</li>
|
||||
* <li>No KI/AI integration or prompt loading</li>
|
||||
* <li>No filename generation or target file copy</li>
|
||||
* <li>No cross-run retry logic</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M2-AP-004
|
||||
* @since M2-AP-004 (extended in M3-AP-005)
|
||||
*/
|
||||
public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
|
||||
|
||||
@@ -45,25 +60,35 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
|
||||
|
||||
private final StartConfiguration configuration;
|
||||
private final RunLockPort runLockPort;
|
||||
private final SourceDocumentCandidatesPort sourceDocumentCandidatesPort;
|
||||
private final PdfTextExtractionPort pdfTextExtractionPort;
|
||||
|
||||
/**
|
||||
* Creates the M2 batch use case with the already-loaded startup configuration and run lock port.
|
||||
* Creates the batch use case with the already-loaded startup configuration and all required ports.
|
||||
* <p>
|
||||
* The configuration is loaded and validated by Bootstrap before use case creation;
|
||||
* the use case receives the result directly and does not re-read it.
|
||||
*
|
||||
* @param configuration the validated startup configuration
|
||||
* @param runLockPort for exclusive run locking
|
||||
* @param sourceDocumentCandidatesPort for loading PDF candidates from the source folder
|
||||
* @param pdfTextExtractionPort for extracting text and page count from a single PDF
|
||||
* @throws NullPointerException if any parameter is null
|
||||
*/
|
||||
public M2BatchRunProcessingUseCase(StartConfiguration configuration, RunLockPort runLockPort) {
|
||||
public M2BatchRunProcessingUseCase(
|
||||
StartConfiguration configuration,
|
||||
RunLockPort runLockPort,
|
||||
SourceDocumentCandidatesPort sourceDocumentCandidatesPort,
|
||||
PdfTextExtractionPort pdfTextExtractionPort) {
|
||||
this.configuration = configuration;
|
||||
this.runLockPort = runLockPort;
|
||||
this.sourceDocumentCandidatesPort = sourceDocumentCandidatesPort;
|
||||
this.pdfTextExtractionPort = pdfTextExtractionPort;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BatchRunOutcome execute(BatchRunContext context) {
|
||||
LOG.info("M2 batch processing initiated with RunId: {}", context.runId());
|
||||
LOG.info("Batch processing initiated. RunId: {}", context.runId());
|
||||
boolean lockAcquired = false;
|
||||
|
||||
try {
|
||||
@@ -77,18 +102,28 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
|
||||
return BatchRunOutcome.LOCK_UNAVAILABLE;
|
||||
}
|
||||
|
||||
// Step 2: M2 Batch execution frame (no document processing)
|
||||
LOG.debug("Configuration in use: source={}, target={}", configuration.sourceFolder(), configuration.targetFolder());
|
||||
LOG.info("Batch execution frame initialized - RunId: {}, Start: {}", context.runId(), context.startInstant());
|
||||
LOG.info("Batch run started. RunId: {}, Start: {}", context.runId(), context.startInstant());
|
||||
|
||||
// M2 Non-goal: No source folder scanning, PDF processing, persistence, or filename generation
|
||||
// This is a controlled no-op batch cycle that validates the entire orchestration path.
|
||||
// Step 2: Load PDF candidates from source folder
|
||||
List<SourceDocumentCandidate> candidates;
|
||||
try {
|
||||
candidates = sourceDocumentCandidatesPort.loadCandidates();
|
||||
} catch (SourceDocumentAccessException e) {
|
||||
LOG.error("Cannot access source folder: {}", e.getMessage(), e);
|
||||
return BatchRunOutcome.FAILURE;
|
||||
}
|
||||
LOG.info("Found {} PDF candidate(s) in source folder.", candidates.size());
|
||||
|
||||
LOG.info("Batch execution frame completed successfully");
|
||||
// Step 3: Process each candidate through the M3 pipeline
|
||||
for (SourceDocumentCandidate candidate : candidates) {
|
||||
processCandidate(candidate);
|
||||
}
|
||||
|
||||
LOG.info("Batch run completed. Processed {} candidate(s). RunId: {}", candidates.size(), context.runId());
|
||||
return BatchRunOutcome.SUCCESS;
|
||||
|
||||
} catch (Exception e) {
|
||||
// Unexpected error during batch orchestration
|
||||
LOG.error("Unexpected error during batch processing", e);
|
||||
return BatchRunOutcome.FAILURE;
|
||||
} finally {
|
||||
@@ -98,11 +133,53 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
|
||||
if (lockAcquired) {
|
||||
try {
|
||||
runLockPort.release();
|
||||
LOG.debug("Run lock released");
|
||||
LOG.debug("Run lock released.");
|
||||
} catch (Exception e) {
|
||||
LOG.warn("Warning: Failed to release run lock", e);
|
||||
LOG.warn("Warning: Failed to release run lock.", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Processes a single PDF candidate through the M3 pipeline.
|
||||
* <p>
|
||||
* M3 processing steps per document:
|
||||
* <ol>
|
||||
* <li>Extract text and page count from the PDF via {@link PdfTextExtractionPort}</li>
|
||||
* <li>On successful extraction: run M3 pre-checks via {@link M3PreCheckEvaluator}</li>
|
||||
* <li>Log the per-document M3 decision and end controlled</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* Per-document errors (extraction failure, pre-check failure) do not abort the overall
|
||||
* batch run. Each candidate ends controlled regardless of its outcome.
|
||||
* <p>
|
||||
* M3 processing boundary: no KI call, no persistence, no filename generation,
|
||||
* no target file copy is initiated here, even for candidates that pass all pre-checks.
|
||||
*
|
||||
* @param candidate the candidate to process
|
||||
*/
|
||||
private void processCandidate(SourceDocumentCandidate candidate) {
|
||||
PdfExtractionResult extractionResult = pdfTextExtractionPort.extractTextAndPageCount(candidate);
|
||||
|
||||
switch (extractionResult) {
|
||||
case PdfExtractionSuccess success -> {
|
||||
M3ProcessingDecision decision = M3PreCheckEvaluator.evaluate(candidate, success, configuration);
|
||||
switch (decision) {
|
||||
case M3PreCheckPassed passed ->
|
||||
LOG.info("M3 pre-checks passed for '{}'. Candidate ready for further processing (M4+).",
|
||||
candidate.uniqueIdentifier());
|
||||
case M3PreCheckFailed failed ->
|
||||
LOG.info("M3 pre-check failed for '{}': {}",
|
||||
candidate.uniqueIdentifier(), failed.failureReason());
|
||||
}
|
||||
}
|
||||
case PdfExtractionContentError contentError ->
|
||||
LOG.info("PDF content not extractable for '{}': {}",
|
||||
candidate.uniqueIdentifier(), contentError.reason());
|
||||
case PdfExtractionTechnicalError technicalError ->
|
||||
LOG.warn("Technical error extracting PDF '{}': {}",
|
||||
candidate.uniqueIdentifier(), technicalError.errorMessage());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user