1
0

M3-AP-005: Batchlauf im Use-Case integriert und sauber von Bootstrap

entkoppelt
This commit is contained in:
2026-04-01 20:34:15 +02:00
parent c482b20df9
commit d60d050948
3 changed files with 413 additions and 80 deletions

View File

@@ -3,41 +3,56 @@ package de.gecheckt.pdf.umbenenner.application.usecase;
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.in.RunBatchProcessingUseCase; import de.gecheckt.pdf.umbenenner.application.port.in.RunBatchProcessingUseCase;
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.service.M3PreCheckEvaluator;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.Logger;
import java.util.List;
/** /**
* M2 implementation of {@link RunBatchProcessingUseCase}. * M3 batch processing implementation of {@link RunBatchProcessingUseCase}.
* <p> * <p>
* This use case orchestrates the batch processing workflow with start protection * Orchestrates the complete M3 batch processing workflow:
* and controlled execution lifecycle, but without actual document processing. * <ol>
* <p>
* Responsibilities:
* <ul>
* <li>Acquire exclusive run lock to prevent concurrent instances</li> * <li>Acquire exclusive run lock to prevent concurrent instances</li>
* <li>Initialize batch execution with the provided run context</li> * <li>Scan source folder for PDF candidates</li>
* <li>Release lock only if it was successfully acquired</li> * <li>For each candidate: extract text and page count, run M3 pre-checks</li>
* <li>Return structured outcome for Bootstrap exit code mapping</li> * <li>Log per-document M3 decision; end each document controlled without KI or target copy</li>
* <li>Release lock and return structured outcome for Bootstrap exit code mapping</li>
* </ol>
* <p>
* M3 processing boundary:
* <ul>
* <li>Documents that pass M3 pre-checks end controlled and are ready for M4+ (KI, persistence, copy)</li>
* <li>Documents with deterministic content errors (no usable text, page limit exceeded) end controlled</li>
* <li>Documents with technical extraction errors end controlled; they do not abort the overall run</li>
* <li>If the source folder itself is inaccessible, the run fails with {@link BatchRunOutcome#FAILURE}</li>
* </ul> * </ul>
* <p> * <p>
* M2 Non-Goals (not implemented): * M3 Non-Goals (not implemented):
* <ul> * <ul>
* <li>No source folder scanning</li> * <li>No fingerprinting or SQLite persistence</li>
* <li>No PDF filtering or text extraction</li> * <li>No KI/AI integration or prompt loading</li>
* <li>No fingerprinting</li> * <li>No filename generation or target file copy</li>
* <li>No SQLite persistence</li> * <li>No cross-run retry logic</li>
* <li>No AI integration</li>
* <li>No filename generation</li>
* <li>No target file copying</li>
* <li>No business-level retry logic</li>
* <li>No single-document processing</li>
* </ul> * </ul>
* *
* @since M2-AP-004 * @since M2-AP-004 (extended in M3-AP-005)
*/ */
public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase { public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
@@ -45,25 +60,35 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
private final StartConfiguration configuration; private final StartConfiguration configuration;
private final RunLockPort runLockPort; private final RunLockPort runLockPort;
private final SourceDocumentCandidatesPort sourceDocumentCandidatesPort;
private final PdfTextExtractionPort pdfTextExtractionPort;
/** /**
* Creates the M2 batch use case with the already-loaded startup configuration and run lock port. * Creates the batch use case with the already-loaded startup configuration and all required ports.
* <p> * <p>
* The configuration is loaded and validated by Bootstrap before use case creation; * The configuration is loaded and validated by Bootstrap before use case creation;
* the use case receives the result directly and does not re-read it. * the use case receives the result directly and does not re-read it.
* *
* @param configuration the validated startup configuration * @param configuration the validated startup configuration
* @param runLockPort for exclusive run locking * @param runLockPort for exclusive run locking
* @param sourceDocumentCandidatesPort for loading PDF candidates from the source folder
* @param pdfTextExtractionPort for extracting text and page count from a single PDF
* @throws NullPointerException if any parameter is null * @throws NullPointerException if any parameter is null
*/ */
public M2BatchRunProcessingUseCase(StartConfiguration configuration, RunLockPort runLockPort) { public M2BatchRunProcessingUseCase(
StartConfiguration configuration,
RunLockPort runLockPort,
SourceDocumentCandidatesPort sourceDocumentCandidatesPort,
PdfTextExtractionPort pdfTextExtractionPort) {
this.configuration = configuration; this.configuration = configuration;
this.runLockPort = runLockPort; this.runLockPort = runLockPort;
this.sourceDocumentCandidatesPort = sourceDocumentCandidatesPort;
this.pdfTextExtractionPort = pdfTextExtractionPort;
} }
@Override @Override
public BatchRunOutcome execute(BatchRunContext context) { public BatchRunOutcome execute(BatchRunContext context) {
LOG.info("M2 batch processing initiated with RunId: {}", context.runId()); LOG.info("Batch processing initiated. RunId: {}", context.runId());
boolean lockAcquired = false; boolean lockAcquired = false;
try { try {
@@ -77,18 +102,28 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
return BatchRunOutcome.LOCK_UNAVAILABLE; return BatchRunOutcome.LOCK_UNAVAILABLE;
} }
// Step 2: M2 Batch execution frame (no document processing)
LOG.debug("Configuration in use: source={}, target={}", configuration.sourceFolder(), configuration.targetFolder()); LOG.debug("Configuration in use: source={}, target={}", configuration.sourceFolder(), configuration.targetFolder());
LOG.info("Batch execution frame initialized - RunId: {}, Start: {}", context.runId(), context.startInstant()); LOG.info("Batch run started. RunId: {}, Start: {}", context.runId(), context.startInstant());
// M2 Non-goal: No source folder scanning, PDF processing, persistence, or filename generation // Step 2: Load PDF candidates from source folder
// This is a controlled no-op batch cycle that validates the entire orchestration path. List<SourceDocumentCandidate> candidates;
try {
candidates = sourceDocumentCandidatesPort.loadCandidates();
} catch (SourceDocumentAccessException e) {
LOG.error("Cannot access source folder: {}", e.getMessage(), e);
return BatchRunOutcome.FAILURE;
}
LOG.info("Found {} PDF candidate(s) in source folder.", candidates.size());
LOG.info("Batch execution frame completed successfully"); // Step 3: Process each candidate through the M3 pipeline
for (SourceDocumentCandidate candidate : candidates) {
processCandidate(candidate);
}
LOG.info("Batch run completed. Processed {} candidate(s). RunId: {}", candidates.size(), context.runId());
return BatchRunOutcome.SUCCESS; return BatchRunOutcome.SUCCESS;
} catch (Exception e) { } catch (Exception e) {
// Unexpected error during batch orchestration
LOG.error("Unexpected error during batch processing", e); LOG.error("Unexpected error during batch processing", e);
return BatchRunOutcome.FAILURE; return BatchRunOutcome.FAILURE;
} finally { } finally {
@@ -98,11 +133,53 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
if (lockAcquired) { if (lockAcquired) {
try { try {
runLockPort.release(); runLockPort.release();
LOG.debug("Run lock released"); LOG.debug("Run lock released.");
} catch (Exception e) { } catch (Exception e) {
LOG.warn("Warning: Failed to release run lock", e); LOG.warn("Warning: Failed to release run lock.", e);
} }
} }
} }
} }
/**
* Processes a single PDF candidate through the M3 pipeline.
* <p>
* M3 processing steps per document:
* <ol>
* <li>Extract text and page count from the PDF via {@link PdfTextExtractionPort}</li>
* <li>On successful extraction: run M3 pre-checks via {@link M3PreCheckEvaluator}</li>
* <li>Log the per-document M3 decision and end controlled</li>
* </ol>
* <p>
* Per-document errors (extraction failure, pre-check failure) do not abort the overall
* batch run. Each candidate ends controlled regardless of its outcome.
* <p>
* M3 processing boundary: no KI call, no persistence, no filename generation,
* no target file copy is initiated here, even for candidates that pass all pre-checks.
*
* @param candidate the candidate to process
*/
private void processCandidate(SourceDocumentCandidate candidate) {
PdfExtractionResult extractionResult = pdfTextExtractionPort.extractTextAndPageCount(candidate);
switch (extractionResult) {
case PdfExtractionSuccess success -> {
M3ProcessingDecision decision = M3PreCheckEvaluator.evaluate(candidate, success, configuration);
switch (decision) {
case M3PreCheckPassed passed ->
LOG.info("M3 pre-checks passed for '{}'. Candidate ready for further processing (M4+).",
candidate.uniqueIdentifier());
case M3PreCheckFailed failed ->
LOG.info("M3 pre-check failed for '{}': {}",
candidate.uniqueIdentifier(), failed.failureReason());
}
}
case PdfExtractionContentError contentError ->
LOG.info("PDF content not extractable for '{}': {}",
candidate.uniqueIdentifier(), contentError.reason());
case PdfExtractionTechnicalError technicalError ->
LOG.warn("Technical error extracting PDF '{}': {}",
candidate.uniqueIdentifier(), technicalError.errorMessage());
}
}
} }

View File

@@ -2,10 +2,20 @@ package de.gecheckt.pdf.umbenenner.application.usecase;
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
import de.gecheckt.pdf.umbenenner.domain.model.RunId; import de.gecheckt.pdf.umbenenner.domain.model.RunId;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
import org.junit.jupiter.api.Test; import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir; import org.junit.jupiter.api.io.TempDir;
@@ -14,26 +24,39 @@ import java.net.URI;
import java.nio.file.Files; import java.nio.file.Files;
import java.nio.file.Path; import java.nio.file.Path;
import java.time.Instant; import java.time.Instant;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assertions.*;
/** /**
* Tests for {@link M2BatchRunProcessingUseCase}. * Tests for {@link M2BatchRunProcessingUseCase}.
* <p> * <p>
* Verifies correct orchestration of the M2 batch cycle including lock management * Covers:
* and controlled execution flow. * <ul>
* <li>Lock acquisition and release lifecycle (M2)</li>
* <li>M3 source folder scanning and per-document processing loop</li>
* <li>M3 happy path: candidate passes pre-checks, ends controlled without KI or target copy</li>
* <li>M3 deterministic content errors: no usable text, page limit exceeded</li>
* <li>M3 technical extraction errors: controlled per-document end, batch continues</li>
* <li>Source folder access failure: batch fails with FAILURE outcome</li>
* </ul>
*/ */
class M2BatchRunProcessingUseCaseTest { class M2BatchRunProcessingUseCaseTest {
@TempDir @TempDir
Path tempDir; Path tempDir;
// -------------------------------------------------------------------------
// M2: Lock lifecycle tests (preserved, updated constructor)
// -------------------------------------------------------------------------
@Test @Test
void execute_successfullyAcquiresAndReleasesLock() throws Exception { void execute_successfullyAcquiresAndReleasesLock() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort(); MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir); StartConfiguration config = buildConfig(tempDir);
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(config, lockPort); M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
BatchRunContext context = new BatchRunContext(new RunId("test-run-1"), Instant.now()); BatchRunContext context = new BatchRunContext(new RunId("test-run-1"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context); BatchRunOutcome outcome = useCase.execute(context);
@@ -48,7 +71,8 @@ class M2BatchRunProcessingUseCaseTest {
CountingRunLockPort lockPort = new CountingRunLockPort(true); CountingRunLockPort lockPort = new CountingRunLockPort(true);
StartConfiguration config = buildConfig(tempDir); StartConfiguration config = buildConfig(tempDir);
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(config, lockPort); M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
BatchRunContext context = new BatchRunContext(new RunId("test-run-2"), Instant.now()); BatchRunContext context = new BatchRunContext(new RunId("test-run-2"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context); BatchRunOutcome outcome = useCase.execute(context);
@@ -60,46 +84,204 @@ class M2BatchRunProcessingUseCaseTest {
/** /**
* Regression test for M2-F1: when acquire() fails, release() must NOT be called. * Regression test for M2-F1: when acquire() fails, release() must NOT be called.
* Calling release() on a lock we never acquired would delete another instance's lock file.
*/ */
@Test @Test
void execute_doesNotReleaseLockWhenAcquireFails() throws Exception { void execute_doesNotReleaseLockWhenAcquireFails() throws Exception {
CountingRunLockPort lockPort = new CountingRunLockPort(true); CountingRunLockPort lockPort = new CountingRunLockPort(true);
StartConfiguration config = buildConfig(tempDir); StartConfiguration config = buildConfig(tempDir);
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(config, lockPort); M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
BatchRunContext context = new BatchRunContext(new RunId("test-run-f1"), Instant.now()); BatchRunContext context = new BatchRunContext(new RunId("test-run-f1"), Instant.now());
useCase.execute(context); useCase.execute(context);
assertEquals(1, lockPort.acquireCallCount(), "acquire() should be called exactly once"); assertEquals(1, lockPort.acquireCallCount(), "acquire() should be called exactly once");
assertEquals(0, lockPort.releaseCallCount(), assertEquals(0, lockPort.releaseCallCount(),
"release() must NOT be called when acquire() failed doing so would delete another instance's lock file"); "release() must NOT be called when acquire() failed would delete another instance's lock file");
} }
@Test @Test
void execute_releasesLockEvenOnUnexpectedError() throws Exception { void execute_releasesLockEvenOnUnexpectedError() throws Exception {
// Lock acquires successfully, but an unexpected exception occurs after that.
// The lock must still be released.
ErrorAfterAcquireLockPort lockPort = new ErrorAfterAcquireLockPort(); ErrorAfterAcquireLockPort lockPort = new ErrorAfterAcquireLockPort();
StartConfiguration config = buildConfig(tempDir); StartConfiguration config = buildConfig(tempDir);
// Use a configuration that triggers an NPE internally simulate by passing null configuration M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
// Instead: use a use case subclass that throws after acquire, or use a custom port. config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
// Here we verify via a use case that fails after acquiring the lock.
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(config, lockPort);
BatchRunContext context = new BatchRunContext(new RunId("test-run-3"), Instant.now()); BatchRunContext context = new BatchRunContext(new RunId("test-run-3"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context); BatchRunOutcome outcome = useCase.execute(context);
// Lock was acquired (no exception thrown by acquire) so release must be called
assertTrue(lockPort.wasAcquireCalled(), "Lock acquire should be called"); assertTrue(lockPort.wasAcquireCalled(), "Lock acquire should be called");
assertTrue(lockPort.wasReleaseCalled(), "Lock should be released even after unexpected error"); assertTrue(lockPort.wasReleaseCalled(), "Lock should be released even after unexpected error");
// The use case itself completes normally since the config is valid;
// this test primarily guards the finally-block path for the acquired case.
assertTrue(outcome.isSuccess() || outcome.isFailure()); assertTrue(outcome.isSuccess() || outcome.isFailure());
} }
// -------------------------------------------------------------------------
// M3: Source folder scanning and candidate processing
// -------------------------------------------------------------------------
@Test
void execute_withNoCandidates_returnsSuccess() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
BatchRunContext context = new BatchRunContext(new RunId("m3-empty"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(), "Empty candidate list should still yield SUCCESS");
}
@Test
void execute_m3HappyPath_candidatePassesPreChecks_endsControlledWithoutKiOrCopy() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
// Candidate with usable text within page limit
SourceDocumentCandidate candidate = makeCandidate("document.pdf");
PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice text", new PdfPageCount(1));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(success);
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
BatchRunContext context = new BatchRunContext(new RunId("m3-happy"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
// Batch run succeeds; document ended controlled at M3 boundary (no KI, no copy)
assertTrue(outcome.isSuccess(), "M3 happy path should yield SUCCESS");
assertEquals(1, extractionPort.callCount(), "Extraction should be called exactly once");
}
@Test
void execute_m3NoUsableText_candidateEndsControlled_batchContinues() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate candidate = makeCandidate("image-only.pdf");
// Extraction returns text with no letters or digits
PdfExtractionSuccess emptySuccess = new PdfExtractionSuccess(" ", new PdfPageCount(1));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(emptySuccess);
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
BatchRunContext context = new BatchRunContext(new RunId("m3-no-text"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
// Document ends with pre-check failure; batch itself still succeeds
assertTrue(outcome.isSuccess(), "No-usable-text pre-check failure should not abort the batch run");
}
@Test
void execute_m3PageLimitExceeded_candidateEndsControlled_batchContinues() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
// Config has maxPages=3; document has 10 pages
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate candidate = makeCandidate("big.pdf");
PdfExtractionSuccess manyPages = new PdfExtractionSuccess("Some text", new PdfPageCount(10));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(manyPages);
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
BatchRunContext context = new BatchRunContext(new RunId("m3-page-limit"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
// maxPages in buildConfig is 3; 10 pages exceeds limit pre-check fails, batch continues
assertTrue(outcome.isSuccess(), "Page limit exceeded should not abort the batch run");
}
@Test
void execute_m3ExtractionContentError_candidateEndsControlled_batchContinues() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate candidate = makeCandidate("encrypted.pdf");
PdfExtractionContentError contentError = new PdfExtractionContentError("PDF is encrypted");
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(contentError);
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
BatchRunContext context = new BatchRunContext(new RunId("m3-content-error"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(), "Extraction content error should not abort the batch run");
}
@Test
void execute_m3ExtractionTechnicalError_candidateEndsControlled_batchContinues() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate candidate = makeCandidate("corrupt.pdf");
PdfExtractionTechnicalError technicalError = new PdfExtractionTechnicalError("I/O error reading file", null);
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(technicalError);
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
BatchRunContext context = new BatchRunContext(new RunId("m3-tech-error"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(), "Technical extraction error should not abort the batch run");
}
@Test
void execute_m3SourceAccessException_returnsFailure() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidatesPort failingPort = () -> {
throw new SourceDocumentAccessException("Source folder not readable");
};
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
config, lockPort, failingPort, new NoOpExtractionPort());
BatchRunContext context = new BatchRunContext(new RunId("m3-access-fail"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isFailure(), "Source folder access failure should yield FAILURE outcome");
assertFalse(outcome.isSuccess(), "Source folder access failure must not be SUCCESS");
// Lock must still be released
assertTrue(lockPort.wasReleaseCalled(), "Lock should be released even when source access fails");
}
@Test
void execute_m3MultipleCandidates_allProcessed_batchSucceeds() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
List<SourceDocumentCandidate> candidates = List.of(
makeCandidate("a.pdf"),
makeCandidate("b.pdf"),
makeCandidate("c.pdf")
);
PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice content", new PdfPageCount(2));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(candidates);
FixedExtractionPort extractionPort = new FixedExtractionPort(success);
M2BatchRunProcessingUseCase useCase = new M2BatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
BatchRunContext context = new BatchRunContext(new RunId("m3-multi"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(), "All three candidates processed should yield SUCCESS");
assertEquals(3, extractionPort.callCount(), "Extraction should be called once per candidate");
}
// ------------------------------------------------------------------------- // -------------------------------------------------------------------------
// Helpers // Helpers
// ------------------------------------------------------------------------- // -------------------------------------------------------------------------
@@ -108,9 +290,9 @@ class M2BatchRunProcessingUseCaseTest {
Path sourceDir = Files.createDirectories(tempDir.resolve("source")); Path sourceDir = Files.createDirectories(tempDir.resolve("source"));
Path targetDir = Files.createDirectories(tempDir.resolve("target")); Path targetDir = Files.createDirectories(tempDir.resolve("target"));
Path dbFile = tempDir.resolve("db.sqlite"); Path dbFile = tempDir.resolve("db.sqlite");
Files.createFile(dbFile); if (!Files.exists(dbFile)) Files.createFile(dbFile);
Path promptFile = tempDir.resolve("prompt.txt"); Path promptFile = tempDir.resolve("prompt.txt");
Files.createFile(promptFile); if (!Files.exists(promptFile)) Files.createFile(promptFile);
return new StartConfiguration( return new StartConfiguration(
sourceDir, sourceDir,
@@ -119,8 +301,8 @@ class M2BatchRunProcessingUseCaseTest {
URI.create("https://api.example.com"), URI.create("https://api.example.com"),
"gpt-4", "gpt-4",
30, 30,
3, 3, // maxRetries
100, 3, // maxPages (low limit useful for page-limit tests)
50000, 50000,
promptFile, promptFile,
tempDir.resolve("lock.lock"), tempDir.resolve("lock.lock"),
@@ -130,6 +312,10 @@ class M2BatchRunProcessingUseCaseTest {
); );
} }
private static SourceDocumentCandidate makeCandidate(String filename) {
return new SourceDocumentCandidate(filename, 1024L, new SourceDocumentLocator("/tmp/" + filename));
}
// ------------------------------------------------------------------------- // -------------------------------------------------------------------------
// Mock / Stub implementations // Mock / Stub implementations
// ------------------------------------------------------------------------- // -------------------------------------------------------------------------
@@ -140,45 +326,31 @@ class M2BatchRunProcessingUseCaseTest {
private boolean releaseCalled = false; private boolean releaseCalled = false;
@Override @Override
public void acquire() { public void acquire() { acquireCalled = true; }
acquireCalled = true;
}
@Override @Override
public void release() { public void release() { releaseCalled = true; }
releaseCalled = true;
}
boolean wasAcquireCalled() { return acquireCalled; } boolean wasAcquireCalled() { return acquireCalled; }
boolean wasReleaseCalled() { return releaseCalled; } boolean wasReleaseCalled() { return releaseCalled; }
} }
/** /** Counting lock port optionally fails on acquire. */
* Counting lock port optionally fails on acquire.
* Tracks exact call counts so tests can assert that release() was never called
* when acquire() threw.
*/
private static class CountingRunLockPort implements RunLockPort { private static class CountingRunLockPort implements RunLockPort {
private final boolean failOnAcquire; private final boolean failOnAcquire;
private int acquireCount = 0; private int acquireCount = 0;
private int releaseCount = 0; private int releaseCount = 0;
CountingRunLockPort(boolean failOnAcquire) { CountingRunLockPort(boolean failOnAcquire) { this.failOnAcquire = failOnAcquire; }
this.failOnAcquire = failOnAcquire;
}
@Override @Override
public void acquire() { public void acquire() {
acquireCount++; acquireCount++;
if (failOnAcquire) { if (failOnAcquire) throw new RunLockUnavailableException("Another instance already running");
throw new RunLockUnavailableException("Another instance already running");
}
} }
@Override @Override
public void release() { public void release() { releaseCount++; }
releaseCount++;
}
int acquireCallCount() { return acquireCount; } int acquireCallCount() { return acquireCount; }
int releaseCallCount() { return releaseCount; } int releaseCallCount() { return releaseCount; }
@@ -190,16 +362,58 @@ class M2BatchRunProcessingUseCaseTest {
private boolean releaseCalled = false; private boolean releaseCalled = false;
@Override @Override
public void acquire() { public void acquire() { acquireCalled = true; }
acquireCalled = true;
}
@Override @Override
public void release() { public void release() { releaseCalled = true; }
releaseCalled = true;
}
boolean wasAcquireCalled() { return acquireCalled; } boolean wasAcquireCalled() { return acquireCalled; }
boolean wasReleaseCalled() { return releaseCalled; } boolean wasReleaseCalled() { return releaseCalled; }
} }
/** Returns an empty candidate list. */
private static class EmptyCandidatesPort implements SourceDocumentCandidatesPort {
@Override
public List<SourceDocumentCandidate> loadCandidates() {
return List.of();
}
}
/** Returns a fixed list of candidates. */
private static class FixedCandidatesPort implements SourceDocumentCandidatesPort {
private final List<SourceDocumentCandidate> candidates;
FixedCandidatesPort(List<SourceDocumentCandidate> candidates) {
this.candidates = candidates;
}
@Override
public List<SourceDocumentCandidate> loadCandidates() {
return candidates;
}
}
/** Returns a fixed extraction result for any candidate; counts calls. */
private static class FixedExtractionPort implements PdfTextExtractionPort {
private final PdfExtractionResult result;
private int calls = 0;
FixedExtractionPort(PdfExtractionResult result) { this.result = result; }
@Override
public PdfExtractionResult extractTextAndPageCount(SourceDocumentCandidate candidate) {
calls++;
return result;
}
int callCount() { return calls; }
}
/** No-op extraction port that should never be called in tests that use EmptyCandidatesPort. */
private static class NoOpExtractionPort implements PdfTextExtractionPort {
@Override
public PdfExtractionResult extractTextAndPageCount(SourceDocumentCandidate candidate) {
throw new UnsupportedOperationException("Should not be called");
}
}
} }

View File

@@ -12,7 +12,9 @@ import de.gecheckt.pdf.umbenenner.application.config.StartConfigurationValidator
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.in.RunBatchProcessingUseCase; import de.gecheckt.pdf.umbenenner.application.port.in.RunBatchProcessingUseCase;
import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort; import de.gecheckt.pdf.umbenenner.application.port.out.ConfigurationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort; import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.usecase.M2BatchRunProcessingUseCase; import de.gecheckt.pdf.umbenenner.application.usecase.M2BatchRunProcessingUseCase;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.RunId; import de.gecheckt.pdf.umbenenner.domain.model.RunId;
@@ -69,8 +71,11 @@ public class BootstrapRunner {
/** /**
* Functional interface for creating a RunBatchProcessingUseCase. * Functional interface for creating a RunBatchProcessingUseCase.
* <p> * <p>
* Receives the already-loaded and validated {@link StartConfiguration} so the use case * Receives the already-loaded and validated {@link StartConfiguration} and run lock port.
* does not need to re-read the configuration file. * <p>
* Note: The use case signature may accept additional ports for M3+ functionality,
* but bootstrap provides No-Op implementations for now (AP-005 scope).
* Full M3 adapter wiring will be completed in AP-007 (Bootstrap expansion).
*/ */
@FunctionalInterface @FunctionalInterface
public interface UseCaseFactory { public interface UseCaseFactory {
@@ -94,7 +99,10 @@ public class BootstrapRunner {
this.configPortFactory = PropertiesConfigurationPortAdapter::new; this.configPortFactory = PropertiesConfigurationPortAdapter::new;
this.runLockPortFactory = FilesystemRunLockPortAdapter::new; this.runLockPortFactory = FilesystemRunLockPortAdapter::new;
this.validatorFactory = StartConfigurationValidator::new; this.validatorFactory = StartConfigurationValidator::new;
this.useCaseFactory = (config, lock) -> new M2BatchRunProcessingUseCase(config, lock); // AP-005: Use case accepts M3 ports, but bootstrap provides No-Op implementations (M2 scope)
// AP-007 will wire real M3 adapters; for now, M2 uses No-Op ports
this.useCaseFactory = (config, lock) ->
new M2BatchRunProcessingUseCase(config, lock, new NoOpSourceCandidatesPort(), new NoOpExtractionPort());
this.commandFactory = SchedulerBatchCommand::new; this.commandFactory = SchedulerBatchCommand::new;
} }
@@ -157,6 +165,8 @@ public class BootstrapRunner {
// Step 6: Create the use case with the validated config and run lock (application layer) // Step 6: Create the use case with the validated config and run lock (application layer)
// Config is passed directly; the use case does not re-read the properties file. // Config is passed directly; the use case does not re-read the properties file.
// Note: The use case signature includes M3 ports, but bootstrap (M2 scope) provides No-Op implementations.
// Real M3 adapter wiring will be completed in AP-007.
RunBatchProcessingUseCase useCase = useCaseFactory.create(config, runLockPort); RunBatchProcessingUseCase useCase = useCaseFactory.create(config, runLockPort);
// Step 7: Create the CLI command adapter with the use case // Step 7: Create the CLI command adapter with the use case
@@ -192,4 +202,36 @@ public class BootstrapRunner {
return 1; return 1;
} }
} }
// =========================================================================
// AP-005 (M2 scope): No-Op port implementations
// (Real M3 adapters will be wired in AP-007)
// =========================================================================
/**
* No-Op implementation of {@link SourceDocumentCandidatesPort} for M2 scope.
* <p>
* M2 batch execution does not scan the source folder, so this returns an empty list.
* AP-007 will replace this with a real filesystem adapter.
*/
private static class NoOpSourceCandidatesPort implements SourceDocumentCandidatesPort {
@Override
public java.util.List<de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate> loadCandidates() {
return java.util.List.of();
}
}
/**
* No-Op implementation of {@link PdfTextExtractionPort} for M2 scope.
* <p>
* M2 batch execution does not extract PDF content, so this port is never called.
* AP-007 will replace this with a real PDFBox adapter.
*/
private static class NoOpExtractionPort implements PdfTextExtractionPort {
@Override
public de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult extractTextAndPageCount(
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate) {
throw new UnsupportedOperationException("M2 scope: No-Op port, should not be called");
}
}
} }