1
0

M4 AP-006 Idempotenz- und Persistenzlogik integrieren

This commit is contained in:
2026-04-02 23:36:22 +02:00
parent 8ee4041feb
commit 00c4cf1e5c
7 changed files with 1598 additions and 174 deletions

View File

@@ -0,0 +1,425 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown;
import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters;
import de.gecheckt.pdf.umbenenner.application.port.out.PersistenceLookupTechnicalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
import org.junit.jupiter.api.BeforeEach;
import org.junit.jupiter.api.Test;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import static org.junit.jupiter.api.Assertions.*;
/**
* Unit tests for {@link M4DocumentProcessor}.
* <p>
* Covers:
* <ul>
* <li>M4 minimal rules: status, counter and retryable flag mapping</li>
* <li>Skip logic for SUCCESS and FAILED_FINAL documents</li>
* <li>New document path (DocumentUnknown)</li>
* <li>Known processable document path (DocumentKnownProcessable)</li>
* <li>Persistence lookup failure: no attempt written</li>
* <li>Persistence write failure: controlled failure, no crash</li>
* <li>Skip events do not change error counters</li>
* </ul>
*/
class M4DocumentProcessorTest {
private static final String FINGERPRINT_HEX =
"a".repeat(64); // 64 lowercase hex chars
private CapturingDocumentRecordRepository recordRepo;
private CapturingProcessingAttemptRepository attemptRepo;
private M4DocumentProcessor processor;
private SourceDocumentCandidate candidate;
private DocumentFingerprint fingerprint;
private BatchRunContext context;
private Instant attemptStart;
@BeforeEach
void setUp() {
recordRepo = new CapturingDocumentRecordRepository();
attemptRepo = new CapturingProcessingAttemptRepository();
processor = new M4DocumentProcessor(recordRepo, attemptRepo);
candidate = new SourceDocumentCandidate(
"test.pdf", 1024L, new SourceDocumentLocator("/tmp/test.pdf"));
fingerprint = new DocumentFingerprint(FINGERPRINT_HEX);
context = new BatchRunContext(new RunId("run-001"), Instant.now());
attemptStart = Instant.now();
}
// -------------------------------------------------------------------------
// New document (DocumentUnknown) path
// -------------------------------------------------------------------------
@Test
void process_newDocument_preCheckPassed_persistsSuccessStatus() {
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
// One attempt written
assertEquals(1, attemptRepo.savedAttempts.size());
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
assertEquals(ProcessingStatus.SUCCESS, attempt.status());
assertFalse(attempt.retryable());
assertNull(attempt.failureClass());
assertNull(attempt.failureMessage());
// One master record created
assertEquals(1, recordRepo.createdRecords.size());
DocumentRecord record = recordRepo.createdRecords.get(0);
assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
assertEquals(0, record.failureCounters().contentErrorCount());
assertEquals(0, record.failureCounters().transientErrorCount());
assertNotNull(record.lastSuccessInstant());
assertNull(record.lastFailureInstant());
}
@Test
void process_newDocument_firstContentError_persistsFailedRetryable_contentCounterOne() {
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
assertEquals(1, attemptRepo.savedAttempts.size());
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
assertTrue(attempt.retryable());
assertEquals(1, recordRepo.createdRecords.size());
DocumentRecord record = recordRepo.createdRecords.get(0);
assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
assertEquals(1, record.failureCounters().contentErrorCount());
assertEquals(0, record.failureCounters().transientErrorCount());
assertNotNull(record.lastFailureInstant());
assertNull(record.lastSuccessInstant());
}
@Test
void process_newDocument_technicalError_persistsFailedRetryable_transientCounterOne() {
recordRepo.setLookupResult(new DocumentUnknown());
DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError(
candidate, "I/O error", null);
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
assertEquals(1, attemptRepo.savedAttempts.size());
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
assertEquals(ProcessingStatus.FAILED_RETRYABLE, attempt.status());
assertTrue(attempt.retryable());
assertEquals(1, recordRepo.createdRecords.size());
DocumentRecord record = recordRepo.createdRecords.get(0);
assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
assertEquals(0, record.failureCounters().contentErrorCount());
assertEquals(1, record.failureCounters().transientErrorCount());
}
// -------------------------------------------------------------------------
// Known processable document path (DocumentKnownProcessable)
// -------------------------------------------------------------------------
@Test
void process_knownDocument_secondContentError_persistsFailedFinal_contentCounterTwo() {
// Existing record: first content error already recorded
DocumentRecord existingRecord = buildRecord(
ProcessingStatus.FAILED_RETRYABLE,
new FailureCounters(1, 0));
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED);
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
assertEquals(1, attemptRepo.savedAttempts.size());
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
assertEquals(ProcessingStatus.FAILED_FINAL, attempt.status());
assertFalse(attempt.retryable());
assertEquals(1, recordRepo.updatedRecords.size());
DocumentRecord record = recordRepo.updatedRecords.get(0);
assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
assertEquals(2, record.failureCounters().contentErrorCount());
assertEquals(0, record.failureCounters().transientErrorCount());
}
@Test
void process_knownDocument_technicalError_incrementsTransientCounter_remainsRetryable() {
DocumentRecord existingRecord = buildRecord(
ProcessingStatus.FAILED_RETRYABLE,
new FailureCounters(0, 2));
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
DocumentProcessingOutcome m3Outcome = new TechnicalDocumentError(
candidate, "Timeout", null);
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
assertEquals(1, recordRepo.updatedRecords.size());
DocumentRecord record = recordRepo.updatedRecords.get(0);
assertEquals(ProcessingStatus.FAILED_RETRYABLE, record.overallStatus());
assertEquals(0, record.failureCounters().contentErrorCount());
assertEquals(3, record.failureCounters().transientErrorCount());
assertTrue(attemptRepo.savedAttempts.get(0).retryable());
}
@Test
void process_knownDocument_preCheckPassed_persistsSuccess() {
DocumentRecord existingRecord = buildRecord(
ProcessingStatus.FAILED_RETRYABLE,
new FailureCounters(0, 1));
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
assertEquals(1, recordRepo.updatedRecords.size());
DocumentRecord record = recordRepo.updatedRecords.get(0);
assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
// Counters unchanged on success
assertEquals(0, record.failureCounters().contentErrorCount());
assertEquals(1, record.failureCounters().transientErrorCount());
assertNotNull(record.lastSuccessInstant());
}
// -------------------------------------------------------------------------
// Skip paths
// -------------------------------------------------------------------------
@Test
void process_terminalSuccess_persistsSkipAttemptWithSkippedAlreadyProcessed() {
DocumentRecord existingRecord = buildRecord(
ProcessingStatus.SUCCESS,
FailureCounters.zero());
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
assertEquals(1, attemptRepo.savedAttempts.size());
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
assertEquals(ProcessingStatus.SKIPPED_ALREADY_PROCESSED, attempt.status());
assertFalse(attempt.retryable());
assertNull(attempt.failureClass());
assertNull(attempt.failureMessage());
// Master record updated (only updatedAt changes)
assertEquals(1, recordRepo.updatedRecords.size());
DocumentRecord record = recordRepo.updatedRecords.get(0);
// Status and counters remain unchanged
assertEquals(ProcessingStatus.SUCCESS, record.overallStatus());
assertEquals(0, record.failureCounters().contentErrorCount());
assertEquals(0, record.failureCounters().transientErrorCount());
}
@Test
void process_terminalFinalFailure_persistsSkipAttemptWithSkippedFinalFailure() {
DocumentRecord existingRecord = buildRecord(
ProcessingStatus.FAILED_FINAL,
new FailureCounters(2, 0));
recordRepo.setLookupResult(new DocumentTerminalFinalFailure(existingRecord));
DocumentProcessingOutcome m3Outcome = new PreCheckFailed(
candidate, PreCheckFailureReason.NO_USABLE_TEXT);
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
assertEquals(1, attemptRepo.savedAttempts.size());
ProcessingAttempt attempt = attemptRepo.savedAttempts.get(0);
assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, attempt.status());
assertFalse(attempt.retryable());
// Master record updated (only updatedAt changes); counters unchanged
assertEquals(1, recordRepo.updatedRecords.size());
DocumentRecord record = recordRepo.updatedRecords.get(0);
assertEquals(ProcessingStatus.FAILED_FINAL, record.overallStatus());
assertEquals(2, record.failureCounters().contentErrorCount());
assertEquals(0, record.failureCounters().transientErrorCount());
}
@Test
void process_skipEvent_doesNotChangeErrorCounters() {
FailureCounters originalCounters = new FailureCounters(1, 3);
DocumentRecord existingRecord = buildRecord(ProcessingStatus.SUCCESS, originalCounters);
recordRepo.setLookupResult(new DocumentTerminalSuccess(existingRecord));
processor.process(candidate, fingerprint,
new PreCheckPassed(candidate, new PdfExtractionSuccess("t", new PdfPageCount(1))),
context, attemptStart);
DocumentRecord updated = recordRepo.updatedRecords.get(0);
assertEquals(originalCounters.contentErrorCount(), updated.failureCounters().contentErrorCount(),
"Skip must not change content error counter");
assertEquals(originalCounters.transientErrorCount(), updated.failureCounters().transientErrorCount(),
"Skip must not change transient error counter");
}
// -------------------------------------------------------------------------
// Persistence lookup failure
// -------------------------------------------------------------------------
@Test
void process_persistenceLookupFailure_noAttemptWritten_noException() {
recordRepo.setLookupResult(new PersistenceLookupTechnicalFailure("DB unavailable", null));
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
// Must not throw
assertDoesNotThrow(() ->
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart));
// No attempt written, no record created/updated
assertEquals(0, attemptRepo.savedAttempts.size(),
"No attempt must be written when lookup fails");
assertEquals(0, recordRepo.createdRecords.size());
assertEquals(0, recordRepo.updatedRecords.size());
}
// -------------------------------------------------------------------------
// Persistence write failure: controlled, no crash
// -------------------------------------------------------------------------
@Test
void process_persistenceWriteFailure_doesNotThrow_batchContinues() {
recordRepo.setLookupResult(new DocumentUnknown());
// Make the attempt save throw
attemptRepo.failOnSave = true;
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
// Must not propagate the exception
assertDoesNotThrow(() ->
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart));
}
// -------------------------------------------------------------------------
// Attempt number monotonicity
// -------------------------------------------------------------------------
@Test
void process_attemptNumberIsAssignedFromRepository() {
recordRepo.setLookupResult(new DocumentUnknown());
attemptRepo.nextAttemptNumber = 3; // Simulate 2 prior attempts
DocumentProcessingOutcome m3Outcome = new PreCheckPassed(
candidate, new PdfExtractionSuccess("text", new PdfPageCount(1)));
processor.process(candidate, fingerprint, m3Outcome, context, attemptStart);
assertEquals(1, attemptRepo.savedAttempts.size());
assertEquals(3, attemptRepo.savedAttempts.get(0).attemptNumber(),
"Attempt number must be taken from the repository");
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
private DocumentRecord buildRecord(ProcessingStatus status, FailureCounters counters) {
Instant now = Instant.now();
return new DocumentRecord(
fingerprint,
new SourceDocumentLocator("/tmp/test.pdf"),
"test.pdf",
status,
counters,
status == ProcessingStatus.SUCCESS ? null : now,
status == ProcessingStatus.SUCCESS ? now : null,
now,
now
);
}
// -------------------------------------------------------------------------
// Capturing test doubles
// -------------------------------------------------------------------------
private static class CapturingDocumentRecordRepository implements DocumentRecordRepository {
private DocumentRecordLookupResult lookupResult = new DocumentUnknown();
final List<DocumentRecord> createdRecords = new ArrayList<>();
final List<DocumentRecord> updatedRecords = new ArrayList<>();
void setLookupResult(DocumentRecordLookupResult result) {
this.lookupResult = result;
}
@Override
public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) {
return lookupResult;
}
@Override
public void create(DocumentRecord record) {
createdRecords.add(record);
}
@Override
public void update(DocumentRecord record) {
updatedRecords.add(record);
}
}
private static class CapturingProcessingAttemptRepository implements ProcessingAttemptRepository {
final List<ProcessingAttempt> savedAttempts = new ArrayList<>();
int nextAttemptNumber = 1;
boolean failOnSave = false;
@Override
public int loadNextAttemptNumber(DocumentFingerprint fingerprint) {
return nextAttemptNumber;
}
@Override
public void save(ProcessingAttempt attempt) {
if (failOnSave) {
throw new DocumentPersistenceException("Simulated save failure");
}
savedAttempts.add(attempt);
}
@Override
public List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint) {
return List.copyOf(savedAttempts);
}
}
}

View File

@@ -2,12 +2,24 @@ package de.gecheckt.pdf.umbenenner.application.usecase;
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError;
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.service.M4DocumentProcessor;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
@@ -37,10 +49,11 @@ import static org.junit.jupiter.api.Assertions.*;
* <ul>
* <li>Lock acquisition and release lifecycle</li>
* <li>Source folder scanning and per-document processing loop</li>
* <li>Happy path: candidate passes pre-checks, ends controlled without KI or target copy</li>
* <li>Happy path: candidate passes pre-checks, M4 persistence is invoked</li>
* <li>Deterministic content errors: no usable text, page limit exceeded</li>
* <li>Technical extraction errors: controlled per-document end, batch continues</li>
* <li>Source folder access failure: batch fails with FAILURE outcome</li>
* <li>M4 idempotency: fingerprint failure → not historised</li>
* </ul>
*/
class BatchRunProcessingUseCaseTest {
@@ -57,8 +70,9 @@ class BatchRunProcessingUseCaseTest {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("test-run-1"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
@@ -73,8 +87,9 @@ class BatchRunProcessingUseCaseTest {
CountingRunLockPort lockPort = new CountingRunLockPort(true);
StartConfiguration config = buildConfig(tempDir);
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("test-run-2"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
@@ -92,8 +107,9 @@ class BatchRunProcessingUseCaseTest {
CountingRunLockPort lockPort = new CountingRunLockPort(true);
StartConfiguration config = buildConfig(tempDir);
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("test-run-f1"), Instant.now());
useCase.execute(context);
@@ -108,8 +124,9 @@ class BatchRunProcessingUseCaseTest {
ErrorAfterAcquireLockPort lockPort = new ErrorAfterAcquireLockPort();
StartConfiguration config = buildConfig(tempDir);
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("test-run-3"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
@@ -128,8 +145,9 @@ class BatchRunProcessingUseCaseTest {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort());
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, new EmptyCandidatesPort(), new NoOpExtractionPort(),
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("empty"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
@@ -138,25 +156,26 @@ class BatchRunProcessingUseCaseTest {
}
@Test
void execute_happyPath_candidatePassesPreChecks_endsControlledWithoutKiOrCopy() throws Exception {
void execute_happyPath_candidatePassesPreChecks_m4PersistenceInvoked() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
// Candidate with usable text within page limit
SourceDocumentCandidate candidate = makeCandidate("document.pdf");
PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice text", new PdfPageCount(1));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(success);
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, candidatesPort, extractionPort,
new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("happy"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
// Batch run succeeds; document ended controlled at boundary (no KI, no copy)
assertTrue(outcome.isSuccess(), "Happy path should yield SUCCESS");
assertEquals(1, extractionPort.callCount(), "Extraction should be called exactly once");
assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called exactly once");
}
@Test
@@ -165,40 +184,42 @@ class BatchRunProcessingUseCaseTest {
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate candidate = makeCandidate("image-only.pdf");
// Extraction returns text with no letters or digits
PdfExtractionSuccess emptySuccess = new PdfExtractionSuccess(" ", new PdfPageCount(1));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(emptySuccess);
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, candidatesPort, extractionPort,
new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("no-text"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
// Document ends with pre-check failure; batch itself still succeeds
assertTrue(outcome.isSuccess(), "No-usable-text pre-check failure should not abort the batch run");
assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for content errors");
}
@Test
void execute_pageLimitExceeded_candidateEndsControlled_batchContinues() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
// Config has maxPages=3; document has 10 pages
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate candidate = makeCandidate("big.pdf");
PdfExtractionSuccess manyPages = new PdfExtractionSuccess("Some text", new PdfPageCount(10));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(manyPages);
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, candidatesPort, extractionPort,
new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("page-limit"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
// maxPages in buildConfig is 3; 10 pages exceeds limit pre-check fails, batch continues
assertTrue(outcome.isSuccess(), "Page limit exceeded should not abort the batch run");
assertEquals(1, m4Processor.processCallCount(), "M4 processor should still be called for page limit errors");
}
@Test
@@ -210,14 +231,17 @@ class BatchRunProcessingUseCaseTest {
PdfExtractionContentError contentError = new PdfExtractionContentError("PDF is encrypted");
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(contentError);
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, candidatesPort, extractionPort,
new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("content-error"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(), "Extraction content error should not abort the batch run");
assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for content errors");
}
@Test
@@ -229,14 +253,17 @@ class BatchRunProcessingUseCaseTest {
PdfExtractionTechnicalError technicalError = new PdfExtractionTechnicalError("I/O error reading file", null);
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(technicalError);
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, candidatesPort, extractionPort,
new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("tech-error"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(), "Technical extraction error should not abort the batch run");
assertEquals(1, m4Processor.processCallCount(), "M4 processor should be called for technical errors");
}
@Test
@@ -248,18 +275,71 @@ class BatchRunProcessingUseCaseTest {
throw new SourceDocumentAccessException("Source folder not readable");
};
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, failingPort, new NoOpExtractionPort());
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, failingPort, new NoOpExtractionPort(),
new AlwaysSuccessFingerprintPort(), new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("access-fail"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isFailure(), "Source folder access failure should yield FAILURE outcome");
assertFalse(outcome.isSuccess(), "Source folder access failure must not be SUCCESS");
// Lock must still be released
assertTrue(lockPort.wasReleaseCalled(), "Lock should be released even when source access fails");
}
// -------------------------------------------------------------------------
// M4-specific: fingerprint failure → not historised
// -------------------------------------------------------------------------
@Test
void execute_fingerprintFailure_candidateNotHistorised_batchContinues() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf");
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
// Fingerprint always fails → M4 processor must NOT be called
FingerprintPort alwaysFailingFingerprintPort = c ->
new FingerprintTechnicalError("Cannot read file", null);
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, candidatesPort, new NoOpExtractionPort(),
alwaysFailingFingerprintPort, m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("fp-fail"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(), "Fingerprint failure should not abort the batch run");
assertEquals(0, m4Processor.processCallCount(),
"M4 processor must NOT be called when fingerprint computation fails (pre-fingerprint failure)");
}
@Test
void execute_fingerprintFailure_extractionNotCalled() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate candidate = makeCandidate("unreadable.pdf");
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(candidate));
FixedExtractionPort extractionPort = new FixedExtractionPort(
new PdfExtractionSuccess("text", new PdfPageCount(1)));
FingerprintPort alwaysFailingFingerprintPort = c ->
new FingerprintTechnicalError("Cannot read file", null);
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, candidatesPort, extractionPort,
alwaysFailingFingerprintPort, new NoOpM4DocumentProcessor());
BatchRunContext context = new BatchRunContext(new RunId("fp-fail-no-extract"), Instant.now());
useCase.execute(context);
assertEquals(0, extractionPort.callCount(),
"PDF extraction must NOT be called when fingerprint computation fails");
}
/**
* Mixed-batch test: one document per outcome type in a single run.
* Proves that no individual outcome aborts the overall batch.
@@ -267,18 +347,18 @@ class BatchRunProcessingUseCaseTest {
@Test
void execute_mixedBatch_allOutcomeTypes_batchOverallSucceeds() throws Exception {
MockRunLockPort lockPort = new MockRunLockPort();
// maxPages=3 in buildConfig; pageLimitCandidate has 10 pages → exceeds limit
StartConfiguration config = buildConfig(tempDir);
SourceDocumentCandidate goodCandidate = makeCandidate("good.pdf");
SourceDocumentCandidate noTextCandidate = makeCandidate("notext.pdf");
SourceDocumentCandidate pageLimitCandidate = makeCandidate("toobig.pdf");
SourceDocumentCandidate goodCandidate = makeCandidate("good.pdf");
SourceDocumentCandidate noTextCandidate = makeCandidate("notext.pdf");
SourceDocumentCandidate pageLimitCandidate = makeCandidate("toobig.pdf");
SourceDocumentCandidate technicalErrorCandidate = makeCandidate("broken.pdf");
SourceDocumentCandidate contentErrorCandidate = makeCandidate("encrypted.pdf");
SourceDocumentCandidate contentErrorCandidate = makeCandidate("encrypted.pdf");
SourceDocumentCandidate fpFailCandidate = makeCandidate("unreadable.pdf");
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(List.of(
goodCandidate, noTextCandidate, pageLimitCandidate,
technicalErrorCandidate, contentErrorCandidate));
technicalErrorCandidate, contentErrorCandidate, fpFailCandidate));
MappedExtractionPort extractionPort = new MappedExtractionPort()
.with(goodCandidate, new PdfExtractionSuccess("Invoice text", new PdfPageCount(1)))
@@ -287,16 +367,31 @@ class BatchRunProcessingUseCaseTest {
.with(technicalErrorCandidate, new PdfExtractionTechnicalError("I/O error", null))
.with(contentErrorCandidate, new PdfExtractionContentError("PDF is encrypted"));
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
// fpFailCandidate gets a fingerprint failure; others get a valid fingerprint
FingerprintPort mappedFingerprintPort = candidate -> {
if (candidate.uniqueIdentifier().equals("unreadable.pdf")) {
return new FingerprintTechnicalError("Cannot read", null);
}
return new FingerprintSuccess(makeFingerprint(candidate.uniqueIdentifier()));
};
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, candidatesPort, extractionPort,
mappedFingerprintPort, m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("mixed"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(),
"Mixed batch with all outcome types must yield batch SUCCESS");
assertTrue(outcome.isSuccess(), "Mixed batch with all outcome types must yield batch SUCCESS");
// 5 candidates with successful fingerprint → M4 processor called 5 times
// 1 candidate with fingerprint failure → M4 processor NOT called
assertEquals(5, m4Processor.processCallCount(),
"M4 processor must be called for each candidate with a successful fingerprint");
// Extraction called for 5 candidates (not for fpFailCandidate)
assertEquals(5, extractionPort.callCount(),
"Extraction must be attempted for each of the 5 candidates");
"Extraction must be attempted for each of the 5 candidates with a valid fingerprint");
}
@Test
@@ -312,21 +407,35 @@ class BatchRunProcessingUseCaseTest {
PdfExtractionSuccess success = new PdfExtractionSuccess("Invoice content", new PdfPageCount(2));
FixedCandidatesPort candidatesPort = new FixedCandidatesPort(candidates);
FixedExtractionPort extractionPort = new FixedExtractionPort(success);
TrackingM4DocumentProcessor m4Processor = new TrackingM4DocumentProcessor();
DefaultBatchRunProcessingUseCase useCase = new DefaultBatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort);
DefaultBatchRunProcessingUseCase useCase = buildUseCase(
config, lockPort, candidatesPort, extractionPort,
new AlwaysSuccessFingerprintPort(), m4Processor);
BatchRunContext context = new BatchRunContext(new RunId("multi"), Instant.now());
BatchRunOutcome outcome = useCase.execute(context);
assertTrue(outcome.isSuccess(), "All three candidates processed should yield SUCCESS");
assertEquals(3, extractionPort.callCount(), "Extraction should be called once per candidate");
assertEquals(3, m4Processor.processCallCount(), "M4 processor should be called once per candidate");
}
// -------------------------------------------------------------------------
// Helpers
// -------------------------------------------------------------------------
private static DefaultBatchRunProcessingUseCase buildUseCase(
StartConfiguration config,
RunLockPort lockPort,
SourceDocumentCandidatesPort candidatesPort,
PdfTextExtractionPort extractionPort,
FingerprintPort fingerprintPort,
M4DocumentProcessor m4Processor) {
return new DefaultBatchRunProcessingUseCase(
config, lockPort, candidatesPort, extractionPort, fingerprintPort, m4Processor);
}
private static StartConfiguration buildConfig(Path tempDir) throws Exception {
Path sourceDir = Files.createDirectories(tempDir.resolve("source"));
Path targetDir = Files.createDirectories(tempDir.resolve("target"));
@@ -357,6 +466,13 @@ class BatchRunProcessingUseCaseTest {
return new SourceDocumentCandidate(filename, 1024L, new SourceDocumentLocator("/tmp/" + filename));
}
/** Creates a deterministic fake fingerprint from a string (padded to 64 hex chars). */
private static DocumentFingerprint makeFingerprint(String seed) {
String hex = String.format("%064x", Math.abs(seed.hashCode()));
// Ensure exactly 64 lowercase hex chars
return new DocumentFingerprint(hex.substring(0, 64));
}
// -------------------------------------------------------------------------
// Mock / Stub implementations
// -------------------------------------------------------------------------
@@ -480,4 +596,88 @@ class BatchRunProcessingUseCaseTest {
int callCount() { return calls; }
}
}
/**
* Fingerprint port that always returns a deterministic success based on the candidate's
* unique identifier.
*/
private static class AlwaysSuccessFingerprintPort implements FingerprintPort {
@Override
public FingerprintResult computeFingerprint(SourceDocumentCandidate candidate) {
String hex = String.format("%064x", Math.abs(candidate.uniqueIdentifier().hashCode()));
return new FingerprintSuccess(new DocumentFingerprint(hex.substring(0, 64)));
}
}
/**
* No-op M4DocumentProcessor that does nothing (for tests that only care about
* lock/batch lifecycle, not M4 persistence).
*/
private static class NoOpM4DocumentProcessor extends M4DocumentProcessor {
NoOpM4DocumentProcessor() {
super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository());
}
}
/**
* Tracking M4DocumentProcessor that counts how many times {@code process()} is called.
*/
private static class TrackingM4DocumentProcessor extends M4DocumentProcessor {
private int processCallCount = 0;
TrackingM4DocumentProcessor() {
super(new NoOpDocumentRecordRepository(), new NoOpProcessingAttemptRepository());
}
@Override
public void process(
de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate candidate,
de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint fingerprint,
de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome m3Outcome,
de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext context,
java.time.Instant attemptStart) {
processCallCount++;
// Delegate to super so the real logic runs (with no-op repos)
super.process(candidate, fingerprint, m3Outcome, context, attemptStart);
}
int processCallCount() { return processCallCount; }
}
/** No-op DocumentRecordRepository for use in test M4DocumentProcessor instances. */
private static class NoOpDocumentRecordRepository implements DocumentRecordRepository {
@Override
public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) {
// Return DocumentUnknown so the M4 processor always takes the "new document" path
return new DocumentUnknown();
}
@Override
public void create(DocumentRecord record) {
// No-op
}
@Override
public void update(DocumentRecord record) {
// No-op
}
}
/** No-op ProcessingAttemptRepository for use in test M4DocumentProcessor instances. */
private static class NoOpProcessingAttemptRepository implements ProcessingAttemptRepository {
@Override
public int loadNextAttemptNumber(DocumentFingerprint fingerprint) {
return 1;
}
@Override
public void save(ProcessingAttempt attempt) {
// No-op
}
@Override
public List<ProcessingAttempt> findAllByFingerprint(DocumentFingerprint fingerprint) {
return List.of();
}
}
}