diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java index ad42434..b69899c 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java @@ -1401,6 +1401,100 @@ class DocumentProcessingCoordinatorTest { } } + // ------------------------------------------------------------------------- + // Logging correlation content tests (fingerprint and classification in messages) + // ------------------------------------------------------------------------- + + @Test + void process_terminalSuccess_skipLog_containsFingerprintForCorrelation() { + // Proves that the skip log for a SUCCESS document contains the document fingerprint, + // satisfying the post-fingerprint log correlation rule. + MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = + new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + recordRepo.setLookupResult(new DocumentTerminalSuccess( + buildRecord(ProcessingStatus.SUCCESS, FailureCounters.zero()))); + + coordinatorWithCapturing.process(candidate, fingerprint, + new PreCheckPassed(candidate, new PdfExtractionSuccess("text", new PdfPageCount(1))), + context, attemptStart); + + assertTrue(capturingLogger.anyInfoContains(FINGERPRINT_HEX), + "Skip log for SUCCESS must contain the document fingerprint for correlation. " + + "Captured info messages: " + capturingLogger.infoMessages); + } + + @Test + void process_terminalFinalFailure_skipLog_containsFingerprintForCorrelation() { + // Proves that the skip log for a FAILED_FINAL document contains the document fingerprint, + // satisfying the post-fingerprint log correlation rule. + MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = + new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + recordRepo.setLookupResult(new DocumentTerminalFinalFailure( + buildRecord(ProcessingStatus.FAILED_FINAL, new FailureCounters(2, 0)))); + + coordinatorWithCapturing.process(candidate, fingerprint, + new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT), + context, attemptStart); + + assertTrue(capturingLogger.anyInfoContains(FINGERPRINT_HEX), + "Skip log for FAILED_FINAL must contain the document fingerprint for correlation. " + + "Captured info messages: " + capturingLogger.infoMessages); + } + + @Test + void process_firstContentError_retryDecisionLog_containsFingerprintAndFailedRetryable() { + // Proves that the retry decision log for a first deterministic content error contains + // both the document fingerprint and the FAILED_RETRYABLE classification. + MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = + new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + recordRepo.setLookupResult(new DocumentUnknown()); + + coordinatorWithCapturing.process(candidate, fingerprint, + new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT), + context, attemptStart); + + assertTrue(capturingLogger.anyInfoContains(FINGERPRINT_HEX), + "Retry decision log must contain the document fingerprint. " + + "Captured info messages: " + capturingLogger.infoMessages); + assertTrue(capturingLogger.anyInfoContains("FAILED_RETRYABLE"), + "Retry decision log must contain the FAILED_RETRYABLE classification. " + + "Captured info messages: " + capturingLogger.infoMessages); + } + + @Test + void process_secondContentError_retryDecisionLog_containsFingerprintAndFailedFinal() { + // Proves that the retry decision log for the finalising deterministic content error + // contains both the document fingerprint and the FAILED_FINAL classification. + MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = + new DocumentProcessingCoordinator(recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + // Existing record already has one content error — second content error finalises + recordRepo.setLookupResult(new DocumentKnownProcessable( + buildRecord(ProcessingStatus.FAILED_RETRYABLE, new FailureCounters(1, 0)))); + + coordinatorWithCapturing.process(candidate, fingerprint, + new PreCheckFailed(candidate, PreCheckFailureReason.PAGE_LIMIT_EXCEEDED), + context, attemptStart); + + assertTrue(capturingLogger.anyInfoContains(FINGERPRINT_HEX), + "Finalising retry decision log must contain the document fingerprint. " + + "Captured info messages: " + capturingLogger.infoMessages); + assertTrue(capturingLogger.anyInfoContains("FAILED_FINAL"), + "Finalising retry decision log must contain the FAILED_FINAL classification. " + + "Captured info messages: " + capturingLogger.infoMessages); + } + /** Zählt Logger-Aufrufe je Level, um VoidMethodCallMutator-Mutationen zu erkennen. */ private static class CapturingProcessingLogger implements ProcessingLogger { int infoCallCount = 0; @@ -1434,4 +1528,58 @@ class DocumentProcessingCoordinatorTest { errorCallCount++; } } + + /** + * Captures formatted log messages for content verification. + * Resolves {@code {}} placeholders in SLF4J/Log4j2 style for later inspection. + */ + private static class MessageCapturingProcessingLogger implements ProcessingLogger { + final List infoMessages = new ArrayList<>(); + final List warnMessages = new ArrayList<>(); + final List errorMessages = new ArrayList<>(); + + private static String format(String message, Object... args) { + if (args == null || args.length == 0) return message; + StringBuilder sb = new StringBuilder(); + int argIndex = 0; + int start = 0; + int pos; + while ((pos = message.indexOf("{}", start)) != -1 && argIndex < args.length) { + sb.append(message, start, pos); + sb.append(args[argIndex++]); + start = pos + 2; + } + sb.append(message, start, message.length()); + return sb.toString(); + } + + @Override + public void info(String message, Object... args) { + infoMessages.add(format(message, args)); + } + + @Override + public void debug(String message, Object... args) { + // Not captured for content — debug messages are not decision-level events + } + + @Override + public void debugSensitiveAiContent(String message, Object... args) { + // Not captured — tested separately + } + + @Override + public void warn(String message, Object... args) { + warnMessages.add(format(message, args)); + } + + @Override + public void error(String message, Object... args) { + errorMessages.add(format(message, args)); + } + + boolean anyInfoContains(String text) { + return infoMessages.stream().anyMatch(m -> m.contains(text)); + } + } } \ No newline at end of file diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java index 1e65278..b3051f7 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/usecase/BatchRunProcessingUseCaseTest.java @@ -6,10 +6,14 @@ import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity; import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort; import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure; import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentPersistenceException; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordLookupResult; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess; import de.gecheckt.pdf.umbenenner.application.port.out.DocumentUnknown; +import de.gecheckt.pdf.umbenenner.application.port.out.FailureCounters; import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort; import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult; import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess; @@ -42,6 +46,7 @@ import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError; import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount; +import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; import de.gecheckt.pdf.umbenenner.domain.model.RunId; import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; @@ -751,6 +756,171 @@ class BatchRunProcessingUseCaseTest { "logProcessingOutcome muss bei TechnicalDocumentError warn() aufrufen"); } + // ------------------------------------------------------------------------- + // Batch-level M7 integration tests (real coordinator + capturing repos) + // These prove that skip and finalization semantics work in the actual batch run, + // not just at the coordinator unit-test level. + // ------------------------------------------------------------------------- + + /** + * Full batch integration: a candidate whose fingerprint maps to a SUCCESS record + * must be historised as SKIPPED_ALREADY_PROCESSED without triggering the pipeline. + */ + @Test + void execute_successDocument_batchRunHistorisesSkippedAlreadyProcessed() throws Exception { + MockRunLockPort lockPort = new MockRunLockPort(); + RuntimeConfiguration config = buildConfig(tempDir); + + SourceDocumentCandidate candidate = makeCandidate("already-done.pdf"); + DocumentFingerprint fingerprint = makeFingerprint(candidate.uniqueIdentifier()); + + // Capturing repos to inspect what was persisted + BatchCapturingDocumentRecordRepository recordRepo = new BatchCapturingDocumentRecordRepository(); + BatchCapturingProcessingAttemptRepository attemptRepo = new BatchCapturingProcessingAttemptRepository(); + BatchCapturingUnitOfWorkPort unitOfWork = new BatchCapturingUnitOfWorkPort(recordRepo, attemptRepo); + + // Repo returns SUCCESS for this fingerprint + DocumentRecord successRecord = new DocumentRecord( + fingerprint, new SourceDocumentLocator("/tmp/already-done.pdf"), "already-done.pdf", + ProcessingStatus.SUCCESS, FailureCounters.zero(), + null, java.time.Instant.now(), java.time.Instant.now(), java.time.Instant.now(), + "/target", "2026-01-15 - Rechnung.pdf"); + recordRepo.setLookupResult(new DocumentTerminalSuccess(successRecord)); + + DocumentProcessingCoordinator realCoordinator = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWork, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 3); + + // Fingerprint port returns the pre-defined fingerprint for this candidate + FingerprintPort fixedFingerprintPort = c -> new FingerprintSuccess(fingerprint); + + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, new FixedCandidatesPort(List.of(candidate)), new NoOpExtractionPort(), + fixedFingerprintPort, realCoordinator); + BatchRunContext context = new BatchRunContext(new RunId("skip-success-run"), Instant.now()); + + useCase.execute(context); + + // Exactly one skip attempt must be recorded + assertEquals(1, attemptRepo.savedAttempts.size(), + "Exactly one skip attempt must be historised for a SUCCESS document"); + assertEquals(ProcessingStatus.SKIPPED_ALREADY_PROCESSED, attemptRepo.savedAttempts.get(0).status(), + "Skip attempt status must be SKIPPED_ALREADY_PROCESSED"); + assertFalse(attemptRepo.savedAttempts.get(0).retryable(), + "Skip attempt must not be retryable"); + } + + /** + * Full batch integration: a candidate whose fingerprint maps to a FAILED_FINAL record + * must be historised as SKIPPED_FINAL_FAILURE without triggering the pipeline. + */ + @Test + void execute_finallyFailedDocument_batchRunHistorisesSkippedFinalFailure() throws Exception { + MockRunLockPort lockPort = new MockRunLockPort(); + RuntimeConfiguration config = buildConfig(tempDir); + + SourceDocumentCandidate candidate = makeCandidate("permanent-failure.pdf"); + DocumentFingerprint fingerprint = makeFingerprint(candidate.uniqueIdentifier()); + + BatchCapturingDocumentRecordRepository recordRepo = new BatchCapturingDocumentRecordRepository(); + BatchCapturingProcessingAttemptRepository attemptRepo = new BatchCapturingProcessingAttemptRepository(); + BatchCapturingUnitOfWorkPort unitOfWork = new BatchCapturingUnitOfWorkPort(recordRepo, attemptRepo); + + // Repo returns FAILED_FINAL for this fingerprint + DocumentRecord failedFinalRecord = new DocumentRecord( + fingerprint, new SourceDocumentLocator("/tmp/permanent-failure.pdf"), "permanent-failure.pdf", + ProcessingStatus.FAILED_FINAL, new FailureCounters(2, 0), + java.time.Instant.now(), null, java.time.Instant.now(), java.time.Instant.now(), + null, null); + recordRepo.setLookupResult(new DocumentTerminalFinalFailure(failedFinalRecord)); + + DocumentProcessingCoordinator realCoordinator = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWork, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 3); + + FingerprintPort fixedFingerprintPort = c -> new FingerprintSuccess(fingerprint); + + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, new FixedCandidatesPort(List.of(candidate)), new NoOpExtractionPort(), + fixedFingerprintPort, realCoordinator); + BatchRunContext context = new BatchRunContext(new RunId("skip-final-failure-run"), Instant.now()); + + useCase.execute(context); + + // Exactly one skip attempt must be recorded + assertEquals(1, attemptRepo.savedAttempts.size(), + "Exactly one skip attempt must be historised for a FAILED_FINAL document"); + assertEquals(ProcessingStatus.SKIPPED_FINAL_FAILURE, attemptRepo.savedAttempts.get(0).status(), + "Skip attempt status must be SKIPPED_FINAL_FAILURE"); + assertFalse(attemptRepo.savedAttempts.get(0).retryable(), + "Skip attempt must not be retryable"); + } + + /** + * Full batch integration: a batch with one terminal (SUCCESS) and one processable document + * handles each independently — the terminal document is skipped, the processable one is processed. + * Document errors on one candidate must not block processing of the other. + */ + @Test + void execute_mixedTerminalAndProcessable_eachHandledIndependentlyInBatch() throws Exception { + MockRunLockPort lockPort = new MockRunLockPort(); + RuntimeConfiguration config = buildConfig(tempDir); + + SourceDocumentCandidate terminalCandidate = makeCandidate("terminal.pdf"); + SourceDocumentCandidate processableCandidate = makeCandidate("processable.pdf"); + + DocumentFingerprint terminalFp = makeFingerprint(terminalCandidate.uniqueIdentifier()); + DocumentFingerprint processableFp = makeFingerprint(processableCandidate.uniqueIdentifier()); + + BatchCapturingDocumentRecordRepository recordRepo = new BatchCapturingDocumentRecordRepository(); + BatchCapturingProcessingAttemptRepository attemptRepo = new BatchCapturingProcessingAttemptRepository(); + BatchCapturingUnitOfWorkPort unitOfWork = new BatchCapturingUnitOfWorkPort(recordRepo, attemptRepo); + + // Terminal candidate: SUCCESS in repo + DocumentRecord successRecord = new DocumentRecord( + terminalFp, new SourceDocumentLocator("/tmp/terminal.pdf"), "terminal.pdf", + ProcessingStatus.SUCCESS, FailureCounters.zero(), + null, java.time.Instant.now(), java.time.Instant.now(), java.time.Instant.now(), + "/target", "2026-01-15 - Rechnung.pdf"); + + // Per-fingerprint lookup: terminal gets SUCCESS, processable gets Unknown + recordRepo.setLookupByFingerprint(terminalFp, new DocumentTerminalSuccess(successRecord)); + recordRepo.setLookupByFingerprint(processableFp, new DocumentUnknown()); + + DocumentProcessingCoordinator realCoordinator = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWork, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), 3); + + FingerprintPort perCandidateFingerprintPort = candidate -> { + if (candidate.uniqueIdentifier().equals("terminal.pdf")) return new FingerprintSuccess(terminalFp); + return new FingerprintSuccess(processableFp); + }; + + FixedCandidatesPort candidatesPort = new FixedCandidatesPort( + List.of(terminalCandidate, processableCandidate)); + FixedExtractionPort extractionPort = new FixedExtractionPort( + new PdfExtractionSuccess("Invoice text", new PdfPageCount(1))); + + DefaultBatchRunProcessingUseCase useCase = buildUseCase( + config, lockPort, candidatesPort, extractionPort, + perCandidateFingerprintPort, realCoordinator); + BatchRunContext context = new BatchRunContext(new RunId("mixed-batch-run"), Instant.now()); + + useCase.execute(context); + + // Two attempts must be recorded (one skip + one pipeline attempt) + assertEquals(2, attemptRepo.savedAttempts.size(), + "Two attempts must be recorded: one skip and one pipeline attempt"); + + // First attempt: skip for the terminal candidate + assertEquals(ProcessingStatus.SKIPPED_ALREADY_PROCESSED, attemptRepo.savedAttempts.get(0).status(), + "First attempt must be SKIPPED_ALREADY_PROCESSED for the terminal candidate"); + + // Second attempt: pipeline result for the processable candidate (AI fails → FAILED_RETRYABLE transient) + assertNotEquals(ProcessingStatus.SKIPPED_ALREADY_PROCESSED, attemptRepo.savedAttempts.get(1).status(), + "Second attempt must not be a skip — it must be the pipeline result for the processable candidate"); + } + // ------------------------------------------------------------------------- // Log correlation tests // ------------------------------------------------------------------------- @@ -1236,6 +1406,108 @@ class BatchRunProcessingUseCaseTest { } } + /** + * DocumentRecordRepository for batch integration tests. + * Supports per-fingerprint lookup results and records written records. + */ + private static class BatchCapturingDocumentRecordRepository implements DocumentRecordRepository { + private final Map lookupByFp = new java.util.HashMap<>(); + private DocumentRecordLookupResult defaultResult = new DocumentUnknown(); + final List createdRecords = new ArrayList<>(); + final List updatedRecords = new ArrayList<>(); + + void setLookupResult(DocumentRecordLookupResult result) { + this.defaultResult = result; + } + + void setLookupByFingerprint(DocumentFingerprint fp, DocumentRecordLookupResult result) { + lookupByFp.put(fp.sha256Hex(), result); + } + + @Override + public DocumentRecordLookupResult findByFingerprint(DocumentFingerprint fingerprint) { + return lookupByFp.getOrDefault(fingerprint.sha256Hex(), defaultResult); + } + + @Override + public void create(DocumentRecord record) { + createdRecords.add(record); + } + + @Override + public void update(DocumentRecord record) { + updatedRecords.add(record); + } + } + + /** + * ProcessingAttemptRepository for batch integration tests. Records all saved attempts. + */ + private static class BatchCapturingProcessingAttemptRepository implements ProcessingAttemptRepository { + final List savedAttempts = new ArrayList<>(); + + @Override + public int loadNextAttemptNumber(DocumentFingerprint fingerprint) { + return savedAttempts.size() + 1; + } + + @Override + public void save(ProcessingAttempt attempt) { + savedAttempts.add(attempt); + } + + @Override + public List findAllByFingerprint(DocumentFingerprint fingerprint) { + return savedAttempts.stream() + .filter(a -> a.fingerprint().sha256Hex().equals(fingerprint.sha256Hex())) + .toList(); + } + + @Override + public ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint) { + return savedAttempts.stream() + .filter(a -> a.fingerprint().sha256Hex().equals(fingerprint.sha256Hex()) + && a.status() == ProcessingStatus.PROPOSAL_READY) + .reduce((first, second) -> second) + .orElse(null); + } + } + + /** + * UnitOfWorkPort for batch integration tests. Executes operations directly + * against the capturing repos. + */ + private static class BatchCapturingUnitOfWorkPort implements UnitOfWorkPort { + private final BatchCapturingDocumentRecordRepository recordRepo; + private final BatchCapturingProcessingAttemptRepository attemptRepo; + + BatchCapturingUnitOfWorkPort(BatchCapturingDocumentRecordRepository recordRepo, + BatchCapturingProcessingAttemptRepository attemptRepo) { + this.recordRepo = recordRepo; + this.attemptRepo = attemptRepo; + } + + @Override + public void executeInTransaction(Consumer operations) { + operations.accept(new TransactionOperations() { + @Override + public void saveProcessingAttempt(ProcessingAttempt attempt) { + attemptRepo.save(attempt); + } + + @Override + public void createDocumentRecord(DocumentRecord record) { + recordRepo.create(record); + } + + @Override + public void updateDocumentRecord(DocumentRecord record) { + recordRepo.update(record); + } + }); + } + } + /** Zählt Logger-Aufrufe je Level, um VoidMethodCallMutator-Mutationen zu erkennen. */ private static class CapturingProcessingLogger implements ProcessingLogger { int infoCallCount = 0;