1
0

M8 komplett umgesetzt

This commit is contained in:
2026-04-08 16:30:13 +02:00
parent a3f47ba560
commit d61316c699
21 changed files with 2377 additions and 89 deletions

View File

@@ -62,6 +62,11 @@
<artifactId>mockito-junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>

View File

@@ -310,10 +310,10 @@ public class BootstrapRunner {
LOG.error("Configuration validation failed: {}", e.getMessage());
return 1;
} catch (DocumentPersistenceException e) {
LOG.error("Persistence operation failed: {}", e.getMessage(), e);
LOG.error("Schema initialization failed: {}", e.getMessage(), e);
return 1;
} catch (Exception e) {
LOG.error("Bootstrap failure during startup.", e);
LOG.error("Unexpected startup failure.", e);
return 1;
}
}
@@ -391,7 +391,7 @@ public class BootstrapRunner {
*/
private BatchRunContext createRunContext() {
RunId runId = new RunId(UUID.randomUUID().toString());
LOG.info("Batch run started. RunId: {}", runId);
LOG.info("Preparing batch run. RunId: {}", runId);
return new BatchRunContext(runId, Instant.now());
}

View File

@@ -0,0 +1,698 @@
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger;
import static org.assertj.core.api.Assertions.assertThat;
/**
* Deterministic end-to-end tests for the complete batch processing pipeline.
* <p>
* Each test method is independent and uses its own {@link E2ETestContext} backed by a
* JUnit {@code @TempDir}. All infrastructure adapters (SQLite, filesystem, PDF extraction,
* fingerprinting) are real production implementations. Only the AI invocation port is
* replaced by a configurable {@link StubAiInvocationPort} to avoid real HTTP calls.
*
* <h2>End-to-end invariants verified</h2>
* <ul>
* <li><strong>Happy-path to {@code SUCCESS}</strong>: two-run flow via {@code PROPOSAL_READY}
* intermediate state to a final {@code SUCCESS} with a target file on disk.</li>
* <li><strong>Deterministic content error</strong>: blank PDFs (no extractable text) reach
* {@code FAILED_RETRYABLE} after the first run and {@code FAILED_FINAL} after the
* second run, exercising the one-retry rule for deterministic content errors.</li>
* <li><strong>Transient technical error</strong>: AI stub failures produce
* {@code FAILED_RETRYABLE} (transient counter incremented) without a target file.</li>
* <li><strong>Transient error exhaustion</strong>: repeated AI stub failures across
* {@code maxRetriesTransient} runs increment the transient counter each time until
* the limit is reached and the document is finalized to {@code FAILED_FINAL}.</li>
* <li><strong>Skip after {@code SUCCESS}</strong>: a document whose status is
* {@code SUCCESS} generates exactly one {@code SKIPPED_ALREADY_PROCESSED} attempt
* in the next run; the overall status and target file remain unchanged.</li>
* <li><strong>Skip after {@code FAILED_FINAL}</strong>: a document whose status is
* {@code FAILED_FINAL} generates exactly one {@code SKIPPED_FINAL_FAILURE} attempt
* in the next run; the overall status and failure counters remain unchanged.</li>
* <li><strong>{@code PROPOSAL_READY} with later finalization</strong>: a document in
* {@code PROPOSAL_READY} state is finalized without an AI call in the next run,
* confirming the leading-proposal-attempt rule.</li>
* <li><strong>Target copy error with immediate within-run retry (success)</strong>: when the
* first copy attempt fails but the immediate within-run retry succeeds, the document is
* recorded as {@code SUCCESS} and no transient error counter is incremented.</li>
* <li><strong>Target copy error with immediate within-run retry (failure)</strong>: when both
* the initial and immediate-retry copy attempts fail, the document is recorded as
* {@code FAILED_RETRYABLE} with an incremented transient counter.</li>
* <li><strong>Duplicate target filename suffix</strong>: when two distinct documents produce
* the same base target filename in the same batch run, the second receives a {@code (1)}
* suffix to avoid overwriting the first.</li>
* <li><strong>Mixed batch outcome</strong>: a batch run that contains both successfully
* processed documents and documents with content errors completes with
* {@link BatchRunOutcome#SUCCESS}, confirming that document-level failures do not
* abort the batch or change the overall exit-code-relevant outcome.</li>
* </ul>
*
* <h2>Document text used in tests</h2>
* <p>
* Searchable PDFs embed enough text to pass the minimum-text pre-check. The AI stub
* returns a title of {@code "Stromabrechnung"} and date {@code "2024-01-15"} by default,
* producing a target filename of {@code "2024-01-15 - Stromabrechnung.pdf"}.
*/
class BatchRunEndToEndTest {
private static final String SAMPLE_PDF_TEXT =
"Stromabrechnung Kundenname Musterstadt Datum 15.01.2024 Betrag 123,45 EUR";
// =========================================================================
// Scenario 1: Happy-path to SUCCESS
// =========================================================================
/**
* Verifies the complete two-run happy-path:
* <ol>
* <li>Run 1: AI stub returns valid proposal → document status becomes
* {@code PROPOSAL_READY}; no target file yet.</li>
* <li>Run 2: AI is NOT called again; target file is copied; document status
* becomes {@code SUCCESS}.</li>
* </ol>
* This confirms the leading-proposal-attempt rule and the two-phase finalization.
*/
@Test
void happyPath_twoRuns_reachesSuccess(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("rechnung.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("rechnung.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// --- Run 1: AI produces a naming proposal ---
BatchRunOutcome run1 = ctx.runBatch();
assertThat(run1).isEqualTo(BatchRunOutcome.SUCCESS);
assertThat(ctx.aiStub.invocationCount()).isEqualTo(1);
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
assertThat(ctx.listTargetFiles()).isEmpty();
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
assertThat(attempts1).hasSize(1);
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
// --- Run 2: Finalization without AI call ---
ctx.aiStub.resetInvocationCount();
BatchRunOutcome run2 = ctx.runBatch();
assertThat(run2).isEqualTo(BatchRunOutcome.SUCCESS);
assertThat(ctx.aiStub.invocationCount())
.as("AI must not be called again when PROPOSAL_READY exists")
.isEqualTo(0);
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
assertThat(record2.lastSuccessInstant()).isNotNull();
assertThat(record2.lastTargetFileName()).isNotNull();
List<String> targetFiles = ctx.listTargetFiles();
assertThat(targetFiles).hasSize(1);
assertThat(targetFiles.get(0)).endsWith(".pdf");
assertThat(Files.exists(ctx.targetFolder().resolve(targetFiles.get(0)))).isTrue();
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
assertThat(attempts2).hasSize(2);
assertThat(attempts2.get(1).status()).isEqualTo(ProcessingStatus.SUCCESS);
}
}
// =========================================================================
// Scenario 2: Deterministic content error → FAILED_RETRYABLE → FAILED_FINAL
// =========================================================================
/**
* Verifies the one-retry rule for deterministic content errors:
* <ol>
* <li>Run 1: blank PDF → pre-check fails (no extractable text) →
* {@code FAILED_RETRYABLE}, content error counter = 1.</li>
* <li>Run 2: same outcome again → {@code FAILED_FINAL}, content error counter = 2.</li>
* </ol>
* No AI call is made in either run because the content pre-check prevents it.
*/
@Test
void deterministicContentError_twoRuns_reachesFailedFinal(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createBlankPdf("blank.pdf");
Path pdfPath = ctx.sourceFolder().resolve("blank.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// --- Run 1 ---
ctx.runBatch();
assertThat(ctx.aiStub.invocationCount())
.as("AI must not be called for a blank PDF")
.isEqualTo(0);
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(1);
assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(0);
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
assertThat(attempts1).hasSize(1);
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(attempts1.get(0).retryable()).isTrue();
// --- Run 2 ---
ctx.runBatch();
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(record2.failureCounters().contentErrorCount()).isEqualTo(2);
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
assertThat(attempts2).hasSize(2);
assertThat(attempts2.get(1).status()).isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(attempts2.get(1).retryable()).isFalse();
// No target file should exist
assertThat(ctx.listTargetFiles()).isEmpty();
}
}
// =========================================================================
// Scenario 3: Transient technical error → FAILED_RETRYABLE
// =========================================================================
/**
* Verifies that a transient AI failure produces {@code FAILED_RETRYABLE} with an
* incremented transient error counter, and that no target file is written.
* The document remains retryable in subsequent runs until the transient limit is reached.
*/
@Test
void transientAiFailure_producesFailedRetryable(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
ctx.aiStub.configureTechnicalFailure();
ctx.runBatch();
assertThat(ctx.aiStub.invocationCount())
.as("AI must have been invoked (and failed) once")
.isEqualTo(1);
DocumentRecord record = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(record.failureCounters().transientErrorCount()).isEqualTo(1);
assertThat(record.failureCounters().contentErrorCount()).isEqualTo(0);
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(1);
assertThat(attempts.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(attempts.get(0).retryable()).isTrue();
assertThat(ctx.listTargetFiles()).isEmpty();
}
}
// =========================================================================
// Scenario 4: Skip after SUCCESS
// =========================================================================
/**
* Verifies the skip-after-success invariant:
* after a document reaches {@code SUCCESS} (via two runs), a third run records a
* {@code SKIPPED_ALREADY_PROCESSED} attempt without changing the overall status,
* failure counters, or the target file.
*/
@Test
void skipAfterSuccess_thirdRun_recordsSkip(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// Reach SUCCESS via two runs
ctx.runBatch(); // → PROPOSAL_READY
ctx.runBatch(); // → SUCCESS
DocumentRecord successRecord = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(successRecord.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
String targetFileBefore = successRecord.lastTargetFileName();
// --- Run 3: should produce skip ---
ctx.aiStub.resetInvocationCount();
BatchRunOutcome run3 = ctx.runBatch();
assertThat(run3).isEqualTo(BatchRunOutcome.SUCCESS);
assertThat(ctx.aiStub.invocationCount())
.as("AI must not be called for an already-successful document")
.isEqualTo(0);
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record3.overallStatus())
.as("Overall status must remain SUCCESS after a skip")
.isEqualTo(ProcessingStatus.SUCCESS);
assertThat(record3.lastTargetFileName())
.as("Target filename must not change after a skip")
.isEqualTo(targetFileBefore);
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(3);
assertThat(attempts.get(2).status()).isEqualTo(ProcessingStatus.SKIPPED_ALREADY_PROCESSED);
assertThat(attempts.get(2).retryable()).isFalse();
// Target file count must remain exactly one
assertThat(ctx.listTargetFiles()).hasSize(1);
}
}
// =========================================================================
// Scenario 5: Skip after FAILED_FINAL
// =========================================================================
/**
* Verifies the skip-after-final-failure invariant:
* after a document reaches {@code FAILED_FINAL} (via two blank-PDF runs), a third run
* records a {@code SKIPPED_FINAL_FAILURE} attempt without changing the overall status
* or failure counters.
*/
@Test
void skipAfterFailedFinal_thirdRun_recordsSkip(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createBlankPdf("blank.pdf");
Path pdfPath = ctx.sourceFolder().resolve("blank.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// Reach FAILED_FINAL via two blank-PDF runs
ctx.runBatch(); // → FAILED_RETRYABLE
ctx.runBatch(); // → FAILED_FINAL
DocumentRecord finalRecord = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(finalRecord.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
int contentErrorsBefore = finalRecord.failureCounters().contentErrorCount();
// --- Run 3: should produce skip ---
ctx.runBatch();
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record3.overallStatus())
.as("Overall status must remain FAILED_FINAL after a skip")
.isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(record3.failureCounters().contentErrorCount())
.as("Failure counters must not change after a skip")
.isEqualTo(contentErrorsBefore);
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(3);
assertThat(attempts.get(2).status()).isEqualTo(ProcessingStatus.SKIPPED_FINAL_FAILURE);
assertThat(attempts.get(2).retryable()).isFalse();
}
}
// =========================================================================
// Scenario 6: Existing PROPOSAL_READY with later finalization
// =========================================================================
/**
* Verifies the leading-proposal-attempt rule in isolation:
* <ol>
* <li>Run 1: AI produces a naming proposal → document status is {@code PROPOSAL_READY}.</li>
* <li>Run 2: AI stub is reset to technical failure; the coordinator must still finalize
* the document to {@code SUCCESS} using the persisted proposal — without calling the AI.</li>
* </ol>
* This confirms that the second run never re-invokes the AI when a valid
* {@code PROPOSAL_READY} attempt already exists.
*/
@Test
void proposalReadyFinalization_noAiCallInSecondRun(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// --- Run 1: establish PROPOSAL_READY ---
ctx.runBatch();
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
assertThat(ctx.listTargetFiles()).isEmpty();
// --- Run 2: AI stub would fail if called, but must not be called ---
ctx.aiStub.configureTechnicalFailure();
ctx.aiStub.resetInvocationCount();
ctx.runBatch();
assertThat(ctx.aiStub.invocationCount())
.as("AI must not be invoked during PROPOSAL_READY finalization")
.isEqualTo(0);
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
List<String> targetFiles = ctx.listTargetFiles();
assertThat(targetFiles).hasSize(1);
assertThat(targetFiles.get(0)).endsWith(".pdf");
}
}
// =========================================================================
// Scenario 7: Target copy error with immediate within-run retry
// =========================================================================
/**
* Verifies the immediate within-run retry for target copy failures:
* <ol>
* <li>Run 1: AI produces {@code PROPOSAL_READY}.</li>
* <li>Run 2: The {@link TargetFileCopyPort} is overridden with a stub that fails on
* the first invocation but delegates to the real adapter on the second.
* The coordinator must detect the first failure, retry immediately within the
* same run, and record {@code SUCCESS} — without incrementing the transient
* error counter.</li>
* </ol>
* The immediate retry does not count as a cross-run transient error.
*/
@Test
void targetCopyError_immediateRetrySucceeds_recordsSuccess(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// --- Run 1: produce PROPOSAL_READY ---
ctx.runBatch();
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
// --- Run 2: first copy attempt fails, retry succeeds ---
TargetFileCopyPort realAdapter =
new de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter(
ctx.targetFolder());
AtomicInteger copyCallCount = new AtomicInteger(0);
TargetFileCopyPort stubWithRetry = (locator, resolvedFilename) -> {
int call = copyCallCount.incrementAndGet();
if (call == 1) {
// First attempt: simulate a transient write failure
return new TargetFileCopyTechnicalFailure(
"Simulated write failure on first attempt", true);
}
// Second attempt (immediate within-run retry): delegate to real adapter
return realAdapter.copyToTarget(locator, resolvedFilename);
};
ctx.setTargetFileCopyPortOverride(stubWithRetry);
ctx.runBatch();
assertThat(copyCallCount.get())
.as("Copy port must have been called twice (initial + retry)")
.isEqualTo(2);
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
assertThat(record2.failureCounters().transientErrorCount())
.as("Immediate within-run retry must not increment the transient error counter")
.isEqualTo(0);
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(2);
assertThat(attempts.get(1).status()).isEqualTo(ProcessingStatus.SUCCESS);
List<String> targetFiles = ctx.listTargetFiles();
assertThat(targetFiles).hasSize(1);
assertThat(Files.exists(ctx.targetFolder().resolve(targetFiles.get(0)))).isTrue();
}
}
// =========================================================================
// Scenario 8: Transient error exhaustion → FAILED_FINAL
// =========================================================================
/**
* Verifies the complete transient error exhaustion path over multiple runs:
* <ol>
* <li>Run 1: AI stub fails technically → {@code FAILED_RETRYABLE},
* transient counter = 1 (below limit 3).</li>
* <li>Run 2: AI stub fails again → {@code FAILED_RETRYABLE},
* transient counter = 2 (below limit 3).</li>
* <li>Run 3: AI stub fails again → transient counter reaches the limit (3 = 3) →
* {@code FAILED_FINAL}; no target file is ever written.</li>
* </ol>
* This confirms the {@code maxRetriesTransient} boundary: the run that pushes the
* counter to the configured limit is the run that finalises the document.
*/
@Test
void transientErrors_multipleRuns_exhaustsLimit_reachesFailedFinal(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
ctx.aiStub.configureTechnicalFailure();
// --- Run 1: counter 0 → 1, below limit → FAILED_RETRYABLE ---
ctx.runBatch();
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(1);
assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(0);
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
assertThat(attempts1).hasSize(1);
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(attempts1.get(0).retryable()).isTrue();
// --- Run 2: counter 1 → 2, below limit → FAILED_RETRYABLE ---
ctx.runBatch();
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(record2.failureCounters().transientErrorCount()).isEqualTo(2);
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
assertThat(attempts2).hasSize(2);
assertThat(attempts2.get(1).retryable()).isTrue();
// --- Run 3: counter 2 → 3 = limit → FAILED_FINAL ---
ctx.runBatch();
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record3.overallStatus())
.as("Transient counter reaching the configured limit must finalise the document")
.isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(record3.failureCounters().transientErrorCount())
.as("Transient counter must equal maxRetriesTransient after exhaustion")
.isEqualTo(E2ETestContext.MAX_RETRIES_TRANSIENT);
assertThat(record3.failureCounters().contentErrorCount()).isEqualTo(0);
List<ProcessingAttempt> attempts3 = ctx.findAttempts(fp);
assertThat(attempts3).hasSize(3);
assertThat(attempts3.get(2).status()).isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(attempts3.get(2).retryable()).isFalse();
assertThat(ctx.listTargetFiles()).isEmpty();
}
}
// =========================================================================
// Scenario 9: Target copy error both attempts fail → FAILED_RETRYABLE
// =========================================================================
/**
* Verifies the failure path of the immediate within-run retry mechanism:
* <ol>
* <li>Run 1: AI stub returns a valid proposal → {@code PROPOSAL_READY}.</li>
* <li>Run 2: The {@link TargetFileCopyPort} is overridden with a stub that fails
* on every call. The coordinator issues the initial copy attempt (failure),
* grants exactly one immediate retry (also failure), then classifies the
* result as a transient technical error and records {@code FAILED_RETRYABLE}
* with an incremented transient counter.</li>
* </ol>
* This confirms that the within-run retry does not suppress the error when both
* attempts fail, and that the transient counter is incremented exactly once.
*/
@Test
void targetCopyError_bothAttemptsFail_reachesFailedRetryable(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// --- Run 1: establish PROPOSAL_READY ---
ctx.runBatch();
assertThat(ctx.findDocumentRecord(fp).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
// --- Run 2: both copy attempts fail ---
ctx.setTargetFileCopyPortOverride(
(locator, resolvedFilename) ->
new TargetFileCopyTechnicalFailure(
"Simulated persistent write failure", true));
ctx.runBatch();
DocumentRecord record = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record.overallStatus())
.as("Both copy attempts failing must produce FAILED_RETRYABLE "
+ "(transient error, limit not yet reached)")
.isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(record.failureCounters().transientErrorCount())
.as("The double copy failure must increment the transient counter exactly once")
.isEqualTo(1);
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(2);
assertThat(attempts.get(1).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(attempts.get(1).retryable()).isTrue();
assertThat(ctx.listTargetFiles()).isEmpty();
}
}
// =========================================================================
// Scenario 10: Two documents with identical target name → duplicate suffix
// =========================================================================
/**
* Verifies the duplicate target filename suffix rule at end-to-end level:
* when two distinct source documents both resolve to the same base target name
* ({@code "2024-01-15 - Stromabrechnung.pdf"}) in the same finalization run, the
* second document written to the target folder must receive a {@code (1)} suffix.
* <ol>
* <li>Run 1: both PDFs are processed by the AI stub (same configured response) →
* both reach {@code PROPOSAL_READY}.</li>
* <li>Run 2: both are finalized in sequence; the first written claims the base name,
* the second receives {@code "2024-01-15 - Stromabrechnung(1).pdf"}.</li>
* </ol>
* Both documents reach {@code SUCCESS} and the target folder contains exactly two files.
*/
@Test
void twoDifferentDocuments_sameProposedName_secondGetsDuplicateSuffix(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
// Two distinct PDFs; the AI stub returns the same title and date for both
ctx.createSearchablePdf("rechnung1.pdf", SAMPLE_PDF_TEXT);
ctx.createSearchablePdf("rechnung2.pdf",
"Stromabrechnung Zweiter Kunde Musterstadt Datum 15.01.2024 Betrag 99,00 EUR");
Path pdf1 = ctx.sourceFolder().resolve("rechnung1.pdf");
Path pdf2 = ctx.sourceFolder().resolve("rechnung2.pdf");
DocumentFingerprint fp1 = ctx.computeFingerprint(pdf1);
DocumentFingerprint fp2 = ctx.computeFingerprint(pdf2);
// --- Run 1: AI stub processes both PDFs → PROPOSAL_READY ---
ctx.runBatch();
assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
assertThat(ctx.listTargetFiles()).isEmpty();
// --- Run 2: both finalized; the second must receive the (1) suffix ---
ctx.runBatch();
assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.SUCCESS);
assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.SUCCESS);
List<String> targetFiles = ctx.listTargetFiles();
assertThat(targetFiles)
.as("Both distinct documents must produce separate target files")
.hasSize(2);
assertThat(targetFiles)
.as("Base name must exist for the first document written")
.anyMatch(f -> f.equals("2024-01-15 - Stromabrechnung.pdf"));
assertThat(targetFiles)
.as("Duplicate suffix (1) must be appended for the second document written")
.anyMatch(f -> f.equals("2024-01-15 - Stromabrechnung(1).pdf"));
}
}
// =========================================================================
// Scenario 11: Mixed batch document failures do not affect batch outcome
// =========================================================================
/**
* Verifies that document-level failures do not cause a batch-level failure:
* <ol>
* <li>Run 1: a searchable PDF reaches {@code PROPOSAL_READY}; a blank PDF
* (no extractable text) reaches {@code FAILED_RETRYABLE}.
* {@link BatchRunOutcome#SUCCESS} is returned.</li>
* <li>Run 2: the searchable PDF is finalized to {@code SUCCESS};
* the blank PDF reaches its second content error and is finalized to
* {@code FAILED_FINAL}. {@link BatchRunOutcome#SUCCESS} is returned.</li>
* </ol>
* This confirms the exit-code contract: only hard bootstrap or infrastructure
* failures produce a non-zero exit code; document-level errors do not.
*/
@Test
void mixedBatch_oneSuccess_oneContentError_batchOutcomeIsSuccess(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("good.pdf", SAMPLE_PDF_TEXT);
ctx.createBlankPdf("blank.pdf");
Path goodPdf = ctx.sourceFolder().resolve("good.pdf");
Path blankPdf = ctx.sourceFolder().resolve("blank.pdf");
DocumentFingerprint fpGood = ctx.computeFingerprint(goodPdf);
DocumentFingerprint fpBlank = ctx.computeFingerprint(blankPdf);
// --- Run 1 ---
BatchRunOutcome run1 = ctx.runBatch();
assertThat(run1)
.as("Batch must complete with SUCCESS even when individual documents fail")
.isEqualTo(BatchRunOutcome.SUCCESS);
assertThat(ctx.findDocumentRecord(fpGood).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow()
.failureCounters().contentErrorCount()).isEqualTo(1);
// --- Run 2 ---
BatchRunOutcome run2 = ctx.runBatch();
assertThat(run2)
.as("Batch must complete with SUCCESS even when a document is finalised "
+ "to FAILED_FINAL")
.isEqualTo(BatchRunOutcome.SUCCESS);
DocumentRecord goodRecord = ctx.findDocumentRecord(fpGood).orElseThrow();
assertThat(goodRecord.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
DocumentRecord blankRecord = ctx.findDocumentRecord(fpBlank).orElseThrow();
assertThat(blankRecord.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(blankRecord.failureCounters().contentErrorCount()).isEqualTo(2);
// Exactly one target file from the successfully processed document
List<String> targetFiles = ctx.listTargetFiles();
assertThat(targetFiles).hasSize(1);
assertThat(targetFiles.get(0)).endsWith(".pdf");
}
}
}

View File

@@ -0,0 +1,406 @@
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
import de.gecheckt.pdf.umbenenner.adapter.out.clock.SystemClockAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.fingerprint.Sha256FingerprintAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.lock.FilesystemRunLockPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.pdfextraction.PdfTextExtractionPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.prompt.FilesystemPromptPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sourcedocument.SourceDocumentCandidatesPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteDocumentRecordRepositoryAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteUnitOfWorkAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.targetfolder.FilesystemTargetFolderAdapter;
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger;
import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort;
import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort;
import de.gecheckt.pdf.umbenenner.application.service.AiNamingService;
import de.gecheckt.pdf.umbenenner.application.service.AiResponseValidator;
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingCoordinator;
import de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase;
import de.gecheckt.pdf.umbenenner.bootstrap.adapter.Log4jProcessingLogger;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Instant;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
/**
* Full adapter wiring context for deterministic end-to-end tests of the batch processing pipeline.
* <p>
* Provides real infrastructure adapters for all subsystems (SQLite persistence, filesystem
* source/target folders, PDF text extraction, SHA-256 fingerprinting, run locking) and a
* configurable stub ({@link StubAiInvocationPort}) for the AI invocation port.
* This ensures that end-to-end tests cover the complete production code path without
* performing real HTTP calls to an AI service.
*
* <h2>Invariants verified by this context</h2>
* <ul>
* <li>Happy-path to {@code SUCCESS}: two-run flow where Run 1 produces {@code PROPOSAL_READY}
* and Run 2 copies the file and records {@code SUCCESS}.</li>
* <li>Deterministic content error: blank PDFs (no extractable text) produce
* {@code FAILED_RETRYABLE} after the first run and {@code FAILED_FINAL} after a
* second run.</li>
* <li>Transient technical error: AI stub failures produce {@code FAILED_RETRYABLE} for each
* run until the transient error limit is reached, at which point the document is
* finalized to {@code FAILED_FINAL}.</li>
* <li>Skip after {@code SUCCESS}: a document in {@code SUCCESS} state generates a
* {@code SKIPPED_ALREADY_PROCESSED} attempt in subsequent runs.</li>
* <li>Skip after {@code FAILED_FINAL}: a document in {@code FAILED_FINAL} state generates a
* {@code SKIPPED_FINAL_FAILURE} attempt in subsequent runs.</li>
* <li>{@code PROPOSAL_READY} with later finalization: a document in {@code PROPOSAL_READY}
* state is finalized without an AI call in the next run.</li>
* <li>Target copy error with immediate retry: when the first copy attempt fails but the
* immediate within-run retry succeeds, the document is recorded as {@code SUCCESS}.</li>
* </ul>
*
* <h2>Usage pattern</h2>
* <pre>{@code
* @TempDir Path tempDir;
*
* @Test
* void example() throws Exception {
* try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
* ctx.createSearchablePdf("doc.pdf", "Rechnung 2024-01-15 ...");
* BatchRunOutcome run1 = ctx.runBatch();
* // assertions...
* }
* }
* }</pre>
*
* <h2>Thread safety</h2>
* <p>
* Not thread-safe. Each test method should use its own context instance.
*/
public final class E2ETestContext implements AutoCloseable {
/** Maximum pages before triggering a deterministic content error. */
static final int MAX_PAGES = 50;
/** Maximum text characters sent to the AI service. */
static final int MAX_TEXT_CHARS = 10_000;
/**
* Maximum transient retries before a document is finalized to {@code FAILED_FINAL}.
* Set to 3 to allow multi-run transient-failure tests without immediate finalization.
*/
static final int MAX_RETRIES_TRANSIENT = 3;
/** Model name carried in attempt history (no real inference occurs). */
static final String AI_MODEL = "e2e-stub-model";
private final Path sourceFolder;
private final Path targetFolder;
private final Path lockFile;
private final Path promptFile;
private final String jdbcUrl;
private final SqliteDocumentRecordRepositoryAdapter documentRepo;
private final SqliteProcessingAttemptRepositoryAdapter attemptRepo;
/**
* Configurable AI stub. Tests may call {@link StubAiInvocationPort#configureSuccess},
* {@link StubAiInvocationPort#configureTechnicalFailure}, or
* {@link StubAiInvocationPort#reset()} between batch runs.
*/
public final StubAiInvocationPort aiStub;
/**
* Optional override for the {@link TargetFileCopyPort}.
* {@code null} means the real {@link FilesystemTargetFileCopyAdapter} is used.
* Set via {@link #setTargetFileCopyPortOverride} to inject a failure-simulating stub.
*/
private TargetFileCopyPort targetFileCopyPortOverride;
private E2ETestContext(
Path sourceFolder,
Path targetFolder,
Path lockFile,
Path promptFile,
String jdbcUrl,
SqliteDocumentRecordRepositoryAdapter documentRepo,
SqliteProcessingAttemptRepositoryAdapter attemptRepo,
StubAiInvocationPort aiStub) {
this.sourceFolder = sourceFolder;
this.targetFolder = targetFolder;
this.lockFile = lockFile;
this.promptFile = promptFile;
this.jdbcUrl = jdbcUrl;
this.documentRepo = documentRepo;
this.attemptRepo = attemptRepo;
this.aiStub = aiStub;
}
/**
* Initializes a fully wired end-to-end test context rooted in {@code tempDir}.
* <p>
* Creates the {@code source/}, {@code target/} subdirectories and a minimal prompt
* file, initializes the SQLite schema, and wires all adapters.
*
* @param tempDir the JUnit {@code @TempDir} or any writable temporary directory
* @return a ready-to-use context; caller is responsible for closing it
* @throws Exception if schema initialization or directory/file creation fails
*/
public static E2ETestContext initialize(Path tempDir) throws Exception {
Path sourceFolder = Files.createDirectories(tempDir.resolve("source"));
Path targetFolder = Files.createDirectories(tempDir.resolve("target"));
Path lockFile = tempDir.resolve("run.lock");
Path promptFile = tempDir.resolve("prompt.txt");
Files.writeString(promptFile,
"Analysiere das folgende Dokument und liefere Datum, Titel und Begruendung als JSON-Objekt.");
String jdbcUrl = "jdbc:sqlite:" + tempDir.resolve("test.db").toAbsolutePath().toString().replace('\\', '/');
SqliteSchemaInitializationAdapter schema = new SqliteSchemaInitializationAdapter(jdbcUrl);
schema.initializeSchema();
SqliteDocumentRecordRepositoryAdapter documentRepo =
new SqliteDocumentRecordRepositoryAdapter(jdbcUrl);
SqliteProcessingAttemptRepositoryAdapter attemptRepo =
new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl);
return new E2ETestContext(
sourceFolder, targetFolder, lockFile, promptFile,
jdbcUrl, documentRepo, attemptRepo, new StubAiInvocationPort());
}
// =========================================================================
// Test fixture creation
// =========================================================================
/**
* Creates a single-page searchable PDF in the source folder with the given text.
* <p>
* The file is ready for the batch run as soon as this method returns.
*
* @param filename the PDF filename (e.g. {@code "rechnung.pdf"})
* @param textContent text to embed; should be at least a few words to pass pre-checks
* @return the absolute path of the created file
* @throws IOException if the file cannot be written
*/
public Path createSearchablePdf(String filename, String textContent) throws IOException {
Path pdfPath = sourceFolder.resolve(filename);
MinimalPdfFactory.createSearchablePdf(pdfPath, textContent);
return pdfPath;
}
/**
* Creates a single-page blank PDF (no extractable text) in the source folder.
* <p>
* Processing this file triggers the "no usable text" deterministic content error,
* which skips the AI call.
*
* @param filename the PDF filename (e.g. {@code "blank.pdf"})
* @return the absolute path of the created file
* @throws IOException if the file cannot be written
*/
public Path createBlankPdf(String filename) throws IOException {
Path pdfPath = sourceFolder.resolve(filename);
MinimalPdfFactory.createBlankPdf(pdfPath);
return pdfPath;
}
// =========================================================================
// Batch execution
// =========================================================================
/**
* Executes one complete batch run using the current stub configuration.
* <p>
* A fresh {@link BatchRunContext} with a new {@link RunId} is created for each call,
* matching the production behavior where every Task Scheduler invocation is a
* distinct run.
*
* @return the outcome of the batch run
*/
public BatchRunOutcome runBatch() {
DefaultBatchRunProcessingUseCase useCase = buildUseCase();
BatchRunContext context = new BatchRunContext(
new RunId(UUID.randomUUID().toString()), Instant.now());
return useCase.execute(context);
}
// =========================================================================
// State inspection helpers
// =========================================================================
/**
* Looks up the document master record for the given fingerprint.
*
* @param fingerprint the document fingerprint to query
* @return the master record if one exists, {@link Optional#empty()} if unknown or
* if a persistence lookup error occurred
*/
public Optional<DocumentRecord> findDocumentRecord(DocumentFingerprint fingerprint) {
return switch (documentRepo.findByFingerprint(fingerprint)) {
case DocumentTerminalSuccess s -> Optional.of(s.record());
case DocumentTerminalFinalFailure f -> Optional.of(f.record());
case DocumentKnownProcessable p -> Optional.of(p.record());
default -> Optional.empty();
};
}
/**
* Returns all processing attempts for the given fingerprint in insertion order.
*
* @param fingerprint the document fingerprint to query
* @return all recorded attempts; empty list if none exist
*/
public List<ProcessingAttempt> findAttempts(DocumentFingerprint fingerprint) {
return attemptRepo.findAllByFingerprint(fingerprint);
}
/**
* Computes the SHA-256 fingerprint for the given file using the production adapter.
* <p>
* Useful for correlating a test PDF with its database record after a batch run.
*
* @param file the absolute path of the file to fingerprint
* @return the fingerprint
* @throws IllegalStateException if fingerprint computation fails
*/
public DocumentFingerprint computeFingerprint(Path file) {
Sha256FingerprintAdapter adapter = new Sha256FingerprintAdapter();
// Construct a minimal candidate that mirrors how the production source adapter creates one
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
file.getFileName().toString(),
0L,
new SourceDocumentLocator(file.toAbsolutePath().toString()));
return switch (adapter.computeFingerprint(candidate)) {
case FingerprintSuccess s -> s.fingerprint();
default -> throw new IllegalStateException(
"Fingerprint computation failed for test fixture: " + file);
};
}
/**
* Lists the filenames of all files currently in the target folder.
*
* @return list of filenames; empty if target folder is empty
* @throws IOException if the target folder cannot be read
*/
public List<String> listTargetFiles() throws IOException {
try (var stream = Files.list(targetFolder)) {
return stream.map(p -> p.getFileName().toString()).sorted().toList();
}
}
/**
* Returns the source folder path used by this context.
*/
public Path sourceFolder() {
return sourceFolder;
}
/**
* Returns the target folder path used by this context.
*/
public Path targetFolder() {
return targetFolder;
}
/**
* Overrides the {@link TargetFileCopyPort} used in subsequent batch runs.
* Pass {@code null} to revert to the real {@link FilesystemTargetFileCopyAdapter}.
*
* @param override the port implementation to use, or {@code null} for the real adapter
*/
public void setTargetFileCopyPortOverride(TargetFileCopyPort override) {
this.targetFileCopyPortOverride = override;
}
@Override
public void close() {
// No explicit cleanup needed: @TempDir removes all files automatically
}
// =========================================================================
// Private wiring
// =========================================================================
/**
* Constructs a fully wired {@link DefaultBatchRunProcessingUseCase} for a single batch run.
* <p>
* All adapters are instantiated fresh per run to avoid shared mutable state between
* runs (e.g. locks, connection states). The AI stub and optional copy-port override
* are re-used across runs within the same test.
*/
private DefaultBatchRunProcessingUseCase buildUseCase() {
RuntimeConfiguration runtimeConfig = new RuntimeConfiguration(
MAX_PAGES, MAX_RETRIES_TRANSIENT, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT);
FingerprintPort fingerprintPort = new Sha256FingerprintAdapter();
DocumentRecordRepository documentRecordRepository = documentRepo;
ProcessingAttemptRepository processingAttemptRepository = attemptRepo;
UnitOfWorkPort unitOfWorkPort = new SqliteUnitOfWorkAdapter(jdbcUrl);
ProcessingLogger coordinatorLogger = new Log4jProcessingLogger(
DocumentProcessingCoordinator.class);
TargetFolderPort targetFolderPort = new FilesystemTargetFolderAdapter(targetFolder);
TargetFileCopyPort targetFileCopyPort = (targetFileCopyPortOverride != null)
? targetFileCopyPortOverride
: new FilesystemTargetFileCopyAdapter(targetFolder);
DocumentProcessingCoordinator coordinator = new DocumentProcessingCoordinator(
documentRecordRepository,
processingAttemptRepository,
unitOfWorkPort,
targetFolderPort,
targetFileCopyPort,
coordinatorLogger,
MAX_RETRIES_TRANSIENT);
PromptPort promptPort = new FilesystemPromptPortAdapter(promptFile);
ClockPort clockPort = new SystemClockAdapter();
AiResponseValidator aiResponseValidator = new AiResponseValidator(clockPort);
AiNamingService aiNamingService = new AiNamingService(
aiStub, promptPort, aiResponseValidator, AI_MODEL, MAX_TEXT_CHARS);
ProcessingLogger useCaseLogger = new Log4jProcessingLogger(
DefaultBatchRunProcessingUseCase.class);
RunLockPort runLockPort = new FilesystemRunLockPortAdapter(lockFile);
SourceDocumentCandidatesPort candidatesPort =
new SourceDocumentCandidatesPortAdapter(sourceFolder);
PdfTextExtractionPort extractionPort = new PdfTextExtractionPortAdapter();
return new DefaultBatchRunProcessingUseCase(
runtimeConfig,
runLockPort,
candidatesPort,
extractionPort,
fingerprintPort,
coordinator,
aiNamingService,
useCaseLogger);
}
}

View File

@@ -0,0 +1,72 @@
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import java.io.IOException;
import java.nio.file.Path;
/**
* Factory for creating minimal PDF test fixtures used in end-to-end tests.
* <p>
* Provides two variants:
* <ul>
* <li>Searchable PDFs with embedded text content — used for happy-path, transient-error,
* and target-copy-failure scenarios where the pre-check must pass.</li>
* <li>Blank PDFs with no extractable text — used for deterministic content-error
* scenarios where the pre-check must fail.</li>
* </ul>
* <p>
* Uses Apache PDFBox to create real, structurally valid PDF files so that the
* production {@code PdfTextExtractionPortAdapter} processes them correctly.
*/
final class MinimalPdfFactory {
private MinimalPdfFactory() {
// Static utility class — not instantiable
}
/**
* Creates a single-page searchable PDF with the given text content at the output path.
* <p>
* The resulting file passes the production pre-checks for minimum text length and
* page count, enabling the AI naming pipeline to run.
*
* @param outputPath the path where the PDF will be written; parent directory must exist
* @param textContent the text to embed in the PDF; should be non-empty for happy-path tests
* @throws IOException if the file cannot be written
*/
static void createSearchablePdf(Path outputPath, String textContent) throws IOException {
try (PDDocument doc = new PDDocument()) {
PDPage page = new PDPage();
doc.addPage(page);
try (PDPageContentStream stream = new PDPageContentStream(doc, page)) {
stream.beginText();
stream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 12);
stream.newLineAtOffset(50, 700);
stream.showText(textContent);
stream.endText();
}
doc.save(outputPath.toFile());
}
}
/**
* Creates a single-page blank PDF with no text content at the output path.
* <p>
* The resulting file triggers the "no usable text" pre-check failure
* (deterministic content error), which does not invoke the AI service.
*
* @param outputPath the path where the PDF will be written; parent directory must exist
* @throws IOException if the file cannot be written
*/
static void createBlankPdf(Path outputPath) throws IOException {
try (PDDocument doc = new PDDocument()) {
doc.addPage(new PDPage());
doc.save(outputPath.toFile());
}
}
}

View File

@@ -0,0 +1,109 @@
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Configurable test double for {@link AiInvocationPort}.
* <p>
* Replaces the real HTTP-based AI adapter in end-to-end tests so that the processing
* pipeline can be exercised without real network calls. Supports two response modes:
* <ul>
* <li><strong>Success mode</strong> (default): returns a structurally valid JSON response
* containing configurable {@code title} and {@code date} fields. This produces a
* {@code PROPOSAL_READY} outcome when the response passes validation.</li>
* <li><strong>Technical failure mode</strong>: returns an {@link AiInvocationTechnicalFailure},
* simulating network errors or service unavailability. This produces a
* {@code FAILED_RETRYABLE} (transient) outcome.</li>
* </ul>
* <p>
* The stub tracks the total number of invocations so that tests can verify whether
* the AI pipeline was called at all (e.g. confirming that a {@code PROPOSAL_READY}
* finalization skips the AI call).
*/
final class StubAiInvocationPort implements AiInvocationPort {
private final AtomicInteger invocationCount = new AtomicInteger(0);
private volatile boolean returnTechnicalFailure = false;
private volatile String title = "Stromabrechnung";
private volatile String date = "2024-01-15";
private volatile String reasoning = "Testdokument fuer End-to-End-Tests.";
/**
* Configures the stub to return a valid naming proposal with the given title and date.
*
* @param title the document title (must pass validation: max 20 chars, no special chars)
* @param date the document date in {@code YYYY-MM-DD} format, or {@code null} to omit
*/
void configureSuccess(String title, String date) {
this.title = title;
this.date = date;
this.returnTechnicalFailure = false;
}
/**
* Configures the stub to return a transient technical failure on every invocation.
* The failure reason is {@code STUB_FAILURE}.
*/
void configureTechnicalFailure() {
this.returnTechnicalFailure = true;
}
/**
* Resets the stub to its default success configuration with title "Stromabrechnung"
* and date "2024-01-15", and clears the invocation counter.
*/
void reset() {
this.title = "Stromabrechnung";
this.date = "2024-01-15";
this.reasoning = "Testdokument fuer End-to-End-Tests.";
this.returnTechnicalFailure = false;
invocationCount.set(0);
}
/**
* Returns the total number of times {@link #invoke} was called since construction
* or the last {@link #reset()}.
*/
int invocationCount() {
return invocationCount.get();
}
/**
* Resets the invocation counter to zero without changing response configuration.
*/
void resetInvocationCount() {
invocationCount.set(0);
}
/**
* Returns either a success response or a technical failure, depending on current configuration.
* Increments the invocation counter on every call.
*/
@Override
public AiInvocationResult invoke(AiRequestRepresentation request) {
invocationCount.incrementAndGet();
if (returnTechnicalFailure) {
return new AiInvocationTechnicalFailure(
request,
"STUB_FAILURE",
"Test stub: configured to return technical failure");
}
String dateField = (date != null) ? "\"date\": \"" + date + "\", " : "";
String rawJson = "{"
+ dateField
+ "\"title\": \"" + title + "\", "
+ "\"reasoning\": \"" + reasoning + "\""
+ "}";
return new AiInvocationSuccess(request, new AiRawResponse(rawJson));
}
}