M8 komplett umgesetzt
This commit is contained in:
@@ -62,6 +62,11 @@
|
||||
<artifactId>mockito-junit-jupiter</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.assertj</groupId>
|
||||
<artifactId>assertj-core</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
||||
@@ -310,10 +310,10 @@ public class BootstrapRunner {
|
||||
LOG.error("Configuration validation failed: {}", e.getMessage());
|
||||
return 1;
|
||||
} catch (DocumentPersistenceException e) {
|
||||
LOG.error("Persistence operation failed: {}", e.getMessage(), e);
|
||||
LOG.error("Schema initialization failed: {}", e.getMessage(), e);
|
||||
return 1;
|
||||
} catch (Exception e) {
|
||||
LOG.error("Bootstrap failure during startup.", e);
|
||||
LOG.error("Unexpected startup failure.", e);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -391,7 +391,7 @@ public class BootstrapRunner {
|
||||
*/
|
||||
private BatchRunContext createRunContext() {
|
||||
RunId runId = new RunId(UUID.randomUUID().toString());
|
||||
LOG.info("Batch run started. RunId: {}", runId);
|
||||
LOG.info("Preparing batch run. RunId: {}", runId);
|
||||
return new BatchRunContext(runId, Instant.now());
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,698 @@
|
||||
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* Deterministic end-to-end tests for the complete batch processing pipeline.
|
||||
* <p>
|
||||
* Each test method is independent and uses its own {@link E2ETestContext} backed by a
|
||||
* JUnit {@code @TempDir}. All infrastructure adapters (SQLite, filesystem, PDF extraction,
|
||||
* fingerprinting) are real production implementations. Only the AI invocation port is
|
||||
* replaced by a configurable {@link StubAiInvocationPort} to avoid real HTTP calls.
|
||||
*
|
||||
* <h2>End-to-end invariants verified</h2>
|
||||
* <ul>
|
||||
* <li><strong>Happy-path to {@code SUCCESS}</strong>: two-run flow via {@code PROPOSAL_READY}
|
||||
* intermediate state to a final {@code SUCCESS} with a target file on disk.</li>
|
||||
* <li><strong>Deterministic content error</strong>: blank PDFs (no extractable text) reach
|
||||
* {@code FAILED_RETRYABLE} after the first run and {@code FAILED_FINAL} after the
|
||||
* second run, exercising the one-retry rule for deterministic content errors.</li>
|
||||
* <li><strong>Transient technical error</strong>: AI stub failures produce
|
||||
* {@code FAILED_RETRYABLE} (transient counter incremented) without a target file.</li>
|
||||
* <li><strong>Transient error exhaustion</strong>: repeated AI stub failures across
|
||||
* {@code maxRetriesTransient} runs increment the transient counter each time until
|
||||
* the limit is reached and the document is finalized to {@code FAILED_FINAL}.</li>
|
||||
* <li><strong>Skip after {@code SUCCESS}</strong>: a document whose status is
|
||||
* {@code SUCCESS} generates exactly one {@code SKIPPED_ALREADY_PROCESSED} attempt
|
||||
* in the next run; the overall status and target file remain unchanged.</li>
|
||||
* <li><strong>Skip after {@code FAILED_FINAL}</strong>: a document whose status is
|
||||
* {@code FAILED_FINAL} generates exactly one {@code SKIPPED_FINAL_FAILURE} attempt
|
||||
* in the next run; the overall status and failure counters remain unchanged.</li>
|
||||
* <li><strong>{@code PROPOSAL_READY} with later finalization</strong>: a document in
|
||||
* {@code PROPOSAL_READY} state is finalized without an AI call in the next run,
|
||||
* confirming the leading-proposal-attempt rule.</li>
|
||||
* <li><strong>Target copy error with immediate within-run retry (success)</strong>: when the
|
||||
* first copy attempt fails but the immediate within-run retry succeeds, the document is
|
||||
* recorded as {@code SUCCESS} and no transient error counter is incremented.</li>
|
||||
* <li><strong>Target copy error with immediate within-run retry (failure)</strong>: when both
|
||||
* the initial and immediate-retry copy attempts fail, the document is recorded as
|
||||
* {@code FAILED_RETRYABLE} with an incremented transient counter.</li>
|
||||
* <li><strong>Duplicate target filename suffix</strong>: when two distinct documents produce
|
||||
* the same base target filename in the same batch run, the second receives a {@code (1)}
|
||||
* suffix to avoid overwriting the first.</li>
|
||||
* <li><strong>Mixed batch outcome</strong>: a batch run that contains both successfully
|
||||
* processed documents and documents with content errors completes with
|
||||
* {@link BatchRunOutcome#SUCCESS}, confirming that document-level failures do not
|
||||
* abort the batch or change the overall exit-code-relevant outcome.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Document text used in tests</h2>
|
||||
* <p>
|
||||
* Searchable PDFs embed enough text to pass the minimum-text pre-check. The AI stub
|
||||
* returns a title of {@code "Stromabrechnung"} and date {@code "2024-01-15"} by default,
|
||||
* producing a target filename of {@code "2024-01-15 - Stromabrechnung.pdf"}.
|
||||
*/
|
||||
class BatchRunEndToEndTest {
|
||||
|
||||
private static final String SAMPLE_PDF_TEXT =
|
||||
"Stromabrechnung Kundenname Musterstadt Datum 15.01.2024 Betrag 123,45 EUR";
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 1: Happy-path to SUCCESS
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the complete two-run happy-path:
|
||||
* <ol>
|
||||
* <li>Run 1: AI stub returns valid proposal → document status becomes
|
||||
* {@code PROPOSAL_READY}; no target file yet.</li>
|
||||
* <li>Run 2: AI is NOT called again; target file is copied; document status
|
||||
* becomes {@code SUCCESS}.</li>
|
||||
* </ol>
|
||||
* This confirms the leading-proposal-attempt rule and the two-phase finalization.
|
||||
*/
|
||||
@Test
|
||||
void happyPath_twoRuns_reachesSuccess(@TempDir Path tempDir) throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("rechnung.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("rechnung.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// --- Run 1: AI produces a naming proposal ---
|
||||
BatchRunOutcome run1 = ctx.runBatch();
|
||||
|
||||
assertThat(run1).isEqualTo(BatchRunOutcome.SUCCESS);
|
||||
assertThat(ctx.aiStub.invocationCount()).isEqualTo(1);
|
||||
|
||||
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
|
||||
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
|
||||
assertThat(attempts1).hasSize(1);
|
||||
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
|
||||
// --- Run 2: Finalization without AI call ---
|
||||
ctx.aiStub.resetInvocationCount();
|
||||
BatchRunOutcome run2 = ctx.runBatch();
|
||||
|
||||
assertThat(run2).isEqualTo(BatchRunOutcome.SUCCESS);
|
||||
assertThat(ctx.aiStub.invocationCount())
|
||||
.as("AI must not be called again when PROPOSAL_READY exists")
|
||||
.isEqualTo(0);
|
||||
|
||||
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
assertThat(record2.lastSuccessInstant()).isNotNull();
|
||||
assertThat(record2.lastTargetFileName()).isNotNull();
|
||||
|
||||
List<String> targetFiles = ctx.listTargetFiles();
|
||||
assertThat(targetFiles).hasSize(1);
|
||||
assertThat(targetFiles.get(0)).endsWith(".pdf");
|
||||
assertThat(Files.exists(ctx.targetFolder().resolve(targetFiles.get(0)))).isTrue();
|
||||
|
||||
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
|
||||
assertThat(attempts2).hasSize(2);
|
||||
assertThat(attempts2.get(1).status()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 2: Deterministic content error → FAILED_RETRYABLE → FAILED_FINAL
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the one-retry rule for deterministic content errors:
|
||||
* <ol>
|
||||
* <li>Run 1: blank PDF → pre-check fails (no extractable text) →
|
||||
* {@code FAILED_RETRYABLE}, content error counter = 1.</li>
|
||||
* <li>Run 2: same outcome again → {@code FAILED_FINAL}, content error counter = 2.</li>
|
||||
* </ol>
|
||||
* No AI call is made in either run because the content pre-check prevents it.
|
||||
*/
|
||||
@Test
|
||||
void deterministicContentError_twoRuns_reachesFailedFinal(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createBlankPdf("blank.pdf");
|
||||
Path pdfPath = ctx.sourceFolder().resolve("blank.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// --- Run 1 ---
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.aiStub.invocationCount())
|
||||
.as("AI must not be called for a blank PDF")
|
||||
.isEqualTo(0);
|
||||
|
||||
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(1);
|
||||
assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(0);
|
||||
|
||||
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
|
||||
assertThat(attempts1).hasSize(1);
|
||||
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(attempts1.get(0).retryable()).isTrue();
|
||||
|
||||
// --- Run 2 ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(record2.failureCounters().contentErrorCount()).isEqualTo(2);
|
||||
|
||||
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
|
||||
assertThat(attempts2).hasSize(2);
|
||||
assertThat(attempts2.get(1).status()).isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(attempts2.get(1).retryable()).isFalse();
|
||||
|
||||
// No target file should exist
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 3: Transient technical error → FAILED_RETRYABLE
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies that a transient AI failure produces {@code FAILED_RETRYABLE} with an
|
||||
* incremented transient error counter, and that no target file is written.
|
||||
* The document remains retryable in subsequent runs until the transient limit is reached.
|
||||
*/
|
||||
@Test
|
||||
void transientAiFailure_producesFailedRetryable(@TempDir Path tempDir) throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
ctx.aiStub.configureTechnicalFailure();
|
||||
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.aiStub.invocationCount())
|
||||
.as("AI must have been invoked (and failed) once")
|
||||
.isEqualTo(1);
|
||||
|
||||
DocumentRecord record = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(record.failureCounters().transientErrorCount()).isEqualTo(1);
|
||||
assertThat(record.failureCounters().contentErrorCount()).isEqualTo(0);
|
||||
|
||||
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
|
||||
assertThat(attempts).hasSize(1);
|
||||
assertThat(attempts.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(attempts.get(0).retryable()).isTrue();
|
||||
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 4: Skip after SUCCESS
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the skip-after-success invariant:
|
||||
* after a document reaches {@code SUCCESS} (via two runs), a third run records a
|
||||
* {@code SKIPPED_ALREADY_PROCESSED} attempt without changing the overall status,
|
||||
* failure counters, or the target file.
|
||||
*/
|
||||
@Test
|
||||
void skipAfterSuccess_thirdRun_recordsSkip(@TempDir Path tempDir) throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// Reach SUCCESS via two runs
|
||||
ctx.runBatch(); // → PROPOSAL_READY
|
||||
ctx.runBatch(); // → SUCCESS
|
||||
|
||||
DocumentRecord successRecord = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(successRecord.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
String targetFileBefore = successRecord.lastTargetFileName();
|
||||
|
||||
// --- Run 3: should produce skip ---
|
||||
ctx.aiStub.resetInvocationCount();
|
||||
BatchRunOutcome run3 = ctx.runBatch();
|
||||
|
||||
assertThat(run3).isEqualTo(BatchRunOutcome.SUCCESS);
|
||||
assertThat(ctx.aiStub.invocationCount())
|
||||
.as("AI must not be called for an already-successful document")
|
||||
.isEqualTo(0);
|
||||
|
||||
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record3.overallStatus())
|
||||
.as("Overall status must remain SUCCESS after a skip")
|
||||
.isEqualTo(ProcessingStatus.SUCCESS);
|
||||
assertThat(record3.lastTargetFileName())
|
||||
.as("Target filename must not change after a skip")
|
||||
.isEqualTo(targetFileBefore);
|
||||
|
||||
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
|
||||
assertThat(attempts).hasSize(3);
|
||||
assertThat(attempts.get(2).status()).isEqualTo(ProcessingStatus.SKIPPED_ALREADY_PROCESSED);
|
||||
assertThat(attempts.get(2).retryable()).isFalse();
|
||||
|
||||
// Target file count must remain exactly one
|
||||
assertThat(ctx.listTargetFiles()).hasSize(1);
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 5: Skip after FAILED_FINAL
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the skip-after-final-failure invariant:
|
||||
* after a document reaches {@code FAILED_FINAL} (via two blank-PDF runs), a third run
|
||||
* records a {@code SKIPPED_FINAL_FAILURE} attempt without changing the overall status
|
||||
* or failure counters.
|
||||
*/
|
||||
@Test
|
||||
void skipAfterFailedFinal_thirdRun_recordsSkip(@TempDir Path tempDir) throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createBlankPdf("blank.pdf");
|
||||
Path pdfPath = ctx.sourceFolder().resolve("blank.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// Reach FAILED_FINAL via two blank-PDF runs
|
||||
ctx.runBatch(); // → FAILED_RETRYABLE
|
||||
ctx.runBatch(); // → FAILED_FINAL
|
||||
|
||||
DocumentRecord finalRecord = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(finalRecord.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
int contentErrorsBefore = finalRecord.failureCounters().contentErrorCount();
|
||||
|
||||
// --- Run 3: should produce skip ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record3.overallStatus())
|
||||
.as("Overall status must remain FAILED_FINAL after a skip")
|
||||
.isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(record3.failureCounters().contentErrorCount())
|
||||
.as("Failure counters must not change after a skip")
|
||||
.isEqualTo(contentErrorsBefore);
|
||||
|
||||
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
|
||||
assertThat(attempts).hasSize(3);
|
||||
assertThat(attempts.get(2).status()).isEqualTo(ProcessingStatus.SKIPPED_FINAL_FAILURE);
|
||||
assertThat(attempts.get(2).retryable()).isFalse();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 6: Existing PROPOSAL_READY with later finalization
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the leading-proposal-attempt rule in isolation:
|
||||
* <ol>
|
||||
* <li>Run 1: AI produces a naming proposal → document status is {@code PROPOSAL_READY}.</li>
|
||||
* <li>Run 2: AI stub is reset to technical failure; the coordinator must still finalize
|
||||
* the document to {@code SUCCESS} using the persisted proposal — without calling the AI.</li>
|
||||
* </ol>
|
||||
* This confirms that the second run never re-invokes the AI when a valid
|
||||
* {@code PROPOSAL_READY} attempt already exists.
|
||||
*/
|
||||
@Test
|
||||
void proposalReadyFinalization_noAiCallInSecondRun(@TempDir Path tempDir) throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// --- Run 1: establish PROPOSAL_READY ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
|
||||
// --- Run 2: AI stub would fail if called, but must not be called ---
|
||||
ctx.aiStub.configureTechnicalFailure();
|
||||
ctx.aiStub.resetInvocationCount();
|
||||
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.aiStub.invocationCount())
|
||||
.as("AI must not be invoked during PROPOSAL_READY finalization")
|
||||
.isEqualTo(0);
|
||||
|
||||
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
|
||||
List<String> targetFiles = ctx.listTargetFiles();
|
||||
assertThat(targetFiles).hasSize(1);
|
||||
assertThat(targetFiles.get(0)).endsWith(".pdf");
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 7: Target copy error with immediate within-run retry
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the immediate within-run retry for target copy failures:
|
||||
* <ol>
|
||||
* <li>Run 1: AI produces {@code PROPOSAL_READY}.</li>
|
||||
* <li>Run 2: The {@link TargetFileCopyPort} is overridden with a stub that fails on
|
||||
* the first invocation but delegates to the real adapter on the second.
|
||||
* The coordinator must detect the first failure, retry immediately within the
|
||||
* same run, and record {@code SUCCESS} — without incrementing the transient
|
||||
* error counter.</li>
|
||||
* </ol>
|
||||
* The immediate retry does not count as a cross-run transient error.
|
||||
*/
|
||||
@Test
|
||||
void targetCopyError_immediateRetrySucceeds_recordsSuccess(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// --- Run 1: produce PROPOSAL_READY ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
|
||||
// --- Run 2: first copy attempt fails, retry succeeds ---
|
||||
TargetFileCopyPort realAdapter =
|
||||
new de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter(
|
||||
ctx.targetFolder());
|
||||
AtomicInteger copyCallCount = new AtomicInteger(0);
|
||||
|
||||
TargetFileCopyPort stubWithRetry = (locator, resolvedFilename) -> {
|
||||
int call = copyCallCount.incrementAndGet();
|
||||
if (call == 1) {
|
||||
// First attempt: simulate a transient write failure
|
||||
return new TargetFileCopyTechnicalFailure(
|
||||
"Simulated write failure on first attempt", true);
|
||||
}
|
||||
// Second attempt (immediate within-run retry): delegate to real adapter
|
||||
return realAdapter.copyToTarget(locator, resolvedFilename);
|
||||
};
|
||||
|
||||
ctx.setTargetFileCopyPortOverride(stubWithRetry);
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(copyCallCount.get())
|
||||
.as("Copy port must have been called twice (initial + retry)")
|
||||
.isEqualTo(2);
|
||||
|
||||
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
assertThat(record2.failureCounters().transientErrorCount())
|
||||
.as("Immediate within-run retry must not increment the transient error counter")
|
||||
.isEqualTo(0);
|
||||
|
||||
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
|
||||
assertThat(attempts).hasSize(2);
|
||||
assertThat(attempts.get(1).status()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
|
||||
List<String> targetFiles = ctx.listTargetFiles();
|
||||
assertThat(targetFiles).hasSize(1);
|
||||
assertThat(Files.exists(ctx.targetFolder().resolve(targetFiles.get(0)))).isTrue();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 8: Transient error exhaustion → FAILED_FINAL
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the complete transient error exhaustion path over multiple runs:
|
||||
* <ol>
|
||||
* <li>Run 1: AI stub fails technically → {@code FAILED_RETRYABLE},
|
||||
* transient counter = 1 (below limit 3).</li>
|
||||
* <li>Run 2: AI stub fails again → {@code FAILED_RETRYABLE},
|
||||
* transient counter = 2 (below limit 3).</li>
|
||||
* <li>Run 3: AI stub fails again → transient counter reaches the limit (3 = 3) →
|
||||
* {@code FAILED_FINAL}; no target file is ever written.</li>
|
||||
* </ol>
|
||||
* This confirms the {@code maxRetriesTransient} boundary: the run that pushes the
|
||||
* counter to the configured limit is the run that finalises the document.
|
||||
*/
|
||||
@Test
|
||||
void transientErrors_multipleRuns_exhaustsLimit_reachesFailedFinal(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
ctx.aiStub.configureTechnicalFailure();
|
||||
|
||||
// --- Run 1: counter 0 → 1, below limit → FAILED_RETRYABLE ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(1);
|
||||
assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(0);
|
||||
|
||||
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
|
||||
assertThat(attempts1).hasSize(1);
|
||||
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(attempts1.get(0).retryable()).isTrue();
|
||||
|
||||
// --- Run 2: counter 1 → 2, below limit → FAILED_RETRYABLE ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(record2.failureCounters().transientErrorCount()).isEqualTo(2);
|
||||
|
||||
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
|
||||
assertThat(attempts2).hasSize(2);
|
||||
assertThat(attempts2.get(1).retryable()).isTrue();
|
||||
|
||||
// --- Run 3: counter 2 → 3 = limit → FAILED_FINAL ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record3.overallStatus())
|
||||
.as("Transient counter reaching the configured limit must finalise the document")
|
||||
.isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(record3.failureCounters().transientErrorCount())
|
||||
.as("Transient counter must equal maxRetriesTransient after exhaustion")
|
||||
.isEqualTo(E2ETestContext.MAX_RETRIES_TRANSIENT);
|
||||
assertThat(record3.failureCounters().contentErrorCount()).isEqualTo(0);
|
||||
|
||||
List<ProcessingAttempt> attempts3 = ctx.findAttempts(fp);
|
||||
assertThat(attempts3).hasSize(3);
|
||||
assertThat(attempts3.get(2).status()).isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(attempts3.get(2).retryable()).isFalse();
|
||||
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 9: Target copy error – both attempts fail → FAILED_RETRYABLE
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the failure path of the immediate within-run retry mechanism:
|
||||
* <ol>
|
||||
* <li>Run 1: AI stub returns a valid proposal → {@code PROPOSAL_READY}.</li>
|
||||
* <li>Run 2: The {@link TargetFileCopyPort} is overridden with a stub that fails
|
||||
* on every call. The coordinator issues the initial copy attempt (failure),
|
||||
* grants exactly one immediate retry (also failure), then classifies the
|
||||
* result as a transient technical error and records {@code FAILED_RETRYABLE}
|
||||
* with an incremented transient counter.</li>
|
||||
* </ol>
|
||||
* This confirms that the within-run retry does not suppress the error when both
|
||||
* attempts fail, and that the transient counter is incremented exactly once.
|
||||
*/
|
||||
@Test
|
||||
void targetCopyError_bothAttemptsFail_reachesFailedRetryable(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// --- Run 1: establish PROPOSAL_READY ---
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.findDocumentRecord(fp).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
|
||||
// --- Run 2: both copy attempts fail ---
|
||||
ctx.setTargetFileCopyPortOverride(
|
||||
(locator, resolvedFilename) ->
|
||||
new TargetFileCopyTechnicalFailure(
|
||||
"Simulated persistent write failure", true));
|
||||
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record.overallStatus())
|
||||
.as("Both copy attempts failing must produce FAILED_RETRYABLE "
|
||||
+ "(transient error, limit not yet reached)")
|
||||
.isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(record.failureCounters().transientErrorCount())
|
||||
.as("The double copy failure must increment the transient counter exactly once")
|
||||
.isEqualTo(1);
|
||||
|
||||
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
|
||||
assertThat(attempts).hasSize(2);
|
||||
assertThat(attempts.get(1).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(attempts.get(1).retryable()).isTrue();
|
||||
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 10: Two documents with identical target name → duplicate suffix
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the duplicate target filename suffix rule at end-to-end level:
|
||||
* when two distinct source documents both resolve to the same base target name
|
||||
* ({@code "2024-01-15 - Stromabrechnung.pdf"}) in the same finalization run, the
|
||||
* second document written to the target folder must receive a {@code (1)} suffix.
|
||||
* <ol>
|
||||
* <li>Run 1: both PDFs are processed by the AI stub (same configured response) →
|
||||
* both reach {@code PROPOSAL_READY}.</li>
|
||||
* <li>Run 2: both are finalized in sequence; the first written claims the base name,
|
||||
* the second receives {@code "2024-01-15 - Stromabrechnung(1).pdf"}.</li>
|
||||
* </ol>
|
||||
* Both documents reach {@code SUCCESS} and the target folder contains exactly two files.
|
||||
*/
|
||||
@Test
|
||||
void twoDifferentDocuments_sameProposedName_secondGetsDuplicateSuffix(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
// Two distinct PDFs; the AI stub returns the same title and date for both
|
||||
ctx.createSearchablePdf("rechnung1.pdf", SAMPLE_PDF_TEXT);
|
||||
ctx.createSearchablePdf("rechnung2.pdf",
|
||||
"Stromabrechnung Zweiter Kunde Musterstadt Datum 15.01.2024 Betrag 99,00 EUR");
|
||||
|
||||
Path pdf1 = ctx.sourceFolder().resolve("rechnung1.pdf");
|
||||
Path pdf2 = ctx.sourceFolder().resolve("rechnung2.pdf");
|
||||
DocumentFingerprint fp1 = ctx.computeFingerprint(pdf1);
|
||||
DocumentFingerprint fp2 = ctx.computeFingerprint(pdf2);
|
||||
|
||||
// --- Run 1: AI stub processes both PDFs → PROPOSAL_READY ---
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
|
||||
// --- Run 2: both finalized; the second must receive the (1) suffix ---
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.SUCCESS);
|
||||
assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.SUCCESS);
|
||||
|
||||
List<String> targetFiles = ctx.listTargetFiles();
|
||||
assertThat(targetFiles)
|
||||
.as("Both distinct documents must produce separate target files")
|
||||
.hasSize(2);
|
||||
assertThat(targetFiles)
|
||||
.as("Base name must exist for the first document written")
|
||||
.anyMatch(f -> f.equals("2024-01-15 - Stromabrechnung.pdf"));
|
||||
assertThat(targetFiles)
|
||||
.as("Duplicate suffix (1) must be appended for the second document written")
|
||||
.anyMatch(f -> f.equals("2024-01-15 - Stromabrechnung(1).pdf"));
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 11: Mixed batch – document failures do not affect batch outcome
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies that document-level failures do not cause a batch-level failure:
|
||||
* <ol>
|
||||
* <li>Run 1: a searchable PDF reaches {@code PROPOSAL_READY}; a blank PDF
|
||||
* (no extractable text) reaches {@code FAILED_RETRYABLE}.
|
||||
* {@link BatchRunOutcome#SUCCESS} is returned.</li>
|
||||
* <li>Run 2: the searchable PDF is finalized to {@code SUCCESS};
|
||||
* the blank PDF reaches its second content error and is finalized to
|
||||
* {@code FAILED_FINAL}. {@link BatchRunOutcome#SUCCESS} is returned.</li>
|
||||
* </ol>
|
||||
* This confirms the exit-code contract: only hard bootstrap or infrastructure
|
||||
* failures produce a non-zero exit code; document-level errors do not.
|
||||
*/
|
||||
@Test
|
||||
void mixedBatch_oneSuccess_oneContentError_batchOutcomeIsSuccess(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("good.pdf", SAMPLE_PDF_TEXT);
|
||||
ctx.createBlankPdf("blank.pdf");
|
||||
|
||||
Path goodPdf = ctx.sourceFolder().resolve("good.pdf");
|
||||
Path blankPdf = ctx.sourceFolder().resolve("blank.pdf");
|
||||
DocumentFingerprint fpGood = ctx.computeFingerprint(goodPdf);
|
||||
DocumentFingerprint fpBlank = ctx.computeFingerprint(blankPdf);
|
||||
|
||||
// --- Run 1 ---
|
||||
BatchRunOutcome run1 = ctx.runBatch();
|
||||
|
||||
assertThat(run1)
|
||||
.as("Batch must complete with SUCCESS even when individual documents fail")
|
||||
.isEqualTo(BatchRunOutcome.SUCCESS);
|
||||
assertThat(ctx.findDocumentRecord(fpGood).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow()
|
||||
.failureCounters().contentErrorCount()).isEqualTo(1);
|
||||
|
||||
// --- Run 2 ---
|
||||
BatchRunOutcome run2 = ctx.runBatch();
|
||||
|
||||
assertThat(run2)
|
||||
.as("Batch must complete with SUCCESS even when a document is finalised "
|
||||
+ "to FAILED_FINAL")
|
||||
.isEqualTo(BatchRunOutcome.SUCCESS);
|
||||
|
||||
DocumentRecord goodRecord = ctx.findDocumentRecord(fpGood).orElseThrow();
|
||||
assertThat(goodRecord.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
|
||||
DocumentRecord blankRecord = ctx.findDocumentRecord(fpBlank).orElseThrow();
|
||||
assertThat(blankRecord.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(blankRecord.failureCounters().contentErrorCount()).isEqualTo(2);
|
||||
|
||||
// Exactly one target file from the successfully processed document
|
||||
List<String> targetFiles = ctx.listTargetFiles();
|
||||
assertThat(targetFiles).hasSize(1);
|
||||
assertThat(targetFiles.get(0)).endsWith(".pdf");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,406 @@
|
||||
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.clock.SystemClockAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.fingerprint.Sha256FingerprintAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.lock.FilesystemRunLockPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.pdfextraction.PdfTextExtractionPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.prompt.FilesystemPromptPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sourcedocument.SourceDocumentCandidatesPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteDocumentRecordRepositoryAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteUnitOfWorkAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.targetfolder.FilesystemTargetFolderAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.service.AiNamingService;
|
||||
import de.gecheckt.pdf.umbenenner.application.service.AiResponseValidator;
|
||||
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingCoordinator;
|
||||
import de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase;
|
||||
import de.gecheckt.pdf.umbenenner.bootstrap.adapter.Log4jProcessingLogger;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Full adapter wiring context for deterministic end-to-end tests of the batch processing pipeline.
|
||||
* <p>
|
||||
* Provides real infrastructure adapters for all subsystems (SQLite persistence, filesystem
|
||||
* source/target folders, PDF text extraction, SHA-256 fingerprinting, run locking) and a
|
||||
* configurable stub ({@link StubAiInvocationPort}) for the AI invocation port.
|
||||
* This ensures that end-to-end tests cover the complete production code path without
|
||||
* performing real HTTP calls to an AI service.
|
||||
*
|
||||
* <h2>Invariants verified by this context</h2>
|
||||
* <ul>
|
||||
* <li>Happy-path to {@code SUCCESS}: two-run flow where Run 1 produces {@code PROPOSAL_READY}
|
||||
* and Run 2 copies the file and records {@code SUCCESS}.</li>
|
||||
* <li>Deterministic content error: blank PDFs (no extractable text) produce
|
||||
* {@code FAILED_RETRYABLE} after the first run and {@code FAILED_FINAL} after a
|
||||
* second run.</li>
|
||||
* <li>Transient technical error: AI stub failures produce {@code FAILED_RETRYABLE} for each
|
||||
* run until the transient error limit is reached, at which point the document is
|
||||
* finalized to {@code FAILED_FINAL}.</li>
|
||||
* <li>Skip after {@code SUCCESS}: a document in {@code SUCCESS} state generates a
|
||||
* {@code SKIPPED_ALREADY_PROCESSED} attempt in subsequent runs.</li>
|
||||
* <li>Skip after {@code FAILED_FINAL}: a document in {@code FAILED_FINAL} state generates a
|
||||
* {@code SKIPPED_FINAL_FAILURE} attempt in subsequent runs.</li>
|
||||
* <li>{@code PROPOSAL_READY} with later finalization: a document in {@code PROPOSAL_READY}
|
||||
* state is finalized without an AI call in the next run.</li>
|
||||
* <li>Target copy error with immediate retry: when the first copy attempt fails but the
|
||||
* immediate within-run retry succeeds, the document is recorded as {@code SUCCESS}.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Usage pattern</h2>
|
||||
* <pre>{@code
|
||||
* @TempDir Path tempDir;
|
||||
*
|
||||
* @Test
|
||||
* void example() throws Exception {
|
||||
* try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
* ctx.createSearchablePdf("doc.pdf", "Rechnung 2024-01-15 ...");
|
||||
* BatchRunOutcome run1 = ctx.runBatch();
|
||||
* // assertions...
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
*
|
||||
* <h2>Thread safety</h2>
|
||||
* <p>
|
||||
* Not thread-safe. Each test method should use its own context instance.
|
||||
*/
|
||||
public final class E2ETestContext implements AutoCloseable {
|
||||
|
||||
/** Maximum pages before triggering a deterministic content error. */
|
||||
static final int MAX_PAGES = 50;
|
||||
|
||||
/** Maximum text characters sent to the AI service. */
|
||||
static final int MAX_TEXT_CHARS = 10_000;
|
||||
|
||||
/**
|
||||
* Maximum transient retries before a document is finalized to {@code FAILED_FINAL}.
|
||||
* Set to 3 to allow multi-run transient-failure tests without immediate finalization.
|
||||
*/
|
||||
static final int MAX_RETRIES_TRANSIENT = 3;
|
||||
|
||||
/** Model name carried in attempt history (no real inference occurs). */
|
||||
static final String AI_MODEL = "e2e-stub-model";
|
||||
|
||||
private final Path sourceFolder;
|
||||
private final Path targetFolder;
|
||||
private final Path lockFile;
|
||||
private final Path promptFile;
|
||||
private final String jdbcUrl;
|
||||
|
||||
private final SqliteDocumentRecordRepositoryAdapter documentRepo;
|
||||
private final SqliteProcessingAttemptRepositoryAdapter attemptRepo;
|
||||
|
||||
/**
|
||||
* Configurable AI stub. Tests may call {@link StubAiInvocationPort#configureSuccess},
|
||||
* {@link StubAiInvocationPort#configureTechnicalFailure}, or
|
||||
* {@link StubAiInvocationPort#reset()} between batch runs.
|
||||
*/
|
||||
public final StubAiInvocationPort aiStub;
|
||||
|
||||
/**
|
||||
* Optional override for the {@link TargetFileCopyPort}.
|
||||
* {@code null} means the real {@link FilesystemTargetFileCopyAdapter} is used.
|
||||
* Set via {@link #setTargetFileCopyPortOverride} to inject a failure-simulating stub.
|
||||
*/
|
||||
private TargetFileCopyPort targetFileCopyPortOverride;
|
||||
|
||||
private E2ETestContext(
|
||||
Path sourceFolder,
|
||||
Path targetFolder,
|
||||
Path lockFile,
|
||||
Path promptFile,
|
||||
String jdbcUrl,
|
||||
SqliteDocumentRecordRepositoryAdapter documentRepo,
|
||||
SqliteProcessingAttemptRepositoryAdapter attemptRepo,
|
||||
StubAiInvocationPort aiStub) {
|
||||
this.sourceFolder = sourceFolder;
|
||||
this.targetFolder = targetFolder;
|
||||
this.lockFile = lockFile;
|
||||
this.promptFile = promptFile;
|
||||
this.jdbcUrl = jdbcUrl;
|
||||
this.documentRepo = documentRepo;
|
||||
this.attemptRepo = attemptRepo;
|
||||
this.aiStub = aiStub;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes a fully wired end-to-end test context rooted in {@code tempDir}.
|
||||
* <p>
|
||||
* Creates the {@code source/}, {@code target/} subdirectories and a minimal prompt
|
||||
* file, initializes the SQLite schema, and wires all adapters.
|
||||
*
|
||||
* @param tempDir the JUnit {@code @TempDir} or any writable temporary directory
|
||||
* @return a ready-to-use context; caller is responsible for closing it
|
||||
* @throws Exception if schema initialization or directory/file creation fails
|
||||
*/
|
||||
public static E2ETestContext initialize(Path tempDir) throws Exception {
|
||||
Path sourceFolder = Files.createDirectories(tempDir.resolve("source"));
|
||||
Path targetFolder = Files.createDirectories(tempDir.resolve("target"));
|
||||
Path lockFile = tempDir.resolve("run.lock");
|
||||
Path promptFile = tempDir.resolve("prompt.txt");
|
||||
|
||||
Files.writeString(promptFile,
|
||||
"Analysiere das folgende Dokument und liefere Datum, Titel und Begruendung als JSON-Objekt.");
|
||||
|
||||
String jdbcUrl = "jdbc:sqlite:" + tempDir.resolve("test.db").toAbsolutePath().toString().replace('\\', '/');
|
||||
|
||||
SqliteSchemaInitializationAdapter schema = new SqliteSchemaInitializationAdapter(jdbcUrl);
|
||||
schema.initializeSchema();
|
||||
|
||||
SqliteDocumentRecordRepositoryAdapter documentRepo =
|
||||
new SqliteDocumentRecordRepositoryAdapter(jdbcUrl);
|
||||
SqliteProcessingAttemptRepositoryAdapter attemptRepo =
|
||||
new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl);
|
||||
|
||||
return new E2ETestContext(
|
||||
sourceFolder, targetFolder, lockFile, promptFile,
|
||||
jdbcUrl, documentRepo, attemptRepo, new StubAiInvocationPort());
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Test fixture creation
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Creates a single-page searchable PDF in the source folder with the given text.
|
||||
* <p>
|
||||
* The file is ready for the batch run as soon as this method returns.
|
||||
*
|
||||
* @param filename the PDF filename (e.g. {@code "rechnung.pdf"})
|
||||
* @param textContent text to embed; should be at least a few words to pass pre-checks
|
||||
* @return the absolute path of the created file
|
||||
* @throws IOException if the file cannot be written
|
||||
*/
|
||||
public Path createSearchablePdf(String filename, String textContent) throws IOException {
|
||||
Path pdfPath = sourceFolder.resolve(filename);
|
||||
MinimalPdfFactory.createSearchablePdf(pdfPath, textContent);
|
||||
return pdfPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a single-page blank PDF (no extractable text) in the source folder.
|
||||
* <p>
|
||||
* Processing this file triggers the "no usable text" deterministic content error,
|
||||
* which skips the AI call.
|
||||
*
|
||||
* @param filename the PDF filename (e.g. {@code "blank.pdf"})
|
||||
* @return the absolute path of the created file
|
||||
* @throws IOException if the file cannot be written
|
||||
*/
|
||||
public Path createBlankPdf(String filename) throws IOException {
|
||||
Path pdfPath = sourceFolder.resolve(filename);
|
||||
MinimalPdfFactory.createBlankPdf(pdfPath);
|
||||
return pdfPath;
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Batch execution
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Executes one complete batch run using the current stub configuration.
|
||||
* <p>
|
||||
* A fresh {@link BatchRunContext} with a new {@link RunId} is created for each call,
|
||||
* matching the production behavior where every Task Scheduler invocation is a
|
||||
* distinct run.
|
||||
*
|
||||
* @return the outcome of the batch run
|
||||
*/
|
||||
public BatchRunOutcome runBatch() {
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase();
|
||||
BatchRunContext context = new BatchRunContext(
|
||||
new RunId(UUID.randomUUID().toString()), Instant.now());
|
||||
return useCase.execute(context);
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// State inspection helpers
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Looks up the document master record for the given fingerprint.
|
||||
*
|
||||
* @param fingerprint the document fingerprint to query
|
||||
* @return the master record if one exists, {@link Optional#empty()} if unknown or
|
||||
* if a persistence lookup error occurred
|
||||
*/
|
||||
public Optional<DocumentRecord> findDocumentRecord(DocumentFingerprint fingerprint) {
|
||||
return switch (documentRepo.findByFingerprint(fingerprint)) {
|
||||
case DocumentTerminalSuccess s -> Optional.of(s.record());
|
||||
case DocumentTerminalFinalFailure f -> Optional.of(f.record());
|
||||
case DocumentKnownProcessable p -> Optional.of(p.record());
|
||||
default -> Optional.empty();
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all processing attempts for the given fingerprint in insertion order.
|
||||
*
|
||||
* @param fingerprint the document fingerprint to query
|
||||
* @return all recorded attempts; empty list if none exist
|
||||
*/
|
||||
public List<ProcessingAttempt> findAttempts(DocumentFingerprint fingerprint) {
|
||||
return attemptRepo.findAllByFingerprint(fingerprint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the SHA-256 fingerprint for the given file using the production adapter.
|
||||
* <p>
|
||||
* Useful for correlating a test PDF with its database record after a batch run.
|
||||
*
|
||||
* @param file the absolute path of the file to fingerprint
|
||||
* @return the fingerprint
|
||||
* @throws IllegalStateException if fingerprint computation fails
|
||||
*/
|
||||
public DocumentFingerprint computeFingerprint(Path file) {
|
||||
Sha256FingerprintAdapter adapter = new Sha256FingerprintAdapter();
|
||||
// Construct a minimal candidate that mirrors how the production source adapter creates one
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
|
||||
file.getFileName().toString(),
|
||||
0L,
|
||||
new SourceDocumentLocator(file.toAbsolutePath().toString()));
|
||||
return switch (adapter.computeFingerprint(candidate)) {
|
||||
case FingerprintSuccess s -> s.fingerprint();
|
||||
default -> throw new IllegalStateException(
|
||||
"Fingerprint computation failed for test fixture: " + file);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Lists the filenames of all files currently in the target folder.
|
||||
*
|
||||
* @return list of filenames; empty if target folder is empty
|
||||
* @throws IOException if the target folder cannot be read
|
||||
*/
|
||||
public List<String> listTargetFiles() throws IOException {
|
||||
try (var stream = Files.list(targetFolder)) {
|
||||
return stream.map(p -> p.getFileName().toString()).sorted().toList();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the source folder path used by this context.
|
||||
*/
|
||||
public Path sourceFolder() {
|
||||
return sourceFolder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the target folder path used by this context.
|
||||
*/
|
||||
public Path targetFolder() {
|
||||
return targetFolder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Overrides the {@link TargetFileCopyPort} used in subsequent batch runs.
|
||||
* Pass {@code null} to revert to the real {@link FilesystemTargetFileCopyAdapter}.
|
||||
*
|
||||
* @param override the port implementation to use, or {@code null} for the real adapter
|
||||
*/
|
||||
public void setTargetFileCopyPortOverride(TargetFileCopyPort override) {
|
||||
this.targetFileCopyPortOverride = override;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// No explicit cleanup needed: @TempDir removes all files automatically
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Private wiring
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Constructs a fully wired {@link DefaultBatchRunProcessingUseCase} for a single batch run.
|
||||
* <p>
|
||||
* All adapters are instantiated fresh per run to avoid shared mutable state between
|
||||
* runs (e.g. locks, connection states). The AI stub and optional copy-port override
|
||||
* are re-used across runs within the same test.
|
||||
*/
|
||||
private DefaultBatchRunProcessingUseCase buildUseCase() {
|
||||
RuntimeConfiguration runtimeConfig = new RuntimeConfiguration(
|
||||
MAX_PAGES, MAX_RETRIES_TRANSIENT, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT);
|
||||
|
||||
FingerprintPort fingerprintPort = new Sha256FingerprintAdapter();
|
||||
|
||||
DocumentRecordRepository documentRecordRepository = documentRepo;
|
||||
ProcessingAttemptRepository processingAttemptRepository = attemptRepo;
|
||||
UnitOfWorkPort unitOfWorkPort = new SqliteUnitOfWorkAdapter(jdbcUrl);
|
||||
|
||||
ProcessingLogger coordinatorLogger = new Log4jProcessingLogger(
|
||||
DocumentProcessingCoordinator.class);
|
||||
TargetFolderPort targetFolderPort = new FilesystemTargetFolderAdapter(targetFolder);
|
||||
TargetFileCopyPort targetFileCopyPort = (targetFileCopyPortOverride != null)
|
||||
? targetFileCopyPortOverride
|
||||
: new FilesystemTargetFileCopyAdapter(targetFolder);
|
||||
|
||||
DocumentProcessingCoordinator coordinator = new DocumentProcessingCoordinator(
|
||||
documentRecordRepository,
|
||||
processingAttemptRepository,
|
||||
unitOfWorkPort,
|
||||
targetFolderPort,
|
||||
targetFileCopyPort,
|
||||
coordinatorLogger,
|
||||
MAX_RETRIES_TRANSIENT);
|
||||
|
||||
PromptPort promptPort = new FilesystemPromptPortAdapter(promptFile);
|
||||
ClockPort clockPort = new SystemClockAdapter();
|
||||
AiResponseValidator aiResponseValidator = new AiResponseValidator(clockPort);
|
||||
AiNamingService aiNamingService = new AiNamingService(
|
||||
aiStub, promptPort, aiResponseValidator, AI_MODEL, MAX_TEXT_CHARS);
|
||||
|
||||
ProcessingLogger useCaseLogger = new Log4jProcessingLogger(
|
||||
DefaultBatchRunProcessingUseCase.class);
|
||||
|
||||
RunLockPort runLockPort = new FilesystemRunLockPortAdapter(lockFile);
|
||||
SourceDocumentCandidatesPort candidatesPort =
|
||||
new SourceDocumentCandidatesPortAdapter(sourceFolder);
|
||||
PdfTextExtractionPort extractionPort = new PdfTextExtractionPortAdapter();
|
||||
|
||||
return new DefaultBatchRunProcessingUseCase(
|
||||
runtimeConfig,
|
||||
runLockPort,
|
||||
candidatesPort,
|
||||
extractionPort,
|
||||
fingerprintPort,
|
||||
coordinator,
|
||||
aiNamingService,
|
||||
useCaseLogger);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
/**
|
||||
* Factory for creating minimal PDF test fixtures used in end-to-end tests.
|
||||
* <p>
|
||||
* Provides two variants:
|
||||
* <ul>
|
||||
* <li>Searchable PDFs with embedded text content — used for happy-path, transient-error,
|
||||
* and target-copy-failure scenarios where the pre-check must pass.</li>
|
||||
* <li>Blank PDFs with no extractable text — used for deterministic content-error
|
||||
* scenarios where the pre-check must fail.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Uses Apache PDFBox to create real, structurally valid PDF files so that the
|
||||
* production {@code PdfTextExtractionPortAdapter} processes them correctly.
|
||||
*/
|
||||
final class MinimalPdfFactory {
|
||||
|
||||
private MinimalPdfFactory() {
|
||||
// Static utility class — not instantiable
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a single-page searchable PDF with the given text content at the output path.
|
||||
* <p>
|
||||
* The resulting file passes the production pre-checks for minimum text length and
|
||||
* page count, enabling the AI naming pipeline to run.
|
||||
*
|
||||
* @param outputPath the path where the PDF will be written; parent directory must exist
|
||||
* @param textContent the text to embed in the PDF; should be non-empty for happy-path tests
|
||||
* @throws IOException if the file cannot be written
|
||||
*/
|
||||
static void createSearchablePdf(Path outputPath, String textContent) throws IOException {
|
||||
try (PDDocument doc = new PDDocument()) {
|
||||
PDPage page = new PDPage();
|
||||
doc.addPage(page);
|
||||
try (PDPageContentStream stream = new PDPageContentStream(doc, page)) {
|
||||
stream.beginText();
|
||||
stream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 12);
|
||||
stream.newLineAtOffset(50, 700);
|
||||
stream.showText(textContent);
|
||||
stream.endText();
|
||||
}
|
||||
doc.save(outputPath.toFile());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a single-page blank PDF with no text content at the output path.
|
||||
* <p>
|
||||
* The resulting file triggers the "no usable text" pre-check failure
|
||||
* (deterministic content error), which does not invoke the AI service.
|
||||
*
|
||||
* @param outputPath the path where the PDF will be written; parent directory must exist
|
||||
* @throws IOException if the file cannot be written
|
||||
*/
|
||||
static void createBlankPdf(Path outputPath) throws IOException {
|
||||
try (PDDocument doc = new PDDocument()) {
|
||||
doc.addPage(new PDPage());
|
||||
doc.save(outputPath.toFile());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,109 @@
|
||||
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* Configurable test double for {@link AiInvocationPort}.
|
||||
* <p>
|
||||
* Replaces the real HTTP-based AI adapter in end-to-end tests so that the processing
|
||||
* pipeline can be exercised without real network calls. Supports two response modes:
|
||||
* <ul>
|
||||
* <li><strong>Success mode</strong> (default): returns a structurally valid JSON response
|
||||
* containing configurable {@code title} and {@code date} fields. This produces a
|
||||
* {@code PROPOSAL_READY} outcome when the response passes validation.</li>
|
||||
* <li><strong>Technical failure mode</strong>: returns an {@link AiInvocationTechnicalFailure},
|
||||
* simulating network errors or service unavailability. This produces a
|
||||
* {@code FAILED_RETRYABLE} (transient) outcome.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The stub tracks the total number of invocations so that tests can verify whether
|
||||
* the AI pipeline was called at all (e.g. confirming that a {@code PROPOSAL_READY}
|
||||
* finalization skips the AI call).
|
||||
*/
|
||||
final class StubAiInvocationPort implements AiInvocationPort {
|
||||
|
||||
private final AtomicInteger invocationCount = new AtomicInteger(0);
|
||||
|
||||
private volatile boolean returnTechnicalFailure = false;
|
||||
private volatile String title = "Stromabrechnung";
|
||||
private volatile String date = "2024-01-15";
|
||||
private volatile String reasoning = "Testdokument fuer End-to-End-Tests.";
|
||||
|
||||
/**
|
||||
* Configures the stub to return a valid naming proposal with the given title and date.
|
||||
*
|
||||
* @param title the document title (must pass validation: max 20 chars, no special chars)
|
||||
* @param date the document date in {@code YYYY-MM-DD} format, or {@code null} to omit
|
||||
*/
|
||||
void configureSuccess(String title, String date) {
|
||||
this.title = title;
|
||||
this.date = date;
|
||||
this.returnTechnicalFailure = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configures the stub to return a transient technical failure on every invocation.
|
||||
* The failure reason is {@code STUB_FAILURE}.
|
||||
*/
|
||||
void configureTechnicalFailure() {
|
||||
this.returnTechnicalFailure = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the stub to its default success configuration with title "Stromabrechnung"
|
||||
* and date "2024-01-15", and clears the invocation counter.
|
||||
*/
|
||||
void reset() {
|
||||
this.title = "Stromabrechnung";
|
||||
this.date = "2024-01-15";
|
||||
this.reasoning = "Testdokument fuer End-to-End-Tests.";
|
||||
this.returnTechnicalFailure = false;
|
||||
invocationCount.set(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of times {@link #invoke} was called since construction
|
||||
* or the last {@link #reset()}.
|
||||
*/
|
||||
int invocationCount() {
|
||||
return invocationCount.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the invocation counter to zero without changing response configuration.
|
||||
*/
|
||||
void resetInvocationCount() {
|
||||
invocationCount.set(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns either a success response or a technical failure, depending on current configuration.
|
||||
* Increments the invocation counter on every call.
|
||||
*/
|
||||
@Override
|
||||
public AiInvocationResult invoke(AiRequestRepresentation request) {
|
||||
invocationCount.incrementAndGet();
|
||||
|
||||
if (returnTechnicalFailure) {
|
||||
return new AiInvocationTechnicalFailure(
|
||||
request,
|
||||
"STUB_FAILURE",
|
||||
"Test stub: configured to return technical failure");
|
||||
}
|
||||
|
||||
String dateField = (date != null) ? "\"date\": \"" + date + "\", " : "";
|
||||
String rawJson = "{"
|
||||
+ dateField
|
||||
+ "\"title\": \"" + title + "\", "
|
||||
+ "\"reasoning\": \"" + reasoning + "\""
|
||||
+ "}";
|
||||
return new AiInvocationSuccess(request, new AiRawResponse(rawJson));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user