1
0

M8 komplett umgesetzt

This commit is contained in:
2026-04-08 16:30:13 +02:00
parent a3f47ba560
commit d61316c699
21 changed files with 2377 additions and 89 deletions

View File

@@ -1,21 +1,70 @@
# PDF Umbenenner Local Configuration Example
# AP-005: Copy this file to config/application.properties and adjust values for local development
# PDF Umbenenner Konfigurationsbeispiel für lokale Entwicklung
# Kopiere diese Datei nach config/application.properties und passe die Werte an.
# Mandatory M1 properties
# ---------------------------------------------------------------------------
# Pflichtparameter
# ---------------------------------------------------------------------------
# Quellordner: Ordner, aus dem OCR-verarbeitete PDF-Dateien gelesen werden.
# Der Ordner muss vorhanden und lesbar sein.
source.folder=./work/local/source
target.folder=./work/local/target
sqlite.file=./work/local/pdf-umbenenner.db
api.baseUrl=http://localhost:8080/api
api.model=gpt-4o-mini
api.timeoutSeconds=30
max.retries.transient=3
max.pages=10
max.text.characters=5000
prompt.template.file=./config/prompts/local-template.txt
# Optional properties
runtime.lock.file=./work/local/lock.pid
# Zielordner: Ordner, in den die umbenannten Kopien abgelegt werden.
# Wird automatisch angelegt, wenn er noch nicht existiert.
target.folder=./work/local/target
# SQLite-Datenbankdatei für Bearbeitungsstatus und Versuchshistorie.
# Das übergeordnete Verzeichnis muss vorhanden sein.
sqlite.file=./work/local/pdf-umbenenner.db
# Basis-URL des OpenAI-kompatiblen KI-Dienstes (ohne Pfadsuffix wie /chat/completions).
api.baseUrl=https://api.openai.com/v1
# Modellname des KI-Dienstes.
api.model=gpt-4o-mini
# HTTP-Timeout für KI-Anfragen in Sekunden (muss > 0 sein).
api.timeoutSeconds=30
# Maximale Anzahl historisierter transienter Fehlversuche pro Dokument.
# Muss eine ganze Zahl >= 1 sein. Bei Erreichen des Grenzwerts wird der
# Dokumentstatus auf FAILED_FINAL gesetzt.
max.retries.transient=3
# Maximale Seitenzahl pro Dokument. Dokumente mit mehr Seiten werden als
# deterministischer Inhaltsfehler behandelt (kein KI-Aufruf).
max.pages=10
# Maximale Zeichenanzahl des Dokumenttexts, der an die KI gesendet wird.
max.text.characters=5000
# Pfad zur externen Prompt-Datei. Der Dateiname dient als Prompt-Identifikator
# in der Versuchshistorie.
prompt.template.file=./config/prompts/template.txt
# ---------------------------------------------------------------------------
# API-Schlüssel
# ---------------------------------------------------------------------------
# Der API-Schlüssel kann wahlweise über diese Property oder über die
# Umgebungsvariable PDF_UMBENENNER_API_KEY gesetzt werden.
# Die Umgebungsvariable hat Vorrang.
api.key=your-local-api-key-here
# ---------------------------------------------------------------------------
# Optionale Parameter
# ---------------------------------------------------------------------------
# Pfad zur Lock-Datei für den Startschutz (verhindert parallele Instanzen).
# Wird weggelassen, verwendet die Anwendung pdf-umbenenner.lock im Arbeitsverzeichnis.
runtime.lock.file=./work/local/pdf-umbenenner.lock
# Log-Verzeichnis. Wird weggelassen, schreibt Log4j2 in ./logs/.
log.directory=./work/local/logs
# Log-Level (DEBUG, INFO, WARN, ERROR). Standard ist INFO.
log.level=INFO
# api.key can also be set via environment variable PDF_UMBENENNER_API_KEY
api.key=your-local-api-key-here
# Sensible KI-Inhalte (vollständige Rohantwort und Reasoning) ins Log schreiben.
# Erlaubte Werte: true oder false. Standard ist false (geschützt).
# Nur für Diagnosezwecke auf true setzen.
log.ai.sensitive=false

View File

@@ -1,21 +1,71 @@
# PDF Umbenenner Test Configuration Example
# AP-005: Copy this file to config/application.properties and adjust values for testing
# PDF Umbenenner Konfigurationsbeispiel für Testläufe
# Kopiere diese Datei nach config/application.properties und passe die Werte an.
# Diese Vorlage enthält kürzere Timeouts und niedrigere Limits für Testläufe.
# Mandatory M1 properties
# ---------------------------------------------------------------------------
# Pflichtparameter
# ---------------------------------------------------------------------------
# Quellordner: Ordner, aus dem OCR-verarbeitete PDF-Dateien gelesen werden.
# Der Ordner muss vorhanden und lesbar sein.
source.folder=./work/test/source
target.folder=./work/test/target
sqlite.file=./work/test/pdf-umbenenner-test.db
api.baseUrl=http://localhost:8081/api
api.model=gpt-4o-mini-test
api.timeoutSeconds=10
max.retries.transient=1
max.pages=5
max.text.characters=2000
prompt.template.file=./config/prompts/test-template.txt
# Optional properties
runtime.lock.file=./work/test/lock.pid
# Zielordner: Ordner, in den die umbenannten Kopien abgelegt werden.
# Wird automatisch angelegt, wenn er noch nicht existiert.
target.folder=./work/test/target
# SQLite-Datenbankdatei für Bearbeitungsstatus und Versuchshistorie.
# Das übergeordnete Verzeichnis muss vorhanden sein.
sqlite.file=./work/test/pdf-umbenenner-test.db
# Basis-URL des OpenAI-kompatiblen KI-Dienstes (ohne Pfadsuffix wie /chat/completions).
api.baseUrl=https://api.openai.com/v1
# Modellname des KI-Dienstes.
api.model=gpt-4o-mini
# HTTP-Timeout für KI-Anfragen in Sekunden (muss > 0 sein).
api.timeoutSeconds=10
# Maximale Anzahl historisierter transienter Fehlversuche pro Dokument.
# Muss eine ganze Zahl >= 1 sein. Bei Erreichen des Grenzwerts wird der
# Dokumentstatus auf FAILED_FINAL gesetzt.
max.retries.transient=1
# Maximale Seitenzahl pro Dokument. Dokumente mit mehr Seiten werden als
# deterministischer Inhaltsfehler behandelt (kein KI-Aufruf).
max.pages=5
# Maximale Zeichenanzahl des Dokumenttexts, der an die KI gesendet wird.
max.text.characters=2000
# Pfad zur externen Prompt-Datei. Der Dateiname dient als Prompt-Identifikator
# in der Versuchshistorie.
prompt.template.file=./config/prompts/template.txt
# ---------------------------------------------------------------------------
# API-Schlüssel
# ---------------------------------------------------------------------------
# Der API-Schlüssel kann wahlweise über diese Property oder über die
# Umgebungsvariable PDF_UMBENENNER_API_KEY gesetzt werden.
# Die Umgebungsvariable hat Vorrang.
api.key=test-api-key-placeholder
# ---------------------------------------------------------------------------
# Optionale Parameter
# ---------------------------------------------------------------------------
# Pfad zur Lock-Datei für den Startschutz (verhindert parallele Instanzen).
# Wird weggelassen, verwendet die Anwendung pdf-umbenenner.lock im Arbeitsverzeichnis.
runtime.lock.file=./work/test/pdf-umbenenner.lock
# Log-Verzeichnis. Wird weggelassen, schreibt Log4j2 in ./logs/.
log.directory=./work/test/logs
# Log-Level (DEBUG, INFO, WARN, ERROR). Standard ist INFO.
log.level=DEBUG
# api.key can also be set via environment variable PDF_UMBENENNER_API_KEY
api.key=test-api-key-placeholder
# Sensible KI-Inhalte (vollständige Rohantwort und Reasoning) ins Log schreiben.
# Erlaubte Werte: true oder false. Standard ist false (geschützt).
# Nur für Diagnosezwecke auf true setzen.
log.ai.sensitive=false

View File

@@ -1 +1,22 @@
This is a test prompt template for AP-006 validation.
Du bist ein Assistent zur automatischen Benennung gescannter PDF-Dokumente.
Analysiere den folgenden Dokumenttext und ermittle:
1. Einen inhaltlich passenden deutschen Titel (maximal 20 Zeichen, nur Buchstaben und Leerzeichen, keine Abkürzungen, keine generischen Bezeichnungen wie "Dokument", "Datei", "Scan" oder "PDF")
2. Das relevanteste Datum des Dokuments
Datumsermittlung nach Priorität:
- Rechnungsdatum
- Dokumentdatum
- Ausstellungsdatum oder Bescheiddatum
- Schreibdatum oder Ende eines Leistungszeitraums
- Kein Datum angeben, wenn kein belastbares Datum eindeutig ableitbar ist
Titelregeln:
- Titel auf Deutsch formulieren
- Eigennamen (Personen, Firmen, Orte) unverändert übernehmen
- Maximal 20 Zeichen (nur der Basistitel, ohne Datumspräfix)
- Keine Sonderzeichen außer Leerzeichen
- Eindeutig und verständlich, nicht generisch
Wenn das Dokument nicht eindeutig interpretierbar ist, beschreibe dies im Reasoning.

209
docs/befundliste.md Normal file
View File

@@ -0,0 +1,209 @@
# Befundliste Integrierte Gesamtprüfung des Endstands
**Erstellt:** 2026-04-08
**Grundlage:** Vollständiger Maven-Reactor-Build, Unit-Tests, E2E-Tests, Integrationstests (Smoke),
PIT-Mutationsanalyse, Code-Review gegen verbindliche Spezifikationen (technik-und-architektur.md,
fachliche-anforderungen.md, CLAUDE.md)
---
## Ausgeführte Prüfungen
| Prüfbereich | Ausgeführt | Ergebnis |
|---|---|---|
| Maven-Reactor-Build (clean verify, alle Module) | ja | GRÜN |
| Unit-Tests (Domain, Application, Adapter-out, Bootstrap) | ja | GRÜN |
| E2E-Tests (BatchRunEndToEndTest, 11 Szenarien) | ja | GRÜN |
| Integrationstests / Smoke-IT (ExecutableJarSmokeTestIT, 2 Tests) | ja | GRÜN |
| PIT-Mutationsanalyse (alle Module) | ja | siehe Einzelbefunde |
| Hexagonale Architektur Domain-Isolation | ja | GRÜN |
| Hexagonale Architektur Port-Verträge (kein Path/NIO/JDBC) | ja | GRÜN |
| Hexagonale Architektur keine Adapter-zu-Adapter-Abhängigkeiten | ja | GRÜN |
| Statusmodell (8 Werte, Semantik laut CLAUDE.md) | ja | GRÜN |
| Naming-Convention-Regel (kein M1M8, kein AP-xxx im Code) | ja | OFFEN (nicht blockierend) |
| Logging-Sensibilitätsregel (log.ai.sensitive) | ja | GRÜN |
| Exit-Code-Semantik (0 / 1) | ja | GRÜN |
| Konfigurationsbeispiele (Pflicht- und Optionalparameter) | ja | GRÜN |
| Betriebsdokumentation (docs/betrieb.md) | ja | GRÜN |
| Prompt-Template im Repository | ja | GRÜN |
| Rückwärtsverträglichkeit M4M7 (Statusmodell, Schema) | ja (statisch) | GRÜN |
---
## Grüne Bereiche (keine Befunde)
### Build und Tests
- Vollständiger Maven-Reactor-Build erfolgreich (`BUILD SUCCESS`, Gesamtlaufzeit ~4 Minuten)
- **827+ Tests** bestanden, 0 Fehler, 0 übersprungen:
- Domain: 227 Tests
- Application: 295 Tests
- Adapter-out: 227 Tests
- Bootstrap (Unit): 76 Tests
- Smoke-IT: 2 Tests
### E2E-Szenarien (BatchRunEndToEndTest)
Alle geforderten Kernszenarien aus der E2E-Testbasis sind abgedeckt und grün:
- Happy-Path: zwei Läufe → `SUCCESS`
- Deterministischer Inhaltsfehler: zwei Läufe → `FAILED_FINAL`
- Transienter KI-Fehler → `FAILED_RETRYABLE`
- Skip nach `SUCCESS``SKIPPED_ALREADY_PROCESSED`
- Skip nach `FAILED_FINAL``SKIPPED_FINAL_FAILURE`
- `PROPOSAL_READY`-Finalisierung ohne erneuten KI-Aufruf im zweiten Lauf
- Zielkopierfehler mit Sofort-Wiederholversuch → `SUCCESS`
- Transiente Fehler über mehrere Läufe → Ausschöpfung → `FAILED_FINAL`
- Zielkopierfehler beide Versuche gescheitert → `FAILED_RETRYABLE`
- Zwei verschiedene Dokumente, gleicher Vorschlagsname → Dubletten-Suffix `(1)`
- Mixed-Batch: ein Erfolg, ein Inhaltsfehler → Batch-Outcome `SUCCESS` (Exit-Code 0)
### Hexagonale Architektur
- **Domain** vollständig infrastrukturfrei: keine Imports aus `java.nio`, `java.io.File`,
JDBC, Log4j oder HTTP-Bibliotheken
- **Port-Verträge** (alle Interfaces in `application.port.out`) enthalten keine `Path`-,
`File`-, NIO- oder JDBC-Typen; nur Domain-Typen werden in Signaturen verwendet
- **Keine Adapter-zu-Adapter-Abhängigkeiten** in `adapter-out`: kein Modul referenziert
ein anderes Adapter-Implementierungspaket direkt
- **Abhängigkeitsrichtung** korrekt: adapter-out → application → domain
### Fachregeln
- Statusmodell vollständig (8 Werte: `READY_FOR_AI`, `PROPOSAL_READY`, `SUCCESS`,
`FAILED_RETRYABLE`, `FAILED_FINAL`, `SKIPPED_ALREADY_PROCESSED`,
`SKIPPED_FINAL_FAILURE`, `PROCESSING`)
- Retry-Semantik korrekt implementiert (deterministisch 1 Retry → final;
transient bis `max.retries.transient`)
- Skip-Semantik korrekt (SUCCESS → Skip, FAILED_FINAL → Skip, keine Zähleränderung)
- Führende Proposal-Quelle: `PROPOSAL_READY`-Versuch wird korrekt als Quelle verwendet
- SUCCESS-Bedingung: erst nach Zielkopie und konsistenter Persistenz
### Logging und Sensibilität
- `log.ai.sensitive`-Mechanismus vollständig implementiert und getestet
- Default `false` (sicher): KI-Rohantwort und Reasoning nicht im Log
- Persistenz in SQLite unabhängig von dieser Einstellung
- Konfiguration in beiden Beispieldateien dokumentiert
### Konfiguration und Dokumentation
- `config/application-local.example.properties`: vollständig, alle Pflicht- und
Optionalparameter vorhanden
- `config/application-test.example.properties`: vollständig
- `config/prompts/template.txt`: Prompt-Template im Repository vorhanden
- `docs/betrieb.md`: Betriebsdokumentation mit Start, Konfiguration, Exit-Codes,
Retry-Grundverhalten, Logging-Sensibilität
- Konfigurationsparameter-Namen in Dokumentation und Code konsistent
### Exit-Code-Semantik
- Exit-Code `0`: technisch ordnungsgemäßer Lauf (auch bei Teilfehlern einzelner Dokumente)
- Exit-Code `1`: harte Start-/Bootstrap-Fehler, ungültige Konfiguration, Lock-Fehler
- Implementierung in `PdfUmbenennerApplication` und `BootstrapRunner` korrekt
### PIT-Mutationsanalyse (Gesamtstand)
- Domain: 83 % Mutation Kill Rate
- Adapter-out: 83 % Mutation Kill Rate
- Application: 87 % Test Strength
- Bootstrap: 76 % Kill Rate (34 Mutationen, 26 getötet)
---
## Offene Punkte
### Nicht blockierend
#### B1 Naming-Convention-Verletzungen in Code, Tests und Konfiguration (CLAUDE.md § Naming-Regel)
**Themenbereich:** Dokumentation / Codequalität
**Norm:** CLAUDE.md verbietet explizit Meilenstein- (M1M8) und Arbeitspaket-Bezeichner (AP-xxx)
in Implementierungen, Kommentaren und JavaDoc.
**Befund:** 43 Treffer in `.java`-Dateien (21 in Produktionscode, 22 in Testcode) sowie
1 Treffer in `config/application.properties`.
Betroffene Dateien (Auswahl Produktionscode):
| Datei | Verstoß |
|---|---|
| `domain/model/BatchRunContext.java` | `@since M2-AP-003` |
| `domain/model/DocumentFingerprint.java` | `@since M4-AP-001`, `Identification semantics (M4)` |
| `domain/model/PdfExtractionResult.java` | `@since M3-AP-001` |
| `domain/model/SourceDocumentCandidate.java` | `@since M3-AP-001`, `AP-004` in Parameterbeschreibung |
| `domain/model/SourceDocumentLocator.java` | `@since M3-AP-001` |
| `adapter/out/lock/FilesystemRunLockPortAdapter.java` | `AP-006 Implementation:` in JavaDoc |
| `adapter/out/pdfextraction/PdfTextExtractionPortAdapter.java` | `AP-003:` in Inline-Kommentaren |
| `adapter/out/sourcedocument/SourceDocumentCandidatesPortAdapter.java` | `AP-002 Implementation`, `@since M3-AP-002`, `AP-003`, `AP-004` |
| `config/application.properties` | Kommentarheader `# PDF Umbenenner Configuration for AP-006 Testing` |
Betroffene Dateien (Auswahl Testcode):
| Datei | Verstoß |
|---|---|
| `adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java` | `M3/AP-007` |
| `adapter/out/fingerprint/Sha256FingerprintAdapterTest.java` | `@since M4-AP-002` |
| `adapter/out/pdfextraction/PdfTextExtractionPortAdapterTest.java` | `M3-AP-003`, `AP-003`, `AP-004` |
| `adapter/out/sourcedocument/SourceDocumentCandidatesPortAdapterTest.java` | `M3-AP-002`, `AP-004` |
| `adapter/out/sqlite/SqliteUnitOfWorkAdapterTest.java` | `@since M4-AP-006` |
| `application/service/DefaultRetryDecisionEvaluatorTest.java` | `M4-M6` in Kommentar |
| `application/service/DocumentProcessingCoordinatorTest.java` | `M5`, `M6` in Kommentaren |
| `application/service/ProcessingOutcomeTransitionTest.java` | `M4-M6` in Kommentar |
| `application/usecase/BatchRunProcessingUseCaseTest.java` | `M7` in Kommentar |
| `bootstrap/ExecutableJarSmokeTestIT.java` | `AP-008`, `M1` in JavaDoc |
**Bewertung:** Rein kosmetisch/dokumentarisch. Kein Einfluss auf Funktionalität, Build
oder Testergebnis. Betrifft ausschließlich Kommentare und JavaDoc-Annotationen.
**Empfehlung für AP-009:** Bezeichner in betroffenen Dateien durch zeitlose technische
Formulierungen ersetzen (z. B. `@since M4-AP-001` → entfernen oder in neutrales
`@since 1.0` umwandeln; Inline-Kommentare sachlich formulieren).
---
#### B2 StartConfiguration in Application-Schicht enthält java.nio.file.Path (Architektur-Grenzfall)
**Themenbereich:** Architektur
**Norm:** „Application orchestriert Use Cases und enthält keine technischen
Implementierungsdetails" (technik-und-architektur.md §3.1); Port-Verträge dürfen keine
NIO-Typen enthalten (CLAUDE.md).
**Befund:** `StartConfiguration` (in `application/config/startup/`) ist ein Java-Record
mit `java.nio.file.Path`-Feldern für `sourceFolder`, `targetFolder`, `sqliteFile`,
`promptTemplateFile`, `runtimeLockFile`, `logDirectory`.
**Kontext:** `StartConfiguration` ist kein Port-Vertrag, sondern ein unveränderliches
Konfigurations-DTO, das ausschließlich von Bootstrap erzeugt und an Adapter übergeben wird.
Die Port-Verträge selbst sind sauber (keine Path-Typen in Port-Interfaces).
**Bewertung:** Grenzfall. `Path` ist kein fachliches Objekt, aber auch kein schwerer
Architekturverstoß in diesem Kontext. Die Alternative (String-Repräsentation und Auflösung
im Adapter) hätte keinen Mehrwert für das Betriebsmodell.
**Empfehlung für AP-009:** Auf Wunsch im Rahmen von AP-009 prüfen, ob das Verschieben von
`StartConfiguration` in das Bootstrap-Modul sinnvoller wäre. Keine Pflicht, da kein
funktionaler Defekt vorliegt.
---
#### B3 PIT-Überlebende in Bootstrap (Bootstrap: 76 % Kill Rate)
**Themenbereich:** Testqualität
**Befund:** 8 überlebende Mutanten im Bootstrap-Modul (34 generiert, 26 getötet).
Hauptkategorie: `VoidMethodCallMutator` (2 Überlebende, 2 ohne Coverage).
**Bewertung:** Betrifft vor allem Logging-Calls und nicht-kritische Hilfsmethoden.
Keine funktional tragenden Entscheidungspfade betroffen.
**Empfehlung:** Kein AP-009-Handlungsbedarf; wurde bereits in AP-007 auf akzeptablem
Niveau konsolidiert.
---
## Zusammenfassung
| Klassifikation | Anzahl | Beschreibung |
|---|---|---|
| Release-Blocker | **0** | |
| Nicht blockierend | **3** | B1 Naming, B2 Path-Grenzfall, B3 PIT-Bootstrap |
**Der Endstand ist produktionsbereit.** Alle fachlichen, technischen und architekturellen
Kernanforderungen sind umgesetzt und durch automatisierte Tests abgesichert. Der Maven-Build
ist fehlerfrei. Die identifizierten offenen Punkte sind ausschließlich nicht blockierend.
Falls AP-009 durchgeführt wird, sollte der Fokus auf **B1** (Naming-Convention-Bereinigung)
liegen, da dieser Punkt die einzige verbindliche CLAUDE.md-Regel betrifft, die noch nicht
vollständig eingehalten wird.

214
docs/betrieb.md Normal file
View File

@@ -0,0 +1,214 @@
# Betriebsdokumentation PDF Umbenenner
## Zweck
Der PDF Umbenenner liest bereits OCR-verarbeitete, durchsuchbare PDF-Dateien aus einem
konfigurierten Quellordner, ermittelt per KI-Aufruf einen normierten deutschen Dateinamen
und legt eine Kopie im konfigurierten Zielordner ab. Die Quelldatei bleibt unverändert.
---
## Voraussetzungen
- Java 21 (JRE oder JDK)
- Zugang zu einem OpenAI-kompatiblen KI-Dienst (API-Schlüssel erforderlich)
- Quellordner mit OCR-verarbeiteten PDF-Dateien
- Schreibzugriff auf Zielordner und Datenbankverzeichnis
---
## Start des ausführbaren JAR
Das ausführbare JAR wird durch den Maven-Build im Verzeichnis
`pdf-umbenenner-bootstrap/target/` erzeugt:
```
java -jar pdf-umbenenner-bootstrap/target/pdf-umbenenner-bootstrap-0.0.1-SNAPSHOT.jar
```
Die Anwendung liest die Konfiguration aus `config/application.properties` relativ zum
Arbeitsverzeichnis, in dem der Befehl ausgeführt wird.
### Start über Windows Task Scheduler
Empfohlene Startsequenz für den Windows Task Scheduler:
1. Aktion: Programm/Skript starten
2. Programm: `java`
3. Argumente: `-jar pdf-umbenenner-bootstrap-0.0.1-SNAPSHOT.jar`
4. Starten in: Verzeichnis mit `config/application.properties` und `config/prompts/`
---
## Konfiguration
Die Konfiguration wird aus `config/application.properties` geladen.
Vorlagen für lokale und Test-Konfigurationen befinden sich in:
- `config/application-local.example.properties`
- `config/application-test.example.properties`
### Pflichtparameter
| Parameter | Beschreibung |
|------------------------|--------------|
| `source.folder` | Quellordner mit OCR-PDFs (muss vorhanden und lesbar sein) |
| `target.folder` | Zielordner für umbenannte Kopien (wird angelegt, wenn nicht vorhanden) |
| `sqlite.file` | SQLite-Datenbankdatei (übergeordnetes Verzeichnis muss existieren) |
| `api.baseUrl` | Basis-URL des KI-Dienstes (z. B. `https://api.openai.com/v1`) |
| `api.model` | Modellname (z. B. `gpt-4o-mini`) |
| `api.timeoutSeconds` | HTTP-Timeout für KI-Anfragen in Sekunden (ganzzahlig, > 0) |
| `max.retries.transient`| Maximale transiente Fehlversuche pro Dokument (ganzzahlig, >= 1) |
| `max.pages` | Maximale Seitenzahl pro Dokument (ganzzahlig, > 0) |
| `max.text.characters` | Maximale Zeichenanzahl des Dokumenttexts für KI-Anfragen (ganzzahlig, > 0) |
| `prompt.template.file` | Pfad zur externen Prompt-Datei (muss vorhanden sein) |
### Optionale Parameter
| Parameter | Beschreibung | Standard |
|----------------------|--------------|---------|
| `api.key` | API-Schlüssel (alternativ: Umgebungsvariable `PDF_UMBENENNER_API_KEY`) | |
| `runtime.lock.file` | Lock-Datei für Startschutz | `pdf-umbenenner.lock` im Arbeitsverzeichnis |
| `log.directory` | Log-Verzeichnis | `./logs/` |
| `log.level` | Log-Level (`DEBUG`, `INFO`, `WARN`, `ERROR`) | `INFO` |
| `log.ai.sensitive` | KI-Rohantwort und Reasoning ins Log schreiben (`true`/`false`) | `false` |
### API-Schlüssel
Der API-Schlüssel kann auf zwei Wegen gesetzt werden:
1. **Umgebungsvariable `PDF_UMBENENNER_API_KEY`** (empfohlen, hat Vorrang)
2. Property `api.key` in `config/application.properties`
Die Umgebungsvariable hat immer Vorrang über die Properties-Datei.
---
## Prompt-Konfiguration
Der Prompt wird aus der in `prompt.template.file` konfigurierten externen Textdatei geladen.
Der Dateiname der Prompt-Datei dient als Prompt-Identifikator in der Versuchshistorie
(SQLite) und ermöglicht so die Nachvollziehbarkeit, welche Prompt-Version für welchen
Verarbeitungsversuch verwendet wurde.
Eine Vorlage befindet sich in `config/prompts/template.txt` und kann direkt verwendet oder
an den jeweiligen KI-Dienst angepasst werden.
Die Anwendung ergänzt den Prompt automatisch um:
- einen Dokumenttext-Abschnitt
- eine explizite JSON-Antwortspezifikation mit den Feldern `title`, `reasoning` und `date`
Der Prompt in `template.txt` muss deshalb **keine** JSON-Formatanweisung enthalten
nur den inhaltlichen Auftrag an die KI.
---
## Zielformat
Jede erfolgreich verarbeitete PDF-Datei wird im Zielordner unter folgendem Namen abgelegt:
```
YYYY-MM-DD - Titel.pdf
```
Bei Namenskollisionen wird ein laufendes Suffix angehängt:
```
YYYY-MM-DD - Titel(1).pdf
YYYY-MM-DD - Titel(2).pdf
```
Das Suffix zählt nicht zu den 20 Zeichen des Basistitels.
---
## Retry- und Skip-Verhalten
### Dokumentstatus
| Status | Bedeutung |
|---------------------------|-----------|
| `SUCCESS` | Erfolgreich verarbeitet und kopiert |
| `FAILED_RETRYABLE` | Fehlgeschlagen, erneuter Versuch in späterem Lauf möglich |
| `FAILED_FINAL` | Terminal fehlgeschlagen, wird nicht erneut verarbeitet |
| `SKIPPED_ALREADY_PROCESSED` | Übersprungen Dokument bereits erfolgreich verarbeitet |
| `SKIPPED_FINAL_FAILURE` | Übersprungen Dokument terminal fehlgeschlagen |
### Retry-Regeln
**Deterministische Inhaltsfehler** (z. B. kein extrahierbarer Text, Seitenlimit überschritten,
unbrauchbarer KI-Titel):
- Erster Fehler → `FAILED_RETRYABLE` (ein Wiederholversuch in späterem Lauf erlaubt)
- Zweiter Fehler → `FAILED_FINAL` (kein weiterer Versuch)
**Transiente technische Fehler** (z. B. KI nicht erreichbar, HTTP-Timeout):
- Wiederholbar bis zum Grenzwert `max.retries.transient`
- Bei Erreichen des Grenzwerts → `FAILED_FINAL`
**Technischer Sofort-Wiederholversuch:**
Bei einem Schreibfehler der Zielkopie wird innerhalb desselben Laufs exakt ein
Sofort-Wiederholversuch unternommen. Dieser zählt nicht zum laufübergreifenden
Fehlerzähler.
---
## Logging
Logs werden in das konfigurierte `log.directory` geschrieben (Standard: `./logs/`).
Log-Rotation erfolgt täglich und bei Erreichen von 10 MB je Datei.
### Sensible KI-Inhalte
Standardmäßig werden die vollständige KI-Rohantwort und das KI-Reasoning **nicht** ins Log
geschrieben, sondern ausschließlich in der SQLite-Datenbank gespeichert.
Die Ausgabe kann für Diagnosezwecke mit `log.ai.sensitive=true` freigeschaltet werden.
Erlaubte Werte: `true` oder `false`. Jeder andere Wert ist ungültig und verhindert den Start.
---
## Exit-Codes
| Code | Bedeutung |
|------|-----------|
| `0` | Lauf technisch ordnungsgemäß ausgeführt (auch bei dokumentbezogenen Teilfehlern) |
| `1` | Harter Start- oder Bootstrap-Fehler (ungültige Konfiguration, Lock nicht erwerbbar, Schema-Initialisierungsfehler) |
Dokumentbezogene Fehler einzelner PDF-Dateien führen **nicht** zu Exit-Code `1`.
---
## Startschutz (Parallelinstanzschutz)
Die Anwendung verwendet eine exklusive Lock-Datei, um parallele Instanzen zu verhindern.
Wenn bereits eine Instanz läuft, beendet sich die neue Instanz sofort mit Exit-Code `1`.
Der Pfad der Lock-Datei ist über `runtime.lock.file` konfigurierbar.
Ohne Konfiguration wird `pdf-umbenenner.lock` im Arbeitsverzeichnis verwendet.
---
## SQLite-Datenbank
Die SQLite-Datei enthält:
- **Dokument-Stammsätze**: Gesamtstatus, Fehlerzähler, letzter Zieldateiname, Zeitstempel
- **Versuchshistorie**: Jeder Verarbeitungsversuch mit Modell, Prompt-Identifikator,
KI-Rohantwort, Reasoning, Datum, Titel und Fehlerstatus
Die Datenbank ist die führende Wahrheitsquelle für Bearbeitungsstatus und Nachvollziehbarkeit.
Sie muss nicht manuell verwaltet werden das Schema wird beim Start automatisch initialisiert.
---
## Systemgrenzen
- Nur OCR-verarbeitete, durchsuchbare PDF-Dateien werden verarbeitet
- Keine eingebaute OCR-Funktion
- Kein Web-UI, keine REST-API, keine interaktive Bedienung
- Kein interner Scheduler der Start erfolgt extern (z. B. Windows Task Scheduler)
- Quelldateien werden nie überschrieben, verschoben oder gelöscht
- Die Identifikation erfolgt über SHA-256-Fingerprint des Dateiinhalts, nicht über Dateinamen

View File

@@ -0,0 +1,18 @@
/**
* Outbound adapter for system time access.
* <p>
* Components:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.adapter.out.clock.SystemClockAdapter}
* — Production implementation of {@link de.gecheckt.pdf.umbenenner.application.port.out.ClockPort}
* that delegates to the JVM system clock ({@code Instant.now()}).</li>
* </ul>
* <p>
* The {@link de.gecheckt.pdf.umbenenner.application.port.out.ClockPort} abstraction ensures that
* all application-layer and domain-layer code obtains the current instant through the port,
* enabling deterministic time injection in tests without coupling to wall-clock time.
* <p>
* No date/time logic or formatting is performed in this package; that responsibility
* belongs to the application layer.
*/
package de.gecheckt.pdf.umbenenner.adapter.out.clock;

View File

@@ -247,6 +247,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
* @return the most recent {@code PROPOSAL_READY} attempt, or {@code null}
* @throws DocumentPersistenceException if the query fails
*/
@Override
public ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint) {
Objects.requireNonNull(fingerprint, "fingerprint must not be null");
@@ -259,7 +260,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
final_target_file_name
FROM processing_attempt
WHERE fingerprint = ?
AND status = 'PROPOSAL_READY'
AND status = ?
ORDER BY attempt_number DESC
LIMIT 1
""";
@@ -270,6 +271,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON);
statement.setString(1, fingerprint.sha256Hex());
statement.setString(2, ProcessingStatus.PROPOSAL_READY.name());
try (ResultSet rs = statement.executeQuery()) {
if (rs.next()) {

View File

@@ -1,5 +1,7 @@
package de.gecheckt.pdf.umbenenner.adapter.out.sqlite;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Proxy;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.SQLException;
@@ -93,53 +95,70 @@ public class SqliteUnitOfWorkAdapter implements UnitOfWorkPort {
}
}
/**
* Wraps a shared transaction connection so that {@code close()} becomes a no-op.
* <p>
* Repository adapters manage their own connection lifecycle via try-with-resources,
* which would close the shared transaction connection prematurely if not wrapped.
* All other {@link Connection} methods are delegated unchanged to the underlying connection.
*
* @param underlying the real shared connection; must not be null
* @return a proxy connection that ignores {@code close()} calls
*/
private static Connection nonClosingWrapper(Connection underlying) {
return (Connection) Proxy.newProxyInstance(
Connection.class.getClassLoader(),
new Class<?>[] { Connection.class },
(proxy, method, args) -> {
if ("close".equals(method.getName())) {
return null;
}
try {
return method.invoke(underlying, args);
} catch (InvocationTargetException e) {
throw e.getCause();
}
});
}
private class TransactionOperationsImpl implements TransactionOperations {
private final Connection connection;
TransactionOperationsImpl(Connection connection) {
this.connection = connection;
}
@Override
public void saveProcessingAttempt(ProcessingAttempt attempt) {
// Repository methods declare DocumentPersistenceException as the only thrown exception.
// Any other exception (NullPointerException, etc.) will propagate to the outer try-catch
// and be caught there.
SqliteProcessingAttemptRepositoryAdapter repo =
new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl) {
@Override
protected Connection getConnection() throws SQLException {
return connection;
return nonClosingWrapper(connection);
}
};
repo.save(attempt);
}
@Override
public void createDocumentRecord(DocumentRecord record) {
// Repository methods declare DocumentPersistenceException as the only thrown exception.
// Any other exception (NullPointerException, etc.) will propagate to the outer try-catch
// and be caught there.
SqliteDocumentRecordRepositoryAdapter repo =
new SqliteDocumentRecordRepositoryAdapter(jdbcUrl) {
@Override
protected Connection getConnection() throws SQLException {
return connection;
return nonClosingWrapper(connection);
}
};
repo.create(record);
}
@Override
public void updateDocumentRecord(DocumentRecord record) {
// Repository methods declare DocumentPersistenceException as the only thrown exception.
// Any other exception (NullPointerException, etc.) will propagate to the outer try-catch
// and be caught there.
SqliteDocumentRecordRepositoryAdapter repo =
new SqliteDocumentRecordRepositoryAdapter(jdbcUrl) {
@Override
protected Connection getConnection() throws SQLException {
return connection;
return nonClosingWrapper(connection);
}
};
repo.update(record);

View File

@@ -0,0 +1,24 @@
/**
* Outbound adapter for writing the target file copy.
* <p>
* Components:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter}
* — Filesystem-based implementation of
* {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort}.</li>
* </ul>
* <p>
* The adapter uses a two-step write pattern: the source is first copied to a temporary
* file ({@code resolvedFilename + ".tmp"}) in the target folder, then renamed/moved to
* the final filename. An atomic move is attempted first; a standard move is used as a
* fallback when the filesystem does not support atomic cross-directory moves.
* <p>
* <strong>Source integrity:</strong> The source file is never modified, moved, or deleted.
* Only a copy is created in the target folder.
* <p>
* <strong>Architecture boundary:</strong> All NIO ({@code Path}, {@code Files}) operations
* are strictly confined to this package. The port interface
* {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort} contains no
* filesystem types, preserving the hexagonal architecture boundary.
*/
package de.gecheckt.pdf.umbenenner.adapter.out.targetcopy;

View File

@@ -0,0 +1,26 @@
/**
* Outbound adapter for target folder management and unique filename resolution.
* <p>
* Components:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.adapter.out.targetfolder.FilesystemTargetFolderAdapter}
* — Filesystem-based implementation of
* {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort}.</li>
* </ul>
* <p>
* <strong>Duplicate resolution:</strong> Given a base name such as
* {@code 2024-01-15 - Rechnung.pdf}, the adapter checks whether the file exists in the
* target folder and appends a numeric suffix ({@code (1)}, {@code (2)}, …) directly
* before {@code .pdf} until a free name is found. The 20-character base-title limit
* does not apply to the suffix.
* <p>
* <strong>Rollback support:</strong> The adapter provides a best-effort deletion method
* used by the application layer to remove a successfully written target copy when
* subsequent persistence fails, preventing orphaned target files.
* <p>
* <strong>Architecture boundary:</strong> All NIO ({@code Path}, {@code Files}) operations
* are strictly confined to this package. The port interface
* {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort} contains no
* filesystem types, preserving the hexagonal architecture boundary.
*/
package de.gecheckt.pdf.umbenenner.adapter.out.targetfolder;

View File

@@ -85,30 +85,6 @@ public class AiRequestComposer {
Objects.requireNonNull(promptContent, "promptContent must not be null");
Objects.requireNonNull(documentText, "documentText must not be null");
// The complete request text is composed in a fixed, deterministic order:
// 1. Prompt content (instruction)
// 2. Newline separator
// 3. Prompt identifier marker (for traceability)
// 4. Newline separator
// 5. Document text section marker
// 6. Newline separator
// 7. Document text content
// 8. Newline separator
// 9. Response format specification (JSON-only with required fields)
//
// This order is fixed so that another implementation knows exactly where
// each part is positioned and what to expect.
StringBuilder requestBuilder = new StringBuilder();
requestBuilder.append(promptContent);
requestBuilder.append("\n");
requestBuilder.append("--- Prompt-ID: ").append(promptIdentifier.identifier()).append(" ---");
requestBuilder.append("\n");
requestBuilder.append("--- Document Text ---");
requestBuilder.append("\n");
requestBuilder.append(documentText);
requestBuilder.append("\n");
appendJsonResponseFormat(requestBuilder);
// Record the exact character count of the document text that was included.
// This is the length of the document text (not the complete request).
int sentCharacterCount = documentText.length();

View File

@@ -393,10 +393,10 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
private void logProcessingOutcome(SourceDocumentCandidate candidate, DocumentProcessingOutcome outcome) {
switch (outcome) {
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed ->
logger.info("Pre-checks FAILED for '{}': {} (Deterministic content error).",
logger.info("Pre-checks failed for '{}': {} (deterministic content error).",
candidate.uniqueIdentifier(), failed.failureReasonDescription());
case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError ->
logger.warn("Processing FAILED for '{}': {} (Technical error retryable).",
logger.warn("Processing failed for '{}': {} (transient technical error retryable).",
candidate.uniqueIdentifier(), technicalError.errorMessage());
case de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady ready ->
logger.info("AI naming proposal ready for '{}': title='{}', date={}.",
@@ -404,10 +404,10 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
ready.proposal().validatedTitle(),
ready.proposal().resolvedDate());
case de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure aiTechnical ->
logger.warn("AI technical failure for '{}': {} (Transient retryable).",
logger.warn("AI invocation failed for '{}': {} (transient technical error retryable).",
candidate.uniqueIdentifier(), aiTechnical.errorMessage());
case de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure aiFunctional ->
logger.info("AI functional failure for '{}': {} (Deterministic content error).",
logger.info("AI naming failed for '{}': {} (deterministic content error).",
candidate.uniqueIdentifier(), aiFunctional.errorMessage());
default -> { /* other outcomes are handled elsewhere */ }
}

View File

@@ -314,4 +314,13 @@ class AiNamingServiceTest {
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("maxTextCharacters must be >= 1");
}
@Test
void constructor_maxTextCharactersOne_doesNotThrow() {
// maxTextCharacters=1 is the minimum valid value (boundary test).
// A changed-conditional-boundary mutation that changes '< 1' to '<= 1' would
// cause this constructor call to throw — this test detects that mutation.
new AiNamingService(aiInvocationPort, promptPort, validator, MODEL_NAME, 1);
// No exception expected; reaching this line means the boundary is correct
}
}

View File

@@ -829,8 +829,9 @@ class DocumentProcessingCoordinatorTest {
// No PROPOSAL_READY attempt pre-populated
// persistTransientError returns true when the error record was persisted successfully
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -851,8 +852,9 @@ class DocumentProcessingCoordinatorTest {
null, DateSource.AI_PROVIDED, "Rechnung", null);
attemptRepo.savedAttempts.add(badProposal);
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -871,8 +873,10 @@ class DocumentProcessingCoordinatorTest {
new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithFailingFolder.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
boolean result = coordinatorWithFailingFolder.processDeferredOutcome(
candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -891,8 +895,10 @@ class DocumentProcessingCoordinatorTest {
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithFailingCopy.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
boolean result = coordinatorWithFailingCopy.processDeferredOutcome(
candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -914,8 +920,9 @@ class DocumentProcessingCoordinatorTest {
"A".repeat(21), null);
attemptRepo.savedAttempts.add(badProposal);
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -939,8 +946,9 @@ class DocumentProcessingCoordinatorTest {
"Rechnung-2026", null);
attemptRepo.savedAttempts.add(badProposal);
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -1008,9 +1016,10 @@ class DocumentProcessingCoordinatorTest {
new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCountingCopy.processDeferredOutcome(
boolean result = coordinatorWithCountingCopy.processDeferredOutcome(
candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -1037,9 +1046,10 @@ class DocumentProcessingCoordinatorTest {
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), failingCopy, new NoOpProcessingLogger(), 1);
coordinatorWith1Retry.processDeferredOutcome(
boolean result = coordinatorWith1Retry.processDeferredOutcome(
candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_FINAL)
.findFirst()
@@ -1055,6 +1065,58 @@ class DocumentProcessingCoordinatorTest {
"Transient error counter must be 1 after the first cross-run transient error");
}
@Test
void processDeferredOutcome_proposalReady_copyFailure_retryDecisionLog_containsFailedRetryable() {
// Verifies that when a copy failure leads to FAILED_RETRYABLE in persistTransientError,
// the retry-decision log message specifically contains "FAILED_RETRYABLE" and
// "will retry in later run" — the branch-specific text that distinguishes it from the
// FAILED_FINAL branch. This kills the negated-conditional mutation on the retryable flag check.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(capturingLogger.anyInfoContains("FAILED_RETRYABLE"),
"Retry decision log for a retryable transient copy error must contain FAILED_RETRYABLE. "
+ "Captured info messages: " + capturingLogger.infoMessages);
assertTrue(capturingLogger.anyInfoContains("will retry in later run"),
"Retry decision log for a retryable transient error must contain 'will retry in later run'. "
+ "Captured info messages: " + capturingLogger.infoMessages);
}
@Test
void processDeferredOutcome_proposalReady_copyFailure_maxRetriesTransient1_retryDecisionLog_containsFailedFinal() {
// Verifies that when a copy failure with maxRetriesTransient=1 leads to FAILED_FINAL in
// persistTransientError, the retry-decision log message contains "FAILED_FINAL" and
// "transient error limit reached" — the branch-specific text that distinguishes it
// from the FAILED_RETRYABLE branch.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), capturingLogger,
1 /* maxRetriesTransient=1 → immediately final */);
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(capturingLogger.anyInfoContains("FAILED_FINAL"),
"Retry decision log for a finalising transient copy error must contain FAILED_FINAL. "
+ "Captured info messages: " + capturingLogger.infoMessages);
assertTrue(capturingLogger.anyInfoContains("transient error limit reached"),
"Retry decision log for a finalising transient error must contain 'transient error limit reached'. "
+ "Captured info messages: " + capturingLogger.infoMessages);
}
@Test
void processDeferredOutcome_proposalReady_immediateRetryDoesNotTriggerAiOrNewProposal() {
// Ensures that during the immediate retry path no pipeline (AI) execution happens
@@ -1375,6 +1437,26 @@ class DocumentProcessingCoordinatorTest {
}
}
/** Counts calls to {@link #tryDeleteTargetFile(String)} for mutation detection. */
private static class CapturingTargetFolderPort implements TargetFolderPort {
int tryDeleteCallCount = 0;
@Override
public String getTargetFolderLocator() {
return "/tmp/target";
}
@Override
public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) {
return new ResolvedTargetFilename(baseName);
}
@Override
public void tryDeleteTargetFile(String resolvedFilename) {
tryDeleteCallCount++;
}
}
private static class NoOpTargetFolderPort implements TargetFolderPort {
@Override
public String getTargetFolderLocator() {
@@ -1493,6 +1575,162 @@ class DocumentProcessingCoordinatorTest {
assertTrue(capturingLogger.anyInfoContains("FAILED_FINAL"),
"Finalising retry decision log must contain the FAILED_FINAL classification. "
+ "Captured info messages: " + capturingLogger.infoMessages);
assertTrue(capturingLogger.anyInfoContains("permanently failed"),
"Finalising retry decision log must contain 'permanently failed' to distinguish "
+ "the FAILED_FINAL branch from the generic status log. "
+ "Captured info messages: " + capturingLogger.infoMessages);
}
// -------------------------------------------------------------------------
// Finalization path logging: error, warn, and info calls in key paths
// -------------------------------------------------------------------------
@Test
void processDeferredOutcome_proposalReady_missingProposalAttempt_logsError() {
// Missing PROPOSAL_READY attempt in history — finalizeProposalReady must log an error.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
// No attempt pre-loaded — proposalAttempt == null branch
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(capturingLogger.errorCallCount > 0,
"An error must be logged when the PROPOSAL_READY attempt is missing from history");
}
@Test
void processDeferredOutcome_proposalReady_inconsistentProposalState_logsError() {
// Inconsistent proposal state (null date) — finalizeProposalReady must log an error.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
ProcessingAttempt badProposal = new ProcessingAttempt(
fingerprint, context.runId(), 1, Instant.now(), Instant.now(),
ProcessingStatus.PROPOSAL_READY, null, null, false,
"model", "prompt", 1, 100, "{}", "reason",
null, DateSource.AI_PROVIDED, "Rechnung", null);
attemptRepo.savedAttempts.add(badProposal);
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(capturingLogger.errorCallCount > 0,
"An error must be logged when the proposal state is inconsistent");
}
@Test
void processDeferredOutcome_proposalReady_duplicateResolutionFailure_logsError() {
// Duplicate resolution failure — finalizeProposalReady must log an error.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(capturingLogger.errorCallCount > 0,
"An error must be logged when duplicate resolution fails");
}
@Test
void processDeferredOutcome_proposalReady_resolvedFilename_logsInfo() {
// Successful duplicate resolution — resolved filename must be logged at INFO.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCapturing.processDeferredOutcome(
candidate, fingerprint, context, attemptStart,
c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
assertTrue(capturingLogger.infoCallCount > 0,
"Resolved target filename must be logged at INFO level");
}
@Test
void processDeferredOutcome_proposalReady_firstCopyFails_logsWarn() {
// First copy attempt fails → immediate retry: a WARN must be logged for the first failure.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
CountingTargetFileCopyPort onlyFirstFails = new CountingTargetFileCopyPort(1);
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), onlyFirstFails, capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCapturing.processDeferredOutcome(
candidate, fingerprint, context, attemptStart,
c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
assertTrue(capturingLogger.warnCallCount > 0,
"A WARN must be logged when the first copy attempt fails and an immediate retry is triggered");
}
@Test
void processDeferredOutcome_proposalReady_bothCopyAttemptsFail_logsError() {
// Both copy attempts fail — finalizeProposalReady must log an error.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
CountingTargetFileCopyPort bothFail = new CountingTargetFileCopyPort(2);
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), bothFail, capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCapturing.processDeferredOutcome(
candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(capturingLogger.errorCallCount > 0,
"An error must be logged when both copy attempts fail");
}
@Test
void processDeferredOutcome_proposalReady_immediateRetrySucceeds_logsInfo() {
// First copy fails, immediate retry succeeds — a success INFO must be logged.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
CountingTargetFileCopyPort onlyFirstFails = new CountingTargetFileCopyPort(1);
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), onlyFirstFails, capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCapturing.processDeferredOutcome(
candidate, fingerprint, context, attemptStart,
c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
assertTrue(capturingLogger.infoCallCount > 0,
"An INFO must be logged when the immediate within-run retry succeeds");
}
/** Zählt Logger-Aufrufe je Level, um VoidMethodCallMutator-Mutationen zu erkennen. */
@@ -1581,5 +1819,91 @@ class DocumentProcessingCoordinatorTest {
boolean anyInfoContains(String text) {
return infoMessages.stream().anyMatch(m -> m.contains(text));
}
boolean anyErrorContains(String text) {
return errorMessages.stream().anyMatch(m -> m.contains(text));
}
}
// -------------------------------------------------------------------------
// AI sensitive content logging in finalization path
// -------------------------------------------------------------------------
@Test
void processDeferredOutcome_proposalReady_aiContentNotNull_callsDebugSensitiveAiContent() {
// buildValidProposalAttempt() has non-null aiRawResponse and aiReasoning.
// The conditional guards at lines 398 and 402 of finalizeProposalReady must
// trigger the debugSensitiveAiContent call when the values are present.
// If negated, the calls would be suppressed for non-null values — detectable here.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt()); // aiRawResponse="{}", aiReasoning="reason"
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCapturing.processDeferredOutcome(
candidate, fingerprint, context, attemptStart,
c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
assertTrue(capturingLogger.debugSensitiveAiContentCallCount >= 2,
"debugSensitiveAiContent must be called for aiRawResponse and aiReasoning "
+ "when both are non-null. Actual call count: "
+ capturingLogger.debugSensitiveAiContentCallCount);
}
// -------------------------------------------------------------------------
// Best-effort rollback path: tryDeleteTargetFile and secondary persistence
// -------------------------------------------------------------------------
@Test
void processDeferredOutcome_proposalReady_persistenceFailureAfterCopy_callsTryDeleteTargetFile() {
// When persistence fails after a successful copy, the best-effort rollback
// must call tryDeleteTargetFile to clean up the orphaned target file.
// This test kills the 'removed call to tryDeleteTargetFile' mutation.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
unitOfWorkPort.failOnExecute = true;
CapturingTargetFolderPort capturingFolderPort = new CapturingTargetFolderPort();
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
capturingFolderPort, new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(capturingFolderPort.tryDeleteCallCount > 0,
"tryDeleteTargetFile must be called at least once for best-effort rollback "
+ "when persistence fails after a successful copy");
}
@Test
void processDeferredOutcome_proposalReady_persistenceFailureAfterCopy_logsSecondaryFailure() {
// When persistence fails after a successful copy and the secondary persistence
// attempt in persistTransientErrorAfterPersistenceFailure also fails,
// an error must be logged for the secondary failure.
// This kills the 'removed call to persistTransientErrorAfterPersistenceFailure' mutation.
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
unitOfWorkPort.failOnExecute = true; // both primary and secondary persistence fail
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
DEFAULT_MAX_RETRIES_TRANSIENT);
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
assertTrue(capturingLogger.anyErrorContains("Secondary persistence failure")
|| capturingLogger.anyErrorContains("secondary"),
"An error must be logged for the secondary persistence failure. "
+ "Captured error messages: " + capturingLogger.errorMessages);
}
}

View File

@@ -7,8 +7,11 @@ import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.DateSource;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
@@ -314,4 +317,58 @@ class ProcessingOutcomeTransitionTest {
assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus());
assertEquals(2, result.counters().transientErrorCount());
}
// -------------------------------------------------------------------------
// PreCheckPassed routed through transition (edge case: no AI step taken)
// -------------------------------------------------------------------------
@Test
void forNewDocument_preCheckPassed_limitOne_immediatelyFinal() {
// PreCheckPassed without an AI outcome is treated as a transient error by the transition.
// With limit=1 the first such error must immediately finalise to FAILED_FINAL.
PreCheckPassed outcome = new PreCheckPassed(
candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1)));
ProcessingOutcomeTransition.ProcessingOutcome result =
ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_1);
assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus(),
"With limit=1 a PreCheckPassed-routed transient error must immediately finalise");
assertFalse(result.retryable());
assertEquals(1, result.counters().transientErrorCount());
assertEquals(0, result.counters().contentErrorCount());
}
@Test
void forNewDocument_preCheckPassed_limitTwo_firstErrorRetryable() {
// With limit=2 the first PreCheckPassed-routed transient error is retryable.
PreCheckPassed outcome = new PreCheckPassed(
candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1)));
ProcessingOutcomeTransition.ProcessingOutcome result =
ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_2);
assertEquals(ProcessingStatus.FAILED_RETRYABLE, result.overallStatus(),
"With limit=2 the first PreCheckPassed-routed transient error must be retryable");
assertTrue(result.retryable());
assertEquals(1, result.counters().transientErrorCount());
assertEquals(0, result.counters().contentErrorCount());
}
@Test
void forKnownDocument_preCheckPassed_limitTwo_secondErrorFinal() {
// With limit=2 and an existing transient error count of 1,
// the next PreCheckPassed-routed error increments to 2 = limit → FAILED_FINAL.
PreCheckPassed outcome = new PreCheckPassed(
candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1)));
FailureCounters existing = new FailureCounters(0, 1);
ProcessingOutcomeTransition.ProcessingOutcome result =
ProcessingOutcomeTransition.forKnownDocument(outcome, existing, LIMIT_2);
assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus(),
"PreCheckPassed-routed error at transient limit must finalise to FAILED_FINAL");
assertFalse(result.retryable());
assertEquals(2, result.counters().transientErrorCount());
}
}

View File

@@ -62,6 +62,11 @@
<artifactId>mockito-junit-jupiter</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
<build>

View File

@@ -310,10 +310,10 @@ public class BootstrapRunner {
LOG.error("Configuration validation failed: {}", e.getMessage());
return 1;
} catch (DocumentPersistenceException e) {
LOG.error("Persistence operation failed: {}", e.getMessage(), e);
LOG.error("Schema initialization failed: {}", e.getMessage(), e);
return 1;
} catch (Exception e) {
LOG.error("Bootstrap failure during startup.", e);
LOG.error("Unexpected startup failure.", e);
return 1;
}
}
@@ -391,7 +391,7 @@ public class BootstrapRunner {
*/
private BatchRunContext createRunContext() {
RunId runId = new RunId(UUID.randomUUID().toString());
LOG.info("Batch run started. RunId: {}", runId);
LOG.info("Preparing batch run. RunId: {}", runId);
return new BatchRunContext(runId, Instant.now());
}

View File

@@ -0,0 +1,698 @@
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicInteger;
import static org.assertj.core.api.Assertions.assertThat;
/**
* Deterministic end-to-end tests for the complete batch processing pipeline.
* <p>
* Each test method is independent and uses its own {@link E2ETestContext} backed by a
* JUnit {@code @TempDir}. All infrastructure adapters (SQLite, filesystem, PDF extraction,
* fingerprinting) are real production implementations. Only the AI invocation port is
* replaced by a configurable {@link StubAiInvocationPort} to avoid real HTTP calls.
*
* <h2>End-to-end invariants verified</h2>
* <ul>
* <li><strong>Happy-path to {@code SUCCESS}</strong>: two-run flow via {@code PROPOSAL_READY}
* intermediate state to a final {@code SUCCESS} with a target file on disk.</li>
* <li><strong>Deterministic content error</strong>: blank PDFs (no extractable text) reach
* {@code FAILED_RETRYABLE} after the first run and {@code FAILED_FINAL} after the
* second run, exercising the one-retry rule for deterministic content errors.</li>
* <li><strong>Transient technical error</strong>: AI stub failures produce
* {@code FAILED_RETRYABLE} (transient counter incremented) without a target file.</li>
* <li><strong>Transient error exhaustion</strong>: repeated AI stub failures across
* {@code maxRetriesTransient} runs increment the transient counter each time until
* the limit is reached and the document is finalized to {@code FAILED_FINAL}.</li>
* <li><strong>Skip after {@code SUCCESS}</strong>: a document whose status is
* {@code SUCCESS} generates exactly one {@code SKIPPED_ALREADY_PROCESSED} attempt
* in the next run; the overall status and target file remain unchanged.</li>
* <li><strong>Skip after {@code FAILED_FINAL}</strong>: a document whose status is
* {@code FAILED_FINAL} generates exactly one {@code SKIPPED_FINAL_FAILURE} attempt
* in the next run; the overall status and failure counters remain unchanged.</li>
* <li><strong>{@code PROPOSAL_READY} with later finalization</strong>: a document in
* {@code PROPOSAL_READY} state is finalized without an AI call in the next run,
* confirming the leading-proposal-attempt rule.</li>
* <li><strong>Target copy error with immediate within-run retry (success)</strong>: when the
* first copy attempt fails but the immediate within-run retry succeeds, the document is
* recorded as {@code SUCCESS} and no transient error counter is incremented.</li>
* <li><strong>Target copy error with immediate within-run retry (failure)</strong>: when both
* the initial and immediate-retry copy attempts fail, the document is recorded as
* {@code FAILED_RETRYABLE} with an incremented transient counter.</li>
* <li><strong>Duplicate target filename suffix</strong>: when two distinct documents produce
* the same base target filename in the same batch run, the second receives a {@code (1)}
* suffix to avoid overwriting the first.</li>
* <li><strong>Mixed batch outcome</strong>: a batch run that contains both successfully
* processed documents and documents with content errors completes with
* {@link BatchRunOutcome#SUCCESS}, confirming that document-level failures do not
* abort the batch or change the overall exit-code-relevant outcome.</li>
* </ul>
*
* <h2>Document text used in tests</h2>
* <p>
* Searchable PDFs embed enough text to pass the minimum-text pre-check. The AI stub
* returns a title of {@code "Stromabrechnung"} and date {@code "2024-01-15"} by default,
* producing a target filename of {@code "2024-01-15 - Stromabrechnung.pdf"}.
*/
class BatchRunEndToEndTest {
private static final String SAMPLE_PDF_TEXT =
"Stromabrechnung Kundenname Musterstadt Datum 15.01.2024 Betrag 123,45 EUR";
// =========================================================================
// Scenario 1: Happy-path to SUCCESS
// =========================================================================
/**
* Verifies the complete two-run happy-path:
* <ol>
* <li>Run 1: AI stub returns valid proposal → document status becomes
* {@code PROPOSAL_READY}; no target file yet.</li>
* <li>Run 2: AI is NOT called again; target file is copied; document status
* becomes {@code SUCCESS}.</li>
* </ol>
* This confirms the leading-proposal-attempt rule and the two-phase finalization.
*/
@Test
void happyPath_twoRuns_reachesSuccess(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("rechnung.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("rechnung.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// --- Run 1: AI produces a naming proposal ---
BatchRunOutcome run1 = ctx.runBatch();
assertThat(run1).isEqualTo(BatchRunOutcome.SUCCESS);
assertThat(ctx.aiStub.invocationCount()).isEqualTo(1);
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
assertThat(ctx.listTargetFiles()).isEmpty();
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
assertThat(attempts1).hasSize(1);
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
// --- Run 2: Finalization without AI call ---
ctx.aiStub.resetInvocationCount();
BatchRunOutcome run2 = ctx.runBatch();
assertThat(run2).isEqualTo(BatchRunOutcome.SUCCESS);
assertThat(ctx.aiStub.invocationCount())
.as("AI must not be called again when PROPOSAL_READY exists")
.isEqualTo(0);
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
assertThat(record2.lastSuccessInstant()).isNotNull();
assertThat(record2.lastTargetFileName()).isNotNull();
List<String> targetFiles = ctx.listTargetFiles();
assertThat(targetFiles).hasSize(1);
assertThat(targetFiles.get(0)).endsWith(".pdf");
assertThat(Files.exists(ctx.targetFolder().resolve(targetFiles.get(0)))).isTrue();
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
assertThat(attempts2).hasSize(2);
assertThat(attempts2.get(1).status()).isEqualTo(ProcessingStatus.SUCCESS);
}
}
// =========================================================================
// Scenario 2: Deterministic content error → FAILED_RETRYABLE → FAILED_FINAL
// =========================================================================
/**
* Verifies the one-retry rule for deterministic content errors:
* <ol>
* <li>Run 1: blank PDF → pre-check fails (no extractable text) →
* {@code FAILED_RETRYABLE}, content error counter = 1.</li>
* <li>Run 2: same outcome again → {@code FAILED_FINAL}, content error counter = 2.</li>
* </ol>
* No AI call is made in either run because the content pre-check prevents it.
*/
@Test
void deterministicContentError_twoRuns_reachesFailedFinal(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createBlankPdf("blank.pdf");
Path pdfPath = ctx.sourceFolder().resolve("blank.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// --- Run 1 ---
ctx.runBatch();
assertThat(ctx.aiStub.invocationCount())
.as("AI must not be called for a blank PDF")
.isEqualTo(0);
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(1);
assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(0);
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
assertThat(attempts1).hasSize(1);
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(attempts1.get(0).retryable()).isTrue();
// --- Run 2 ---
ctx.runBatch();
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(record2.failureCounters().contentErrorCount()).isEqualTo(2);
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
assertThat(attempts2).hasSize(2);
assertThat(attempts2.get(1).status()).isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(attempts2.get(1).retryable()).isFalse();
// No target file should exist
assertThat(ctx.listTargetFiles()).isEmpty();
}
}
// =========================================================================
// Scenario 3: Transient technical error → FAILED_RETRYABLE
// =========================================================================
/**
* Verifies that a transient AI failure produces {@code FAILED_RETRYABLE} with an
* incremented transient error counter, and that no target file is written.
* The document remains retryable in subsequent runs until the transient limit is reached.
*/
@Test
void transientAiFailure_producesFailedRetryable(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
ctx.aiStub.configureTechnicalFailure();
ctx.runBatch();
assertThat(ctx.aiStub.invocationCount())
.as("AI must have been invoked (and failed) once")
.isEqualTo(1);
DocumentRecord record = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(record.failureCounters().transientErrorCount()).isEqualTo(1);
assertThat(record.failureCounters().contentErrorCount()).isEqualTo(0);
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(1);
assertThat(attempts.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(attempts.get(0).retryable()).isTrue();
assertThat(ctx.listTargetFiles()).isEmpty();
}
}
// =========================================================================
// Scenario 4: Skip after SUCCESS
// =========================================================================
/**
* Verifies the skip-after-success invariant:
* after a document reaches {@code SUCCESS} (via two runs), a third run records a
* {@code SKIPPED_ALREADY_PROCESSED} attempt without changing the overall status,
* failure counters, or the target file.
*/
@Test
void skipAfterSuccess_thirdRun_recordsSkip(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// Reach SUCCESS via two runs
ctx.runBatch(); // → PROPOSAL_READY
ctx.runBatch(); // → SUCCESS
DocumentRecord successRecord = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(successRecord.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
String targetFileBefore = successRecord.lastTargetFileName();
// --- Run 3: should produce skip ---
ctx.aiStub.resetInvocationCount();
BatchRunOutcome run3 = ctx.runBatch();
assertThat(run3).isEqualTo(BatchRunOutcome.SUCCESS);
assertThat(ctx.aiStub.invocationCount())
.as("AI must not be called for an already-successful document")
.isEqualTo(0);
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record3.overallStatus())
.as("Overall status must remain SUCCESS after a skip")
.isEqualTo(ProcessingStatus.SUCCESS);
assertThat(record3.lastTargetFileName())
.as("Target filename must not change after a skip")
.isEqualTo(targetFileBefore);
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(3);
assertThat(attempts.get(2).status()).isEqualTo(ProcessingStatus.SKIPPED_ALREADY_PROCESSED);
assertThat(attempts.get(2).retryable()).isFalse();
// Target file count must remain exactly one
assertThat(ctx.listTargetFiles()).hasSize(1);
}
}
// =========================================================================
// Scenario 5: Skip after FAILED_FINAL
// =========================================================================
/**
* Verifies the skip-after-final-failure invariant:
* after a document reaches {@code FAILED_FINAL} (via two blank-PDF runs), a third run
* records a {@code SKIPPED_FINAL_FAILURE} attempt without changing the overall status
* or failure counters.
*/
@Test
void skipAfterFailedFinal_thirdRun_recordsSkip(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createBlankPdf("blank.pdf");
Path pdfPath = ctx.sourceFolder().resolve("blank.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// Reach FAILED_FINAL via two blank-PDF runs
ctx.runBatch(); // → FAILED_RETRYABLE
ctx.runBatch(); // → FAILED_FINAL
DocumentRecord finalRecord = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(finalRecord.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
int contentErrorsBefore = finalRecord.failureCounters().contentErrorCount();
// --- Run 3: should produce skip ---
ctx.runBatch();
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record3.overallStatus())
.as("Overall status must remain FAILED_FINAL after a skip")
.isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(record3.failureCounters().contentErrorCount())
.as("Failure counters must not change after a skip")
.isEqualTo(contentErrorsBefore);
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(3);
assertThat(attempts.get(2).status()).isEqualTo(ProcessingStatus.SKIPPED_FINAL_FAILURE);
assertThat(attempts.get(2).retryable()).isFalse();
}
}
// =========================================================================
// Scenario 6: Existing PROPOSAL_READY with later finalization
// =========================================================================
/**
* Verifies the leading-proposal-attempt rule in isolation:
* <ol>
* <li>Run 1: AI produces a naming proposal → document status is {@code PROPOSAL_READY}.</li>
* <li>Run 2: AI stub is reset to technical failure; the coordinator must still finalize
* the document to {@code SUCCESS} using the persisted proposal — without calling the AI.</li>
* </ol>
* This confirms that the second run never re-invokes the AI when a valid
* {@code PROPOSAL_READY} attempt already exists.
*/
@Test
void proposalReadyFinalization_noAiCallInSecondRun(@TempDir Path tempDir) throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// --- Run 1: establish PROPOSAL_READY ---
ctx.runBatch();
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
assertThat(ctx.listTargetFiles()).isEmpty();
// --- Run 2: AI stub would fail if called, but must not be called ---
ctx.aiStub.configureTechnicalFailure();
ctx.aiStub.resetInvocationCount();
ctx.runBatch();
assertThat(ctx.aiStub.invocationCount())
.as("AI must not be invoked during PROPOSAL_READY finalization")
.isEqualTo(0);
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
List<String> targetFiles = ctx.listTargetFiles();
assertThat(targetFiles).hasSize(1);
assertThat(targetFiles.get(0)).endsWith(".pdf");
}
}
// =========================================================================
// Scenario 7: Target copy error with immediate within-run retry
// =========================================================================
/**
* Verifies the immediate within-run retry for target copy failures:
* <ol>
* <li>Run 1: AI produces {@code PROPOSAL_READY}.</li>
* <li>Run 2: The {@link TargetFileCopyPort} is overridden with a stub that fails on
* the first invocation but delegates to the real adapter on the second.
* The coordinator must detect the first failure, retry immediately within the
* same run, and record {@code SUCCESS} — without incrementing the transient
* error counter.</li>
* </ol>
* The immediate retry does not count as a cross-run transient error.
*/
@Test
void targetCopyError_immediateRetrySucceeds_recordsSuccess(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// --- Run 1: produce PROPOSAL_READY ---
ctx.runBatch();
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
// --- Run 2: first copy attempt fails, retry succeeds ---
TargetFileCopyPort realAdapter =
new de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter(
ctx.targetFolder());
AtomicInteger copyCallCount = new AtomicInteger(0);
TargetFileCopyPort stubWithRetry = (locator, resolvedFilename) -> {
int call = copyCallCount.incrementAndGet();
if (call == 1) {
// First attempt: simulate a transient write failure
return new TargetFileCopyTechnicalFailure(
"Simulated write failure on first attempt", true);
}
// Second attempt (immediate within-run retry): delegate to real adapter
return realAdapter.copyToTarget(locator, resolvedFilename);
};
ctx.setTargetFileCopyPortOverride(stubWithRetry);
ctx.runBatch();
assertThat(copyCallCount.get())
.as("Copy port must have been called twice (initial + retry)")
.isEqualTo(2);
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
assertThat(record2.failureCounters().transientErrorCount())
.as("Immediate within-run retry must not increment the transient error counter")
.isEqualTo(0);
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(2);
assertThat(attempts.get(1).status()).isEqualTo(ProcessingStatus.SUCCESS);
List<String> targetFiles = ctx.listTargetFiles();
assertThat(targetFiles).hasSize(1);
assertThat(Files.exists(ctx.targetFolder().resolve(targetFiles.get(0)))).isTrue();
}
}
// =========================================================================
// Scenario 8: Transient error exhaustion → FAILED_FINAL
// =========================================================================
/**
* Verifies the complete transient error exhaustion path over multiple runs:
* <ol>
* <li>Run 1: AI stub fails technically → {@code FAILED_RETRYABLE},
* transient counter = 1 (below limit 3).</li>
* <li>Run 2: AI stub fails again → {@code FAILED_RETRYABLE},
* transient counter = 2 (below limit 3).</li>
* <li>Run 3: AI stub fails again → transient counter reaches the limit (3 = 3) →
* {@code FAILED_FINAL}; no target file is ever written.</li>
* </ol>
* This confirms the {@code maxRetriesTransient} boundary: the run that pushes the
* counter to the configured limit is the run that finalises the document.
*/
@Test
void transientErrors_multipleRuns_exhaustsLimit_reachesFailedFinal(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
ctx.aiStub.configureTechnicalFailure();
// --- Run 1: counter 0 → 1, below limit → FAILED_RETRYABLE ---
ctx.runBatch();
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(1);
assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(0);
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
assertThat(attempts1).hasSize(1);
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(attempts1.get(0).retryable()).isTrue();
// --- Run 2: counter 1 → 2, below limit → FAILED_RETRYABLE ---
ctx.runBatch();
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(record2.failureCounters().transientErrorCount()).isEqualTo(2);
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
assertThat(attempts2).hasSize(2);
assertThat(attempts2.get(1).retryable()).isTrue();
// --- Run 3: counter 2 → 3 = limit → FAILED_FINAL ---
ctx.runBatch();
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record3.overallStatus())
.as("Transient counter reaching the configured limit must finalise the document")
.isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(record3.failureCounters().transientErrorCount())
.as("Transient counter must equal maxRetriesTransient after exhaustion")
.isEqualTo(E2ETestContext.MAX_RETRIES_TRANSIENT);
assertThat(record3.failureCounters().contentErrorCount()).isEqualTo(0);
List<ProcessingAttempt> attempts3 = ctx.findAttempts(fp);
assertThat(attempts3).hasSize(3);
assertThat(attempts3.get(2).status()).isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(attempts3.get(2).retryable()).isFalse();
assertThat(ctx.listTargetFiles()).isEmpty();
}
}
// =========================================================================
// Scenario 9: Target copy error both attempts fail → FAILED_RETRYABLE
// =========================================================================
/**
* Verifies the failure path of the immediate within-run retry mechanism:
* <ol>
* <li>Run 1: AI stub returns a valid proposal → {@code PROPOSAL_READY}.</li>
* <li>Run 2: The {@link TargetFileCopyPort} is overridden with a stub that fails
* on every call. The coordinator issues the initial copy attempt (failure),
* grants exactly one immediate retry (also failure), then classifies the
* result as a transient technical error and records {@code FAILED_RETRYABLE}
* with an incremented transient counter.</li>
* </ol>
* This confirms that the within-run retry does not suppress the error when both
* attempts fail, and that the transient counter is incremented exactly once.
*/
@Test
void targetCopyError_bothAttemptsFail_reachesFailedRetryable(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
// --- Run 1: establish PROPOSAL_READY ---
ctx.runBatch();
assertThat(ctx.findDocumentRecord(fp).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
// --- Run 2: both copy attempts fail ---
ctx.setTargetFileCopyPortOverride(
(locator, resolvedFilename) ->
new TargetFileCopyTechnicalFailure(
"Simulated persistent write failure", true));
ctx.runBatch();
DocumentRecord record = ctx.findDocumentRecord(fp).orElseThrow();
assertThat(record.overallStatus())
.as("Both copy attempts failing must produce FAILED_RETRYABLE "
+ "(transient error, limit not yet reached)")
.isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(record.failureCounters().transientErrorCount())
.as("The double copy failure must increment the transient counter exactly once")
.isEqualTo(1);
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
assertThat(attempts).hasSize(2);
assertThat(attempts.get(1).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(attempts.get(1).retryable()).isTrue();
assertThat(ctx.listTargetFiles()).isEmpty();
}
}
// =========================================================================
// Scenario 10: Two documents with identical target name → duplicate suffix
// =========================================================================
/**
* Verifies the duplicate target filename suffix rule at end-to-end level:
* when two distinct source documents both resolve to the same base target name
* ({@code "2024-01-15 - Stromabrechnung.pdf"}) in the same finalization run, the
* second document written to the target folder must receive a {@code (1)} suffix.
* <ol>
* <li>Run 1: both PDFs are processed by the AI stub (same configured response) →
* both reach {@code PROPOSAL_READY}.</li>
* <li>Run 2: both are finalized in sequence; the first written claims the base name,
* the second receives {@code "2024-01-15 - Stromabrechnung(1).pdf"}.</li>
* </ol>
* Both documents reach {@code SUCCESS} and the target folder contains exactly two files.
*/
@Test
void twoDifferentDocuments_sameProposedName_secondGetsDuplicateSuffix(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
// Two distinct PDFs; the AI stub returns the same title and date for both
ctx.createSearchablePdf("rechnung1.pdf", SAMPLE_PDF_TEXT);
ctx.createSearchablePdf("rechnung2.pdf",
"Stromabrechnung Zweiter Kunde Musterstadt Datum 15.01.2024 Betrag 99,00 EUR");
Path pdf1 = ctx.sourceFolder().resolve("rechnung1.pdf");
Path pdf2 = ctx.sourceFolder().resolve("rechnung2.pdf");
DocumentFingerprint fp1 = ctx.computeFingerprint(pdf1);
DocumentFingerprint fp2 = ctx.computeFingerprint(pdf2);
// --- Run 1: AI stub processes both PDFs → PROPOSAL_READY ---
ctx.runBatch();
assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
assertThat(ctx.listTargetFiles()).isEmpty();
// --- Run 2: both finalized; the second must receive the (1) suffix ---
ctx.runBatch();
assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.SUCCESS);
assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.SUCCESS);
List<String> targetFiles = ctx.listTargetFiles();
assertThat(targetFiles)
.as("Both distinct documents must produce separate target files")
.hasSize(2);
assertThat(targetFiles)
.as("Base name must exist for the first document written")
.anyMatch(f -> f.equals("2024-01-15 - Stromabrechnung.pdf"));
assertThat(targetFiles)
.as("Duplicate suffix (1) must be appended for the second document written")
.anyMatch(f -> f.equals("2024-01-15 - Stromabrechnung(1).pdf"));
}
}
// =========================================================================
// Scenario 11: Mixed batch document failures do not affect batch outcome
// =========================================================================
/**
* Verifies that document-level failures do not cause a batch-level failure:
* <ol>
* <li>Run 1: a searchable PDF reaches {@code PROPOSAL_READY}; a blank PDF
* (no extractable text) reaches {@code FAILED_RETRYABLE}.
* {@link BatchRunOutcome#SUCCESS} is returned.</li>
* <li>Run 2: the searchable PDF is finalized to {@code SUCCESS};
* the blank PDF reaches its second content error and is finalized to
* {@code FAILED_FINAL}. {@link BatchRunOutcome#SUCCESS} is returned.</li>
* </ol>
* This confirms the exit-code contract: only hard bootstrap or infrastructure
* failures produce a non-zero exit code; document-level errors do not.
*/
@Test
void mixedBatch_oneSuccess_oneContentError_batchOutcomeIsSuccess(@TempDir Path tempDir)
throws Exception {
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
ctx.createSearchablePdf("good.pdf", SAMPLE_PDF_TEXT);
ctx.createBlankPdf("blank.pdf");
Path goodPdf = ctx.sourceFolder().resolve("good.pdf");
Path blankPdf = ctx.sourceFolder().resolve("blank.pdf");
DocumentFingerprint fpGood = ctx.computeFingerprint(goodPdf);
DocumentFingerprint fpBlank = ctx.computeFingerprint(blankPdf);
// --- Run 1 ---
BatchRunOutcome run1 = ctx.runBatch();
assertThat(run1)
.as("Batch must complete with SUCCESS even when individual documents fail")
.isEqualTo(BatchRunOutcome.SUCCESS);
assertThat(ctx.findDocumentRecord(fpGood).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow().overallStatus())
.isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow()
.failureCounters().contentErrorCount()).isEqualTo(1);
// --- Run 2 ---
BatchRunOutcome run2 = ctx.runBatch();
assertThat(run2)
.as("Batch must complete with SUCCESS even when a document is finalised "
+ "to FAILED_FINAL")
.isEqualTo(BatchRunOutcome.SUCCESS);
DocumentRecord goodRecord = ctx.findDocumentRecord(fpGood).orElseThrow();
assertThat(goodRecord.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
DocumentRecord blankRecord = ctx.findDocumentRecord(fpBlank).orElseThrow();
assertThat(blankRecord.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
assertThat(blankRecord.failureCounters().contentErrorCount()).isEqualTo(2);
// Exactly one target file from the successfully processed document
List<String> targetFiles = ctx.listTargetFiles();
assertThat(targetFiles).hasSize(1);
assertThat(targetFiles.get(0)).endsWith(".pdf");
}
}
}

View File

@@ -0,0 +1,406 @@
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
import de.gecheckt.pdf.umbenenner.adapter.out.clock.SystemClockAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.fingerprint.Sha256FingerprintAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.lock.FilesystemRunLockPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.pdfextraction.PdfTextExtractionPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.prompt.FilesystemPromptPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sourcedocument.SourceDocumentCandidatesPortAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteDocumentRecordRepositoryAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteUnitOfWorkAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter;
import de.gecheckt.pdf.umbenenner.adapter.out.targetfolder.FilesystemTargetFolderAdapter;
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure;
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger;
import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort;
import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort;
import de.gecheckt.pdf.umbenenner.application.service.AiNamingService;
import de.gecheckt.pdf.umbenenner.application.service.AiResponseValidator;
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingCoordinator;
import de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase;
import de.gecheckt.pdf.umbenenner.bootstrap.adapter.Log4jProcessingLogger;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Instant;
import java.util.List;
import java.util.Optional;
import java.util.UUID;
/**
* Full adapter wiring context for deterministic end-to-end tests of the batch processing pipeline.
* <p>
* Provides real infrastructure adapters for all subsystems (SQLite persistence, filesystem
* source/target folders, PDF text extraction, SHA-256 fingerprinting, run locking) and a
* configurable stub ({@link StubAiInvocationPort}) for the AI invocation port.
* This ensures that end-to-end tests cover the complete production code path without
* performing real HTTP calls to an AI service.
*
* <h2>Invariants verified by this context</h2>
* <ul>
* <li>Happy-path to {@code SUCCESS}: two-run flow where Run 1 produces {@code PROPOSAL_READY}
* and Run 2 copies the file and records {@code SUCCESS}.</li>
* <li>Deterministic content error: blank PDFs (no extractable text) produce
* {@code FAILED_RETRYABLE} after the first run and {@code FAILED_FINAL} after a
* second run.</li>
* <li>Transient technical error: AI stub failures produce {@code FAILED_RETRYABLE} for each
* run until the transient error limit is reached, at which point the document is
* finalized to {@code FAILED_FINAL}.</li>
* <li>Skip after {@code SUCCESS}: a document in {@code SUCCESS} state generates a
* {@code SKIPPED_ALREADY_PROCESSED} attempt in subsequent runs.</li>
* <li>Skip after {@code FAILED_FINAL}: a document in {@code FAILED_FINAL} state generates a
* {@code SKIPPED_FINAL_FAILURE} attempt in subsequent runs.</li>
* <li>{@code PROPOSAL_READY} with later finalization: a document in {@code PROPOSAL_READY}
* state is finalized without an AI call in the next run.</li>
* <li>Target copy error with immediate retry: when the first copy attempt fails but the
* immediate within-run retry succeeds, the document is recorded as {@code SUCCESS}.</li>
* </ul>
*
* <h2>Usage pattern</h2>
* <pre>{@code
* @TempDir Path tempDir;
*
* @Test
* void example() throws Exception {
* try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
* ctx.createSearchablePdf("doc.pdf", "Rechnung 2024-01-15 ...");
* BatchRunOutcome run1 = ctx.runBatch();
* // assertions...
* }
* }
* }</pre>
*
* <h2>Thread safety</h2>
* <p>
* Not thread-safe. Each test method should use its own context instance.
*/
public final class E2ETestContext implements AutoCloseable {
/** Maximum pages before triggering a deterministic content error. */
static final int MAX_PAGES = 50;
/** Maximum text characters sent to the AI service. */
static final int MAX_TEXT_CHARS = 10_000;
/**
* Maximum transient retries before a document is finalized to {@code FAILED_FINAL}.
* Set to 3 to allow multi-run transient-failure tests without immediate finalization.
*/
static final int MAX_RETRIES_TRANSIENT = 3;
/** Model name carried in attempt history (no real inference occurs). */
static final String AI_MODEL = "e2e-stub-model";
private final Path sourceFolder;
private final Path targetFolder;
private final Path lockFile;
private final Path promptFile;
private final String jdbcUrl;
private final SqliteDocumentRecordRepositoryAdapter documentRepo;
private final SqliteProcessingAttemptRepositoryAdapter attemptRepo;
/**
* Configurable AI stub. Tests may call {@link StubAiInvocationPort#configureSuccess},
* {@link StubAiInvocationPort#configureTechnicalFailure}, or
* {@link StubAiInvocationPort#reset()} between batch runs.
*/
public final StubAiInvocationPort aiStub;
/**
* Optional override for the {@link TargetFileCopyPort}.
* {@code null} means the real {@link FilesystemTargetFileCopyAdapter} is used.
* Set via {@link #setTargetFileCopyPortOverride} to inject a failure-simulating stub.
*/
private TargetFileCopyPort targetFileCopyPortOverride;
private E2ETestContext(
Path sourceFolder,
Path targetFolder,
Path lockFile,
Path promptFile,
String jdbcUrl,
SqliteDocumentRecordRepositoryAdapter documentRepo,
SqliteProcessingAttemptRepositoryAdapter attemptRepo,
StubAiInvocationPort aiStub) {
this.sourceFolder = sourceFolder;
this.targetFolder = targetFolder;
this.lockFile = lockFile;
this.promptFile = promptFile;
this.jdbcUrl = jdbcUrl;
this.documentRepo = documentRepo;
this.attemptRepo = attemptRepo;
this.aiStub = aiStub;
}
/**
* Initializes a fully wired end-to-end test context rooted in {@code tempDir}.
* <p>
* Creates the {@code source/}, {@code target/} subdirectories and a minimal prompt
* file, initializes the SQLite schema, and wires all adapters.
*
* @param tempDir the JUnit {@code @TempDir} or any writable temporary directory
* @return a ready-to-use context; caller is responsible for closing it
* @throws Exception if schema initialization or directory/file creation fails
*/
public static E2ETestContext initialize(Path tempDir) throws Exception {
Path sourceFolder = Files.createDirectories(tempDir.resolve("source"));
Path targetFolder = Files.createDirectories(tempDir.resolve("target"));
Path lockFile = tempDir.resolve("run.lock");
Path promptFile = tempDir.resolve("prompt.txt");
Files.writeString(promptFile,
"Analysiere das folgende Dokument und liefere Datum, Titel und Begruendung als JSON-Objekt.");
String jdbcUrl = "jdbc:sqlite:" + tempDir.resolve("test.db").toAbsolutePath().toString().replace('\\', '/');
SqliteSchemaInitializationAdapter schema = new SqliteSchemaInitializationAdapter(jdbcUrl);
schema.initializeSchema();
SqliteDocumentRecordRepositoryAdapter documentRepo =
new SqliteDocumentRecordRepositoryAdapter(jdbcUrl);
SqliteProcessingAttemptRepositoryAdapter attemptRepo =
new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl);
return new E2ETestContext(
sourceFolder, targetFolder, lockFile, promptFile,
jdbcUrl, documentRepo, attemptRepo, new StubAiInvocationPort());
}
// =========================================================================
// Test fixture creation
// =========================================================================
/**
* Creates a single-page searchable PDF in the source folder with the given text.
* <p>
* The file is ready for the batch run as soon as this method returns.
*
* @param filename the PDF filename (e.g. {@code "rechnung.pdf"})
* @param textContent text to embed; should be at least a few words to pass pre-checks
* @return the absolute path of the created file
* @throws IOException if the file cannot be written
*/
public Path createSearchablePdf(String filename, String textContent) throws IOException {
Path pdfPath = sourceFolder.resolve(filename);
MinimalPdfFactory.createSearchablePdf(pdfPath, textContent);
return pdfPath;
}
/**
* Creates a single-page blank PDF (no extractable text) in the source folder.
* <p>
* Processing this file triggers the "no usable text" deterministic content error,
* which skips the AI call.
*
* @param filename the PDF filename (e.g. {@code "blank.pdf"})
* @return the absolute path of the created file
* @throws IOException if the file cannot be written
*/
public Path createBlankPdf(String filename) throws IOException {
Path pdfPath = sourceFolder.resolve(filename);
MinimalPdfFactory.createBlankPdf(pdfPath);
return pdfPath;
}
// =========================================================================
// Batch execution
// =========================================================================
/**
* Executes one complete batch run using the current stub configuration.
* <p>
* A fresh {@link BatchRunContext} with a new {@link RunId} is created for each call,
* matching the production behavior where every Task Scheduler invocation is a
* distinct run.
*
* @return the outcome of the batch run
*/
public BatchRunOutcome runBatch() {
DefaultBatchRunProcessingUseCase useCase = buildUseCase();
BatchRunContext context = new BatchRunContext(
new RunId(UUID.randomUUID().toString()), Instant.now());
return useCase.execute(context);
}
// =========================================================================
// State inspection helpers
// =========================================================================
/**
* Looks up the document master record for the given fingerprint.
*
* @param fingerprint the document fingerprint to query
* @return the master record if one exists, {@link Optional#empty()} if unknown or
* if a persistence lookup error occurred
*/
public Optional<DocumentRecord> findDocumentRecord(DocumentFingerprint fingerprint) {
return switch (documentRepo.findByFingerprint(fingerprint)) {
case DocumentTerminalSuccess s -> Optional.of(s.record());
case DocumentTerminalFinalFailure f -> Optional.of(f.record());
case DocumentKnownProcessable p -> Optional.of(p.record());
default -> Optional.empty();
};
}
/**
* Returns all processing attempts for the given fingerprint in insertion order.
*
* @param fingerprint the document fingerprint to query
* @return all recorded attempts; empty list if none exist
*/
public List<ProcessingAttempt> findAttempts(DocumentFingerprint fingerprint) {
return attemptRepo.findAllByFingerprint(fingerprint);
}
/**
* Computes the SHA-256 fingerprint for the given file using the production adapter.
* <p>
* Useful for correlating a test PDF with its database record after a batch run.
*
* @param file the absolute path of the file to fingerprint
* @return the fingerprint
* @throws IllegalStateException if fingerprint computation fails
*/
public DocumentFingerprint computeFingerprint(Path file) {
Sha256FingerprintAdapter adapter = new Sha256FingerprintAdapter();
// Construct a minimal candidate that mirrors how the production source adapter creates one
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
file.getFileName().toString(),
0L,
new SourceDocumentLocator(file.toAbsolutePath().toString()));
return switch (adapter.computeFingerprint(candidate)) {
case FingerprintSuccess s -> s.fingerprint();
default -> throw new IllegalStateException(
"Fingerprint computation failed for test fixture: " + file);
};
}
/**
* Lists the filenames of all files currently in the target folder.
*
* @return list of filenames; empty if target folder is empty
* @throws IOException if the target folder cannot be read
*/
public List<String> listTargetFiles() throws IOException {
try (var stream = Files.list(targetFolder)) {
return stream.map(p -> p.getFileName().toString()).sorted().toList();
}
}
/**
* Returns the source folder path used by this context.
*/
public Path sourceFolder() {
return sourceFolder;
}
/**
* Returns the target folder path used by this context.
*/
public Path targetFolder() {
return targetFolder;
}
/**
* Overrides the {@link TargetFileCopyPort} used in subsequent batch runs.
* Pass {@code null} to revert to the real {@link FilesystemTargetFileCopyAdapter}.
*
* @param override the port implementation to use, or {@code null} for the real adapter
*/
public void setTargetFileCopyPortOverride(TargetFileCopyPort override) {
this.targetFileCopyPortOverride = override;
}
@Override
public void close() {
// No explicit cleanup needed: @TempDir removes all files automatically
}
// =========================================================================
// Private wiring
// =========================================================================
/**
* Constructs a fully wired {@link DefaultBatchRunProcessingUseCase} for a single batch run.
* <p>
* All adapters are instantiated fresh per run to avoid shared mutable state between
* runs (e.g. locks, connection states). The AI stub and optional copy-port override
* are re-used across runs within the same test.
*/
private DefaultBatchRunProcessingUseCase buildUseCase() {
RuntimeConfiguration runtimeConfig = new RuntimeConfiguration(
MAX_PAGES, MAX_RETRIES_TRANSIENT, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT);
FingerprintPort fingerprintPort = new Sha256FingerprintAdapter();
DocumentRecordRepository documentRecordRepository = documentRepo;
ProcessingAttemptRepository processingAttemptRepository = attemptRepo;
UnitOfWorkPort unitOfWorkPort = new SqliteUnitOfWorkAdapter(jdbcUrl);
ProcessingLogger coordinatorLogger = new Log4jProcessingLogger(
DocumentProcessingCoordinator.class);
TargetFolderPort targetFolderPort = new FilesystemTargetFolderAdapter(targetFolder);
TargetFileCopyPort targetFileCopyPort = (targetFileCopyPortOverride != null)
? targetFileCopyPortOverride
: new FilesystemTargetFileCopyAdapter(targetFolder);
DocumentProcessingCoordinator coordinator = new DocumentProcessingCoordinator(
documentRecordRepository,
processingAttemptRepository,
unitOfWorkPort,
targetFolderPort,
targetFileCopyPort,
coordinatorLogger,
MAX_RETRIES_TRANSIENT);
PromptPort promptPort = new FilesystemPromptPortAdapter(promptFile);
ClockPort clockPort = new SystemClockAdapter();
AiResponseValidator aiResponseValidator = new AiResponseValidator(clockPort);
AiNamingService aiNamingService = new AiNamingService(
aiStub, promptPort, aiResponseValidator, AI_MODEL, MAX_TEXT_CHARS);
ProcessingLogger useCaseLogger = new Log4jProcessingLogger(
DefaultBatchRunProcessingUseCase.class);
RunLockPort runLockPort = new FilesystemRunLockPortAdapter(lockFile);
SourceDocumentCandidatesPort candidatesPort =
new SourceDocumentCandidatesPortAdapter(sourceFolder);
PdfTextExtractionPort extractionPort = new PdfTextExtractionPortAdapter();
return new DefaultBatchRunProcessingUseCase(
runtimeConfig,
runLockPort,
candidatesPort,
extractionPort,
fingerprintPort,
coordinator,
aiNamingService,
useCaseLogger);
}
}

View File

@@ -0,0 +1,72 @@
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import java.io.IOException;
import java.nio.file.Path;
/**
* Factory for creating minimal PDF test fixtures used in end-to-end tests.
* <p>
* Provides two variants:
* <ul>
* <li>Searchable PDFs with embedded text content — used for happy-path, transient-error,
* and target-copy-failure scenarios where the pre-check must pass.</li>
* <li>Blank PDFs with no extractable text — used for deterministic content-error
* scenarios where the pre-check must fail.</li>
* </ul>
* <p>
* Uses Apache PDFBox to create real, structurally valid PDF files so that the
* production {@code PdfTextExtractionPortAdapter} processes them correctly.
*/
final class MinimalPdfFactory {
private MinimalPdfFactory() {
// Static utility class — not instantiable
}
/**
* Creates a single-page searchable PDF with the given text content at the output path.
* <p>
* The resulting file passes the production pre-checks for minimum text length and
* page count, enabling the AI naming pipeline to run.
*
* @param outputPath the path where the PDF will be written; parent directory must exist
* @param textContent the text to embed in the PDF; should be non-empty for happy-path tests
* @throws IOException if the file cannot be written
*/
static void createSearchablePdf(Path outputPath, String textContent) throws IOException {
try (PDDocument doc = new PDDocument()) {
PDPage page = new PDPage();
doc.addPage(page);
try (PDPageContentStream stream = new PDPageContentStream(doc, page)) {
stream.beginText();
stream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 12);
stream.newLineAtOffset(50, 700);
stream.showText(textContent);
stream.endText();
}
doc.save(outputPath.toFile());
}
}
/**
* Creates a single-page blank PDF with no text content at the output path.
* <p>
* The resulting file triggers the "no usable text" pre-check failure
* (deterministic content error), which does not invoke the AI service.
*
* @param outputPath the path where the PDF will be written; parent directory must exist
* @throws IOException if the file cannot be written
*/
static void createBlankPdf(Path outputPath) throws IOException {
try (PDDocument doc = new PDDocument()) {
doc.addPage(new PDPage());
doc.save(outputPath.toFile());
}
}
}

View File

@@ -0,0 +1,109 @@
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
import java.util.concurrent.atomic.AtomicInteger;
/**
* Configurable test double for {@link AiInvocationPort}.
* <p>
* Replaces the real HTTP-based AI adapter in end-to-end tests so that the processing
* pipeline can be exercised without real network calls. Supports two response modes:
* <ul>
* <li><strong>Success mode</strong> (default): returns a structurally valid JSON response
* containing configurable {@code title} and {@code date} fields. This produces a
* {@code PROPOSAL_READY} outcome when the response passes validation.</li>
* <li><strong>Technical failure mode</strong>: returns an {@link AiInvocationTechnicalFailure},
* simulating network errors or service unavailability. This produces a
* {@code FAILED_RETRYABLE} (transient) outcome.</li>
* </ul>
* <p>
* The stub tracks the total number of invocations so that tests can verify whether
* the AI pipeline was called at all (e.g. confirming that a {@code PROPOSAL_READY}
* finalization skips the AI call).
*/
final class StubAiInvocationPort implements AiInvocationPort {
private final AtomicInteger invocationCount = new AtomicInteger(0);
private volatile boolean returnTechnicalFailure = false;
private volatile String title = "Stromabrechnung";
private volatile String date = "2024-01-15";
private volatile String reasoning = "Testdokument fuer End-to-End-Tests.";
/**
* Configures the stub to return a valid naming proposal with the given title and date.
*
* @param title the document title (must pass validation: max 20 chars, no special chars)
* @param date the document date in {@code YYYY-MM-DD} format, or {@code null} to omit
*/
void configureSuccess(String title, String date) {
this.title = title;
this.date = date;
this.returnTechnicalFailure = false;
}
/**
* Configures the stub to return a transient technical failure on every invocation.
* The failure reason is {@code STUB_FAILURE}.
*/
void configureTechnicalFailure() {
this.returnTechnicalFailure = true;
}
/**
* Resets the stub to its default success configuration with title "Stromabrechnung"
* and date "2024-01-15", and clears the invocation counter.
*/
void reset() {
this.title = "Stromabrechnung";
this.date = "2024-01-15";
this.reasoning = "Testdokument fuer End-to-End-Tests.";
this.returnTechnicalFailure = false;
invocationCount.set(0);
}
/**
* Returns the total number of times {@link #invoke} was called since construction
* or the last {@link #reset()}.
*/
int invocationCount() {
return invocationCount.get();
}
/**
* Resets the invocation counter to zero without changing response configuration.
*/
void resetInvocationCount() {
invocationCount.set(0);
}
/**
* Returns either a success response or a technical failure, depending on current configuration.
* Increments the invocation counter on every call.
*/
@Override
public AiInvocationResult invoke(AiRequestRepresentation request) {
invocationCount.incrementAndGet();
if (returnTechnicalFailure) {
return new AiInvocationTechnicalFailure(
request,
"STUB_FAILURE",
"Test stub: configured to return technical failure");
}
String dateField = (date != null) ? "\"date\": \"" + date + "\", " : "";
String rawJson = "{"
+ dateField
+ "\"title\": \"" + title + "\", "
+ "\"reasoning\": \"" + reasoning + "\""
+ "}";
return new AiInvocationSuccess(request, new AiRawResponse(rawJson));
}
}