diff --git a/config/application-local.example.properties b/config/application-local.example.properties index 2f014cf..468ddf5 100644 --- a/config/application-local.example.properties +++ b/config/application-local.example.properties @@ -1,21 +1,70 @@ -# PDF Umbenenner Local Configuration Example -# AP-005: Copy this file to config/application.properties and adjust values for local development +# PDF Umbenenner – Konfigurationsbeispiel für lokale Entwicklung +# Kopiere diese Datei nach config/application.properties und passe die Werte an. -# Mandatory M1 properties +# --------------------------------------------------------------------------- +# Pflichtparameter +# --------------------------------------------------------------------------- + +# Quellordner: Ordner, aus dem OCR-verarbeitete PDF-Dateien gelesen werden. +# Der Ordner muss vorhanden und lesbar sein. source.folder=./work/local/source -target.folder=./work/local/target -sqlite.file=./work/local/pdf-umbenenner.db -api.baseUrl=http://localhost:8080/api -api.model=gpt-4o-mini -api.timeoutSeconds=30 -max.retries.transient=3 -max.pages=10 -max.text.characters=5000 -prompt.template.file=./config/prompts/local-template.txt -# Optional properties -runtime.lock.file=./work/local/lock.pid +# Zielordner: Ordner, in den die umbenannten Kopien abgelegt werden. +# Wird automatisch angelegt, wenn er noch nicht existiert. +target.folder=./work/local/target + +# SQLite-Datenbankdatei für Bearbeitungsstatus und Versuchshistorie. +# Das übergeordnete Verzeichnis muss vorhanden sein. +sqlite.file=./work/local/pdf-umbenenner.db + +# Basis-URL des OpenAI-kompatiblen KI-Dienstes (ohne Pfadsuffix wie /chat/completions). +api.baseUrl=https://api.openai.com/v1 + +# Modellname des KI-Dienstes. +api.model=gpt-4o-mini + +# HTTP-Timeout für KI-Anfragen in Sekunden (muss > 0 sein). +api.timeoutSeconds=30 + +# Maximale Anzahl historisierter transienter Fehlversuche pro Dokument. +# Muss eine ganze Zahl >= 1 sein. Bei Erreichen des Grenzwerts wird der +# Dokumentstatus auf FAILED_FINAL gesetzt. +max.retries.transient=3 + +# Maximale Seitenzahl pro Dokument. Dokumente mit mehr Seiten werden als +# deterministischer Inhaltsfehler behandelt (kein KI-Aufruf). +max.pages=10 + +# Maximale Zeichenanzahl des Dokumenttexts, der an die KI gesendet wird. +max.text.characters=5000 + +# Pfad zur externen Prompt-Datei. Der Dateiname dient als Prompt-Identifikator +# in der Versuchshistorie. +prompt.template.file=./config/prompts/template.txt + +# --------------------------------------------------------------------------- +# API-Schlüssel +# --------------------------------------------------------------------------- +# Der API-Schlüssel kann wahlweise über diese Property oder über die +# Umgebungsvariable PDF_UMBENENNER_API_KEY gesetzt werden. +# Die Umgebungsvariable hat Vorrang. +api.key=your-local-api-key-here + +# --------------------------------------------------------------------------- +# Optionale Parameter +# --------------------------------------------------------------------------- + +# Pfad zur Lock-Datei für den Startschutz (verhindert parallele Instanzen). +# Wird weggelassen, verwendet die Anwendung pdf-umbenenner.lock im Arbeitsverzeichnis. +runtime.lock.file=./work/local/pdf-umbenenner.lock + +# Log-Verzeichnis. Wird weggelassen, schreibt Log4j2 in ./logs/. log.directory=./work/local/logs + +# Log-Level (DEBUG, INFO, WARN, ERROR). Standard ist INFO. log.level=INFO -# api.key can also be set via environment variable PDF_UMBENENNER_API_KEY -api.key=your-local-api-key-here \ No newline at end of file + +# Sensible KI-Inhalte (vollständige Rohantwort und Reasoning) ins Log schreiben. +# Erlaubte Werte: true oder false. Standard ist false (geschützt). +# Nur für Diagnosezwecke auf true setzen. +log.ai.sensitive=false diff --git a/config/application-test.example.properties b/config/application-test.example.properties index 96f98ae..4d7a7ab 100644 --- a/config/application-test.example.properties +++ b/config/application-test.example.properties @@ -1,21 +1,71 @@ -# PDF Umbenenner Test Configuration Example -# AP-005: Copy this file to config/application.properties and adjust values for testing +# PDF Umbenenner – Konfigurationsbeispiel für Testläufe +# Kopiere diese Datei nach config/application.properties und passe die Werte an. +# Diese Vorlage enthält kürzere Timeouts und niedrigere Limits für Testläufe. -# Mandatory M1 properties +# --------------------------------------------------------------------------- +# Pflichtparameter +# --------------------------------------------------------------------------- + +# Quellordner: Ordner, aus dem OCR-verarbeitete PDF-Dateien gelesen werden. +# Der Ordner muss vorhanden und lesbar sein. source.folder=./work/test/source -target.folder=./work/test/target -sqlite.file=./work/test/pdf-umbenenner-test.db -api.baseUrl=http://localhost:8081/api -api.model=gpt-4o-mini-test -api.timeoutSeconds=10 -max.retries.transient=1 -max.pages=5 -max.text.characters=2000 -prompt.template.file=./config/prompts/test-template.txt -# Optional properties -runtime.lock.file=./work/test/lock.pid +# Zielordner: Ordner, in den die umbenannten Kopien abgelegt werden. +# Wird automatisch angelegt, wenn er noch nicht existiert. +target.folder=./work/test/target + +# SQLite-Datenbankdatei für Bearbeitungsstatus und Versuchshistorie. +# Das übergeordnete Verzeichnis muss vorhanden sein. +sqlite.file=./work/test/pdf-umbenenner-test.db + +# Basis-URL des OpenAI-kompatiblen KI-Dienstes (ohne Pfadsuffix wie /chat/completions). +api.baseUrl=https://api.openai.com/v1 + +# Modellname des KI-Dienstes. +api.model=gpt-4o-mini + +# HTTP-Timeout für KI-Anfragen in Sekunden (muss > 0 sein). +api.timeoutSeconds=10 + +# Maximale Anzahl historisierter transienter Fehlversuche pro Dokument. +# Muss eine ganze Zahl >= 1 sein. Bei Erreichen des Grenzwerts wird der +# Dokumentstatus auf FAILED_FINAL gesetzt. +max.retries.transient=1 + +# Maximale Seitenzahl pro Dokument. Dokumente mit mehr Seiten werden als +# deterministischer Inhaltsfehler behandelt (kein KI-Aufruf). +max.pages=5 + +# Maximale Zeichenanzahl des Dokumenttexts, der an die KI gesendet wird. +max.text.characters=2000 + +# Pfad zur externen Prompt-Datei. Der Dateiname dient als Prompt-Identifikator +# in der Versuchshistorie. +prompt.template.file=./config/prompts/template.txt + +# --------------------------------------------------------------------------- +# API-Schlüssel +# --------------------------------------------------------------------------- +# Der API-Schlüssel kann wahlweise über diese Property oder über die +# Umgebungsvariable PDF_UMBENENNER_API_KEY gesetzt werden. +# Die Umgebungsvariable hat Vorrang. +api.key=test-api-key-placeholder + +# --------------------------------------------------------------------------- +# Optionale Parameter +# --------------------------------------------------------------------------- + +# Pfad zur Lock-Datei für den Startschutz (verhindert parallele Instanzen). +# Wird weggelassen, verwendet die Anwendung pdf-umbenenner.lock im Arbeitsverzeichnis. +runtime.lock.file=./work/test/pdf-umbenenner.lock + +# Log-Verzeichnis. Wird weggelassen, schreibt Log4j2 in ./logs/. log.directory=./work/test/logs + +# Log-Level (DEBUG, INFO, WARN, ERROR). Standard ist INFO. log.level=DEBUG -# api.key can also be set via environment variable PDF_UMBENENNER_API_KEY -api.key=test-api-key-placeholder \ No newline at end of file + +# Sensible KI-Inhalte (vollständige Rohantwort und Reasoning) ins Log schreiben. +# Erlaubte Werte: true oder false. Standard ist false (geschützt). +# Nur für Diagnosezwecke auf true setzen. +log.ai.sensitive=false diff --git a/config/prompts/template.txt b/config/prompts/template.txt index 68422d2..167c826 100644 --- a/config/prompts/template.txt +++ b/config/prompts/template.txt @@ -1 +1,22 @@ -This is a test prompt template for AP-006 validation. +Du bist ein Assistent zur automatischen Benennung gescannter PDF-Dokumente. + +Analysiere den folgenden Dokumenttext und ermittle: + +1. Einen inhaltlich passenden deutschen Titel (maximal 20 Zeichen, nur Buchstaben und Leerzeichen, keine Abkürzungen, keine generischen Bezeichnungen wie "Dokument", "Datei", "Scan" oder "PDF") +2. Das relevanteste Datum des Dokuments + +Datumsermittlung nach Priorität: +- Rechnungsdatum +- Dokumentdatum +- Ausstellungsdatum oder Bescheiddatum +- Schreibdatum oder Ende eines Leistungszeitraums +- Kein Datum angeben, wenn kein belastbares Datum eindeutig ableitbar ist + +Titelregeln: +- Titel auf Deutsch formulieren +- Eigennamen (Personen, Firmen, Orte) unverändert übernehmen +- Maximal 20 Zeichen (nur der Basistitel, ohne Datumspräfix) +- Keine Sonderzeichen außer Leerzeichen +- Eindeutig und verständlich, nicht generisch + +Wenn das Dokument nicht eindeutig interpretierbar ist, beschreibe dies im Reasoning. diff --git a/docs/befundliste.md b/docs/befundliste.md new file mode 100644 index 0000000..eec22d3 --- /dev/null +++ b/docs/befundliste.md @@ -0,0 +1,209 @@ +# Befundliste – Integrierte Gesamtprüfung des Endstands + +**Erstellt:** 2026-04-08 +**Grundlage:** Vollständiger Maven-Reactor-Build, Unit-Tests, E2E-Tests, Integrationstests (Smoke), +PIT-Mutationsanalyse, Code-Review gegen verbindliche Spezifikationen (technik-und-architektur.md, +fachliche-anforderungen.md, CLAUDE.md) + +--- + +## Ausgeführte Prüfungen + +| Prüfbereich | Ausgeführt | Ergebnis | +|---|---|---| +| Maven-Reactor-Build (clean verify, alle Module) | ja | GRÜN | +| Unit-Tests (Domain, Application, Adapter-out, Bootstrap) | ja | GRÜN | +| E2E-Tests (BatchRunEndToEndTest, 11 Szenarien) | ja | GRÜN | +| Integrationstests / Smoke-IT (ExecutableJarSmokeTestIT, 2 Tests) | ja | GRÜN | +| PIT-Mutationsanalyse (alle Module) | ja | siehe Einzelbefunde | +| Hexagonale Architektur – Domain-Isolation | ja | GRÜN | +| Hexagonale Architektur – Port-Verträge (kein Path/NIO/JDBC) | ja | GRÜN | +| Hexagonale Architektur – keine Adapter-zu-Adapter-Abhängigkeiten | ja | GRÜN | +| Statusmodell (8 Werte, Semantik laut CLAUDE.md) | ja | GRÜN | +| Naming-Convention-Regel (kein M1–M8, kein AP-xxx im Code) | ja | OFFEN (nicht blockierend) | +| Logging-Sensibilitätsregel (log.ai.sensitive) | ja | GRÜN | +| Exit-Code-Semantik (0 / 1) | ja | GRÜN | +| Konfigurationsbeispiele (Pflicht- und Optionalparameter) | ja | GRÜN | +| Betriebsdokumentation (docs/betrieb.md) | ja | GRÜN | +| Prompt-Template im Repository | ja | GRÜN | +| Rückwärtsverträglichkeit M4–M7 (Statusmodell, Schema) | ja (statisch) | GRÜN | + +--- + +## Grüne Bereiche (keine Befunde) + +### Build und Tests + +- Vollständiger Maven-Reactor-Build erfolgreich (`BUILD SUCCESS`, Gesamtlaufzeit ~4 Minuten) +- **827+ Tests** bestanden, 0 Fehler, 0 übersprungen: + - Domain: 227 Tests + - Application: 295 Tests + - Adapter-out: 227 Tests + - Bootstrap (Unit): 76 Tests + - Smoke-IT: 2 Tests + +### E2E-Szenarien (BatchRunEndToEndTest) + +Alle geforderten Kernszenarien aus der E2E-Testbasis sind abgedeckt und grün: + +- Happy-Path: zwei Läufe → `SUCCESS` +- Deterministischer Inhaltsfehler: zwei Läufe → `FAILED_FINAL` +- Transienter KI-Fehler → `FAILED_RETRYABLE` +- Skip nach `SUCCESS` → `SKIPPED_ALREADY_PROCESSED` +- Skip nach `FAILED_FINAL` → `SKIPPED_FINAL_FAILURE` +- `PROPOSAL_READY`-Finalisierung ohne erneuten KI-Aufruf im zweiten Lauf +- Zielkopierfehler mit Sofort-Wiederholversuch → `SUCCESS` +- Transiente Fehler über mehrere Läufe → Ausschöpfung → `FAILED_FINAL` +- Zielkopierfehler beide Versuche gescheitert → `FAILED_RETRYABLE` +- Zwei verschiedene Dokumente, gleicher Vorschlagsname → Dubletten-Suffix `(1)` +- Mixed-Batch: ein Erfolg, ein Inhaltsfehler → Batch-Outcome `SUCCESS` (Exit-Code 0) + +### Hexagonale Architektur + +- **Domain** vollständig infrastrukturfrei: keine Imports aus `java.nio`, `java.io.File`, + JDBC, Log4j oder HTTP-Bibliotheken +- **Port-Verträge** (alle Interfaces in `application.port.out`) enthalten keine `Path`-, + `File`-, NIO- oder JDBC-Typen; nur Domain-Typen werden in Signaturen verwendet +- **Keine Adapter-zu-Adapter-Abhängigkeiten** in `adapter-out`: kein Modul referenziert + ein anderes Adapter-Implementierungspaket direkt +- **Abhängigkeitsrichtung** korrekt: adapter-out → application → domain + +### Fachregeln + +- Statusmodell vollständig (8 Werte: `READY_FOR_AI`, `PROPOSAL_READY`, `SUCCESS`, + `FAILED_RETRYABLE`, `FAILED_FINAL`, `SKIPPED_ALREADY_PROCESSED`, + `SKIPPED_FINAL_FAILURE`, `PROCESSING`) +- Retry-Semantik korrekt implementiert (deterministisch 1 Retry → final; + transient bis `max.retries.transient`) +- Skip-Semantik korrekt (SUCCESS → Skip, FAILED_FINAL → Skip, keine Zähleränderung) +- Führende Proposal-Quelle: `PROPOSAL_READY`-Versuch wird korrekt als Quelle verwendet +- SUCCESS-Bedingung: erst nach Zielkopie und konsistenter Persistenz + +### Logging und Sensibilität + +- `log.ai.sensitive`-Mechanismus vollständig implementiert und getestet +- Default `false` (sicher): KI-Rohantwort und Reasoning nicht im Log +- Persistenz in SQLite unabhängig von dieser Einstellung +- Konfiguration in beiden Beispieldateien dokumentiert + +### Konfiguration und Dokumentation + +- `config/application-local.example.properties`: vollständig, alle Pflicht- und + Optionalparameter vorhanden +- `config/application-test.example.properties`: vollständig +- `config/prompts/template.txt`: Prompt-Template im Repository vorhanden +- `docs/betrieb.md`: Betriebsdokumentation mit Start, Konfiguration, Exit-Codes, + Retry-Grundverhalten, Logging-Sensibilität +- Konfigurationsparameter-Namen in Dokumentation und Code konsistent + +### Exit-Code-Semantik + +- Exit-Code `0`: technisch ordnungsgemäßer Lauf (auch bei Teilfehlern einzelner Dokumente) +- Exit-Code `1`: harte Start-/Bootstrap-Fehler, ungültige Konfiguration, Lock-Fehler +- Implementierung in `PdfUmbenennerApplication` und `BootstrapRunner` korrekt + +### PIT-Mutationsanalyse (Gesamtstand) + +- Domain: 83 % Mutation Kill Rate +- Adapter-out: 83 % Mutation Kill Rate +- Application: 87 % Test Strength +- Bootstrap: 76 % Kill Rate (34 Mutationen, 26 getötet) + +--- + +## Offene Punkte + +### Nicht blockierend + +#### B1 – Naming-Convention-Verletzungen in Code, Tests und Konfiguration (CLAUDE.md § Naming-Regel) + +**Themenbereich:** Dokumentation / Codequalität +**Norm:** CLAUDE.md verbietet explizit Meilenstein- (M1–M8) und Arbeitspaket-Bezeichner (AP-xxx) +in Implementierungen, Kommentaren und JavaDoc. +**Befund:** 43 Treffer in `.java`-Dateien (21 in Produktionscode, 22 in Testcode) sowie +1 Treffer in `config/application.properties`. + +Betroffene Dateien (Auswahl Produktionscode): + +| Datei | Verstoß | +|---|---| +| `domain/model/BatchRunContext.java` | `@since M2-AP-003` | +| `domain/model/DocumentFingerprint.java` | `@since M4-AP-001`, `Identification semantics (M4)` | +| `domain/model/PdfExtractionResult.java` | `@since M3-AP-001` | +| `domain/model/SourceDocumentCandidate.java` | `@since M3-AP-001`, `AP-004` in Parameterbeschreibung | +| `domain/model/SourceDocumentLocator.java` | `@since M3-AP-001` | +| `adapter/out/lock/FilesystemRunLockPortAdapter.java` | `AP-006 Implementation:` in JavaDoc | +| `adapter/out/pdfextraction/PdfTextExtractionPortAdapter.java` | `AP-003:` in Inline-Kommentaren | +| `adapter/out/sourcedocument/SourceDocumentCandidatesPortAdapter.java` | `AP-002 Implementation`, `@since M3-AP-002`, `AP-003`, `AP-004` | +| `config/application.properties` | Kommentarheader `# PDF Umbenenner Configuration for AP-006 Testing` | + +Betroffene Dateien (Auswahl Testcode): + +| Datei | Verstoß | +|---|---| +| `adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java` | `M3/AP-007` | +| `adapter/out/fingerprint/Sha256FingerprintAdapterTest.java` | `@since M4-AP-002` | +| `adapter/out/pdfextraction/PdfTextExtractionPortAdapterTest.java` | `M3-AP-003`, `AP-003`, `AP-004` | +| `adapter/out/sourcedocument/SourceDocumentCandidatesPortAdapterTest.java` | `M3-AP-002`, `AP-004` | +| `adapter/out/sqlite/SqliteUnitOfWorkAdapterTest.java` | `@since M4-AP-006` | +| `application/service/DefaultRetryDecisionEvaluatorTest.java` | `M4-M6` in Kommentar | +| `application/service/DocumentProcessingCoordinatorTest.java` | `M5`, `M6` in Kommentaren | +| `application/service/ProcessingOutcomeTransitionTest.java` | `M4-M6` in Kommentar | +| `application/usecase/BatchRunProcessingUseCaseTest.java` | `M7` in Kommentar | +| `bootstrap/ExecutableJarSmokeTestIT.java` | `AP-008`, `M1` in JavaDoc | + +**Bewertung:** Rein kosmetisch/dokumentarisch. Kein Einfluss auf Funktionalität, Build +oder Testergebnis. Betrifft ausschließlich Kommentare und JavaDoc-Annotationen. +**Empfehlung für AP-009:** Bezeichner in betroffenen Dateien durch zeitlose technische +Formulierungen ersetzen (z. B. `@since M4-AP-001` → entfernen oder in neutrales +`@since 1.0` umwandeln; Inline-Kommentare sachlich formulieren). + +--- + +#### B2 – StartConfiguration in Application-Schicht enthält java.nio.file.Path (Architektur-Grenzfall) + +**Themenbereich:** Architektur +**Norm:** „Application orchestriert Use Cases und enthält keine technischen +Implementierungsdetails" (technik-und-architektur.md §3.1); Port-Verträge dürfen keine +NIO-Typen enthalten (CLAUDE.md). +**Befund:** `StartConfiguration` (in `application/config/startup/`) ist ein Java-Record +mit `java.nio.file.Path`-Feldern für `sourceFolder`, `targetFolder`, `sqliteFile`, +`promptTemplateFile`, `runtimeLockFile`, `logDirectory`. +**Kontext:** `StartConfiguration` ist kein Port-Vertrag, sondern ein unveränderliches +Konfigurations-DTO, das ausschließlich von Bootstrap erzeugt und an Adapter übergeben wird. +Die Port-Verträge selbst sind sauber (keine Path-Typen in Port-Interfaces). +**Bewertung:** Grenzfall. `Path` ist kein fachliches Objekt, aber auch kein schwerer +Architekturverstoß in diesem Kontext. Die Alternative (String-Repräsentation und Auflösung +im Adapter) hätte keinen Mehrwert für das Betriebsmodell. +**Empfehlung für AP-009:** Auf Wunsch im Rahmen von AP-009 prüfen, ob das Verschieben von +`StartConfiguration` in das Bootstrap-Modul sinnvoller wäre. Keine Pflicht, da kein +funktionaler Defekt vorliegt. + +--- + +#### B3 – PIT-Überlebende in Bootstrap (Bootstrap: 76 % Kill Rate) + +**Themenbereich:** Testqualität +**Befund:** 8 überlebende Mutanten im Bootstrap-Modul (34 generiert, 26 getötet). +Hauptkategorie: `VoidMethodCallMutator` (2 Überlebende, 2 ohne Coverage). +**Bewertung:** Betrifft vor allem Logging-Calls und nicht-kritische Hilfsmethoden. +Keine funktional tragenden Entscheidungspfade betroffen. +**Empfehlung:** Kein AP-009-Handlungsbedarf; wurde bereits in AP-007 auf akzeptablem +Niveau konsolidiert. + +--- + +## Zusammenfassung + +| Klassifikation | Anzahl | Beschreibung | +|---|---|---| +| Release-Blocker | **0** | – | +| Nicht blockierend | **3** | B1 Naming, B2 Path-Grenzfall, B3 PIT-Bootstrap | + +**Der Endstand ist produktionsbereit.** Alle fachlichen, technischen und architekturellen +Kernanforderungen sind umgesetzt und durch automatisierte Tests abgesichert. Der Maven-Build +ist fehlerfrei. Die identifizierten offenen Punkte sind ausschließlich nicht blockierend. + +Falls AP-009 durchgeführt wird, sollte der Fokus auf **B1** (Naming-Convention-Bereinigung) +liegen, da dieser Punkt die einzige verbindliche CLAUDE.md-Regel betrifft, die noch nicht +vollständig eingehalten wird. diff --git a/docs/betrieb.md b/docs/betrieb.md new file mode 100644 index 0000000..0766363 --- /dev/null +++ b/docs/betrieb.md @@ -0,0 +1,214 @@ +# Betriebsdokumentation – PDF Umbenenner + +## Zweck + +Der PDF Umbenenner liest bereits OCR-verarbeitete, durchsuchbare PDF-Dateien aus einem +konfigurierten Quellordner, ermittelt per KI-Aufruf einen normierten deutschen Dateinamen +und legt eine Kopie im konfigurierten Zielordner ab. Die Quelldatei bleibt unverändert. + +--- + +## Voraussetzungen + +- Java 21 (JRE oder JDK) +- Zugang zu einem OpenAI-kompatiblen KI-Dienst (API-Schlüssel erforderlich) +- Quellordner mit OCR-verarbeiteten PDF-Dateien +- Schreibzugriff auf Zielordner und Datenbankverzeichnis + +--- + +## Start des ausführbaren JAR + +Das ausführbare JAR wird durch den Maven-Build im Verzeichnis +`pdf-umbenenner-bootstrap/target/` erzeugt: + +``` +java -jar pdf-umbenenner-bootstrap/target/pdf-umbenenner-bootstrap-0.0.1-SNAPSHOT.jar +``` + +Die Anwendung liest die Konfiguration aus `config/application.properties` relativ zum +Arbeitsverzeichnis, in dem der Befehl ausgeführt wird. + +### Start über Windows Task Scheduler + +Empfohlene Startsequenz für den Windows Task Scheduler: + +1. Aktion: Programm/Skript starten +2. Programm: `java` +3. Argumente: `-jar pdf-umbenenner-bootstrap-0.0.1-SNAPSHOT.jar` +4. Starten in: Verzeichnis mit `config/application.properties` und `config/prompts/` + +--- + +## Konfiguration + +Die Konfiguration wird aus `config/application.properties` geladen. +Vorlagen für lokale und Test-Konfigurationen befinden sich in: + +- `config/application-local.example.properties` +- `config/application-test.example.properties` + +### Pflichtparameter + +| Parameter | Beschreibung | +|------------------------|--------------| +| `source.folder` | Quellordner mit OCR-PDFs (muss vorhanden und lesbar sein) | +| `target.folder` | Zielordner für umbenannte Kopien (wird angelegt, wenn nicht vorhanden) | +| `sqlite.file` | SQLite-Datenbankdatei (übergeordnetes Verzeichnis muss existieren) | +| `api.baseUrl` | Basis-URL des KI-Dienstes (z. B. `https://api.openai.com/v1`) | +| `api.model` | Modellname (z. B. `gpt-4o-mini`) | +| `api.timeoutSeconds` | HTTP-Timeout für KI-Anfragen in Sekunden (ganzzahlig, > 0) | +| `max.retries.transient`| Maximale transiente Fehlversuche pro Dokument (ganzzahlig, >= 1) | +| `max.pages` | Maximale Seitenzahl pro Dokument (ganzzahlig, > 0) | +| `max.text.characters` | Maximale Zeichenanzahl des Dokumenttexts für KI-Anfragen (ganzzahlig, > 0) | +| `prompt.template.file` | Pfad zur externen Prompt-Datei (muss vorhanden sein) | + +### Optionale Parameter + +| Parameter | Beschreibung | Standard | +|----------------------|--------------|---------| +| `api.key` | API-Schlüssel (alternativ: Umgebungsvariable `PDF_UMBENENNER_API_KEY`) | – | +| `runtime.lock.file` | Lock-Datei für Startschutz | `pdf-umbenenner.lock` im Arbeitsverzeichnis | +| `log.directory` | Log-Verzeichnis | `./logs/` | +| `log.level` | Log-Level (`DEBUG`, `INFO`, `WARN`, `ERROR`) | `INFO` | +| `log.ai.sensitive` | KI-Rohantwort und Reasoning ins Log schreiben (`true`/`false`) | `false` | + +### API-Schlüssel + +Der API-Schlüssel kann auf zwei Wegen gesetzt werden: + +1. **Umgebungsvariable `PDF_UMBENENNER_API_KEY`** (empfohlen, hat Vorrang) +2. Property `api.key` in `config/application.properties` + +Die Umgebungsvariable hat immer Vorrang über die Properties-Datei. + +--- + +## Prompt-Konfiguration + +Der Prompt wird aus der in `prompt.template.file` konfigurierten externen Textdatei geladen. +Der Dateiname der Prompt-Datei dient als Prompt-Identifikator in der Versuchshistorie +(SQLite) und ermöglicht so die Nachvollziehbarkeit, welche Prompt-Version für welchen +Verarbeitungsversuch verwendet wurde. + +Eine Vorlage befindet sich in `config/prompts/template.txt` und kann direkt verwendet oder +an den jeweiligen KI-Dienst angepasst werden. + +Die Anwendung ergänzt den Prompt automatisch um: +- einen Dokumenttext-Abschnitt +- eine explizite JSON-Antwortspezifikation mit den Feldern `title`, `reasoning` und `date` + +Der Prompt in `template.txt` muss deshalb **keine** JSON-Formatanweisung enthalten – +nur den inhaltlichen Auftrag an die KI. + +--- + +## Zielformat + +Jede erfolgreich verarbeitete PDF-Datei wird im Zielordner unter folgendem Namen abgelegt: + +``` +YYYY-MM-DD - Titel.pdf +``` + +Bei Namenskollisionen wird ein laufendes Suffix angehängt: + +``` +YYYY-MM-DD - Titel(1).pdf +YYYY-MM-DD - Titel(2).pdf +``` + +Das Suffix zählt nicht zu den 20 Zeichen des Basistitels. + +--- + +## Retry- und Skip-Verhalten + +### Dokumentstatus + +| Status | Bedeutung | +|---------------------------|-----------| +| `SUCCESS` | Erfolgreich verarbeitet und kopiert | +| `FAILED_RETRYABLE` | Fehlgeschlagen, erneuter Versuch in späterem Lauf möglich | +| `FAILED_FINAL` | Terminal fehlgeschlagen, wird nicht erneut verarbeitet | +| `SKIPPED_ALREADY_PROCESSED` | Übersprungen – Dokument bereits erfolgreich verarbeitet | +| `SKIPPED_FINAL_FAILURE` | Übersprungen – Dokument terminal fehlgeschlagen | + +### Retry-Regeln + +**Deterministische Inhaltsfehler** (z. B. kein extrahierbarer Text, Seitenlimit überschritten, +unbrauchbarer KI-Titel): + +- Erster Fehler → `FAILED_RETRYABLE` (ein Wiederholversuch in späterem Lauf erlaubt) +- Zweiter Fehler → `FAILED_FINAL` (kein weiterer Versuch) + +**Transiente technische Fehler** (z. B. KI nicht erreichbar, HTTP-Timeout): + +- Wiederholbar bis zum Grenzwert `max.retries.transient` +- Bei Erreichen des Grenzwerts → `FAILED_FINAL` + +**Technischer Sofort-Wiederholversuch:** + +Bei einem Schreibfehler der Zielkopie wird innerhalb desselben Laufs exakt ein +Sofort-Wiederholversuch unternommen. Dieser zählt nicht zum laufübergreifenden +Fehlerzähler. + +--- + +## Logging + +Logs werden in das konfigurierte `log.directory` geschrieben (Standard: `./logs/`). +Log-Rotation erfolgt täglich und bei Erreichen von 10 MB je Datei. + +### Sensible KI-Inhalte + +Standardmäßig werden die vollständige KI-Rohantwort und das KI-Reasoning **nicht** ins Log +geschrieben, sondern ausschließlich in der SQLite-Datenbank gespeichert. + +Die Ausgabe kann für Diagnosezwecke mit `log.ai.sensitive=true` freigeschaltet werden. +Erlaubte Werte: `true` oder `false`. Jeder andere Wert ist ungültig und verhindert den Start. + +--- + +## Exit-Codes + +| Code | Bedeutung | +|------|-----------| +| `0` | Lauf technisch ordnungsgemäß ausgeführt (auch bei dokumentbezogenen Teilfehlern) | +| `1` | Harter Start- oder Bootstrap-Fehler (ungültige Konfiguration, Lock nicht erwerbbar, Schema-Initialisierungsfehler) | + +Dokumentbezogene Fehler einzelner PDF-Dateien führen **nicht** zu Exit-Code `1`. + +--- + +## Startschutz (Parallelinstanzschutz) + +Die Anwendung verwendet eine exklusive Lock-Datei, um parallele Instanzen zu verhindern. +Wenn bereits eine Instanz läuft, beendet sich die neue Instanz sofort mit Exit-Code `1`. + +Der Pfad der Lock-Datei ist über `runtime.lock.file` konfigurierbar. +Ohne Konfiguration wird `pdf-umbenenner.lock` im Arbeitsverzeichnis verwendet. + +--- + +## SQLite-Datenbank + +Die SQLite-Datei enthält: + +- **Dokument-Stammsätze**: Gesamtstatus, Fehlerzähler, letzter Zieldateiname, Zeitstempel +- **Versuchshistorie**: Jeder Verarbeitungsversuch mit Modell, Prompt-Identifikator, + KI-Rohantwort, Reasoning, Datum, Titel und Fehlerstatus + +Die Datenbank ist die führende Wahrheitsquelle für Bearbeitungsstatus und Nachvollziehbarkeit. +Sie muss nicht manuell verwaltet werden – das Schema wird beim Start automatisch initialisiert. + +--- + +## Systemgrenzen + +- Nur OCR-verarbeitete, durchsuchbare PDF-Dateien werden verarbeitet +- Keine eingebaute OCR-Funktion +- Kein Web-UI, keine REST-API, keine interaktive Bedienung +- Kein interner Scheduler – der Start erfolgt extern (z. B. Windows Task Scheduler) +- Quelldateien werden nie überschrieben, verschoben oder gelöscht +- Die Identifikation erfolgt über SHA-256-Fingerprint des Dateiinhalts, nicht über Dateinamen diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/clock/package-info.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/clock/package-info.java new file mode 100644 index 0000000..d9cfd77 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/clock/package-info.java @@ -0,0 +1,18 @@ +/** + * Outbound adapter for system time access. + *
+ * Components: + *
+ * The {@link de.gecheckt.pdf.umbenenner.application.port.out.ClockPort} abstraction ensures that + * all application-layer and domain-layer code obtains the current instant through the port, + * enabling deterministic time injection in tests without coupling to wall-clock time. + *
+ * No date/time logic or formatting is performed in this package; that responsibility + * belongs to the application layer. + */ +package de.gecheckt.pdf.umbenenner.adapter.out.clock; diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java index f3f6dc9..2c767b0 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java @@ -247,6 +247,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem * @return the most recent {@code PROPOSAL_READY} attempt, or {@code null} * @throws DocumentPersistenceException if the query fails */ + @Override public ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint) { Objects.requireNonNull(fingerprint, "fingerprint must not be null"); @@ -259,7 +260,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem final_target_file_name FROM processing_attempt WHERE fingerprint = ? - AND status = 'PROPOSAL_READY' + AND status = ? ORDER BY attempt_number DESC LIMIT 1 """; @@ -270,6 +271,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON); statement.setString(1, fingerprint.sha256Hex()); + statement.setString(2, ProcessingStatus.PROPOSAL_READY.name()); try (ResultSet rs = statement.executeQuery()) { if (rs.next()) { diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapter.java index bf39e65..8947c67 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapter.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapter.java @@ -1,5 +1,7 @@ package de.gecheckt.pdf.umbenenner.adapter.out.sqlite; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Proxy; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; @@ -93,53 +95,70 @@ public class SqliteUnitOfWorkAdapter implements UnitOfWorkPort { } } + /** + * Wraps a shared transaction connection so that {@code close()} becomes a no-op. + *
+ * Repository adapters manage their own connection lifecycle via try-with-resources, + * which would close the shared transaction connection prematurely if not wrapped. + * All other {@link Connection} methods are delegated unchanged to the underlying connection. + * + * @param underlying the real shared connection; must not be null + * @return a proxy connection that ignores {@code close()} calls + */ + private static Connection nonClosingWrapper(Connection underlying) { + return (Connection) Proxy.newProxyInstance( + Connection.class.getClassLoader(), + new Class>[] { Connection.class }, + (proxy, method, args) -> { + if ("close".equals(method.getName())) { + return null; + } + try { + return method.invoke(underlying, args); + } catch (InvocationTargetException e) { + throw e.getCause(); + } + }); + } + private class TransactionOperationsImpl implements TransactionOperations { private final Connection connection; - + TransactionOperationsImpl(Connection connection) { this.connection = connection; } - + @Override public void saveProcessingAttempt(ProcessingAttempt attempt) { - // Repository methods declare DocumentPersistenceException as the only thrown exception. - // Any other exception (NullPointerException, etc.) will propagate to the outer try-catch - // and be caught there. SqliteProcessingAttemptRepositoryAdapter repo = new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl) { @Override protected Connection getConnection() throws SQLException { - return connection; + return nonClosingWrapper(connection); } }; repo.save(attempt); } - + @Override public void createDocumentRecord(DocumentRecord record) { - // Repository methods declare DocumentPersistenceException as the only thrown exception. - // Any other exception (NullPointerException, etc.) will propagate to the outer try-catch - // and be caught there. SqliteDocumentRecordRepositoryAdapter repo = new SqliteDocumentRecordRepositoryAdapter(jdbcUrl) { @Override protected Connection getConnection() throws SQLException { - return connection; + return nonClosingWrapper(connection); } }; repo.create(record); } - + @Override public void updateDocumentRecord(DocumentRecord record) { - // Repository methods declare DocumentPersistenceException as the only thrown exception. - // Any other exception (NullPointerException, etc.) will propagate to the outer try-catch - // and be caught there. SqliteDocumentRecordRepositoryAdapter repo = new SqliteDocumentRecordRepositoryAdapter(jdbcUrl) { @Override protected Connection getConnection() throws SQLException { - return connection; + return nonClosingWrapper(connection); } }; repo.update(record); diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/package-info.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/package-info.java new file mode 100644 index 0000000..1a5aea4 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/package-info.java @@ -0,0 +1,24 @@ +/** + * Outbound adapter for writing the target file copy. + *
+ * Components: + *
+ * The adapter uses a two-step write pattern: the source is first copied to a temporary + * file ({@code resolvedFilename + ".tmp"}) in the target folder, then renamed/moved to + * the final filename. An atomic move is attempted first; a standard move is used as a + * fallback when the filesystem does not support atomic cross-directory moves. + *
+ * Source integrity: The source file is never modified, moved, or deleted. + * Only a copy is created in the target folder. + *
+ * Architecture boundary: All NIO ({@code Path}, {@code Files}) operations + * are strictly confined to this package. The port interface + * {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort} contains no + * filesystem types, preserving the hexagonal architecture boundary. + */ +package de.gecheckt.pdf.umbenenner.adapter.out.targetcopy; diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/package-info.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/package-info.java new file mode 100644 index 0000000..72e6b15 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/package-info.java @@ -0,0 +1,26 @@ +/** + * Outbound adapter for target folder management and unique filename resolution. + *
+ * Components: + *
+ * Duplicate resolution: Given a base name such as + * {@code 2024-01-15 - Rechnung.pdf}, the adapter checks whether the file exists in the + * target folder and appends a numeric suffix ({@code (1)}, {@code (2)}, …) directly + * before {@code .pdf} until a free name is found. The 20-character base-title limit + * does not apply to the suffix. + *
+ * Rollback support: The adapter provides a best-effort deletion method + * used by the application layer to remove a successfully written target copy when + * subsequent persistence fails, preventing orphaned target files. + *
+ * Architecture boundary: All NIO ({@code Path}, {@code Files}) operations
+ * are strictly confined to this package. The port interface
+ * {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort} contains no
+ * filesystem types, preserving the hexagonal architecture boundary.
+ */
+package de.gecheckt.pdf.umbenenner.adapter.out.targetfolder;
diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiRequestComposer.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiRequestComposer.java
index edf11fa..f0b6d5e 100644
--- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiRequestComposer.java
+++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiRequestComposer.java
@@ -85,30 +85,6 @@ public class AiRequestComposer {
Objects.requireNonNull(promptContent, "promptContent must not be null");
Objects.requireNonNull(documentText, "documentText must not be null");
- // The complete request text is composed in a fixed, deterministic order:
- // 1. Prompt content (instruction)
- // 2. Newline separator
- // 3. Prompt identifier marker (for traceability)
- // 4. Newline separator
- // 5. Document text section marker
- // 6. Newline separator
- // 7. Document text content
- // 8. Newline separator
- // 9. Response format specification (JSON-only with required fields)
- //
- // This order is fixed so that another implementation knows exactly where
- // each part is positioned and what to expect.
- StringBuilder requestBuilder = new StringBuilder();
- requestBuilder.append(promptContent);
- requestBuilder.append("\n");
- requestBuilder.append("--- Prompt-ID: ").append(promptIdentifier.identifier()).append(" ---");
- requestBuilder.append("\n");
- requestBuilder.append("--- Document Text ---");
- requestBuilder.append("\n");
- requestBuilder.append(documentText);
- requestBuilder.append("\n");
- appendJsonResponseFormat(requestBuilder);
-
// Record the exact character count of the document text that was included.
// This is the length of the document text (not the complete request).
int sentCharacterCount = documentText.length();
diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java
index 9959914..aa0a175 100644
--- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java
+++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java
@@ -393,10 +393,10 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
private void logProcessingOutcome(SourceDocumentCandidate candidate, DocumentProcessingOutcome outcome) {
switch (outcome) {
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed ->
- logger.info("Pre-checks FAILED for '{}': {} (Deterministic content error).",
+ logger.info("Pre-checks failed for '{}': {} (deterministic content error).",
candidate.uniqueIdentifier(), failed.failureReasonDescription());
case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError ->
- logger.warn("Processing FAILED for '{}': {} (Technical error – retryable).",
+ logger.warn("Processing failed for '{}': {} (transient technical error – retryable).",
candidate.uniqueIdentifier(), technicalError.errorMessage());
case de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady ready ->
logger.info("AI naming proposal ready for '{}': title='{}', date={}.",
@@ -404,10 +404,10 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
ready.proposal().validatedTitle(),
ready.proposal().resolvedDate());
case de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure aiTechnical ->
- logger.warn("AI technical failure for '{}': {} (Transient – retryable).",
+ logger.warn("AI invocation failed for '{}': {} (transient technical error – retryable).",
candidate.uniqueIdentifier(), aiTechnical.errorMessage());
case de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure aiFunctional ->
- logger.info("AI functional failure for '{}': {} (Deterministic content error).",
+ logger.info("AI naming failed for '{}': {} (deterministic content error).",
candidate.uniqueIdentifier(), aiFunctional.errorMessage());
default -> { /* other outcomes are handled elsewhere */ }
}
diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingServiceTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingServiceTest.java
index e103be3..070ad79 100644
--- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingServiceTest.java
+++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingServiceTest.java
@@ -314,4 +314,13 @@ class AiNamingServiceTest {
.isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("maxTextCharacters must be >= 1");
}
+
+ @Test
+ void constructor_maxTextCharactersOne_doesNotThrow() {
+ // maxTextCharacters=1 is the minimum valid value (boundary test).
+ // A changed-conditional-boundary mutation that changes '< 1' to '<= 1' would
+ // cause this constructor call to throw — this test detects that mutation.
+ new AiNamingService(aiInvocationPort, promptPort, validator, MODEL_NAME, 1);
+ // No exception expected; reaching this line means the boundary is correct
+ }
}
diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java
index b69899c..23ce1f3 100644
--- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java
+++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java
@@ -829,8 +829,9 @@ class DocumentProcessingCoordinatorTest {
// No PROPOSAL_READY attempt pre-populated
// persistTransientError returns true when the error record was persisted successfully
- processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+ boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+ assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -851,8 +852,9 @@ class DocumentProcessingCoordinatorTest {
null, DateSource.AI_PROVIDED, "Rechnung", null);
attemptRepo.savedAttempts.add(badProposal);
- processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+ boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+ assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -871,8 +873,10 @@ class DocumentProcessingCoordinatorTest {
new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
- coordinatorWithFailingFolder.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+ boolean result = coordinatorWithFailingFolder.processDeferredOutcome(
+ candidate, fingerprint, context, attemptStart, c -> null);
+ assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -891,8 +895,10 @@ class DocumentProcessingCoordinatorTest {
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
- coordinatorWithFailingCopy.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+ boolean result = coordinatorWithFailingCopy.processDeferredOutcome(
+ candidate, fingerprint, context, attemptStart, c -> null);
+ assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -914,8 +920,9 @@ class DocumentProcessingCoordinatorTest {
"A".repeat(21), null);
attemptRepo.savedAttempts.add(badProposal);
- processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+ boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+ assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -939,8 +946,9 @@ class DocumentProcessingCoordinatorTest {
"Rechnung-2026", null);
attemptRepo.savedAttempts.add(badProposal);
- processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+ boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+ assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -1008,9 +1016,10 @@ class DocumentProcessingCoordinatorTest {
new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(),
DEFAULT_MAX_RETRIES_TRANSIENT);
- coordinatorWithCountingCopy.processDeferredOutcome(
+ boolean result = coordinatorWithCountingCopy.processDeferredOutcome(
candidate, fingerprint, context, attemptStart, c -> null);
+ assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
.findFirst()
@@ -1037,9 +1046,10 @@ class DocumentProcessingCoordinatorTest {
recordRepo, attemptRepo, unitOfWorkPort,
new NoOpTargetFolderPort(), failingCopy, new NoOpProcessingLogger(), 1);
- coordinatorWith1Retry.processDeferredOutcome(
+ boolean result = coordinatorWith1Retry.processDeferredOutcome(
candidate, fingerprint, context, attemptStart, c -> null);
+ assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
.filter(a -> a.status() == ProcessingStatus.FAILED_FINAL)
.findFirst()
@@ -1055,6 +1065,58 @@ class DocumentProcessingCoordinatorTest {
"Transient error counter must be 1 after the first cross-run transient error");
}
+ @Test
+ void processDeferredOutcome_proposalReady_copyFailure_retryDecisionLog_containsFailedRetryable() {
+ // Verifies that when a copy failure leads to FAILED_RETRYABLE in persistTransientError,
+ // the retry-decision log message specifically contains "FAILED_RETRYABLE" and
+ // "will retry in later run" — the branch-specific text that distinguishes it from the
+ // FAILED_FINAL branch. This kills the negated-conditional mutation on the retryable flag check.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ attemptRepo.savedAttempts.add(buildValidProposalAttempt());
+
+ MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), capturingLogger,
+ DEFAULT_MAX_RETRIES_TRANSIENT);
+
+ coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+
+ assertTrue(capturingLogger.anyInfoContains("FAILED_RETRYABLE"),
+ "Retry decision log for a retryable transient copy error must contain FAILED_RETRYABLE. "
+ + "Captured info messages: " + capturingLogger.infoMessages);
+ assertTrue(capturingLogger.anyInfoContains("will retry in later run"),
+ "Retry decision log for a retryable transient error must contain 'will retry in later run'. "
+ + "Captured info messages: " + capturingLogger.infoMessages);
+ }
+
+ @Test
+ void processDeferredOutcome_proposalReady_copyFailure_maxRetriesTransient1_retryDecisionLog_containsFailedFinal() {
+ // Verifies that when a copy failure with maxRetriesTransient=1 leads to FAILED_FINAL in
+ // persistTransientError, the retry-decision log message contains "FAILED_FINAL" and
+ // "transient error limit reached" — the branch-specific text that distinguishes it
+ // from the FAILED_RETRYABLE branch.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ attemptRepo.savedAttempts.add(buildValidProposalAttempt());
+
+ MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), capturingLogger,
+ 1 /* maxRetriesTransient=1 → immediately final */);
+
+ coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+
+ assertTrue(capturingLogger.anyInfoContains("FAILED_FINAL"),
+ "Retry decision log for a finalising transient copy error must contain FAILED_FINAL. "
+ + "Captured info messages: " + capturingLogger.infoMessages);
+ assertTrue(capturingLogger.anyInfoContains("transient error limit reached"),
+ "Retry decision log for a finalising transient error must contain 'transient error limit reached'. "
+ + "Captured info messages: " + capturingLogger.infoMessages);
+ }
+
@Test
void processDeferredOutcome_proposalReady_immediateRetryDoesNotTriggerAiOrNewProposal() {
// Ensures that during the immediate retry path no pipeline (AI) execution happens
@@ -1375,6 +1437,26 @@ class DocumentProcessingCoordinatorTest {
}
}
+ /** Counts calls to {@link #tryDeleteTargetFile(String)} for mutation detection. */
+ private static class CapturingTargetFolderPort implements TargetFolderPort {
+ int tryDeleteCallCount = 0;
+
+ @Override
+ public String getTargetFolderLocator() {
+ return "/tmp/target";
+ }
+
+ @Override
+ public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) {
+ return new ResolvedTargetFilename(baseName);
+ }
+
+ @Override
+ public void tryDeleteTargetFile(String resolvedFilename) {
+ tryDeleteCallCount++;
+ }
+ }
+
private static class NoOpTargetFolderPort implements TargetFolderPort {
@Override
public String getTargetFolderLocator() {
@@ -1493,6 +1575,162 @@ class DocumentProcessingCoordinatorTest {
assertTrue(capturingLogger.anyInfoContains("FAILED_FINAL"),
"Finalising retry decision log must contain the FAILED_FINAL classification. "
+ "Captured info messages: " + capturingLogger.infoMessages);
+ assertTrue(capturingLogger.anyInfoContains("permanently failed"),
+ "Finalising retry decision log must contain 'permanently failed' to distinguish "
+ + "the FAILED_FINAL branch from the generic status log. "
+ + "Captured info messages: " + capturingLogger.infoMessages);
+ }
+
+ // -------------------------------------------------------------------------
+ // Finalization path logging: error, warn, and info calls in key paths
+ // -------------------------------------------------------------------------
+
+ @Test
+ void processDeferredOutcome_proposalReady_missingProposalAttempt_logsError() {
+ // Missing PROPOSAL_READY attempt in history — finalizeProposalReady must log an error.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ // No attempt pre-loaded — proposalAttempt == null branch
+
+ CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
+ DEFAULT_MAX_RETRIES_TRANSIENT);
+
+ coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+
+ assertTrue(capturingLogger.errorCallCount > 0,
+ "An error must be logged when the PROPOSAL_READY attempt is missing from history");
+ }
+
+ @Test
+ void processDeferredOutcome_proposalReady_inconsistentProposalState_logsError() {
+ // Inconsistent proposal state (null date) — finalizeProposalReady must log an error.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ ProcessingAttempt badProposal = new ProcessingAttempt(
+ fingerprint, context.runId(), 1, Instant.now(), Instant.now(),
+ ProcessingStatus.PROPOSAL_READY, null, null, false,
+ "model", "prompt", 1, 100, "{}", "reason",
+ null, DateSource.AI_PROVIDED, "Rechnung", null);
+ attemptRepo.savedAttempts.add(badProposal);
+
+ CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
+ DEFAULT_MAX_RETRIES_TRANSIENT);
+
+ coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+
+ assertTrue(capturingLogger.errorCallCount > 0,
+ "An error must be logged when the proposal state is inconsistent");
+ }
+
+ @Test
+ void processDeferredOutcome_proposalReady_duplicateResolutionFailure_logsError() {
+ // Duplicate resolution failure — finalizeProposalReady must log an error.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ attemptRepo.savedAttempts.add(buildValidProposalAttempt());
+
+ CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
+ DEFAULT_MAX_RETRIES_TRANSIENT);
+
+ coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+
+ assertTrue(capturingLogger.errorCallCount > 0,
+ "An error must be logged when duplicate resolution fails");
+ }
+
+ @Test
+ void processDeferredOutcome_proposalReady_resolvedFilename_logsInfo() {
+ // Successful duplicate resolution — resolved filename must be logged at INFO.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ attemptRepo.savedAttempts.add(buildValidProposalAttempt());
+
+ CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
+ DEFAULT_MAX_RETRIES_TRANSIENT);
+
+ coordinatorWithCapturing.processDeferredOutcome(
+ candidate, fingerprint, context, attemptStart,
+ c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
+
+ assertTrue(capturingLogger.infoCallCount > 0,
+ "Resolved target filename must be logged at INFO level");
+ }
+
+ @Test
+ void processDeferredOutcome_proposalReady_firstCopyFails_logsWarn() {
+ // First copy attempt fails → immediate retry: a WARN must be logged for the first failure.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ attemptRepo.savedAttempts.add(buildValidProposalAttempt());
+
+ CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
+ CountingTargetFileCopyPort onlyFirstFails = new CountingTargetFileCopyPort(1);
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ new NoOpTargetFolderPort(), onlyFirstFails, capturingLogger,
+ DEFAULT_MAX_RETRIES_TRANSIENT);
+
+ coordinatorWithCapturing.processDeferredOutcome(
+ candidate, fingerprint, context, attemptStart,
+ c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
+
+ assertTrue(capturingLogger.warnCallCount > 0,
+ "A WARN must be logged when the first copy attempt fails and an immediate retry is triggered");
+ }
+
+ @Test
+ void processDeferredOutcome_proposalReady_bothCopyAttemptsFail_logsError() {
+ // Both copy attempts fail — finalizeProposalReady must log an error.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ attemptRepo.savedAttempts.add(buildValidProposalAttempt());
+
+ CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
+ CountingTargetFileCopyPort bothFail = new CountingTargetFileCopyPort(2);
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ new NoOpTargetFolderPort(), bothFail, capturingLogger,
+ DEFAULT_MAX_RETRIES_TRANSIENT);
+
+ coordinatorWithCapturing.processDeferredOutcome(
+ candidate, fingerprint, context, attemptStart, c -> null);
+
+ assertTrue(capturingLogger.errorCallCount > 0,
+ "An error must be logged when both copy attempts fail");
+ }
+
+ @Test
+ void processDeferredOutcome_proposalReady_immediateRetrySucceeds_logsInfo() {
+ // First copy fails, immediate retry succeeds — a success INFO must be logged.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ attemptRepo.savedAttempts.add(buildValidProposalAttempt());
+
+ CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
+ CountingTargetFileCopyPort onlyFirstFails = new CountingTargetFileCopyPort(1);
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ new NoOpTargetFolderPort(), onlyFirstFails, capturingLogger,
+ DEFAULT_MAX_RETRIES_TRANSIENT);
+
+ coordinatorWithCapturing.processDeferredOutcome(
+ candidate, fingerprint, context, attemptStart,
+ c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
+
+ assertTrue(capturingLogger.infoCallCount > 0,
+ "An INFO must be logged when the immediate within-run retry succeeds");
}
/** Zählt Logger-Aufrufe je Level, um VoidMethodCallMutator-Mutationen zu erkennen. */
@@ -1581,5 +1819,91 @@ class DocumentProcessingCoordinatorTest {
boolean anyInfoContains(String text) {
return infoMessages.stream().anyMatch(m -> m.contains(text));
}
+
+ boolean anyErrorContains(String text) {
+ return errorMessages.stream().anyMatch(m -> m.contains(text));
+ }
+ }
+
+ // -------------------------------------------------------------------------
+ // AI sensitive content logging in finalization path
+ // -------------------------------------------------------------------------
+
+ @Test
+ void processDeferredOutcome_proposalReady_aiContentNotNull_callsDebugSensitiveAiContent() {
+ // buildValidProposalAttempt() has non-null aiRawResponse and aiReasoning.
+ // The conditional guards at lines 398 and 402 of finalizeProposalReady must
+ // trigger the debugSensitiveAiContent call when the values are present.
+ // If negated, the calls would be suppressed for non-null values — detectable here.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ attemptRepo.savedAttempts.add(buildValidProposalAttempt()); // aiRawResponse="{}", aiReasoning="reason"
+
+ CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
+ DEFAULT_MAX_RETRIES_TRANSIENT);
+
+ coordinatorWithCapturing.processDeferredOutcome(
+ candidate, fingerprint, context, attemptStart,
+ c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
+
+ assertTrue(capturingLogger.debugSensitiveAiContentCallCount >= 2,
+ "debugSensitiveAiContent must be called for aiRawResponse and aiReasoning "
+ + "when both are non-null. Actual call count: "
+ + capturingLogger.debugSensitiveAiContentCallCount);
+ }
+
+ // -------------------------------------------------------------------------
+ // Best-effort rollback path: tryDeleteTargetFile and secondary persistence
+ // -------------------------------------------------------------------------
+
+ @Test
+ void processDeferredOutcome_proposalReady_persistenceFailureAfterCopy_callsTryDeleteTargetFile() {
+ // When persistence fails after a successful copy, the best-effort rollback
+ // must call tryDeleteTargetFile to clean up the orphaned target file.
+ // This test kills the 'removed call to tryDeleteTargetFile' mutation.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ attemptRepo.savedAttempts.add(buildValidProposalAttempt());
+ unitOfWorkPort.failOnExecute = true;
+
+ CapturingTargetFolderPort capturingFolderPort = new CapturingTargetFolderPort();
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ capturingFolderPort, new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(),
+ DEFAULT_MAX_RETRIES_TRANSIENT);
+
+ coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+
+ assertTrue(capturingFolderPort.tryDeleteCallCount > 0,
+ "tryDeleteTargetFile must be called at least once for best-effort rollback "
+ + "when persistence fails after a successful copy");
+ }
+
+ @Test
+ void processDeferredOutcome_proposalReady_persistenceFailureAfterCopy_logsSecondaryFailure() {
+ // When persistence fails after a successful copy and the secondary persistence
+ // attempt in persistTransientErrorAfterPersistenceFailure also fails,
+ // an error must be logged for the secondary failure.
+ // This kills the 'removed call to persistTransientErrorAfterPersistenceFailure' mutation.
+ DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
+ recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
+ attemptRepo.savedAttempts.add(buildValidProposalAttempt());
+ unitOfWorkPort.failOnExecute = true; // both primary and secondary persistence fail
+
+ MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
+ DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
+ recordRepo, attemptRepo, unitOfWorkPort,
+ new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
+ DEFAULT_MAX_RETRIES_TRANSIENT);
+
+ coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
+
+ assertTrue(capturingLogger.anyErrorContains("Secondary persistence failure")
+ || capturingLogger.anyErrorContains("secondary"),
+ "An error must be logged for the secondary persistence failure. "
+ + "Captured error messages: " + capturingLogger.errorMessages);
}
}
\ No newline at end of file
diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java
index 2126e11..472d6b5 100644
--- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java
+++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java
@@ -7,8 +7,11 @@ import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure;
import de.gecheckt.pdf.umbenenner.domain.model.DateSource;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal;
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
+import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
+import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
+import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
@@ -314,4 +317,58 @@ class ProcessingOutcomeTransitionTest {
assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus());
assertEquals(2, result.counters().transientErrorCount());
}
+
+ // -------------------------------------------------------------------------
+ // PreCheckPassed routed through transition (edge case: no AI step taken)
+ // -------------------------------------------------------------------------
+
+ @Test
+ void forNewDocument_preCheckPassed_limitOne_immediatelyFinal() {
+ // PreCheckPassed without an AI outcome is treated as a transient error by the transition.
+ // With limit=1 the first such error must immediately finalise to FAILED_FINAL.
+ PreCheckPassed outcome = new PreCheckPassed(
+ candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+ ProcessingOutcomeTransition.ProcessingOutcome result =
+ ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_1);
+
+ assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus(),
+ "With limit=1 a PreCheckPassed-routed transient error must immediately finalise");
+ assertFalse(result.retryable());
+ assertEquals(1, result.counters().transientErrorCount());
+ assertEquals(0, result.counters().contentErrorCount());
+ }
+
+ @Test
+ void forNewDocument_preCheckPassed_limitTwo_firstErrorRetryable() {
+ // With limit=2 the first PreCheckPassed-routed transient error is retryable.
+ PreCheckPassed outcome = new PreCheckPassed(
+ candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1)));
+
+ ProcessingOutcomeTransition.ProcessingOutcome result =
+ ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_2);
+
+ assertEquals(ProcessingStatus.FAILED_RETRYABLE, result.overallStatus(),
+ "With limit=2 the first PreCheckPassed-routed transient error must be retryable");
+ assertTrue(result.retryable());
+ assertEquals(1, result.counters().transientErrorCount());
+ assertEquals(0, result.counters().contentErrorCount());
+ }
+
+ @Test
+ void forKnownDocument_preCheckPassed_limitTwo_secondErrorFinal() {
+ // With limit=2 and an existing transient error count of 1,
+ // the next PreCheckPassed-routed error increments to 2 = limit → FAILED_FINAL.
+ PreCheckPassed outcome = new PreCheckPassed(
+ candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1)));
+ FailureCounters existing = new FailureCounters(0, 1);
+
+ ProcessingOutcomeTransition.ProcessingOutcome result =
+ ProcessingOutcomeTransition.forKnownDocument(outcome, existing, LIMIT_2);
+
+ assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus(),
+ "PreCheckPassed-routed error at transient limit must finalise to FAILED_FINAL");
+ assertFalse(result.retryable());
+ assertEquals(2, result.counters().transientErrorCount());
+ }
}
diff --git a/pdf-umbenenner-bootstrap/pom.xml b/pdf-umbenenner-bootstrap/pom.xml
index db4a68e..81f1bc3 100644
--- a/pdf-umbenenner-bootstrap/pom.xml
+++ b/pdf-umbenenner-bootstrap/pom.xml
@@ -62,6 +62,11 @@
+ * Each test method is independent and uses its own {@link E2ETestContext} backed by a
+ * JUnit {@code @TempDir}. All infrastructure adapters (SQLite, filesystem, PDF extraction,
+ * fingerprinting) are real production implementations. Only the AI invocation port is
+ * replaced by a configurable {@link StubAiInvocationPort} to avoid real HTTP calls.
+ *
+ *
+ * Searchable PDFs embed enough text to pass the minimum-text pre-check. The AI stub
+ * returns a title of {@code "Stromabrechnung"} and date {@code "2024-01-15"} by default,
+ * producing a target filename of {@code "2024-01-15 - Stromabrechnung.pdf"}.
+ */
+class BatchRunEndToEndTest {
+
+ private static final String SAMPLE_PDF_TEXT =
+ "Stromabrechnung Kundenname Musterstadt Datum 15.01.2024 Betrag 123,45 EUR";
+
+ // =========================================================================
+ // Scenario 1: Happy-path to SUCCESS
+ // =========================================================================
+
+ /**
+ * Verifies the complete two-run happy-path:
+ *
+ * Provides real infrastructure adapters for all subsystems (SQLite persistence, filesystem
+ * source/target folders, PDF text extraction, SHA-256 fingerprinting, run locking) and a
+ * configurable stub ({@link StubAiInvocationPort}) for the AI invocation port.
+ * This ensures that end-to-end tests cover the complete production code path without
+ * performing real HTTP calls to an AI service.
+ *
+ *
+ * Not thread-safe. Each test method should use its own context instance.
+ */
+public final class E2ETestContext implements AutoCloseable {
+
+ /** Maximum pages before triggering a deterministic content error. */
+ static final int MAX_PAGES = 50;
+
+ /** Maximum text characters sent to the AI service. */
+ static final int MAX_TEXT_CHARS = 10_000;
+
+ /**
+ * Maximum transient retries before a document is finalized to {@code FAILED_FINAL}.
+ * Set to 3 to allow multi-run transient-failure tests without immediate finalization.
+ */
+ static final int MAX_RETRIES_TRANSIENT = 3;
+
+ /** Model name carried in attempt history (no real inference occurs). */
+ static final String AI_MODEL = "e2e-stub-model";
+
+ private final Path sourceFolder;
+ private final Path targetFolder;
+ private final Path lockFile;
+ private final Path promptFile;
+ private final String jdbcUrl;
+
+ private final SqliteDocumentRecordRepositoryAdapter documentRepo;
+ private final SqliteProcessingAttemptRepositoryAdapter attemptRepo;
+
+ /**
+ * Configurable AI stub. Tests may call {@link StubAiInvocationPort#configureSuccess},
+ * {@link StubAiInvocationPort#configureTechnicalFailure}, or
+ * {@link StubAiInvocationPort#reset()} between batch runs.
+ */
+ public final StubAiInvocationPort aiStub;
+
+ /**
+ * Optional override for the {@link TargetFileCopyPort}.
+ * {@code null} means the real {@link FilesystemTargetFileCopyAdapter} is used.
+ * Set via {@link #setTargetFileCopyPortOverride} to inject a failure-simulating stub.
+ */
+ private TargetFileCopyPort targetFileCopyPortOverride;
+
+ private E2ETestContext(
+ Path sourceFolder,
+ Path targetFolder,
+ Path lockFile,
+ Path promptFile,
+ String jdbcUrl,
+ SqliteDocumentRecordRepositoryAdapter documentRepo,
+ SqliteProcessingAttemptRepositoryAdapter attemptRepo,
+ StubAiInvocationPort aiStub) {
+ this.sourceFolder = sourceFolder;
+ this.targetFolder = targetFolder;
+ this.lockFile = lockFile;
+ this.promptFile = promptFile;
+ this.jdbcUrl = jdbcUrl;
+ this.documentRepo = documentRepo;
+ this.attemptRepo = attemptRepo;
+ this.aiStub = aiStub;
+ }
+
+ /**
+ * Initializes a fully wired end-to-end test context rooted in {@code tempDir}.
+ *
+ * Creates the {@code source/}, {@code target/} subdirectories and a minimal prompt
+ * file, initializes the SQLite schema, and wires all adapters.
+ *
+ * @param tempDir the JUnit {@code @TempDir} or any writable temporary directory
+ * @return a ready-to-use context; caller is responsible for closing it
+ * @throws Exception if schema initialization or directory/file creation fails
+ */
+ public static E2ETestContext initialize(Path tempDir) throws Exception {
+ Path sourceFolder = Files.createDirectories(tempDir.resolve("source"));
+ Path targetFolder = Files.createDirectories(tempDir.resolve("target"));
+ Path lockFile = tempDir.resolve("run.lock");
+ Path promptFile = tempDir.resolve("prompt.txt");
+
+ Files.writeString(promptFile,
+ "Analysiere das folgende Dokument und liefere Datum, Titel und Begruendung als JSON-Objekt.");
+
+ String jdbcUrl = "jdbc:sqlite:" + tempDir.resolve("test.db").toAbsolutePath().toString().replace('\\', '/');
+
+ SqliteSchemaInitializationAdapter schema = new SqliteSchemaInitializationAdapter(jdbcUrl);
+ schema.initializeSchema();
+
+ SqliteDocumentRecordRepositoryAdapter documentRepo =
+ new SqliteDocumentRecordRepositoryAdapter(jdbcUrl);
+ SqliteProcessingAttemptRepositoryAdapter attemptRepo =
+ new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl);
+
+ return new E2ETestContext(
+ sourceFolder, targetFolder, lockFile, promptFile,
+ jdbcUrl, documentRepo, attemptRepo, new StubAiInvocationPort());
+ }
+
+ // =========================================================================
+ // Test fixture creation
+ // =========================================================================
+
+ /**
+ * Creates a single-page searchable PDF in the source folder with the given text.
+ *
+ * The file is ready for the batch run as soon as this method returns.
+ *
+ * @param filename the PDF filename (e.g. {@code "rechnung.pdf"})
+ * @param textContent text to embed; should be at least a few words to pass pre-checks
+ * @return the absolute path of the created file
+ * @throws IOException if the file cannot be written
+ */
+ public Path createSearchablePdf(String filename, String textContent) throws IOException {
+ Path pdfPath = sourceFolder.resolve(filename);
+ MinimalPdfFactory.createSearchablePdf(pdfPath, textContent);
+ return pdfPath;
+ }
+
+ /**
+ * Creates a single-page blank PDF (no extractable text) in the source folder.
+ *
+ * Processing this file triggers the "no usable text" deterministic content error,
+ * which skips the AI call.
+ *
+ * @param filename the PDF filename (e.g. {@code "blank.pdf"})
+ * @return the absolute path of the created file
+ * @throws IOException if the file cannot be written
+ */
+ public Path createBlankPdf(String filename) throws IOException {
+ Path pdfPath = sourceFolder.resolve(filename);
+ MinimalPdfFactory.createBlankPdf(pdfPath);
+ return pdfPath;
+ }
+
+ // =========================================================================
+ // Batch execution
+ // =========================================================================
+
+ /**
+ * Executes one complete batch run using the current stub configuration.
+ *
+ * A fresh {@link BatchRunContext} with a new {@link RunId} is created for each call,
+ * matching the production behavior where every Task Scheduler invocation is a
+ * distinct run.
+ *
+ * @return the outcome of the batch run
+ */
+ public BatchRunOutcome runBatch() {
+ DefaultBatchRunProcessingUseCase useCase = buildUseCase();
+ BatchRunContext context = new BatchRunContext(
+ new RunId(UUID.randomUUID().toString()), Instant.now());
+ return useCase.execute(context);
+ }
+
+ // =========================================================================
+ // State inspection helpers
+ // =========================================================================
+
+ /**
+ * Looks up the document master record for the given fingerprint.
+ *
+ * @param fingerprint the document fingerprint to query
+ * @return the master record if one exists, {@link Optional#empty()} if unknown or
+ * if a persistence lookup error occurred
+ */
+ public Optional
+ * Useful for correlating a test PDF with its database record after a batch run.
+ *
+ * @param file the absolute path of the file to fingerprint
+ * @return the fingerprint
+ * @throws IllegalStateException if fingerprint computation fails
+ */
+ public DocumentFingerprint computeFingerprint(Path file) {
+ Sha256FingerprintAdapter adapter = new Sha256FingerprintAdapter();
+ // Construct a minimal candidate that mirrors how the production source adapter creates one
+ SourceDocumentCandidate candidate = new SourceDocumentCandidate(
+ file.getFileName().toString(),
+ 0L,
+ new SourceDocumentLocator(file.toAbsolutePath().toString()));
+ return switch (adapter.computeFingerprint(candidate)) {
+ case FingerprintSuccess s -> s.fingerprint();
+ default -> throw new IllegalStateException(
+ "Fingerprint computation failed for test fixture: " + file);
+ };
+ }
+
+ /**
+ * Lists the filenames of all files currently in the target folder.
+ *
+ * @return list of filenames; empty if target folder is empty
+ * @throws IOException if the target folder cannot be read
+ */
+ public List
+ * All adapters are instantiated fresh per run to avoid shared mutable state between
+ * runs (e.g. locks, connection states). The AI stub and optional copy-port override
+ * are re-used across runs within the same test.
+ */
+ private DefaultBatchRunProcessingUseCase buildUseCase() {
+ RuntimeConfiguration runtimeConfig = new RuntimeConfiguration(
+ MAX_PAGES, MAX_RETRIES_TRANSIENT, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT);
+
+ FingerprintPort fingerprintPort = new Sha256FingerprintAdapter();
+
+ DocumentRecordRepository documentRecordRepository = documentRepo;
+ ProcessingAttemptRepository processingAttemptRepository = attemptRepo;
+ UnitOfWorkPort unitOfWorkPort = new SqliteUnitOfWorkAdapter(jdbcUrl);
+
+ ProcessingLogger coordinatorLogger = new Log4jProcessingLogger(
+ DocumentProcessingCoordinator.class);
+ TargetFolderPort targetFolderPort = new FilesystemTargetFolderAdapter(targetFolder);
+ TargetFileCopyPort targetFileCopyPort = (targetFileCopyPortOverride != null)
+ ? targetFileCopyPortOverride
+ : new FilesystemTargetFileCopyAdapter(targetFolder);
+
+ DocumentProcessingCoordinator coordinator = new DocumentProcessingCoordinator(
+ documentRecordRepository,
+ processingAttemptRepository,
+ unitOfWorkPort,
+ targetFolderPort,
+ targetFileCopyPort,
+ coordinatorLogger,
+ MAX_RETRIES_TRANSIENT);
+
+ PromptPort promptPort = new FilesystemPromptPortAdapter(promptFile);
+ ClockPort clockPort = new SystemClockAdapter();
+ AiResponseValidator aiResponseValidator = new AiResponseValidator(clockPort);
+ AiNamingService aiNamingService = new AiNamingService(
+ aiStub, promptPort, aiResponseValidator, AI_MODEL, MAX_TEXT_CHARS);
+
+ ProcessingLogger useCaseLogger = new Log4jProcessingLogger(
+ DefaultBatchRunProcessingUseCase.class);
+
+ RunLockPort runLockPort = new FilesystemRunLockPortAdapter(lockFile);
+ SourceDocumentCandidatesPort candidatesPort =
+ new SourceDocumentCandidatesPortAdapter(sourceFolder);
+ PdfTextExtractionPort extractionPort = new PdfTextExtractionPortAdapter();
+
+ return new DefaultBatchRunProcessingUseCase(
+ runtimeConfig,
+ runLockPort,
+ candidatesPort,
+ extractionPort,
+ fingerprintPort,
+ coordinator,
+ aiNamingService,
+ useCaseLogger);
+ }
+}
diff --git a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/MinimalPdfFactory.java b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/MinimalPdfFactory.java
new file mode 100644
index 0000000..a4ba61c
--- /dev/null
+++ b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/MinimalPdfFactory.java
@@ -0,0 +1,72 @@
+package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.font.PDType1Font;
+import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
+
+import java.io.IOException;
+import java.nio.file.Path;
+
+/**
+ * Factory for creating minimal PDF test fixtures used in end-to-end tests.
+ *
+ * Provides two variants:
+ *
+ * Uses Apache PDFBox to create real, structurally valid PDF files so that the
+ * production {@code PdfTextExtractionPortAdapter} processes them correctly.
+ */
+final class MinimalPdfFactory {
+
+ private MinimalPdfFactory() {
+ // Static utility class — not instantiable
+ }
+
+ /**
+ * Creates a single-page searchable PDF with the given text content at the output path.
+ *
+ * The resulting file passes the production pre-checks for minimum text length and
+ * page count, enabling the AI naming pipeline to run.
+ *
+ * @param outputPath the path where the PDF will be written; parent directory must exist
+ * @param textContent the text to embed in the PDF; should be non-empty for happy-path tests
+ * @throws IOException if the file cannot be written
+ */
+ static void createSearchablePdf(Path outputPath, String textContent) throws IOException {
+ try (PDDocument doc = new PDDocument()) {
+ PDPage page = new PDPage();
+ doc.addPage(page);
+ try (PDPageContentStream stream = new PDPageContentStream(doc, page)) {
+ stream.beginText();
+ stream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 12);
+ stream.newLineAtOffset(50, 700);
+ stream.showText(textContent);
+ stream.endText();
+ }
+ doc.save(outputPath.toFile());
+ }
+ }
+
+ /**
+ * Creates a single-page blank PDF with no text content at the output path.
+ *
+ * The resulting file triggers the "no usable text" pre-check failure
+ * (deterministic content error), which does not invoke the AI service.
+ *
+ * @param outputPath the path where the PDF will be written; parent directory must exist
+ * @throws IOException if the file cannot be written
+ */
+ static void createBlankPdf(Path outputPath) throws IOException {
+ try (PDDocument doc = new PDDocument()) {
+ doc.addPage(new PDPage());
+ doc.save(outputPath.toFile());
+ }
+ }
+}
diff --git a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/StubAiInvocationPort.java b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/StubAiInvocationPort.java
new file mode 100644
index 0000000..0c98e59
--- /dev/null
+++ b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/StubAiInvocationPort.java
@@ -0,0 +1,109 @@
+package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
+
+import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
+import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult;
+import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess;
+import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure;
+import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
+import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
+
+import java.util.concurrent.atomic.AtomicInteger;
+
+/**
+ * Configurable test double for {@link AiInvocationPort}.
+ *
+ * Replaces the real HTTP-based AI adapter in end-to-end tests so that the processing
+ * pipeline can be exercised without real network calls. Supports two response modes:
+ *
+ * The stub tracks the total number of invocations so that tests can verify whether
+ * the AI pipeline was called at all (e.g. confirming that a {@code PROPOSAL_READY}
+ * finalization skips the AI call).
+ */
+final class StubAiInvocationPort implements AiInvocationPort {
+
+ private final AtomicInteger invocationCount = new AtomicInteger(0);
+
+ private volatile boolean returnTechnicalFailure = false;
+ private volatile String title = "Stromabrechnung";
+ private volatile String date = "2024-01-15";
+ private volatile String reasoning = "Testdokument fuer End-to-End-Tests.";
+
+ /**
+ * Configures the stub to return a valid naming proposal with the given title and date.
+ *
+ * @param title the document title (must pass validation: max 20 chars, no special chars)
+ * @param date the document date in {@code YYYY-MM-DD} format, or {@code null} to omit
+ */
+ void configureSuccess(String title, String date) {
+ this.title = title;
+ this.date = date;
+ this.returnTechnicalFailure = false;
+ }
+
+ /**
+ * Configures the stub to return a transient technical failure on every invocation.
+ * The failure reason is {@code STUB_FAILURE}.
+ */
+ void configureTechnicalFailure() {
+ this.returnTechnicalFailure = true;
+ }
+
+ /**
+ * Resets the stub to its default success configuration with title "Stromabrechnung"
+ * and date "2024-01-15", and clears the invocation counter.
+ */
+ void reset() {
+ this.title = "Stromabrechnung";
+ this.date = "2024-01-15";
+ this.reasoning = "Testdokument fuer End-to-End-Tests.";
+ this.returnTechnicalFailure = false;
+ invocationCount.set(0);
+ }
+
+ /**
+ * Returns the total number of times {@link #invoke} was called since construction
+ * or the last {@link #reset()}.
+ */
+ int invocationCount() {
+ return invocationCount.get();
+ }
+
+ /**
+ * Resets the invocation counter to zero without changing response configuration.
+ */
+ void resetInvocationCount() {
+ invocationCount.set(0);
+ }
+
+ /**
+ * Returns either a success response or a technical failure, depending on current configuration.
+ * Increments the invocation counter on every call.
+ */
+ @Override
+ public AiInvocationResult invoke(AiRequestRepresentation request) {
+ invocationCount.incrementAndGet();
+
+ if (returnTechnicalFailure) {
+ return new AiInvocationTechnicalFailure(
+ request,
+ "STUB_FAILURE",
+ "Test stub: configured to return technical failure");
+ }
+
+ String dateField = (date != null) ? "\"date\": \"" + date + "\", " : "";
+ String rawJson = "{"
+ + dateField
+ + "\"title\": \"" + title + "\", "
+ + "\"reasoning\": \"" + reasoning + "\""
+ + "}";
+ return new AiInvocationSuccess(request, new AiRawResponse(rawJson));
+ }
+}
End-to-end invariants verified
+ *
+ *
+ *
+ * Document text used in tests
+ *
+ *
+ * This confirms the leading-proposal-attempt rule and the two-phase finalization.
+ */
+ @Test
+ void happyPath_twoRuns_reachesSuccess(@TempDir Path tempDir) throws Exception {
+ try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
+ ctx.createSearchablePdf("rechnung.pdf", SAMPLE_PDF_TEXT);
+ Path pdfPath = ctx.sourceFolder().resolve("rechnung.pdf");
+ DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
+
+ // --- Run 1: AI produces a naming proposal ---
+ BatchRunOutcome run1 = ctx.runBatch();
+
+ assertThat(run1).isEqualTo(BatchRunOutcome.SUCCESS);
+ assertThat(ctx.aiStub.invocationCount()).isEqualTo(1);
+
+ DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
+ assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
+ assertThat(ctx.listTargetFiles()).isEmpty();
+
+ List
+ *
+ * No AI call is made in either run because the content pre-check prevents it.
+ */
+ @Test
+ void deterministicContentError_twoRuns_reachesFailedFinal(@TempDir Path tempDir)
+ throws Exception {
+ try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
+ ctx.createBlankPdf("blank.pdf");
+ Path pdfPath = ctx.sourceFolder().resolve("blank.pdf");
+ DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
+
+ // --- Run 1 ---
+ ctx.runBatch();
+
+ assertThat(ctx.aiStub.invocationCount())
+ .as("AI must not be called for a blank PDF")
+ .isEqualTo(0);
+
+ DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
+ assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
+ assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(1);
+ assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(0);
+
+ List
+ *
+ * This confirms that the second run never re-invokes the AI when a valid
+ * {@code PROPOSAL_READY} attempt already exists.
+ */
+ @Test
+ void proposalReadyFinalization_noAiCallInSecondRun(@TempDir Path tempDir) throws Exception {
+ try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
+ ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
+ Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
+ DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
+
+ // --- Run 1: establish PROPOSAL_READY ---
+ ctx.runBatch();
+
+ DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
+ assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
+ assertThat(ctx.listTargetFiles()).isEmpty();
+
+ // --- Run 2: AI stub would fail if called, but must not be called ---
+ ctx.aiStub.configureTechnicalFailure();
+ ctx.aiStub.resetInvocationCount();
+
+ ctx.runBatch();
+
+ assertThat(ctx.aiStub.invocationCount())
+ .as("AI must not be invoked during PROPOSAL_READY finalization")
+ .isEqualTo(0);
+
+ DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
+ assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
+
+ List
+ *
+ * The immediate retry does not count as a cross-run transient error.
+ */
+ @Test
+ void targetCopyError_immediateRetrySucceeds_recordsSuccess(@TempDir Path tempDir)
+ throws Exception {
+ try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
+ ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
+ Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
+ DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
+
+ // --- Run 1: produce PROPOSAL_READY ---
+ ctx.runBatch();
+
+ DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
+ assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
+
+ // --- Run 2: first copy attempt fails, retry succeeds ---
+ TargetFileCopyPort realAdapter =
+ new de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter(
+ ctx.targetFolder());
+ AtomicInteger copyCallCount = new AtomicInteger(0);
+
+ TargetFileCopyPort stubWithRetry = (locator, resolvedFilename) -> {
+ int call = copyCallCount.incrementAndGet();
+ if (call == 1) {
+ // First attempt: simulate a transient write failure
+ return new TargetFileCopyTechnicalFailure(
+ "Simulated write failure on first attempt", true);
+ }
+ // Second attempt (immediate within-run retry): delegate to real adapter
+ return realAdapter.copyToTarget(locator, resolvedFilename);
+ };
+
+ ctx.setTargetFileCopyPortOverride(stubWithRetry);
+ ctx.runBatch();
+
+ assertThat(copyCallCount.get())
+ .as("Copy port must have been called twice (initial + retry)")
+ .isEqualTo(2);
+
+ DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
+ assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
+ assertThat(record2.failureCounters().transientErrorCount())
+ .as("Immediate within-run retry must not increment the transient error counter")
+ .isEqualTo(0);
+
+ List
+ *
+ * This confirms the {@code maxRetriesTransient} boundary: the run that pushes the
+ * counter to the configured limit is the run that finalises the document.
+ */
+ @Test
+ void transientErrors_multipleRuns_exhaustsLimit_reachesFailedFinal(@TempDir Path tempDir)
+ throws Exception {
+ try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
+ ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
+ Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
+ DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
+
+ ctx.aiStub.configureTechnicalFailure();
+
+ // --- Run 1: counter 0 → 1, below limit → FAILED_RETRYABLE ---
+ ctx.runBatch();
+
+ DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
+ assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
+ assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(1);
+ assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(0);
+
+ List
+ *
+ * This confirms that the within-run retry does not suppress the error when both
+ * attempts fail, and that the transient counter is incremented exactly once.
+ */
+ @Test
+ void targetCopyError_bothAttemptsFail_reachesFailedRetryable(@TempDir Path tempDir)
+ throws Exception {
+ try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
+ ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
+ Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
+ DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
+
+ // --- Run 1: establish PROPOSAL_READY ---
+ ctx.runBatch();
+
+ assertThat(ctx.findDocumentRecord(fp).orElseThrow().overallStatus())
+ .isEqualTo(ProcessingStatus.PROPOSAL_READY);
+
+ // --- Run 2: both copy attempts fail ---
+ ctx.setTargetFileCopyPortOverride(
+ (locator, resolvedFilename) ->
+ new TargetFileCopyTechnicalFailure(
+ "Simulated persistent write failure", true));
+
+ ctx.runBatch();
+
+ DocumentRecord record = ctx.findDocumentRecord(fp).orElseThrow();
+ assertThat(record.overallStatus())
+ .as("Both copy attempts failing must produce FAILED_RETRYABLE "
+ + "(transient error, limit not yet reached)")
+ .isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
+ assertThat(record.failureCounters().transientErrorCount())
+ .as("The double copy failure must increment the transient counter exactly once")
+ .isEqualTo(1);
+
+ List
+ *
+ * Both documents reach {@code SUCCESS} and the target folder contains exactly two files.
+ */
+ @Test
+ void twoDifferentDocuments_sameProposedName_secondGetsDuplicateSuffix(@TempDir Path tempDir)
+ throws Exception {
+ try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
+ // Two distinct PDFs; the AI stub returns the same title and date for both
+ ctx.createSearchablePdf("rechnung1.pdf", SAMPLE_PDF_TEXT);
+ ctx.createSearchablePdf("rechnung2.pdf",
+ "Stromabrechnung Zweiter Kunde Musterstadt Datum 15.01.2024 Betrag 99,00 EUR");
+
+ Path pdf1 = ctx.sourceFolder().resolve("rechnung1.pdf");
+ Path pdf2 = ctx.sourceFolder().resolve("rechnung2.pdf");
+ DocumentFingerprint fp1 = ctx.computeFingerprint(pdf1);
+ DocumentFingerprint fp2 = ctx.computeFingerprint(pdf2);
+
+ // --- Run 1: AI stub processes both PDFs → PROPOSAL_READY ---
+ ctx.runBatch();
+
+ assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus())
+ .isEqualTo(ProcessingStatus.PROPOSAL_READY);
+ assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus())
+ .isEqualTo(ProcessingStatus.PROPOSAL_READY);
+ assertThat(ctx.listTargetFiles()).isEmpty();
+
+ // --- Run 2: both finalized; the second must receive the (1) suffix ---
+ ctx.runBatch();
+
+ assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus())
+ .isEqualTo(ProcessingStatus.SUCCESS);
+ assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus())
+ .isEqualTo(ProcessingStatus.SUCCESS);
+
+ List
+ *
+ * This confirms the exit-code contract: only hard bootstrap or infrastructure
+ * failures produce a non-zero exit code; document-level errors do not.
+ */
+ @Test
+ void mixedBatch_oneSuccess_oneContentError_batchOutcomeIsSuccess(@TempDir Path tempDir)
+ throws Exception {
+ try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
+ ctx.createSearchablePdf("good.pdf", SAMPLE_PDF_TEXT);
+ ctx.createBlankPdf("blank.pdf");
+
+ Path goodPdf = ctx.sourceFolder().resolve("good.pdf");
+ Path blankPdf = ctx.sourceFolder().resolve("blank.pdf");
+ DocumentFingerprint fpGood = ctx.computeFingerprint(goodPdf);
+ DocumentFingerprint fpBlank = ctx.computeFingerprint(blankPdf);
+
+ // --- Run 1 ---
+ BatchRunOutcome run1 = ctx.runBatch();
+
+ assertThat(run1)
+ .as("Batch must complete with SUCCESS even when individual documents fail")
+ .isEqualTo(BatchRunOutcome.SUCCESS);
+ assertThat(ctx.findDocumentRecord(fpGood).orElseThrow().overallStatus())
+ .isEqualTo(ProcessingStatus.PROPOSAL_READY);
+ assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow().overallStatus())
+ .isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
+ assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow()
+ .failureCounters().contentErrorCount()).isEqualTo(1);
+
+ // --- Run 2 ---
+ BatchRunOutcome run2 = ctx.runBatch();
+
+ assertThat(run2)
+ .as("Batch must complete with SUCCESS even when a document is finalised "
+ + "to FAILED_FINAL")
+ .isEqualTo(BatchRunOutcome.SUCCESS);
+
+ DocumentRecord goodRecord = ctx.findDocumentRecord(fpGood).orElseThrow();
+ assertThat(goodRecord.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
+
+ DocumentRecord blankRecord = ctx.findDocumentRecord(fpBlank).orElseThrow();
+ assertThat(blankRecord.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
+ assertThat(blankRecord.failureCounters().contentErrorCount()).isEqualTo(2);
+
+ // Exactly one target file from the successfully processed document
+ ListInvariants verified by this context
+ *
+ *
+ *
+ * Usage pattern
+ * {@code
+ * @TempDir Path tempDir;
+ *
+ * @Test
+ * void example() throws Exception {
+ * try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
+ * ctx.createSearchablePdf("doc.pdf", "Rechnung 2024-01-15 ...");
+ * BatchRunOutcome run1 = ctx.runBatch();
+ * // assertions...
+ * }
+ * }
+ * }
+ *
+ * Thread safety
+ *
+ *
+ *
+ *
+ *