diff --git a/config/application-local.example.properties b/config/application-local.example.properties index 2f014cf..468ddf5 100644 --- a/config/application-local.example.properties +++ b/config/application-local.example.properties @@ -1,21 +1,70 @@ -# PDF Umbenenner Local Configuration Example -# AP-005: Copy this file to config/application.properties and adjust values for local development +# PDF Umbenenner – Konfigurationsbeispiel für lokale Entwicklung +# Kopiere diese Datei nach config/application.properties und passe die Werte an. -# Mandatory M1 properties +# --------------------------------------------------------------------------- +# Pflichtparameter +# --------------------------------------------------------------------------- + +# Quellordner: Ordner, aus dem OCR-verarbeitete PDF-Dateien gelesen werden. +# Der Ordner muss vorhanden und lesbar sein. source.folder=./work/local/source -target.folder=./work/local/target -sqlite.file=./work/local/pdf-umbenenner.db -api.baseUrl=http://localhost:8080/api -api.model=gpt-4o-mini -api.timeoutSeconds=30 -max.retries.transient=3 -max.pages=10 -max.text.characters=5000 -prompt.template.file=./config/prompts/local-template.txt -# Optional properties -runtime.lock.file=./work/local/lock.pid +# Zielordner: Ordner, in den die umbenannten Kopien abgelegt werden. +# Wird automatisch angelegt, wenn er noch nicht existiert. +target.folder=./work/local/target + +# SQLite-Datenbankdatei für Bearbeitungsstatus und Versuchshistorie. +# Das übergeordnete Verzeichnis muss vorhanden sein. +sqlite.file=./work/local/pdf-umbenenner.db + +# Basis-URL des OpenAI-kompatiblen KI-Dienstes (ohne Pfadsuffix wie /chat/completions). +api.baseUrl=https://api.openai.com/v1 + +# Modellname des KI-Dienstes. +api.model=gpt-4o-mini + +# HTTP-Timeout für KI-Anfragen in Sekunden (muss > 0 sein). +api.timeoutSeconds=30 + +# Maximale Anzahl historisierter transienter Fehlversuche pro Dokument. +# Muss eine ganze Zahl >= 1 sein. Bei Erreichen des Grenzwerts wird der +# Dokumentstatus auf FAILED_FINAL gesetzt. +max.retries.transient=3 + +# Maximale Seitenzahl pro Dokument. Dokumente mit mehr Seiten werden als +# deterministischer Inhaltsfehler behandelt (kein KI-Aufruf). +max.pages=10 + +# Maximale Zeichenanzahl des Dokumenttexts, der an die KI gesendet wird. +max.text.characters=5000 + +# Pfad zur externen Prompt-Datei. Der Dateiname dient als Prompt-Identifikator +# in der Versuchshistorie. +prompt.template.file=./config/prompts/template.txt + +# --------------------------------------------------------------------------- +# API-Schlüssel +# --------------------------------------------------------------------------- +# Der API-Schlüssel kann wahlweise über diese Property oder über die +# Umgebungsvariable PDF_UMBENENNER_API_KEY gesetzt werden. +# Die Umgebungsvariable hat Vorrang. +api.key=your-local-api-key-here + +# --------------------------------------------------------------------------- +# Optionale Parameter +# --------------------------------------------------------------------------- + +# Pfad zur Lock-Datei für den Startschutz (verhindert parallele Instanzen). +# Wird weggelassen, verwendet die Anwendung pdf-umbenenner.lock im Arbeitsverzeichnis. +runtime.lock.file=./work/local/pdf-umbenenner.lock + +# Log-Verzeichnis. Wird weggelassen, schreibt Log4j2 in ./logs/. log.directory=./work/local/logs + +# Log-Level (DEBUG, INFO, WARN, ERROR). Standard ist INFO. log.level=INFO -# api.key can also be set via environment variable PDF_UMBENENNER_API_KEY -api.key=your-local-api-key-here \ No newline at end of file + +# Sensible KI-Inhalte (vollständige Rohantwort und Reasoning) ins Log schreiben. +# Erlaubte Werte: true oder false. Standard ist false (geschützt). +# Nur für Diagnosezwecke auf true setzen. +log.ai.sensitive=false diff --git a/config/application-test.example.properties b/config/application-test.example.properties index 96f98ae..4d7a7ab 100644 --- a/config/application-test.example.properties +++ b/config/application-test.example.properties @@ -1,21 +1,71 @@ -# PDF Umbenenner Test Configuration Example -# AP-005: Copy this file to config/application.properties and adjust values for testing +# PDF Umbenenner – Konfigurationsbeispiel für Testläufe +# Kopiere diese Datei nach config/application.properties und passe die Werte an. +# Diese Vorlage enthält kürzere Timeouts und niedrigere Limits für Testläufe. -# Mandatory M1 properties +# --------------------------------------------------------------------------- +# Pflichtparameter +# --------------------------------------------------------------------------- + +# Quellordner: Ordner, aus dem OCR-verarbeitete PDF-Dateien gelesen werden. +# Der Ordner muss vorhanden und lesbar sein. source.folder=./work/test/source -target.folder=./work/test/target -sqlite.file=./work/test/pdf-umbenenner-test.db -api.baseUrl=http://localhost:8081/api -api.model=gpt-4o-mini-test -api.timeoutSeconds=10 -max.retries.transient=1 -max.pages=5 -max.text.characters=2000 -prompt.template.file=./config/prompts/test-template.txt -# Optional properties -runtime.lock.file=./work/test/lock.pid +# Zielordner: Ordner, in den die umbenannten Kopien abgelegt werden. +# Wird automatisch angelegt, wenn er noch nicht existiert. +target.folder=./work/test/target + +# SQLite-Datenbankdatei für Bearbeitungsstatus und Versuchshistorie. +# Das übergeordnete Verzeichnis muss vorhanden sein. +sqlite.file=./work/test/pdf-umbenenner-test.db + +# Basis-URL des OpenAI-kompatiblen KI-Dienstes (ohne Pfadsuffix wie /chat/completions). +api.baseUrl=https://api.openai.com/v1 + +# Modellname des KI-Dienstes. +api.model=gpt-4o-mini + +# HTTP-Timeout für KI-Anfragen in Sekunden (muss > 0 sein). +api.timeoutSeconds=10 + +# Maximale Anzahl historisierter transienter Fehlversuche pro Dokument. +# Muss eine ganze Zahl >= 1 sein. Bei Erreichen des Grenzwerts wird der +# Dokumentstatus auf FAILED_FINAL gesetzt. +max.retries.transient=1 + +# Maximale Seitenzahl pro Dokument. Dokumente mit mehr Seiten werden als +# deterministischer Inhaltsfehler behandelt (kein KI-Aufruf). +max.pages=5 + +# Maximale Zeichenanzahl des Dokumenttexts, der an die KI gesendet wird. +max.text.characters=2000 + +# Pfad zur externen Prompt-Datei. Der Dateiname dient als Prompt-Identifikator +# in der Versuchshistorie. +prompt.template.file=./config/prompts/template.txt + +# --------------------------------------------------------------------------- +# API-Schlüssel +# --------------------------------------------------------------------------- +# Der API-Schlüssel kann wahlweise über diese Property oder über die +# Umgebungsvariable PDF_UMBENENNER_API_KEY gesetzt werden. +# Die Umgebungsvariable hat Vorrang. +api.key=test-api-key-placeholder + +# --------------------------------------------------------------------------- +# Optionale Parameter +# --------------------------------------------------------------------------- + +# Pfad zur Lock-Datei für den Startschutz (verhindert parallele Instanzen). +# Wird weggelassen, verwendet die Anwendung pdf-umbenenner.lock im Arbeitsverzeichnis. +runtime.lock.file=./work/test/pdf-umbenenner.lock + +# Log-Verzeichnis. Wird weggelassen, schreibt Log4j2 in ./logs/. log.directory=./work/test/logs + +# Log-Level (DEBUG, INFO, WARN, ERROR). Standard ist INFO. log.level=DEBUG -# api.key can also be set via environment variable PDF_UMBENENNER_API_KEY -api.key=test-api-key-placeholder \ No newline at end of file + +# Sensible KI-Inhalte (vollständige Rohantwort und Reasoning) ins Log schreiben. +# Erlaubte Werte: true oder false. Standard ist false (geschützt). +# Nur für Diagnosezwecke auf true setzen. +log.ai.sensitive=false diff --git a/config/prompts/template.txt b/config/prompts/template.txt index 68422d2..167c826 100644 --- a/config/prompts/template.txt +++ b/config/prompts/template.txt @@ -1 +1,22 @@ -This is a test prompt template for AP-006 validation. +Du bist ein Assistent zur automatischen Benennung gescannter PDF-Dokumente. + +Analysiere den folgenden Dokumenttext und ermittle: + +1. Einen inhaltlich passenden deutschen Titel (maximal 20 Zeichen, nur Buchstaben und Leerzeichen, keine Abkürzungen, keine generischen Bezeichnungen wie "Dokument", "Datei", "Scan" oder "PDF") +2. Das relevanteste Datum des Dokuments + +Datumsermittlung nach Priorität: +- Rechnungsdatum +- Dokumentdatum +- Ausstellungsdatum oder Bescheiddatum +- Schreibdatum oder Ende eines Leistungszeitraums +- Kein Datum angeben, wenn kein belastbares Datum eindeutig ableitbar ist + +Titelregeln: +- Titel auf Deutsch formulieren +- Eigennamen (Personen, Firmen, Orte) unverändert übernehmen +- Maximal 20 Zeichen (nur der Basistitel, ohne Datumspräfix) +- Keine Sonderzeichen außer Leerzeichen +- Eindeutig und verständlich, nicht generisch + +Wenn das Dokument nicht eindeutig interpretierbar ist, beschreibe dies im Reasoning. diff --git a/docs/befundliste.md b/docs/befundliste.md new file mode 100644 index 0000000..eec22d3 --- /dev/null +++ b/docs/befundliste.md @@ -0,0 +1,209 @@ +# Befundliste – Integrierte Gesamtprüfung des Endstands + +**Erstellt:** 2026-04-08 +**Grundlage:** Vollständiger Maven-Reactor-Build, Unit-Tests, E2E-Tests, Integrationstests (Smoke), +PIT-Mutationsanalyse, Code-Review gegen verbindliche Spezifikationen (technik-und-architektur.md, +fachliche-anforderungen.md, CLAUDE.md) + +--- + +## Ausgeführte Prüfungen + +| Prüfbereich | Ausgeführt | Ergebnis | +|---|---|---| +| Maven-Reactor-Build (clean verify, alle Module) | ja | GRÜN | +| Unit-Tests (Domain, Application, Adapter-out, Bootstrap) | ja | GRÜN | +| E2E-Tests (BatchRunEndToEndTest, 11 Szenarien) | ja | GRÜN | +| Integrationstests / Smoke-IT (ExecutableJarSmokeTestIT, 2 Tests) | ja | GRÜN | +| PIT-Mutationsanalyse (alle Module) | ja | siehe Einzelbefunde | +| Hexagonale Architektur – Domain-Isolation | ja | GRÜN | +| Hexagonale Architektur – Port-Verträge (kein Path/NIO/JDBC) | ja | GRÜN | +| Hexagonale Architektur – keine Adapter-zu-Adapter-Abhängigkeiten | ja | GRÜN | +| Statusmodell (8 Werte, Semantik laut CLAUDE.md) | ja | GRÜN | +| Naming-Convention-Regel (kein M1–M8, kein AP-xxx im Code) | ja | OFFEN (nicht blockierend) | +| Logging-Sensibilitätsregel (log.ai.sensitive) | ja | GRÜN | +| Exit-Code-Semantik (0 / 1) | ja | GRÜN | +| Konfigurationsbeispiele (Pflicht- und Optionalparameter) | ja | GRÜN | +| Betriebsdokumentation (docs/betrieb.md) | ja | GRÜN | +| Prompt-Template im Repository | ja | GRÜN | +| Rückwärtsverträglichkeit M4–M7 (Statusmodell, Schema) | ja (statisch) | GRÜN | + +--- + +## Grüne Bereiche (keine Befunde) + +### Build und Tests + +- Vollständiger Maven-Reactor-Build erfolgreich (`BUILD SUCCESS`, Gesamtlaufzeit ~4 Minuten) +- **827+ Tests** bestanden, 0 Fehler, 0 übersprungen: + - Domain: 227 Tests + - Application: 295 Tests + - Adapter-out: 227 Tests + - Bootstrap (Unit): 76 Tests + - Smoke-IT: 2 Tests + +### E2E-Szenarien (BatchRunEndToEndTest) + +Alle geforderten Kernszenarien aus der E2E-Testbasis sind abgedeckt und grün: + +- Happy-Path: zwei Läufe → `SUCCESS` +- Deterministischer Inhaltsfehler: zwei Läufe → `FAILED_FINAL` +- Transienter KI-Fehler → `FAILED_RETRYABLE` +- Skip nach `SUCCESS` → `SKIPPED_ALREADY_PROCESSED` +- Skip nach `FAILED_FINAL` → `SKIPPED_FINAL_FAILURE` +- `PROPOSAL_READY`-Finalisierung ohne erneuten KI-Aufruf im zweiten Lauf +- Zielkopierfehler mit Sofort-Wiederholversuch → `SUCCESS` +- Transiente Fehler über mehrere Läufe → Ausschöpfung → `FAILED_FINAL` +- Zielkopierfehler beide Versuche gescheitert → `FAILED_RETRYABLE` +- Zwei verschiedene Dokumente, gleicher Vorschlagsname → Dubletten-Suffix `(1)` +- Mixed-Batch: ein Erfolg, ein Inhaltsfehler → Batch-Outcome `SUCCESS` (Exit-Code 0) + +### Hexagonale Architektur + +- **Domain** vollständig infrastrukturfrei: keine Imports aus `java.nio`, `java.io.File`, + JDBC, Log4j oder HTTP-Bibliotheken +- **Port-Verträge** (alle Interfaces in `application.port.out`) enthalten keine `Path`-, + `File`-, NIO- oder JDBC-Typen; nur Domain-Typen werden in Signaturen verwendet +- **Keine Adapter-zu-Adapter-Abhängigkeiten** in `adapter-out`: kein Modul referenziert + ein anderes Adapter-Implementierungspaket direkt +- **Abhängigkeitsrichtung** korrekt: adapter-out → application → domain + +### Fachregeln + +- Statusmodell vollständig (8 Werte: `READY_FOR_AI`, `PROPOSAL_READY`, `SUCCESS`, + `FAILED_RETRYABLE`, `FAILED_FINAL`, `SKIPPED_ALREADY_PROCESSED`, + `SKIPPED_FINAL_FAILURE`, `PROCESSING`) +- Retry-Semantik korrekt implementiert (deterministisch 1 Retry → final; + transient bis `max.retries.transient`) +- Skip-Semantik korrekt (SUCCESS → Skip, FAILED_FINAL → Skip, keine Zähleränderung) +- Führende Proposal-Quelle: `PROPOSAL_READY`-Versuch wird korrekt als Quelle verwendet +- SUCCESS-Bedingung: erst nach Zielkopie und konsistenter Persistenz + +### Logging und Sensibilität + +- `log.ai.sensitive`-Mechanismus vollständig implementiert und getestet +- Default `false` (sicher): KI-Rohantwort und Reasoning nicht im Log +- Persistenz in SQLite unabhängig von dieser Einstellung +- Konfiguration in beiden Beispieldateien dokumentiert + +### Konfiguration und Dokumentation + +- `config/application-local.example.properties`: vollständig, alle Pflicht- und + Optionalparameter vorhanden +- `config/application-test.example.properties`: vollständig +- `config/prompts/template.txt`: Prompt-Template im Repository vorhanden +- `docs/betrieb.md`: Betriebsdokumentation mit Start, Konfiguration, Exit-Codes, + Retry-Grundverhalten, Logging-Sensibilität +- Konfigurationsparameter-Namen in Dokumentation und Code konsistent + +### Exit-Code-Semantik + +- Exit-Code `0`: technisch ordnungsgemäßer Lauf (auch bei Teilfehlern einzelner Dokumente) +- Exit-Code `1`: harte Start-/Bootstrap-Fehler, ungültige Konfiguration, Lock-Fehler +- Implementierung in `PdfUmbenennerApplication` und `BootstrapRunner` korrekt + +### PIT-Mutationsanalyse (Gesamtstand) + +- Domain: 83 % Mutation Kill Rate +- Adapter-out: 83 % Mutation Kill Rate +- Application: 87 % Test Strength +- Bootstrap: 76 % Kill Rate (34 Mutationen, 26 getötet) + +--- + +## Offene Punkte + +### Nicht blockierend + +#### B1 – Naming-Convention-Verletzungen in Code, Tests und Konfiguration (CLAUDE.md § Naming-Regel) + +**Themenbereich:** Dokumentation / Codequalität +**Norm:** CLAUDE.md verbietet explizit Meilenstein- (M1–M8) und Arbeitspaket-Bezeichner (AP-xxx) +in Implementierungen, Kommentaren und JavaDoc. +**Befund:** 43 Treffer in `.java`-Dateien (21 in Produktionscode, 22 in Testcode) sowie +1 Treffer in `config/application.properties`. + +Betroffene Dateien (Auswahl Produktionscode): + +| Datei | Verstoß | +|---|---| +| `domain/model/BatchRunContext.java` | `@since M2-AP-003` | +| `domain/model/DocumentFingerprint.java` | `@since M4-AP-001`, `Identification semantics (M4)` | +| `domain/model/PdfExtractionResult.java` | `@since M3-AP-001` | +| `domain/model/SourceDocumentCandidate.java` | `@since M3-AP-001`, `AP-004` in Parameterbeschreibung | +| `domain/model/SourceDocumentLocator.java` | `@since M3-AP-001` | +| `adapter/out/lock/FilesystemRunLockPortAdapter.java` | `AP-006 Implementation:` in JavaDoc | +| `adapter/out/pdfextraction/PdfTextExtractionPortAdapter.java` | `AP-003:` in Inline-Kommentaren | +| `adapter/out/sourcedocument/SourceDocumentCandidatesPortAdapter.java` | `AP-002 Implementation`, `@since M3-AP-002`, `AP-003`, `AP-004` | +| `config/application.properties` | Kommentarheader `# PDF Umbenenner Configuration for AP-006 Testing` | + +Betroffene Dateien (Auswahl Testcode): + +| Datei | Verstoß | +|---|---| +| `adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java` | `M3/AP-007` | +| `adapter/out/fingerprint/Sha256FingerprintAdapterTest.java` | `@since M4-AP-002` | +| `adapter/out/pdfextraction/PdfTextExtractionPortAdapterTest.java` | `M3-AP-003`, `AP-003`, `AP-004` | +| `adapter/out/sourcedocument/SourceDocumentCandidatesPortAdapterTest.java` | `M3-AP-002`, `AP-004` | +| `adapter/out/sqlite/SqliteUnitOfWorkAdapterTest.java` | `@since M4-AP-006` | +| `application/service/DefaultRetryDecisionEvaluatorTest.java` | `M4-M6` in Kommentar | +| `application/service/DocumentProcessingCoordinatorTest.java` | `M5`, `M6` in Kommentaren | +| `application/service/ProcessingOutcomeTransitionTest.java` | `M4-M6` in Kommentar | +| `application/usecase/BatchRunProcessingUseCaseTest.java` | `M7` in Kommentar | +| `bootstrap/ExecutableJarSmokeTestIT.java` | `AP-008`, `M1` in JavaDoc | + +**Bewertung:** Rein kosmetisch/dokumentarisch. Kein Einfluss auf Funktionalität, Build +oder Testergebnis. Betrifft ausschließlich Kommentare und JavaDoc-Annotationen. +**Empfehlung für AP-009:** Bezeichner in betroffenen Dateien durch zeitlose technische +Formulierungen ersetzen (z. B. `@since M4-AP-001` → entfernen oder in neutrales +`@since 1.0` umwandeln; Inline-Kommentare sachlich formulieren). + +--- + +#### B2 – StartConfiguration in Application-Schicht enthält java.nio.file.Path (Architektur-Grenzfall) + +**Themenbereich:** Architektur +**Norm:** „Application orchestriert Use Cases und enthält keine technischen +Implementierungsdetails" (technik-und-architektur.md §3.1); Port-Verträge dürfen keine +NIO-Typen enthalten (CLAUDE.md). +**Befund:** `StartConfiguration` (in `application/config/startup/`) ist ein Java-Record +mit `java.nio.file.Path`-Feldern für `sourceFolder`, `targetFolder`, `sqliteFile`, +`promptTemplateFile`, `runtimeLockFile`, `logDirectory`. +**Kontext:** `StartConfiguration` ist kein Port-Vertrag, sondern ein unveränderliches +Konfigurations-DTO, das ausschließlich von Bootstrap erzeugt und an Adapter übergeben wird. +Die Port-Verträge selbst sind sauber (keine Path-Typen in Port-Interfaces). +**Bewertung:** Grenzfall. `Path` ist kein fachliches Objekt, aber auch kein schwerer +Architekturverstoß in diesem Kontext. Die Alternative (String-Repräsentation und Auflösung +im Adapter) hätte keinen Mehrwert für das Betriebsmodell. +**Empfehlung für AP-009:** Auf Wunsch im Rahmen von AP-009 prüfen, ob das Verschieben von +`StartConfiguration` in das Bootstrap-Modul sinnvoller wäre. Keine Pflicht, da kein +funktionaler Defekt vorliegt. + +--- + +#### B3 – PIT-Überlebende in Bootstrap (Bootstrap: 76 % Kill Rate) + +**Themenbereich:** Testqualität +**Befund:** 8 überlebende Mutanten im Bootstrap-Modul (34 generiert, 26 getötet). +Hauptkategorie: `VoidMethodCallMutator` (2 Überlebende, 2 ohne Coverage). +**Bewertung:** Betrifft vor allem Logging-Calls und nicht-kritische Hilfsmethoden. +Keine funktional tragenden Entscheidungspfade betroffen. +**Empfehlung:** Kein AP-009-Handlungsbedarf; wurde bereits in AP-007 auf akzeptablem +Niveau konsolidiert. + +--- + +## Zusammenfassung + +| Klassifikation | Anzahl | Beschreibung | +|---|---|---| +| Release-Blocker | **0** | – | +| Nicht blockierend | **3** | B1 Naming, B2 Path-Grenzfall, B3 PIT-Bootstrap | + +**Der Endstand ist produktionsbereit.** Alle fachlichen, technischen und architekturellen +Kernanforderungen sind umgesetzt und durch automatisierte Tests abgesichert. Der Maven-Build +ist fehlerfrei. Die identifizierten offenen Punkte sind ausschließlich nicht blockierend. + +Falls AP-009 durchgeführt wird, sollte der Fokus auf **B1** (Naming-Convention-Bereinigung) +liegen, da dieser Punkt die einzige verbindliche CLAUDE.md-Regel betrifft, die noch nicht +vollständig eingehalten wird. diff --git a/docs/betrieb.md b/docs/betrieb.md new file mode 100644 index 0000000..0766363 --- /dev/null +++ b/docs/betrieb.md @@ -0,0 +1,214 @@ +# Betriebsdokumentation – PDF Umbenenner + +## Zweck + +Der PDF Umbenenner liest bereits OCR-verarbeitete, durchsuchbare PDF-Dateien aus einem +konfigurierten Quellordner, ermittelt per KI-Aufruf einen normierten deutschen Dateinamen +und legt eine Kopie im konfigurierten Zielordner ab. Die Quelldatei bleibt unverändert. + +--- + +## Voraussetzungen + +- Java 21 (JRE oder JDK) +- Zugang zu einem OpenAI-kompatiblen KI-Dienst (API-Schlüssel erforderlich) +- Quellordner mit OCR-verarbeiteten PDF-Dateien +- Schreibzugriff auf Zielordner und Datenbankverzeichnis + +--- + +## Start des ausführbaren JAR + +Das ausführbare JAR wird durch den Maven-Build im Verzeichnis +`pdf-umbenenner-bootstrap/target/` erzeugt: + +``` +java -jar pdf-umbenenner-bootstrap/target/pdf-umbenenner-bootstrap-0.0.1-SNAPSHOT.jar +``` + +Die Anwendung liest die Konfiguration aus `config/application.properties` relativ zum +Arbeitsverzeichnis, in dem der Befehl ausgeführt wird. + +### Start über Windows Task Scheduler + +Empfohlene Startsequenz für den Windows Task Scheduler: + +1. Aktion: Programm/Skript starten +2. Programm: `java` +3. Argumente: `-jar pdf-umbenenner-bootstrap-0.0.1-SNAPSHOT.jar` +4. Starten in: Verzeichnis mit `config/application.properties` und `config/prompts/` + +--- + +## Konfiguration + +Die Konfiguration wird aus `config/application.properties` geladen. +Vorlagen für lokale und Test-Konfigurationen befinden sich in: + +- `config/application-local.example.properties` +- `config/application-test.example.properties` + +### Pflichtparameter + +| Parameter | Beschreibung | +|------------------------|--------------| +| `source.folder` | Quellordner mit OCR-PDFs (muss vorhanden und lesbar sein) | +| `target.folder` | Zielordner für umbenannte Kopien (wird angelegt, wenn nicht vorhanden) | +| `sqlite.file` | SQLite-Datenbankdatei (übergeordnetes Verzeichnis muss existieren) | +| `api.baseUrl` | Basis-URL des KI-Dienstes (z. B. `https://api.openai.com/v1`) | +| `api.model` | Modellname (z. B. `gpt-4o-mini`) | +| `api.timeoutSeconds` | HTTP-Timeout für KI-Anfragen in Sekunden (ganzzahlig, > 0) | +| `max.retries.transient`| Maximale transiente Fehlversuche pro Dokument (ganzzahlig, >= 1) | +| `max.pages` | Maximale Seitenzahl pro Dokument (ganzzahlig, > 0) | +| `max.text.characters` | Maximale Zeichenanzahl des Dokumenttexts für KI-Anfragen (ganzzahlig, > 0) | +| `prompt.template.file` | Pfad zur externen Prompt-Datei (muss vorhanden sein) | + +### Optionale Parameter + +| Parameter | Beschreibung | Standard | +|----------------------|--------------|---------| +| `api.key` | API-Schlüssel (alternativ: Umgebungsvariable `PDF_UMBENENNER_API_KEY`) | – | +| `runtime.lock.file` | Lock-Datei für Startschutz | `pdf-umbenenner.lock` im Arbeitsverzeichnis | +| `log.directory` | Log-Verzeichnis | `./logs/` | +| `log.level` | Log-Level (`DEBUG`, `INFO`, `WARN`, `ERROR`) | `INFO` | +| `log.ai.sensitive` | KI-Rohantwort und Reasoning ins Log schreiben (`true`/`false`) | `false` | + +### API-Schlüssel + +Der API-Schlüssel kann auf zwei Wegen gesetzt werden: + +1. **Umgebungsvariable `PDF_UMBENENNER_API_KEY`** (empfohlen, hat Vorrang) +2. Property `api.key` in `config/application.properties` + +Die Umgebungsvariable hat immer Vorrang über die Properties-Datei. + +--- + +## Prompt-Konfiguration + +Der Prompt wird aus der in `prompt.template.file` konfigurierten externen Textdatei geladen. +Der Dateiname der Prompt-Datei dient als Prompt-Identifikator in der Versuchshistorie +(SQLite) und ermöglicht so die Nachvollziehbarkeit, welche Prompt-Version für welchen +Verarbeitungsversuch verwendet wurde. + +Eine Vorlage befindet sich in `config/prompts/template.txt` und kann direkt verwendet oder +an den jeweiligen KI-Dienst angepasst werden. + +Die Anwendung ergänzt den Prompt automatisch um: +- einen Dokumenttext-Abschnitt +- eine explizite JSON-Antwortspezifikation mit den Feldern `title`, `reasoning` und `date` + +Der Prompt in `template.txt` muss deshalb **keine** JSON-Formatanweisung enthalten – +nur den inhaltlichen Auftrag an die KI. + +--- + +## Zielformat + +Jede erfolgreich verarbeitete PDF-Datei wird im Zielordner unter folgendem Namen abgelegt: + +``` +YYYY-MM-DD - Titel.pdf +``` + +Bei Namenskollisionen wird ein laufendes Suffix angehängt: + +``` +YYYY-MM-DD - Titel(1).pdf +YYYY-MM-DD - Titel(2).pdf +``` + +Das Suffix zählt nicht zu den 20 Zeichen des Basistitels. + +--- + +## Retry- und Skip-Verhalten + +### Dokumentstatus + +| Status | Bedeutung | +|---------------------------|-----------| +| `SUCCESS` | Erfolgreich verarbeitet und kopiert | +| `FAILED_RETRYABLE` | Fehlgeschlagen, erneuter Versuch in späterem Lauf möglich | +| `FAILED_FINAL` | Terminal fehlgeschlagen, wird nicht erneut verarbeitet | +| `SKIPPED_ALREADY_PROCESSED` | Übersprungen – Dokument bereits erfolgreich verarbeitet | +| `SKIPPED_FINAL_FAILURE` | Übersprungen – Dokument terminal fehlgeschlagen | + +### Retry-Regeln + +**Deterministische Inhaltsfehler** (z. B. kein extrahierbarer Text, Seitenlimit überschritten, +unbrauchbarer KI-Titel): + +- Erster Fehler → `FAILED_RETRYABLE` (ein Wiederholversuch in späterem Lauf erlaubt) +- Zweiter Fehler → `FAILED_FINAL` (kein weiterer Versuch) + +**Transiente technische Fehler** (z. B. KI nicht erreichbar, HTTP-Timeout): + +- Wiederholbar bis zum Grenzwert `max.retries.transient` +- Bei Erreichen des Grenzwerts → `FAILED_FINAL` + +**Technischer Sofort-Wiederholversuch:** + +Bei einem Schreibfehler der Zielkopie wird innerhalb desselben Laufs exakt ein +Sofort-Wiederholversuch unternommen. Dieser zählt nicht zum laufübergreifenden +Fehlerzähler. + +--- + +## Logging + +Logs werden in das konfigurierte `log.directory` geschrieben (Standard: `./logs/`). +Log-Rotation erfolgt täglich und bei Erreichen von 10 MB je Datei. + +### Sensible KI-Inhalte + +Standardmäßig werden die vollständige KI-Rohantwort und das KI-Reasoning **nicht** ins Log +geschrieben, sondern ausschließlich in der SQLite-Datenbank gespeichert. + +Die Ausgabe kann für Diagnosezwecke mit `log.ai.sensitive=true` freigeschaltet werden. +Erlaubte Werte: `true` oder `false`. Jeder andere Wert ist ungültig und verhindert den Start. + +--- + +## Exit-Codes + +| Code | Bedeutung | +|------|-----------| +| `0` | Lauf technisch ordnungsgemäß ausgeführt (auch bei dokumentbezogenen Teilfehlern) | +| `1` | Harter Start- oder Bootstrap-Fehler (ungültige Konfiguration, Lock nicht erwerbbar, Schema-Initialisierungsfehler) | + +Dokumentbezogene Fehler einzelner PDF-Dateien führen **nicht** zu Exit-Code `1`. + +--- + +## Startschutz (Parallelinstanzschutz) + +Die Anwendung verwendet eine exklusive Lock-Datei, um parallele Instanzen zu verhindern. +Wenn bereits eine Instanz läuft, beendet sich die neue Instanz sofort mit Exit-Code `1`. + +Der Pfad der Lock-Datei ist über `runtime.lock.file` konfigurierbar. +Ohne Konfiguration wird `pdf-umbenenner.lock` im Arbeitsverzeichnis verwendet. + +--- + +## SQLite-Datenbank + +Die SQLite-Datei enthält: + +- **Dokument-Stammsätze**: Gesamtstatus, Fehlerzähler, letzter Zieldateiname, Zeitstempel +- **Versuchshistorie**: Jeder Verarbeitungsversuch mit Modell, Prompt-Identifikator, + KI-Rohantwort, Reasoning, Datum, Titel und Fehlerstatus + +Die Datenbank ist die führende Wahrheitsquelle für Bearbeitungsstatus und Nachvollziehbarkeit. +Sie muss nicht manuell verwaltet werden – das Schema wird beim Start automatisch initialisiert. + +--- + +## Systemgrenzen + +- Nur OCR-verarbeitete, durchsuchbare PDF-Dateien werden verarbeitet +- Keine eingebaute OCR-Funktion +- Kein Web-UI, keine REST-API, keine interaktive Bedienung +- Kein interner Scheduler – der Start erfolgt extern (z. B. Windows Task Scheduler) +- Quelldateien werden nie überschrieben, verschoben oder gelöscht +- Die Identifikation erfolgt über SHA-256-Fingerprint des Dateiinhalts, nicht über Dateinamen diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/clock/package-info.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/clock/package-info.java new file mode 100644 index 0000000..d9cfd77 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/clock/package-info.java @@ -0,0 +1,18 @@ +/** + * Outbound adapter for system time access. + *

+ * Components: + *

+ *

+ * The {@link de.gecheckt.pdf.umbenenner.application.port.out.ClockPort} abstraction ensures that + * all application-layer and domain-layer code obtains the current instant through the port, + * enabling deterministic time injection in tests without coupling to wall-clock time. + *

+ * No date/time logic or formatting is performed in this package; that responsibility + * belongs to the application layer. + */ +package de.gecheckt.pdf.umbenenner.adapter.out.clock; diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java index f3f6dc9..2c767b0 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteProcessingAttemptRepositoryAdapter.java @@ -247,6 +247,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem * @return the most recent {@code PROPOSAL_READY} attempt, or {@code null} * @throws DocumentPersistenceException if the query fails */ + @Override public ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint) { Objects.requireNonNull(fingerprint, "fingerprint must not be null"); @@ -259,7 +260,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem final_target_file_name FROM processing_attempt WHERE fingerprint = ? - AND status = 'PROPOSAL_READY' + AND status = ? ORDER BY attempt_number DESC LIMIT 1 """; @@ -270,6 +271,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON); statement.setString(1, fingerprint.sha256Hex()); + statement.setString(2, ProcessingStatus.PROPOSAL_READY.name()); try (ResultSet rs = statement.executeQuery()) { if (rs.next()) { diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapter.java index bf39e65..8947c67 100644 --- a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapter.java +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/sqlite/SqliteUnitOfWorkAdapter.java @@ -1,5 +1,7 @@ package de.gecheckt.pdf.umbenenner.adapter.out.sqlite; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Proxy; import java.sql.Connection; import java.sql.DriverManager; import java.sql.SQLException; @@ -93,53 +95,70 @@ public class SqliteUnitOfWorkAdapter implements UnitOfWorkPort { } } + /** + * Wraps a shared transaction connection so that {@code close()} becomes a no-op. + *

+ * Repository adapters manage their own connection lifecycle via try-with-resources, + * which would close the shared transaction connection prematurely if not wrapped. + * All other {@link Connection} methods are delegated unchanged to the underlying connection. + * + * @param underlying the real shared connection; must not be null + * @return a proxy connection that ignores {@code close()} calls + */ + private static Connection nonClosingWrapper(Connection underlying) { + return (Connection) Proxy.newProxyInstance( + Connection.class.getClassLoader(), + new Class[] { Connection.class }, + (proxy, method, args) -> { + if ("close".equals(method.getName())) { + return null; + } + try { + return method.invoke(underlying, args); + } catch (InvocationTargetException e) { + throw e.getCause(); + } + }); + } + private class TransactionOperationsImpl implements TransactionOperations { private final Connection connection; - + TransactionOperationsImpl(Connection connection) { this.connection = connection; } - + @Override public void saveProcessingAttempt(ProcessingAttempt attempt) { - // Repository methods declare DocumentPersistenceException as the only thrown exception. - // Any other exception (NullPointerException, etc.) will propagate to the outer try-catch - // and be caught there. SqliteProcessingAttemptRepositoryAdapter repo = new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl) { @Override protected Connection getConnection() throws SQLException { - return connection; + return nonClosingWrapper(connection); } }; repo.save(attempt); } - + @Override public void createDocumentRecord(DocumentRecord record) { - // Repository methods declare DocumentPersistenceException as the only thrown exception. - // Any other exception (NullPointerException, etc.) will propagate to the outer try-catch - // and be caught there. SqliteDocumentRecordRepositoryAdapter repo = new SqliteDocumentRecordRepositoryAdapter(jdbcUrl) { @Override protected Connection getConnection() throws SQLException { - return connection; + return nonClosingWrapper(connection); } }; repo.create(record); } - + @Override public void updateDocumentRecord(DocumentRecord record) { - // Repository methods declare DocumentPersistenceException as the only thrown exception. - // Any other exception (NullPointerException, etc.) will propagate to the outer try-catch - // and be caught there. SqliteDocumentRecordRepositoryAdapter repo = new SqliteDocumentRecordRepositoryAdapter(jdbcUrl) { @Override protected Connection getConnection() throws SQLException { - return connection; + return nonClosingWrapper(connection); } }; repo.update(record); diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/package-info.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/package-info.java new file mode 100644 index 0000000..1a5aea4 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetcopy/package-info.java @@ -0,0 +1,24 @@ +/** + * Outbound adapter for writing the target file copy. + *

+ * Components: + *

+ *

+ * The adapter uses a two-step write pattern: the source is first copied to a temporary + * file ({@code resolvedFilename + ".tmp"}) in the target folder, then renamed/moved to + * the final filename. An atomic move is attempted first; a standard move is used as a + * fallback when the filesystem does not support atomic cross-directory moves. + *

+ * Source integrity: The source file is never modified, moved, or deleted. + * Only a copy is created in the target folder. + *

+ * Architecture boundary: All NIO ({@code Path}, {@code Files}) operations + * are strictly confined to this package. The port interface + * {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort} contains no + * filesystem types, preserving the hexagonal architecture boundary. + */ +package de.gecheckt.pdf.umbenenner.adapter.out.targetcopy; diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/package-info.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/package-info.java new file mode 100644 index 0000000..72e6b15 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/targetfolder/package-info.java @@ -0,0 +1,26 @@ +/** + * Outbound adapter for target folder management and unique filename resolution. + *

+ * Components: + *

+ *

+ * Duplicate resolution: Given a base name such as + * {@code 2024-01-15 - Rechnung.pdf}, the adapter checks whether the file exists in the + * target folder and appends a numeric suffix ({@code (1)}, {@code (2)}, …) directly + * before {@code .pdf} until a free name is found. The 20-character base-title limit + * does not apply to the suffix. + *

+ * Rollback support: The adapter provides a best-effort deletion method + * used by the application layer to remove a successfully written target copy when + * subsequent persistence fails, preventing orphaned target files. + *

+ * Architecture boundary: All NIO ({@code Path}, {@code Files}) operations + * are strictly confined to this package. The port interface + * {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort} contains no + * filesystem types, preserving the hexagonal architecture boundary. + */ +package de.gecheckt.pdf.umbenenner.adapter.out.targetfolder; diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiRequestComposer.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiRequestComposer.java index edf11fa..f0b6d5e 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiRequestComposer.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/AiRequestComposer.java @@ -85,30 +85,6 @@ public class AiRequestComposer { Objects.requireNonNull(promptContent, "promptContent must not be null"); Objects.requireNonNull(documentText, "documentText must not be null"); - // The complete request text is composed in a fixed, deterministic order: - // 1. Prompt content (instruction) - // 2. Newline separator - // 3. Prompt identifier marker (for traceability) - // 4. Newline separator - // 5. Document text section marker - // 6. Newline separator - // 7. Document text content - // 8. Newline separator - // 9. Response format specification (JSON-only with required fields) - // - // This order is fixed so that another implementation knows exactly where - // each part is positioned and what to expect. - StringBuilder requestBuilder = new StringBuilder(); - requestBuilder.append(promptContent); - requestBuilder.append("\n"); - requestBuilder.append("--- Prompt-ID: ").append(promptIdentifier.identifier()).append(" ---"); - requestBuilder.append("\n"); - requestBuilder.append("--- Document Text ---"); - requestBuilder.append("\n"); - requestBuilder.append(documentText); - requestBuilder.append("\n"); - appendJsonResponseFormat(requestBuilder); - // Record the exact character count of the document text that was included. // This is the length of the document text (not the complete request). int sentCharacterCount = documentText.length(); diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java index 9959914..aa0a175 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/usecase/DefaultBatchRunProcessingUseCase.java @@ -393,10 +393,10 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa private void logProcessingOutcome(SourceDocumentCandidate candidate, DocumentProcessingOutcome outcome) { switch (outcome) { case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed -> - logger.info("Pre-checks FAILED for '{}': {} (Deterministic content error).", + logger.info("Pre-checks failed for '{}': {} (deterministic content error).", candidate.uniqueIdentifier(), failed.failureReasonDescription()); case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError -> - logger.warn("Processing FAILED for '{}': {} (Technical error – retryable).", + logger.warn("Processing failed for '{}': {} (transient technical error – retryable).", candidate.uniqueIdentifier(), technicalError.errorMessage()); case de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady ready -> logger.info("AI naming proposal ready for '{}': title='{}', date={}.", @@ -404,10 +404,10 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa ready.proposal().validatedTitle(), ready.proposal().resolvedDate()); case de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure aiTechnical -> - logger.warn("AI technical failure for '{}': {} (Transient – retryable).", + logger.warn("AI invocation failed for '{}': {} (transient technical error – retryable).", candidate.uniqueIdentifier(), aiTechnical.errorMessage()); case de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure aiFunctional -> - logger.info("AI functional failure for '{}': {} (Deterministic content error).", + logger.info("AI naming failed for '{}': {} (deterministic content error).", candidate.uniqueIdentifier(), aiFunctional.errorMessage()); default -> { /* other outcomes are handled elsewhere */ } } diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingServiceTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingServiceTest.java index e103be3..070ad79 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingServiceTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/AiNamingServiceTest.java @@ -314,4 +314,13 @@ class AiNamingServiceTest { .isInstanceOf(IllegalArgumentException.class) .hasMessageContaining("maxTextCharacters must be >= 1"); } + + @Test + void constructor_maxTextCharactersOne_doesNotThrow() { + // maxTextCharacters=1 is the minimum valid value (boundary test). + // A changed-conditional-boundary mutation that changes '< 1' to '<= 1' would + // cause this constructor call to throw — this test detects that mutation. + new AiNamingService(aiInvocationPort, promptPort, validator, MODEL_NAME, 1); + // No exception expected; reaching this line means the boundary is correct + } } diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java index b69899c..23ce1f3 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingCoordinatorTest.java @@ -829,8 +829,9 @@ class DocumentProcessingCoordinatorTest { // No PROPOSAL_READY attempt pre-populated // persistTransientError returns true when the error record was persisted successfully - processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully"); ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) .findFirst() @@ -851,8 +852,9 @@ class DocumentProcessingCoordinatorTest { null, DateSource.AI_PROVIDED, "Rechnung", null); attemptRepo.savedAttempts.add(badProposal); - processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully"); ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) .findFirst() @@ -871,8 +873,10 @@ class DocumentProcessingCoordinatorTest { new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), DEFAULT_MAX_RETRIES_TRANSIENT); - coordinatorWithFailingFolder.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + boolean result = coordinatorWithFailingFolder.processDeferredOutcome( + candidate, fingerprint, context, attemptStart, c -> null); + assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully"); ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) .findFirst() @@ -891,8 +895,10 @@ class DocumentProcessingCoordinatorTest { new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger(), DEFAULT_MAX_RETRIES_TRANSIENT); - coordinatorWithFailingCopy.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + boolean result = coordinatorWithFailingCopy.processDeferredOutcome( + candidate, fingerprint, context, attemptStart, c -> null); + assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully"); ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) .findFirst() @@ -914,8 +920,9 @@ class DocumentProcessingCoordinatorTest { "A".repeat(21), null); attemptRepo.savedAttempts.add(badProposal); - processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully"); ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) .findFirst() @@ -939,8 +946,9 @@ class DocumentProcessingCoordinatorTest { "Rechnung-2026", null); attemptRepo.savedAttempts.add(badProposal); - processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully"); ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) .findFirst() @@ -1008,9 +1016,10 @@ class DocumentProcessingCoordinatorTest { new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(), DEFAULT_MAX_RETRIES_TRANSIENT); - coordinatorWithCountingCopy.processDeferredOutcome( + boolean result = coordinatorWithCountingCopy.processDeferredOutcome( candidate, fingerprint, context, attemptStart, c -> null); + assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully"); ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() .filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE) .findFirst() @@ -1037,9 +1046,10 @@ class DocumentProcessingCoordinatorTest { recordRepo, attemptRepo, unitOfWorkPort, new NoOpTargetFolderPort(), failingCopy, new NoOpProcessingLogger(), 1); - coordinatorWith1Retry.processDeferredOutcome( + boolean result = coordinatorWith1Retry.processDeferredOutcome( candidate, fingerprint, context, attemptStart, c -> null); + assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully"); ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream() .filter(a -> a.status() == ProcessingStatus.FAILED_FINAL) .findFirst() @@ -1055,6 +1065,58 @@ class DocumentProcessingCoordinatorTest { "Transient error counter must be 1 after the first cross-run transient error"); } + @Test + void processDeferredOutcome_proposalReady_copyFailure_retryDecisionLog_containsFailedRetryable() { + // Verifies that when a copy failure leads to FAILED_RETRYABLE in persistTransientError, + // the retry-decision log message specifically contains "FAILED_RETRYABLE" and + // "will retry in later run" — the branch-specific text that distinguishes it from the + // FAILED_FINAL branch. This kills the negated-conditional mutation on the retryable flag check. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + assertTrue(capturingLogger.anyInfoContains("FAILED_RETRYABLE"), + "Retry decision log for a retryable transient copy error must contain FAILED_RETRYABLE. " + + "Captured info messages: " + capturingLogger.infoMessages); + assertTrue(capturingLogger.anyInfoContains("will retry in later run"), + "Retry decision log for a retryable transient error must contain 'will retry in later run'. " + + "Captured info messages: " + capturingLogger.infoMessages); + } + + @Test + void processDeferredOutcome_proposalReady_copyFailure_maxRetriesTransient1_retryDecisionLog_containsFailedFinal() { + // Verifies that when a copy failure with maxRetriesTransient=1 leads to FAILED_FINAL in + // persistTransientError, the retry-decision log message contains "FAILED_FINAL" and + // "transient error limit reached" — the branch-specific text that distinguishes it + // from the FAILED_RETRYABLE branch. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), capturingLogger, + 1 /* maxRetriesTransient=1 → immediately final */); + + coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + assertTrue(capturingLogger.anyInfoContains("FAILED_FINAL"), + "Retry decision log for a finalising transient copy error must contain FAILED_FINAL. " + + "Captured info messages: " + capturingLogger.infoMessages); + assertTrue(capturingLogger.anyInfoContains("transient error limit reached"), + "Retry decision log for a finalising transient error must contain 'transient error limit reached'. " + + "Captured info messages: " + capturingLogger.infoMessages); + } + @Test void processDeferredOutcome_proposalReady_immediateRetryDoesNotTriggerAiOrNewProposal() { // Ensures that during the immediate retry path no pipeline (AI) execution happens @@ -1375,6 +1437,26 @@ class DocumentProcessingCoordinatorTest { } } + /** Counts calls to {@link #tryDeleteTargetFile(String)} for mutation detection. */ + private static class CapturingTargetFolderPort implements TargetFolderPort { + int tryDeleteCallCount = 0; + + @Override + public String getTargetFolderLocator() { + return "/tmp/target"; + } + + @Override + public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) { + return new ResolvedTargetFilename(baseName); + } + + @Override + public void tryDeleteTargetFile(String resolvedFilename) { + tryDeleteCallCount++; + } + } + private static class NoOpTargetFolderPort implements TargetFolderPort { @Override public String getTargetFolderLocator() { @@ -1493,6 +1575,162 @@ class DocumentProcessingCoordinatorTest { assertTrue(capturingLogger.anyInfoContains("FAILED_FINAL"), "Finalising retry decision log must contain the FAILED_FINAL classification. " + "Captured info messages: " + capturingLogger.infoMessages); + assertTrue(capturingLogger.anyInfoContains("permanently failed"), + "Finalising retry decision log must contain 'permanently failed' to distinguish " + + "the FAILED_FINAL branch from the generic status log. " + + "Captured info messages: " + capturingLogger.infoMessages); + } + + // ------------------------------------------------------------------------- + // Finalization path logging: error, warn, and info calls in key paths + // ------------------------------------------------------------------------- + + @Test + void processDeferredOutcome_proposalReady_missingProposalAttempt_logsError() { + // Missing PROPOSAL_READY attempt in history — finalizeProposalReady must log an error. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + // No attempt pre-loaded — proposalAttempt == null branch + + CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + assertTrue(capturingLogger.errorCallCount > 0, + "An error must be logged when the PROPOSAL_READY attempt is missing from history"); + } + + @Test + void processDeferredOutcome_proposalReady_inconsistentProposalState_logsError() { + // Inconsistent proposal state (null date) — finalizeProposalReady must log an error. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + ProcessingAttempt badProposal = new ProcessingAttempt( + fingerprint, context.runId(), 1, Instant.now(), Instant.now(), + ProcessingStatus.PROPOSAL_READY, null, null, false, + "model", "prompt", 1, 100, "{}", "reason", + null, DateSource.AI_PROVIDED, "Rechnung", null); + attemptRepo.savedAttempts.add(badProposal); + + CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + assertTrue(capturingLogger.errorCallCount > 0, + "An error must be logged when the proposal state is inconsistent"); + } + + @Test + void processDeferredOutcome_proposalReady_duplicateResolutionFailure_logsError() { + // Duplicate resolution failure — finalizeProposalReady must log an error. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + assertTrue(capturingLogger.errorCallCount > 0, + "An error must be logged when duplicate resolution fails"); + } + + @Test + void processDeferredOutcome_proposalReady_resolvedFilename_logsInfo() { + // Successful duplicate resolution — resolved filename must be logged at INFO. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCapturing.processDeferredOutcome( + candidate, fingerprint, context, attemptStart, + c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); }); + + assertTrue(capturingLogger.infoCallCount > 0, + "Resolved target filename must be logged at INFO level"); + } + + @Test + void processDeferredOutcome_proposalReady_firstCopyFails_logsWarn() { + // First copy attempt fails → immediate retry: a WARN must be logged for the first failure. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); + CountingTargetFileCopyPort onlyFirstFails = new CountingTargetFileCopyPort(1); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), onlyFirstFails, capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCapturing.processDeferredOutcome( + candidate, fingerprint, context, attemptStart, + c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); }); + + assertTrue(capturingLogger.warnCallCount > 0, + "A WARN must be logged when the first copy attempt fails and an immediate retry is triggered"); + } + + @Test + void processDeferredOutcome_proposalReady_bothCopyAttemptsFail_logsError() { + // Both copy attempts fail — finalizeProposalReady must log an error. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); + CountingTargetFileCopyPort bothFail = new CountingTargetFileCopyPort(2); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), bothFail, capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCapturing.processDeferredOutcome( + candidate, fingerprint, context, attemptStart, c -> null); + + assertTrue(capturingLogger.errorCallCount > 0, + "An error must be logged when both copy attempts fail"); + } + + @Test + void processDeferredOutcome_proposalReady_immediateRetrySucceeds_logsInfo() { + // First copy fails, immediate retry succeeds — a success INFO must be logged. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + + CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); + CountingTargetFileCopyPort onlyFirstFails = new CountingTargetFileCopyPort(1); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), onlyFirstFails, capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCapturing.processDeferredOutcome( + candidate, fingerprint, context, attemptStart, + c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); }); + + assertTrue(capturingLogger.infoCallCount > 0, + "An INFO must be logged when the immediate within-run retry succeeds"); } /** Zählt Logger-Aufrufe je Level, um VoidMethodCallMutator-Mutationen zu erkennen. */ @@ -1581,5 +1819,91 @@ class DocumentProcessingCoordinatorTest { boolean anyInfoContains(String text) { return infoMessages.stream().anyMatch(m -> m.contains(text)); } + + boolean anyErrorContains(String text) { + return errorMessages.stream().anyMatch(m -> m.contains(text)); + } + } + + // ------------------------------------------------------------------------- + // AI sensitive content logging in finalization path + // ------------------------------------------------------------------------- + + @Test + void processDeferredOutcome_proposalReady_aiContentNotNull_callsDebugSensitiveAiContent() { + // buildValidProposalAttempt() has non-null aiRawResponse and aiReasoning. + // The conditional guards at lines 398 and 402 of finalizeProposalReady must + // trigger the debugSensitiveAiContent call when the values are present. + // If negated, the calls would be suppressed for non-null values — detectable here. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); // aiRawResponse="{}", aiReasoning="reason" + + CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCapturing.processDeferredOutcome( + candidate, fingerprint, context, attemptStart, + c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); }); + + assertTrue(capturingLogger.debugSensitiveAiContentCallCount >= 2, + "debugSensitiveAiContent must be called for aiRawResponse and aiReasoning " + + "when both are non-null. Actual call count: " + + capturingLogger.debugSensitiveAiContentCallCount); + } + + // ------------------------------------------------------------------------- + // Best-effort rollback path: tryDeleteTargetFile and secondary persistence + // ------------------------------------------------------------------------- + + @Test + void processDeferredOutcome_proposalReady_persistenceFailureAfterCopy_callsTryDeleteTargetFile() { + // When persistence fails after a successful copy, the best-effort rollback + // must call tryDeleteTargetFile to clean up the orphaned target file. + // This test kills the 'removed call to tryDeleteTargetFile' mutation. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + unitOfWorkPort.failOnExecute = true; + + CapturingTargetFolderPort capturingFolderPort = new CapturingTargetFolderPort(); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + capturingFolderPort, new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(), + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + assertTrue(capturingFolderPort.tryDeleteCallCount > 0, + "tryDeleteTargetFile must be called at least once for best-effort rollback " + + "when persistence fails after a successful copy"); + } + + @Test + void processDeferredOutcome_proposalReady_persistenceFailureAfterCopy_logsSecondaryFailure() { + // When persistence fails after a successful copy and the secondary persistence + // attempt in persistTransientErrorAfterPersistenceFailure also fails, + // an error must be logged for the secondary failure. + // This kills the 'removed call to persistTransientErrorAfterPersistenceFailure' mutation. + DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero()); + recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord)); + attemptRepo.savedAttempts.add(buildValidProposalAttempt()); + unitOfWorkPort.failOnExecute = true; // both primary and secondary persistence fail + + MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger(); + DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator( + recordRepo, attemptRepo, unitOfWorkPort, + new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger, + DEFAULT_MAX_RETRIES_TRANSIENT); + + coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null); + + assertTrue(capturingLogger.anyErrorContains("Secondary persistence failure") + || capturingLogger.anyErrorContains("secondary"), + "An error must be logged for the secondary persistence failure. " + + "Captured error messages: " + capturingLogger.errorMessages); } } \ No newline at end of file diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java index 2126e11..472d6b5 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/ProcessingOutcomeTransitionTest.java @@ -7,8 +7,11 @@ import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure; import de.gecheckt.pdf.umbenenner.domain.model.DateSource; import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal; import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; +import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed; import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; @@ -314,4 +317,58 @@ class ProcessingOutcomeTransitionTest { assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus()); assertEquals(2, result.counters().transientErrorCount()); } + + // ------------------------------------------------------------------------- + // PreCheckPassed routed through transition (edge case: no AI step taken) + // ------------------------------------------------------------------------- + + @Test + void forNewDocument_preCheckPassed_limitOne_immediatelyFinal() { + // PreCheckPassed without an AI outcome is treated as a transient error by the transition. + // With limit=1 the first such error must immediately finalise to FAILED_FINAL. + PreCheckPassed outcome = new PreCheckPassed( + candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1))); + + ProcessingOutcomeTransition.ProcessingOutcome result = + ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_1); + + assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus(), + "With limit=1 a PreCheckPassed-routed transient error must immediately finalise"); + assertFalse(result.retryable()); + assertEquals(1, result.counters().transientErrorCount()); + assertEquals(0, result.counters().contentErrorCount()); + } + + @Test + void forNewDocument_preCheckPassed_limitTwo_firstErrorRetryable() { + // With limit=2 the first PreCheckPassed-routed transient error is retryable. + PreCheckPassed outcome = new PreCheckPassed( + candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1))); + + ProcessingOutcomeTransition.ProcessingOutcome result = + ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_2); + + assertEquals(ProcessingStatus.FAILED_RETRYABLE, result.overallStatus(), + "With limit=2 the first PreCheckPassed-routed transient error must be retryable"); + assertTrue(result.retryable()); + assertEquals(1, result.counters().transientErrorCount()); + assertEquals(0, result.counters().contentErrorCount()); + } + + @Test + void forKnownDocument_preCheckPassed_limitTwo_secondErrorFinal() { + // With limit=2 and an existing transient error count of 1, + // the next PreCheckPassed-routed error increments to 2 = limit → FAILED_FINAL. + PreCheckPassed outcome = new PreCheckPassed( + candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1))); + FailureCounters existing = new FailureCounters(0, 1); + + ProcessingOutcomeTransition.ProcessingOutcome result = + ProcessingOutcomeTransition.forKnownDocument(outcome, existing, LIMIT_2); + + assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus(), + "PreCheckPassed-routed error at transient limit must finalise to FAILED_FINAL"); + assertFalse(result.retryable()); + assertEquals(2, result.counters().transientErrorCount()); + } } diff --git a/pdf-umbenenner-bootstrap/pom.xml b/pdf-umbenenner-bootstrap/pom.xml index db4a68e..81f1bc3 100644 --- a/pdf-umbenenner-bootstrap/pom.xml +++ b/pdf-umbenenner-bootstrap/pom.xml @@ -62,6 +62,11 @@ mockito-junit-jupiter test + + org.assertj + assertj-core + test + diff --git a/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java b/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java index b1e6a94..431b161 100644 --- a/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java +++ b/pdf-umbenenner-bootstrap/src/main/java/de/gecheckt/pdf/umbenenner/bootstrap/BootstrapRunner.java @@ -310,10 +310,10 @@ public class BootstrapRunner { LOG.error("Configuration validation failed: {}", e.getMessage()); return 1; } catch (DocumentPersistenceException e) { - LOG.error("Persistence operation failed: {}", e.getMessage(), e); + LOG.error("Schema initialization failed: {}", e.getMessage(), e); return 1; } catch (Exception e) { - LOG.error("Bootstrap failure during startup.", e); + LOG.error("Unexpected startup failure.", e); return 1; } } @@ -391,7 +391,7 @@ public class BootstrapRunner { */ private BatchRunContext createRunContext() { RunId runId = new RunId(UUID.randomUUID().toString()); - LOG.info("Batch run started. RunId: {}", runId); + LOG.info("Preparing batch run. RunId: {}", runId); return new BatchRunContext(runId, Instant.now()); } diff --git a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/BatchRunEndToEndTest.java b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/BatchRunEndToEndTest.java new file mode 100644 index 0000000..c4f4f3e --- /dev/null +++ b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/BatchRunEndToEndTest.java @@ -0,0 +1,698 @@ +package de.gecheckt.pdf.umbenenner.bootstrap.e2e; + +import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Deterministic end-to-end tests for the complete batch processing pipeline. + *

+ * Each test method is independent and uses its own {@link E2ETestContext} backed by a + * JUnit {@code @TempDir}. All infrastructure adapters (SQLite, filesystem, PDF extraction, + * fingerprinting) are real production implementations. Only the AI invocation port is + * replaced by a configurable {@link StubAiInvocationPort} to avoid real HTTP calls. + * + *

End-to-end invariants verified

+ * + * + *

Document text used in tests

+ *

+ * Searchable PDFs embed enough text to pass the minimum-text pre-check. The AI stub + * returns a title of {@code "Stromabrechnung"} and date {@code "2024-01-15"} by default, + * producing a target filename of {@code "2024-01-15 - Stromabrechnung.pdf"}. + */ +class BatchRunEndToEndTest { + + private static final String SAMPLE_PDF_TEXT = + "Stromabrechnung Kundenname Musterstadt Datum 15.01.2024 Betrag 123,45 EUR"; + + // ========================================================================= + // Scenario 1: Happy-path to SUCCESS + // ========================================================================= + + /** + * Verifies the complete two-run happy-path: + *

    + *
  1. Run 1: AI stub returns valid proposal → document status becomes + * {@code PROPOSAL_READY}; no target file yet.
  2. + *
  3. Run 2: AI is NOT called again; target file is copied; document status + * becomes {@code SUCCESS}.
  4. + *
+ * This confirms the leading-proposal-attempt rule and the two-phase finalization. + */ + @Test + void happyPath_twoRuns_reachesSuccess(@TempDir Path tempDir) throws Exception { + try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) { + ctx.createSearchablePdf("rechnung.pdf", SAMPLE_PDF_TEXT); + Path pdfPath = ctx.sourceFolder().resolve("rechnung.pdf"); + DocumentFingerprint fp = ctx.computeFingerprint(pdfPath); + + // --- Run 1: AI produces a naming proposal --- + BatchRunOutcome run1 = ctx.runBatch(); + + assertThat(run1).isEqualTo(BatchRunOutcome.SUCCESS); + assertThat(ctx.aiStub.invocationCount()).isEqualTo(1); + + DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY); + assertThat(ctx.listTargetFiles()).isEmpty(); + + List attempts1 = ctx.findAttempts(fp); + assertThat(attempts1).hasSize(1); + assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.PROPOSAL_READY); + + // --- Run 2: Finalization without AI call --- + ctx.aiStub.resetInvocationCount(); + BatchRunOutcome run2 = ctx.runBatch(); + + assertThat(run2).isEqualTo(BatchRunOutcome.SUCCESS); + assertThat(ctx.aiStub.invocationCount()) + .as("AI must not be called again when PROPOSAL_READY exists") + .isEqualTo(0); + + DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS); + assertThat(record2.lastSuccessInstant()).isNotNull(); + assertThat(record2.lastTargetFileName()).isNotNull(); + + List targetFiles = ctx.listTargetFiles(); + assertThat(targetFiles).hasSize(1); + assertThat(targetFiles.get(0)).endsWith(".pdf"); + assertThat(Files.exists(ctx.targetFolder().resolve(targetFiles.get(0)))).isTrue(); + + List attempts2 = ctx.findAttempts(fp); + assertThat(attempts2).hasSize(2); + assertThat(attempts2.get(1).status()).isEqualTo(ProcessingStatus.SUCCESS); + } + } + + // ========================================================================= + // Scenario 2: Deterministic content error → FAILED_RETRYABLE → FAILED_FINAL + // ========================================================================= + + /** + * Verifies the one-retry rule for deterministic content errors: + *
    + *
  1. Run 1: blank PDF → pre-check fails (no extractable text) → + * {@code FAILED_RETRYABLE}, content error counter = 1.
  2. + *
  3. Run 2: same outcome again → {@code FAILED_FINAL}, content error counter = 2.
  4. + *
+ * No AI call is made in either run because the content pre-check prevents it. + */ + @Test + void deterministicContentError_twoRuns_reachesFailedFinal(@TempDir Path tempDir) + throws Exception { + try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) { + ctx.createBlankPdf("blank.pdf"); + Path pdfPath = ctx.sourceFolder().resolve("blank.pdf"); + DocumentFingerprint fp = ctx.computeFingerprint(pdfPath); + + // --- Run 1 --- + ctx.runBatch(); + + assertThat(ctx.aiStub.invocationCount()) + .as("AI must not be called for a blank PDF") + .isEqualTo(0); + + DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE); + assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(1); + assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(0); + + List attempts1 = ctx.findAttempts(fp); + assertThat(attempts1).hasSize(1); + assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE); + assertThat(attempts1.get(0).retryable()).isTrue(); + + // --- Run 2 --- + ctx.runBatch(); + + DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL); + assertThat(record2.failureCounters().contentErrorCount()).isEqualTo(2); + + List attempts2 = ctx.findAttempts(fp); + assertThat(attempts2).hasSize(2); + assertThat(attempts2.get(1).status()).isEqualTo(ProcessingStatus.FAILED_FINAL); + assertThat(attempts2.get(1).retryable()).isFalse(); + + // No target file should exist + assertThat(ctx.listTargetFiles()).isEmpty(); + } + } + + // ========================================================================= + // Scenario 3: Transient technical error → FAILED_RETRYABLE + // ========================================================================= + + /** + * Verifies that a transient AI failure produces {@code FAILED_RETRYABLE} with an + * incremented transient error counter, and that no target file is written. + * The document remains retryable in subsequent runs until the transient limit is reached. + */ + @Test + void transientAiFailure_producesFailedRetryable(@TempDir Path tempDir) throws Exception { + try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) { + ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT); + Path pdfPath = ctx.sourceFolder().resolve("doc.pdf"); + DocumentFingerprint fp = ctx.computeFingerprint(pdfPath); + + ctx.aiStub.configureTechnicalFailure(); + + ctx.runBatch(); + + assertThat(ctx.aiStub.invocationCount()) + .as("AI must have been invoked (and failed) once") + .isEqualTo(1); + + DocumentRecord record = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE); + assertThat(record.failureCounters().transientErrorCount()).isEqualTo(1); + assertThat(record.failureCounters().contentErrorCount()).isEqualTo(0); + + List attempts = ctx.findAttempts(fp); + assertThat(attempts).hasSize(1); + assertThat(attempts.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE); + assertThat(attempts.get(0).retryable()).isTrue(); + + assertThat(ctx.listTargetFiles()).isEmpty(); + } + } + + // ========================================================================= + // Scenario 4: Skip after SUCCESS + // ========================================================================= + + /** + * Verifies the skip-after-success invariant: + * after a document reaches {@code SUCCESS} (via two runs), a third run records a + * {@code SKIPPED_ALREADY_PROCESSED} attempt without changing the overall status, + * failure counters, or the target file. + */ + @Test + void skipAfterSuccess_thirdRun_recordsSkip(@TempDir Path tempDir) throws Exception { + try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) { + ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT); + Path pdfPath = ctx.sourceFolder().resolve("doc.pdf"); + DocumentFingerprint fp = ctx.computeFingerprint(pdfPath); + + // Reach SUCCESS via two runs + ctx.runBatch(); // → PROPOSAL_READY + ctx.runBatch(); // → SUCCESS + + DocumentRecord successRecord = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(successRecord.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS); + String targetFileBefore = successRecord.lastTargetFileName(); + + // --- Run 3: should produce skip --- + ctx.aiStub.resetInvocationCount(); + BatchRunOutcome run3 = ctx.runBatch(); + + assertThat(run3).isEqualTo(BatchRunOutcome.SUCCESS); + assertThat(ctx.aiStub.invocationCount()) + .as("AI must not be called for an already-successful document") + .isEqualTo(0); + + DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record3.overallStatus()) + .as("Overall status must remain SUCCESS after a skip") + .isEqualTo(ProcessingStatus.SUCCESS); + assertThat(record3.lastTargetFileName()) + .as("Target filename must not change after a skip") + .isEqualTo(targetFileBefore); + + List attempts = ctx.findAttempts(fp); + assertThat(attempts).hasSize(3); + assertThat(attempts.get(2).status()).isEqualTo(ProcessingStatus.SKIPPED_ALREADY_PROCESSED); + assertThat(attempts.get(2).retryable()).isFalse(); + + // Target file count must remain exactly one + assertThat(ctx.listTargetFiles()).hasSize(1); + } + } + + // ========================================================================= + // Scenario 5: Skip after FAILED_FINAL + // ========================================================================= + + /** + * Verifies the skip-after-final-failure invariant: + * after a document reaches {@code FAILED_FINAL} (via two blank-PDF runs), a third run + * records a {@code SKIPPED_FINAL_FAILURE} attempt without changing the overall status + * or failure counters. + */ + @Test + void skipAfterFailedFinal_thirdRun_recordsSkip(@TempDir Path tempDir) throws Exception { + try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) { + ctx.createBlankPdf("blank.pdf"); + Path pdfPath = ctx.sourceFolder().resolve("blank.pdf"); + DocumentFingerprint fp = ctx.computeFingerprint(pdfPath); + + // Reach FAILED_FINAL via two blank-PDF runs + ctx.runBatch(); // → FAILED_RETRYABLE + ctx.runBatch(); // → FAILED_FINAL + + DocumentRecord finalRecord = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(finalRecord.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL); + int contentErrorsBefore = finalRecord.failureCounters().contentErrorCount(); + + // --- Run 3: should produce skip --- + ctx.runBatch(); + + DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record3.overallStatus()) + .as("Overall status must remain FAILED_FINAL after a skip") + .isEqualTo(ProcessingStatus.FAILED_FINAL); + assertThat(record3.failureCounters().contentErrorCount()) + .as("Failure counters must not change after a skip") + .isEqualTo(contentErrorsBefore); + + List attempts = ctx.findAttempts(fp); + assertThat(attempts).hasSize(3); + assertThat(attempts.get(2).status()).isEqualTo(ProcessingStatus.SKIPPED_FINAL_FAILURE); + assertThat(attempts.get(2).retryable()).isFalse(); + } + } + + // ========================================================================= + // Scenario 6: Existing PROPOSAL_READY with later finalization + // ========================================================================= + + /** + * Verifies the leading-proposal-attempt rule in isolation: + *
    + *
  1. Run 1: AI produces a naming proposal → document status is {@code PROPOSAL_READY}.
  2. + *
  3. Run 2: AI stub is reset to technical failure; the coordinator must still finalize + * the document to {@code SUCCESS} using the persisted proposal — without calling the AI.
  4. + *
+ * This confirms that the second run never re-invokes the AI when a valid + * {@code PROPOSAL_READY} attempt already exists. + */ + @Test + void proposalReadyFinalization_noAiCallInSecondRun(@TempDir Path tempDir) throws Exception { + try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) { + ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT); + Path pdfPath = ctx.sourceFolder().resolve("doc.pdf"); + DocumentFingerprint fp = ctx.computeFingerprint(pdfPath); + + // --- Run 1: establish PROPOSAL_READY --- + ctx.runBatch(); + + DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY); + assertThat(ctx.listTargetFiles()).isEmpty(); + + // --- Run 2: AI stub would fail if called, but must not be called --- + ctx.aiStub.configureTechnicalFailure(); + ctx.aiStub.resetInvocationCount(); + + ctx.runBatch(); + + assertThat(ctx.aiStub.invocationCount()) + .as("AI must not be invoked during PROPOSAL_READY finalization") + .isEqualTo(0); + + DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS); + + List targetFiles = ctx.listTargetFiles(); + assertThat(targetFiles).hasSize(1); + assertThat(targetFiles.get(0)).endsWith(".pdf"); + } + } + + // ========================================================================= + // Scenario 7: Target copy error with immediate within-run retry + // ========================================================================= + + /** + * Verifies the immediate within-run retry for target copy failures: + *
    + *
  1. Run 1: AI produces {@code PROPOSAL_READY}.
  2. + *
  3. Run 2: The {@link TargetFileCopyPort} is overridden with a stub that fails on + * the first invocation but delegates to the real adapter on the second. + * The coordinator must detect the first failure, retry immediately within the + * same run, and record {@code SUCCESS} — without incrementing the transient + * error counter.
  4. + *
+ * The immediate retry does not count as a cross-run transient error. + */ + @Test + void targetCopyError_immediateRetrySucceeds_recordsSuccess(@TempDir Path tempDir) + throws Exception { + try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) { + ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT); + Path pdfPath = ctx.sourceFolder().resolve("doc.pdf"); + DocumentFingerprint fp = ctx.computeFingerprint(pdfPath); + + // --- Run 1: produce PROPOSAL_READY --- + ctx.runBatch(); + + DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY); + + // --- Run 2: first copy attempt fails, retry succeeds --- + TargetFileCopyPort realAdapter = + new de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter( + ctx.targetFolder()); + AtomicInteger copyCallCount = new AtomicInteger(0); + + TargetFileCopyPort stubWithRetry = (locator, resolvedFilename) -> { + int call = copyCallCount.incrementAndGet(); + if (call == 1) { + // First attempt: simulate a transient write failure + return new TargetFileCopyTechnicalFailure( + "Simulated write failure on first attempt", true); + } + // Second attempt (immediate within-run retry): delegate to real adapter + return realAdapter.copyToTarget(locator, resolvedFilename); + }; + + ctx.setTargetFileCopyPortOverride(stubWithRetry); + ctx.runBatch(); + + assertThat(copyCallCount.get()) + .as("Copy port must have been called twice (initial + retry)") + .isEqualTo(2); + + DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS); + assertThat(record2.failureCounters().transientErrorCount()) + .as("Immediate within-run retry must not increment the transient error counter") + .isEqualTo(0); + + List attempts = ctx.findAttempts(fp); + assertThat(attempts).hasSize(2); + assertThat(attempts.get(1).status()).isEqualTo(ProcessingStatus.SUCCESS); + + List targetFiles = ctx.listTargetFiles(); + assertThat(targetFiles).hasSize(1); + assertThat(Files.exists(ctx.targetFolder().resolve(targetFiles.get(0)))).isTrue(); + } + } + + // ========================================================================= + // Scenario 8: Transient error exhaustion → FAILED_FINAL + // ========================================================================= + + /** + * Verifies the complete transient error exhaustion path over multiple runs: + *
    + *
  1. Run 1: AI stub fails technically → {@code FAILED_RETRYABLE}, + * transient counter = 1 (below limit 3).
  2. + *
  3. Run 2: AI stub fails again → {@code FAILED_RETRYABLE}, + * transient counter = 2 (below limit 3).
  4. + *
  5. Run 3: AI stub fails again → transient counter reaches the limit (3 = 3) → + * {@code FAILED_FINAL}; no target file is ever written.
  6. + *
+ * This confirms the {@code maxRetriesTransient} boundary: the run that pushes the + * counter to the configured limit is the run that finalises the document. + */ + @Test + void transientErrors_multipleRuns_exhaustsLimit_reachesFailedFinal(@TempDir Path tempDir) + throws Exception { + try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) { + ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT); + Path pdfPath = ctx.sourceFolder().resolve("doc.pdf"); + DocumentFingerprint fp = ctx.computeFingerprint(pdfPath); + + ctx.aiStub.configureTechnicalFailure(); + + // --- Run 1: counter 0 → 1, below limit → FAILED_RETRYABLE --- + ctx.runBatch(); + + DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE); + assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(1); + assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(0); + + List attempts1 = ctx.findAttempts(fp); + assertThat(attempts1).hasSize(1); + assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE); + assertThat(attempts1.get(0).retryable()).isTrue(); + + // --- Run 2: counter 1 → 2, below limit → FAILED_RETRYABLE --- + ctx.runBatch(); + + DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE); + assertThat(record2.failureCounters().transientErrorCount()).isEqualTo(2); + + List attempts2 = ctx.findAttempts(fp); + assertThat(attempts2).hasSize(2); + assertThat(attempts2.get(1).retryable()).isTrue(); + + // --- Run 3: counter 2 → 3 = limit → FAILED_FINAL --- + ctx.runBatch(); + + DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record3.overallStatus()) + .as("Transient counter reaching the configured limit must finalise the document") + .isEqualTo(ProcessingStatus.FAILED_FINAL); + assertThat(record3.failureCounters().transientErrorCount()) + .as("Transient counter must equal maxRetriesTransient after exhaustion") + .isEqualTo(E2ETestContext.MAX_RETRIES_TRANSIENT); + assertThat(record3.failureCounters().contentErrorCount()).isEqualTo(0); + + List attempts3 = ctx.findAttempts(fp); + assertThat(attempts3).hasSize(3); + assertThat(attempts3.get(2).status()).isEqualTo(ProcessingStatus.FAILED_FINAL); + assertThat(attempts3.get(2).retryable()).isFalse(); + + assertThat(ctx.listTargetFiles()).isEmpty(); + } + } + + // ========================================================================= + // Scenario 9: Target copy error – both attempts fail → FAILED_RETRYABLE + // ========================================================================= + + /** + * Verifies the failure path of the immediate within-run retry mechanism: + *
    + *
  1. Run 1: AI stub returns a valid proposal → {@code PROPOSAL_READY}.
  2. + *
  3. Run 2: The {@link TargetFileCopyPort} is overridden with a stub that fails + * on every call. The coordinator issues the initial copy attempt (failure), + * grants exactly one immediate retry (also failure), then classifies the + * result as a transient technical error and records {@code FAILED_RETRYABLE} + * with an incremented transient counter.
  4. + *
+ * This confirms that the within-run retry does not suppress the error when both + * attempts fail, and that the transient counter is incremented exactly once. + */ + @Test + void targetCopyError_bothAttemptsFail_reachesFailedRetryable(@TempDir Path tempDir) + throws Exception { + try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) { + ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT); + Path pdfPath = ctx.sourceFolder().resolve("doc.pdf"); + DocumentFingerprint fp = ctx.computeFingerprint(pdfPath); + + // --- Run 1: establish PROPOSAL_READY --- + ctx.runBatch(); + + assertThat(ctx.findDocumentRecord(fp).orElseThrow().overallStatus()) + .isEqualTo(ProcessingStatus.PROPOSAL_READY); + + // --- Run 2: both copy attempts fail --- + ctx.setTargetFileCopyPortOverride( + (locator, resolvedFilename) -> + new TargetFileCopyTechnicalFailure( + "Simulated persistent write failure", true)); + + ctx.runBatch(); + + DocumentRecord record = ctx.findDocumentRecord(fp).orElseThrow(); + assertThat(record.overallStatus()) + .as("Both copy attempts failing must produce FAILED_RETRYABLE " + + "(transient error, limit not yet reached)") + .isEqualTo(ProcessingStatus.FAILED_RETRYABLE); + assertThat(record.failureCounters().transientErrorCount()) + .as("The double copy failure must increment the transient counter exactly once") + .isEqualTo(1); + + List attempts = ctx.findAttempts(fp); + assertThat(attempts).hasSize(2); + assertThat(attempts.get(1).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE); + assertThat(attempts.get(1).retryable()).isTrue(); + + assertThat(ctx.listTargetFiles()).isEmpty(); + } + } + + // ========================================================================= + // Scenario 10: Two documents with identical target name → duplicate suffix + // ========================================================================= + + /** + * Verifies the duplicate target filename suffix rule at end-to-end level: + * when two distinct source documents both resolve to the same base target name + * ({@code "2024-01-15 - Stromabrechnung.pdf"}) in the same finalization run, the + * second document written to the target folder must receive a {@code (1)} suffix. + *
    + *
  1. Run 1: both PDFs are processed by the AI stub (same configured response) → + * both reach {@code PROPOSAL_READY}.
  2. + *
  3. Run 2: both are finalized in sequence; the first written claims the base name, + * the second receives {@code "2024-01-15 - Stromabrechnung(1).pdf"}.
  4. + *
+ * Both documents reach {@code SUCCESS} and the target folder contains exactly two files. + */ + @Test + void twoDifferentDocuments_sameProposedName_secondGetsDuplicateSuffix(@TempDir Path tempDir) + throws Exception { + try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) { + // Two distinct PDFs; the AI stub returns the same title and date for both + ctx.createSearchablePdf("rechnung1.pdf", SAMPLE_PDF_TEXT); + ctx.createSearchablePdf("rechnung2.pdf", + "Stromabrechnung Zweiter Kunde Musterstadt Datum 15.01.2024 Betrag 99,00 EUR"); + + Path pdf1 = ctx.sourceFolder().resolve("rechnung1.pdf"); + Path pdf2 = ctx.sourceFolder().resolve("rechnung2.pdf"); + DocumentFingerprint fp1 = ctx.computeFingerprint(pdf1); + DocumentFingerprint fp2 = ctx.computeFingerprint(pdf2); + + // --- Run 1: AI stub processes both PDFs → PROPOSAL_READY --- + ctx.runBatch(); + + assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus()) + .isEqualTo(ProcessingStatus.PROPOSAL_READY); + assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus()) + .isEqualTo(ProcessingStatus.PROPOSAL_READY); + assertThat(ctx.listTargetFiles()).isEmpty(); + + // --- Run 2: both finalized; the second must receive the (1) suffix --- + ctx.runBatch(); + + assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus()) + .isEqualTo(ProcessingStatus.SUCCESS); + assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus()) + .isEqualTo(ProcessingStatus.SUCCESS); + + List targetFiles = ctx.listTargetFiles(); + assertThat(targetFiles) + .as("Both distinct documents must produce separate target files") + .hasSize(2); + assertThat(targetFiles) + .as("Base name must exist for the first document written") + .anyMatch(f -> f.equals("2024-01-15 - Stromabrechnung.pdf")); + assertThat(targetFiles) + .as("Duplicate suffix (1) must be appended for the second document written") + .anyMatch(f -> f.equals("2024-01-15 - Stromabrechnung(1).pdf")); + } + } + + // ========================================================================= + // Scenario 11: Mixed batch – document failures do not affect batch outcome + // ========================================================================= + + /** + * Verifies that document-level failures do not cause a batch-level failure: + *
    + *
  1. Run 1: a searchable PDF reaches {@code PROPOSAL_READY}; a blank PDF + * (no extractable text) reaches {@code FAILED_RETRYABLE}. + * {@link BatchRunOutcome#SUCCESS} is returned.
  2. + *
  3. Run 2: the searchable PDF is finalized to {@code SUCCESS}; + * the blank PDF reaches its second content error and is finalized to + * {@code FAILED_FINAL}. {@link BatchRunOutcome#SUCCESS} is returned.
  4. + *
+ * This confirms the exit-code contract: only hard bootstrap or infrastructure + * failures produce a non-zero exit code; document-level errors do not. + */ + @Test + void mixedBatch_oneSuccess_oneContentError_batchOutcomeIsSuccess(@TempDir Path tempDir) + throws Exception { + try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) { + ctx.createSearchablePdf("good.pdf", SAMPLE_PDF_TEXT); + ctx.createBlankPdf("blank.pdf"); + + Path goodPdf = ctx.sourceFolder().resolve("good.pdf"); + Path blankPdf = ctx.sourceFolder().resolve("blank.pdf"); + DocumentFingerprint fpGood = ctx.computeFingerprint(goodPdf); + DocumentFingerprint fpBlank = ctx.computeFingerprint(blankPdf); + + // --- Run 1 --- + BatchRunOutcome run1 = ctx.runBatch(); + + assertThat(run1) + .as("Batch must complete with SUCCESS even when individual documents fail") + .isEqualTo(BatchRunOutcome.SUCCESS); + assertThat(ctx.findDocumentRecord(fpGood).orElseThrow().overallStatus()) + .isEqualTo(ProcessingStatus.PROPOSAL_READY); + assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow().overallStatus()) + .isEqualTo(ProcessingStatus.FAILED_RETRYABLE); + assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow() + .failureCounters().contentErrorCount()).isEqualTo(1); + + // --- Run 2 --- + BatchRunOutcome run2 = ctx.runBatch(); + + assertThat(run2) + .as("Batch must complete with SUCCESS even when a document is finalised " + + "to FAILED_FINAL") + .isEqualTo(BatchRunOutcome.SUCCESS); + + DocumentRecord goodRecord = ctx.findDocumentRecord(fpGood).orElseThrow(); + assertThat(goodRecord.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS); + + DocumentRecord blankRecord = ctx.findDocumentRecord(fpBlank).orElseThrow(); + assertThat(blankRecord.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL); + assertThat(blankRecord.failureCounters().contentErrorCount()).isEqualTo(2); + + // Exactly one target file from the successfully processed document + List targetFiles = ctx.listTargetFiles(); + assertThat(targetFiles).hasSize(1); + assertThat(targetFiles.get(0)).endsWith(".pdf"); + } + } +} diff --git a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/E2ETestContext.java b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/E2ETestContext.java new file mode 100644 index 0000000..09b17d6 --- /dev/null +++ b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/E2ETestContext.java @@ -0,0 +1,406 @@ +package de.gecheckt.pdf.umbenenner.bootstrap.e2e; + +import de.gecheckt.pdf.umbenenner.adapter.out.clock.SystemClockAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.fingerprint.Sha256FingerprintAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.lock.FilesystemRunLockPortAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.pdfextraction.PdfTextExtractionPortAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.prompt.FilesystemPromptPortAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.sourcedocument.SourceDocumentCandidatesPortAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteDocumentRecordRepositoryAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteUnitOfWorkAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter; +import de.gecheckt.pdf.umbenenner.adapter.out.targetfolder.FilesystemTargetFolderAdapter; +import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration; +import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome; +import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity; +import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure; +import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository; +import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger; +import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort; +import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort; +import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort; +import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort; +import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort; +import de.gecheckt.pdf.umbenenner.application.service.AiNamingService; +import de.gecheckt.pdf.umbenenner.application.service.AiResponseValidator; +import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingCoordinator; +import de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase; +import de.gecheckt.pdf.umbenenner.bootstrap.adapter.Log4jProcessingLogger; +import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.RunId; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.Instant; +import java.util.List; +import java.util.Optional; +import java.util.UUID; + +/** + * Full adapter wiring context for deterministic end-to-end tests of the batch processing pipeline. + *

+ * Provides real infrastructure adapters for all subsystems (SQLite persistence, filesystem + * source/target folders, PDF text extraction, SHA-256 fingerprinting, run locking) and a + * configurable stub ({@link StubAiInvocationPort}) for the AI invocation port. + * This ensures that end-to-end tests cover the complete production code path without + * performing real HTTP calls to an AI service. + * + *

Invariants verified by this context

+ *
    + *
  • Happy-path to {@code SUCCESS}: two-run flow where Run 1 produces {@code PROPOSAL_READY} + * and Run 2 copies the file and records {@code SUCCESS}.
  • + *
  • Deterministic content error: blank PDFs (no extractable text) produce + * {@code FAILED_RETRYABLE} after the first run and {@code FAILED_FINAL} after a + * second run.
  • + *
  • Transient technical error: AI stub failures produce {@code FAILED_RETRYABLE} for each + * run until the transient error limit is reached, at which point the document is + * finalized to {@code FAILED_FINAL}.
  • + *
  • Skip after {@code SUCCESS}: a document in {@code SUCCESS} state generates a + * {@code SKIPPED_ALREADY_PROCESSED} attempt in subsequent runs.
  • + *
  • Skip after {@code FAILED_FINAL}: a document in {@code FAILED_FINAL} state generates a + * {@code SKIPPED_FINAL_FAILURE} attempt in subsequent runs.
  • + *
  • {@code PROPOSAL_READY} with later finalization: a document in {@code PROPOSAL_READY} + * state is finalized without an AI call in the next run.
  • + *
  • Target copy error with immediate retry: when the first copy attempt fails but the + * immediate within-run retry succeeds, the document is recorded as {@code SUCCESS}.
  • + *
+ * + *

Usage pattern

+ *
{@code
+ * @TempDir Path tempDir;
+ *
+ * @Test
+ * void example() throws Exception {
+ *     try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
+ *         ctx.createSearchablePdf("doc.pdf", "Rechnung 2024-01-15 ...");
+ *         BatchRunOutcome run1 = ctx.runBatch();
+ *         // assertions...
+ *     }
+ * }
+ * }
+ * + *

Thread safety

+ *

+ * Not thread-safe. Each test method should use its own context instance. + */ +public final class E2ETestContext implements AutoCloseable { + + /** Maximum pages before triggering a deterministic content error. */ + static final int MAX_PAGES = 50; + + /** Maximum text characters sent to the AI service. */ + static final int MAX_TEXT_CHARS = 10_000; + + /** + * Maximum transient retries before a document is finalized to {@code FAILED_FINAL}. + * Set to 3 to allow multi-run transient-failure tests without immediate finalization. + */ + static final int MAX_RETRIES_TRANSIENT = 3; + + /** Model name carried in attempt history (no real inference occurs). */ + static final String AI_MODEL = "e2e-stub-model"; + + private final Path sourceFolder; + private final Path targetFolder; + private final Path lockFile; + private final Path promptFile; + private final String jdbcUrl; + + private final SqliteDocumentRecordRepositoryAdapter documentRepo; + private final SqliteProcessingAttemptRepositoryAdapter attemptRepo; + + /** + * Configurable AI stub. Tests may call {@link StubAiInvocationPort#configureSuccess}, + * {@link StubAiInvocationPort#configureTechnicalFailure}, or + * {@link StubAiInvocationPort#reset()} between batch runs. + */ + public final StubAiInvocationPort aiStub; + + /** + * Optional override for the {@link TargetFileCopyPort}. + * {@code null} means the real {@link FilesystemTargetFileCopyAdapter} is used. + * Set via {@link #setTargetFileCopyPortOverride} to inject a failure-simulating stub. + */ + private TargetFileCopyPort targetFileCopyPortOverride; + + private E2ETestContext( + Path sourceFolder, + Path targetFolder, + Path lockFile, + Path promptFile, + String jdbcUrl, + SqliteDocumentRecordRepositoryAdapter documentRepo, + SqliteProcessingAttemptRepositoryAdapter attemptRepo, + StubAiInvocationPort aiStub) { + this.sourceFolder = sourceFolder; + this.targetFolder = targetFolder; + this.lockFile = lockFile; + this.promptFile = promptFile; + this.jdbcUrl = jdbcUrl; + this.documentRepo = documentRepo; + this.attemptRepo = attemptRepo; + this.aiStub = aiStub; + } + + /** + * Initializes a fully wired end-to-end test context rooted in {@code tempDir}. + *

+ * Creates the {@code source/}, {@code target/} subdirectories and a minimal prompt + * file, initializes the SQLite schema, and wires all adapters. + * + * @param tempDir the JUnit {@code @TempDir} or any writable temporary directory + * @return a ready-to-use context; caller is responsible for closing it + * @throws Exception if schema initialization or directory/file creation fails + */ + public static E2ETestContext initialize(Path tempDir) throws Exception { + Path sourceFolder = Files.createDirectories(tempDir.resolve("source")); + Path targetFolder = Files.createDirectories(tempDir.resolve("target")); + Path lockFile = tempDir.resolve("run.lock"); + Path promptFile = tempDir.resolve("prompt.txt"); + + Files.writeString(promptFile, + "Analysiere das folgende Dokument und liefere Datum, Titel und Begruendung als JSON-Objekt."); + + String jdbcUrl = "jdbc:sqlite:" + tempDir.resolve("test.db").toAbsolutePath().toString().replace('\\', '/'); + + SqliteSchemaInitializationAdapter schema = new SqliteSchemaInitializationAdapter(jdbcUrl); + schema.initializeSchema(); + + SqliteDocumentRecordRepositoryAdapter documentRepo = + new SqliteDocumentRecordRepositoryAdapter(jdbcUrl); + SqliteProcessingAttemptRepositoryAdapter attemptRepo = + new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl); + + return new E2ETestContext( + sourceFolder, targetFolder, lockFile, promptFile, + jdbcUrl, documentRepo, attemptRepo, new StubAiInvocationPort()); + } + + // ========================================================================= + // Test fixture creation + // ========================================================================= + + /** + * Creates a single-page searchable PDF in the source folder with the given text. + *

+ * The file is ready for the batch run as soon as this method returns. + * + * @param filename the PDF filename (e.g. {@code "rechnung.pdf"}) + * @param textContent text to embed; should be at least a few words to pass pre-checks + * @return the absolute path of the created file + * @throws IOException if the file cannot be written + */ + public Path createSearchablePdf(String filename, String textContent) throws IOException { + Path pdfPath = sourceFolder.resolve(filename); + MinimalPdfFactory.createSearchablePdf(pdfPath, textContent); + return pdfPath; + } + + /** + * Creates a single-page blank PDF (no extractable text) in the source folder. + *

+ * Processing this file triggers the "no usable text" deterministic content error, + * which skips the AI call. + * + * @param filename the PDF filename (e.g. {@code "blank.pdf"}) + * @return the absolute path of the created file + * @throws IOException if the file cannot be written + */ + public Path createBlankPdf(String filename) throws IOException { + Path pdfPath = sourceFolder.resolve(filename); + MinimalPdfFactory.createBlankPdf(pdfPath); + return pdfPath; + } + + // ========================================================================= + // Batch execution + // ========================================================================= + + /** + * Executes one complete batch run using the current stub configuration. + *

+ * A fresh {@link BatchRunContext} with a new {@link RunId} is created for each call, + * matching the production behavior where every Task Scheduler invocation is a + * distinct run. + * + * @return the outcome of the batch run + */ + public BatchRunOutcome runBatch() { + DefaultBatchRunProcessingUseCase useCase = buildUseCase(); + BatchRunContext context = new BatchRunContext( + new RunId(UUID.randomUUID().toString()), Instant.now()); + return useCase.execute(context); + } + + // ========================================================================= + // State inspection helpers + // ========================================================================= + + /** + * Looks up the document master record for the given fingerprint. + * + * @param fingerprint the document fingerprint to query + * @return the master record if one exists, {@link Optional#empty()} if unknown or + * if a persistence lookup error occurred + */ + public Optional findDocumentRecord(DocumentFingerprint fingerprint) { + return switch (documentRepo.findByFingerprint(fingerprint)) { + case DocumentTerminalSuccess s -> Optional.of(s.record()); + case DocumentTerminalFinalFailure f -> Optional.of(f.record()); + case DocumentKnownProcessable p -> Optional.of(p.record()); + default -> Optional.empty(); + }; + } + + /** + * Returns all processing attempts for the given fingerprint in insertion order. + * + * @param fingerprint the document fingerprint to query + * @return all recorded attempts; empty list if none exist + */ + public List findAttempts(DocumentFingerprint fingerprint) { + return attemptRepo.findAllByFingerprint(fingerprint); + } + + /** + * Computes the SHA-256 fingerprint for the given file using the production adapter. + *

+ * Useful for correlating a test PDF with its database record after a batch run. + * + * @param file the absolute path of the file to fingerprint + * @return the fingerprint + * @throws IllegalStateException if fingerprint computation fails + */ + public DocumentFingerprint computeFingerprint(Path file) { + Sha256FingerprintAdapter adapter = new Sha256FingerprintAdapter(); + // Construct a minimal candidate that mirrors how the production source adapter creates one + SourceDocumentCandidate candidate = new SourceDocumentCandidate( + file.getFileName().toString(), + 0L, + new SourceDocumentLocator(file.toAbsolutePath().toString())); + return switch (adapter.computeFingerprint(candidate)) { + case FingerprintSuccess s -> s.fingerprint(); + default -> throw new IllegalStateException( + "Fingerprint computation failed for test fixture: " + file); + }; + } + + /** + * Lists the filenames of all files currently in the target folder. + * + * @return list of filenames; empty if target folder is empty + * @throws IOException if the target folder cannot be read + */ + public List listTargetFiles() throws IOException { + try (var stream = Files.list(targetFolder)) { + return stream.map(p -> p.getFileName().toString()).sorted().toList(); + } + } + + /** + * Returns the source folder path used by this context. + */ + public Path sourceFolder() { + return sourceFolder; + } + + /** + * Returns the target folder path used by this context. + */ + public Path targetFolder() { + return targetFolder; + } + + /** + * Overrides the {@link TargetFileCopyPort} used in subsequent batch runs. + * Pass {@code null} to revert to the real {@link FilesystemTargetFileCopyAdapter}. + * + * @param override the port implementation to use, or {@code null} for the real adapter + */ + public void setTargetFileCopyPortOverride(TargetFileCopyPort override) { + this.targetFileCopyPortOverride = override; + } + + @Override + public void close() { + // No explicit cleanup needed: @TempDir removes all files automatically + } + + // ========================================================================= + // Private wiring + // ========================================================================= + + /** + * Constructs a fully wired {@link DefaultBatchRunProcessingUseCase} for a single batch run. + *

+ * All adapters are instantiated fresh per run to avoid shared mutable state between + * runs (e.g. locks, connection states). The AI stub and optional copy-port override + * are re-used across runs within the same test. + */ + private DefaultBatchRunProcessingUseCase buildUseCase() { + RuntimeConfiguration runtimeConfig = new RuntimeConfiguration( + MAX_PAGES, MAX_RETRIES_TRANSIENT, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT); + + FingerprintPort fingerprintPort = new Sha256FingerprintAdapter(); + + DocumentRecordRepository documentRecordRepository = documentRepo; + ProcessingAttemptRepository processingAttemptRepository = attemptRepo; + UnitOfWorkPort unitOfWorkPort = new SqliteUnitOfWorkAdapter(jdbcUrl); + + ProcessingLogger coordinatorLogger = new Log4jProcessingLogger( + DocumentProcessingCoordinator.class); + TargetFolderPort targetFolderPort = new FilesystemTargetFolderAdapter(targetFolder); + TargetFileCopyPort targetFileCopyPort = (targetFileCopyPortOverride != null) + ? targetFileCopyPortOverride + : new FilesystemTargetFileCopyAdapter(targetFolder); + + DocumentProcessingCoordinator coordinator = new DocumentProcessingCoordinator( + documentRecordRepository, + processingAttemptRepository, + unitOfWorkPort, + targetFolderPort, + targetFileCopyPort, + coordinatorLogger, + MAX_RETRIES_TRANSIENT); + + PromptPort promptPort = new FilesystemPromptPortAdapter(promptFile); + ClockPort clockPort = new SystemClockAdapter(); + AiResponseValidator aiResponseValidator = new AiResponseValidator(clockPort); + AiNamingService aiNamingService = new AiNamingService( + aiStub, promptPort, aiResponseValidator, AI_MODEL, MAX_TEXT_CHARS); + + ProcessingLogger useCaseLogger = new Log4jProcessingLogger( + DefaultBatchRunProcessingUseCase.class); + + RunLockPort runLockPort = new FilesystemRunLockPortAdapter(lockFile); + SourceDocumentCandidatesPort candidatesPort = + new SourceDocumentCandidatesPortAdapter(sourceFolder); + PdfTextExtractionPort extractionPort = new PdfTextExtractionPortAdapter(); + + return new DefaultBatchRunProcessingUseCase( + runtimeConfig, + runLockPort, + candidatesPort, + extractionPort, + fingerprintPort, + coordinator, + aiNamingService, + useCaseLogger); + } +} diff --git a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/MinimalPdfFactory.java b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/MinimalPdfFactory.java new file mode 100644 index 0000000..a4ba61c --- /dev/null +++ b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/MinimalPdfFactory.java @@ -0,0 +1,72 @@ +package de.gecheckt.pdf.umbenenner.bootstrap.e2e; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.font.PDType1Font; +import org.apache.pdfbox.pdmodel.font.Standard14Fonts; + +import java.io.IOException; +import java.nio.file.Path; + +/** + * Factory for creating minimal PDF test fixtures used in end-to-end tests. + *

+ * Provides two variants: + *

    + *
  • Searchable PDFs with embedded text content — used for happy-path, transient-error, + * and target-copy-failure scenarios where the pre-check must pass.
  • + *
  • Blank PDFs with no extractable text — used for deterministic content-error + * scenarios where the pre-check must fail.
  • + *
+ *

+ * Uses Apache PDFBox to create real, structurally valid PDF files so that the + * production {@code PdfTextExtractionPortAdapter} processes them correctly. + */ +final class MinimalPdfFactory { + + private MinimalPdfFactory() { + // Static utility class — not instantiable + } + + /** + * Creates a single-page searchable PDF with the given text content at the output path. + *

+ * The resulting file passes the production pre-checks for minimum text length and + * page count, enabling the AI naming pipeline to run. + * + * @param outputPath the path where the PDF will be written; parent directory must exist + * @param textContent the text to embed in the PDF; should be non-empty for happy-path tests + * @throws IOException if the file cannot be written + */ + static void createSearchablePdf(Path outputPath, String textContent) throws IOException { + try (PDDocument doc = new PDDocument()) { + PDPage page = new PDPage(); + doc.addPage(page); + try (PDPageContentStream stream = new PDPageContentStream(doc, page)) { + stream.beginText(); + stream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 12); + stream.newLineAtOffset(50, 700); + stream.showText(textContent); + stream.endText(); + } + doc.save(outputPath.toFile()); + } + } + + /** + * Creates a single-page blank PDF with no text content at the output path. + *

+ * The resulting file triggers the "no usable text" pre-check failure + * (deterministic content error), which does not invoke the AI service. + * + * @param outputPath the path where the PDF will be written; parent directory must exist + * @throws IOException if the file cannot be written + */ + static void createBlankPdf(Path outputPath) throws IOException { + try (PDDocument doc = new PDDocument()) { + doc.addPage(new PDPage()); + doc.save(outputPath.toFile()); + } + } +} diff --git a/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/StubAiInvocationPort.java b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/StubAiInvocationPort.java new file mode 100644 index 0000000..0c98e59 --- /dev/null +++ b/pdf-umbenenner-bootstrap/src/test/java/de/gecheckt/pdf/umbenenner/bootstrap/e2e/StubAiInvocationPort.java @@ -0,0 +1,109 @@ +package de.gecheckt.pdf.umbenenner.bootstrap.e2e; + +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort; +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult; +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure; +import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse; +import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation; + +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Configurable test double for {@link AiInvocationPort}. + *

+ * Replaces the real HTTP-based AI adapter in end-to-end tests so that the processing + * pipeline can be exercised without real network calls. Supports two response modes: + *

    + *
  • Success mode (default): returns a structurally valid JSON response + * containing configurable {@code title} and {@code date} fields. This produces a + * {@code PROPOSAL_READY} outcome when the response passes validation.
  • + *
  • Technical failure mode: returns an {@link AiInvocationTechnicalFailure}, + * simulating network errors or service unavailability. This produces a + * {@code FAILED_RETRYABLE} (transient) outcome.
  • + *
+ *

+ * The stub tracks the total number of invocations so that tests can verify whether + * the AI pipeline was called at all (e.g. confirming that a {@code PROPOSAL_READY} + * finalization skips the AI call). + */ +final class StubAiInvocationPort implements AiInvocationPort { + + private final AtomicInteger invocationCount = new AtomicInteger(0); + + private volatile boolean returnTechnicalFailure = false; + private volatile String title = "Stromabrechnung"; + private volatile String date = "2024-01-15"; + private volatile String reasoning = "Testdokument fuer End-to-End-Tests."; + + /** + * Configures the stub to return a valid naming proposal with the given title and date. + * + * @param title the document title (must pass validation: max 20 chars, no special chars) + * @param date the document date in {@code YYYY-MM-DD} format, or {@code null} to omit + */ + void configureSuccess(String title, String date) { + this.title = title; + this.date = date; + this.returnTechnicalFailure = false; + } + + /** + * Configures the stub to return a transient technical failure on every invocation. + * The failure reason is {@code STUB_FAILURE}. + */ + void configureTechnicalFailure() { + this.returnTechnicalFailure = true; + } + + /** + * Resets the stub to its default success configuration with title "Stromabrechnung" + * and date "2024-01-15", and clears the invocation counter. + */ + void reset() { + this.title = "Stromabrechnung"; + this.date = "2024-01-15"; + this.reasoning = "Testdokument fuer End-to-End-Tests."; + this.returnTechnicalFailure = false; + invocationCount.set(0); + } + + /** + * Returns the total number of times {@link #invoke} was called since construction + * or the last {@link #reset()}. + */ + int invocationCount() { + return invocationCount.get(); + } + + /** + * Resets the invocation counter to zero without changing response configuration. + */ + void resetInvocationCount() { + invocationCount.set(0); + } + + /** + * Returns either a success response or a technical failure, depending on current configuration. + * Increments the invocation counter on every call. + */ + @Override + public AiInvocationResult invoke(AiRequestRepresentation request) { + invocationCount.incrementAndGet(); + + if (returnTechnicalFailure) { + return new AiInvocationTechnicalFailure( + request, + "STUB_FAILURE", + "Test stub: configured to return technical failure"); + } + + String dateField = (date != null) ? "\"date\": \"" + date + "\", " : ""; + String rawJson = "{" + + dateField + + "\"title\": \"" + title + "\", " + + "\"reasoning\": \"" + reasoning + "\"" + + "}"; + return new AiInvocationSuccess(request, new AiRawResponse(rawJson)); + } +}