M8 komplett umgesetzt
This commit is contained in:
@@ -1,21 +1,70 @@
|
||||
# PDF Umbenenner Local Configuration Example
|
||||
# AP-005: Copy this file to config/application.properties and adjust values for local development
|
||||
# PDF Umbenenner – Konfigurationsbeispiel für lokale Entwicklung
|
||||
# Kopiere diese Datei nach config/application.properties und passe die Werte an.
|
||||
|
||||
# Mandatory M1 properties
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pflichtparameter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Quellordner: Ordner, aus dem OCR-verarbeitete PDF-Dateien gelesen werden.
|
||||
# Der Ordner muss vorhanden und lesbar sein.
|
||||
source.folder=./work/local/source
|
||||
target.folder=./work/local/target
|
||||
sqlite.file=./work/local/pdf-umbenenner.db
|
||||
api.baseUrl=http://localhost:8080/api
|
||||
api.model=gpt-4o-mini
|
||||
api.timeoutSeconds=30
|
||||
max.retries.transient=3
|
||||
max.pages=10
|
||||
max.text.characters=5000
|
||||
prompt.template.file=./config/prompts/local-template.txt
|
||||
|
||||
# Optional properties
|
||||
runtime.lock.file=./work/local/lock.pid
|
||||
log.directory=./work/local/logs
|
||||
log.level=INFO
|
||||
# api.key can also be set via environment variable PDF_UMBENENNER_API_KEY
|
||||
# Zielordner: Ordner, in den die umbenannten Kopien abgelegt werden.
|
||||
# Wird automatisch angelegt, wenn er noch nicht existiert.
|
||||
target.folder=./work/local/target
|
||||
|
||||
# SQLite-Datenbankdatei für Bearbeitungsstatus und Versuchshistorie.
|
||||
# Das übergeordnete Verzeichnis muss vorhanden sein.
|
||||
sqlite.file=./work/local/pdf-umbenenner.db
|
||||
|
||||
# Basis-URL des OpenAI-kompatiblen KI-Dienstes (ohne Pfadsuffix wie /chat/completions).
|
||||
api.baseUrl=https://api.openai.com/v1
|
||||
|
||||
# Modellname des KI-Dienstes.
|
||||
api.model=gpt-4o-mini
|
||||
|
||||
# HTTP-Timeout für KI-Anfragen in Sekunden (muss > 0 sein).
|
||||
api.timeoutSeconds=30
|
||||
|
||||
# Maximale Anzahl historisierter transienter Fehlversuche pro Dokument.
|
||||
# Muss eine ganze Zahl >= 1 sein. Bei Erreichen des Grenzwerts wird der
|
||||
# Dokumentstatus auf FAILED_FINAL gesetzt.
|
||||
max.retries.transient=3
|
||||
|
||||
# Maximale Seitenzahl pro Dokument. Dokumente mit mehr Seiten werden als
|
||||
# deterministischer Inhaltsfehler behandelt (kein KI-Aufruf).
|
||||
max.pages=10
|
||||
|
||||
# Maximale Zeichenanzahl des Dokumenttexts, der an die KI gesendet wird.
|
||||
max.text.characters=5000
|
||||
|
||||
# Pfad zur externen Prompt-Datei. Der Dateiname dient als Prompt-Identifikator
|
||||
# in der Versuchshistorie.
|
||||
prompt.template.file=./config/prompts/template.txt
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API-Schlüssel
|
||||
# ---------------------------------------------------------------------------
|
||||
# Der API-Schlüssel kann wahlweise über diese Property oder über die
|
||||
# Umgebungsvariable PDF_UMBENENNER_API_KEY gesetzt werden.
|
||||
# Die Umgebungsvariable hat Vorrang.
|
||||
api.key=your-local-api-key-here
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Optionale Parameter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Pfad zur Lock-Datei für den Startschutz (verhindert parallele Instanzen).
|
||||
# Wird weggelassen, verwendet die Anwendung pdf-umbenenner.lock im Arbeitsverzeichnis.
|
||||
runtime.lock.file=./work/local/pdf-umbenenner.lock
|
||||
|
||||
# Log-Verzeichnis. Wird weggelassen, schreibt Log4j2 in ./logs/.
|
||||
log.directory=./work/local/logs
|
||||
|
||||
# Log-Level (DEBUG, INFO, WARN, ERROR). Standard ist INFO.
|
||||
log.level=INFO
|
||||
|
||||
# Sensible KI-Inhalte (vollständige Rohantwort und Reasoning) ins Log schreiben.
|
||||
# Erlaubte Werte: true oder false. Standard ist false (geschützt).
|
||||
# Nur für Diagnosezwecke auf true setzen.
|
||||
log.ai.sensitive=false
|
||||
|
||||
@@ -1,21 +1,71 @@
|
||||
# PDF Umbenenner Test Configuration Example
|
||||
# AP-005: Copy this file to config/application.properties and adjust values for testing
|
||||
# PDF Umbenenner – Konfigurationsbeispiel für Testläufe
|
||||
# Kopiere diese Datei nach config/application.properties und passe die Werte an.
|
||||
# Diese Vorlage enthält kürzere Timeouts und niedrigere Limits für Testläufe.
|
||||
|
||||
# Mandatory M1 properties
|
||||
# ---------------------------------------------------------------------------
|
||||
# Pflichtparameter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Quellordner: Ordner, aus dem OCR-verarbeitete PDF-Dateien gelesen werden.
|
||||
# Der Ordner muss vorhanden und lesbar sein.
|
||||
source.folder=./work/test/source
|
||||
target.folder=./work/test/target
|
||||
sqlite.file=./work/test/pdf-umbenenner-test.db
|
||||
api.baseUrl=http://localhost:8081/api
|
||||
api.model=gpt-4o-mini-test
|
||||
api.timeoutSeconds=10
|
||||
max.retries.transient=1
|
||||
max.pages=5
|
||||
max.text.characters=2000
|
||||
prompt.template.file=./config/prompts/test-template.txt
|
||||
|
||||
# Optional properties
|
||||
runtime.lock.file=./work/test/lock.pid
|
||||
log.directory=./work/test/logs
|
||||
log.level=DEBUG
|
||||
# api.key can also be set via environment variable PDF_UMBENENNER_API_KEY
|
||||
# Zielordner: Ordner, in den die umbenannten Kopien abgelegt werden.
|
||||
# Wird automatisch angelegt, wenn er noch nicht existiert.
|
||||
target.folder=./work/test/target
|
||||
|
||||
# SQLite-Datenbankdatei für Bearbeitungsstatus und Versuchshistorie.
|
||||
# Das übergeordnete Verzeichnis muss vorhanden sein.
|
||||
sqlite.file=./work/test/pdf-umbenenner-test.db
|
||||
|
||||
# Basis-URL des OpenAI-kompatiblen KI-Dienstes (ohne Pfadsuffix wie /chat/completions).
|
||||
api.baseUrl=https://api.openai.com/v1
|
||||
|
||||
# Modellname des KI-Dienstes.
|
||||
api.model=gpt-4o-mini
|
||||
|
||||
# HTTP-Timeout für KI-Anfragen in Sekunden (muss > 0 sein).
|
||||
api.timeoutSeconds=10
|
||||
|
||||
# Maximale Anzahl historisierter transienter Fehlversuche pro Dokument.
|
||||
# Muss eine ganze Zahl >= 1 sein. Bei Erreichen des Grenzwerts wird der
|
||||
# Dokumentstatus auf FAILED_FINAL gesetzt.
|
||||
max.retries.transient=1
|
||||
|
||||
# Maximale Seitenzahl pro Dokument. Dokumente mit mehr Seiten werden als
|
||||
# deterministischer Inhaltsfehler behandelt (kein KI-Aufruf).
|
||||
max.pages=5
|
||||
|
||||
# Maximale Zeichenanzahl des Dokumenttexts, der an die KI gesendet wird.
|
||||
max.text.characters=2000
|
||||
|
||||
# Pfad zur externen Prompt-Datei. Der Dateiname dient als Prompt-Identifikator
|
||||
# in der Versuchshistorie.
|
||||
prompt.template.file=./config/prompts/template.txt
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# API-Schlüssel
|
||||
# ---------------------------------------------------------------------------
|
||||
# Der API-Schlüssel kann wahlweise über diese Property oder über die
|
||||
# Umgebungsvariable PDF_UMBENENNER_API_KEY gesetzt werden.
|
||||
# Die Umgebungsvariable hat Vorrang.
|
||||
api.key=test-api-key-placeholder
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Optionale Parameter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Pfad zur Lock-Datei für den Startschutz (verhindert parallele Instanzen).
|
||||
# Wird weggelassen, verwendet die Anwendung pdf-umbenenner.lock im Arbeitsverzeichnis.
|
||||
runtime.lock.file=./work/test/pdf-umbenenner.lock
|
||||
|
||||
# Log-Verzeichnis. Wird weggelassen, schreibt Log4j2 in ./logs/.
|
||||
log.directory=./work/test/logs
|
||||
|
||||
# Log-Level (DEBUG, INFO, WARN, ERROR). Standard ist INFO.
|
||||
log.level=DEBUG
|
||||
|
||||
# Sensible KI-Inhalte (vollständige Rohantwort und Reasoning) ins Log schreiben.
|
||||
# Erlaubte Werte: true oder false. Standard ist false (geschützt).
|
||||
# Nur für Diagnosezwecke auf true setzen.
|
||||
log.ai.sensitive=false
|
||||
|
||||
@@ -1 +1,22 @@
|
||||
This is a test prompt template for AP-006 validation.
|
||||
Du bist ein Assistent zur automatischen Benennung gescannter PDF-Dokumente.
|
||||
|
||||
Analysiere den folgenden Dokumenttext und ermittle:
|
||||
|
||||
1. Einen inhaltlich passenden deutschen Titel (maximal 20 Zeichen, nur Buchstaben und Leerzeichen, keine Abkürzungen, keine generischen Bezeichnungen wie "Dokument", "Datei", "Scan" oder "PDF")
|
||||
2. Das relevanteste Datum des Dokuments
|
||||
|
||||
Datumsermittlung nach Priorität:
|
||||
- Rechnungsdatum
|
||||
- Dokumentdatum
|
||||
- Ausstellungsdatum oder Bescheiddatum
|
||||
- Schreibdatum oder Ende eines Leistungszeitraums
|
||||
- Kein Datum angeben, wenn kein belastbares Datum eindeutig ableitbar ist
|
||||
|
||||
Titelregeln:
|
||||
- Titel auf Deutsch formulieren
|
||||
- Eigennamen (Personen, Firmen, Orte) unverändert übernehmen
|
||||
- Maximal 20 Zeichen (nur der Basistitel, ohne Datumspräfix)
|
||||
- Keine Sonderzeichen außer Leerzeichen
|
||||
- Eindeutig und verständlich, nicht generisch
|
||||
|
||||
Wenn das Dokument nicht eindeutig interpretierbar ist, beschreibe dies im Reasoning.
|
||||
|
||||
209
docs/befundliste.md
Normal file
209
docs/befundliste.md
Normal file
@@ -0,0 +1,209 @@
|
||||
# Befundliste – Integrierte Gesamtprüfung des Endstands
|
||||
|
||||
**Erstellt:** 2026-04-08
|
||||
**Grundlage:** Vollständiger Maven-Reactor-Build, Unit-Tests, E2E-Tests, Integrationstests (Smoke),
|
||||
PIT-Mutationsanalyse, Code-Review gegen verbindliche Spezifikationen (technik-und-architektur.md,
|
||||
fachliche-anforderungen.md, CLAUDE.md)
|
||||
|
||||
---
|
||||
|
||||
## Ausgeführte Prüfungen
|
||||
|
||||
| Prüfbereich | Ausgeführt | Ergebnis |
|
||||
|---|---|---|
|
||||
| Maven-Reactor-Build (clean verify, alle Module) | ja | GRÜN |
|
||||
| Unit-Tests (Domain, Application, Adapter-out, Bootstrap) | ja | GRÜN |
|
||||
| E2E-Tests (BatchRunEndToEndTest, 11 Szenarien) | ja | GRÜN |
|
||||
| Integrationstests / Smoke-IT (ExecutableJarSmokeTestIT, 2 Tests) | ja | GRÜN |
|
||||
| PIT-Mutationsanalyse (alle Module) | ja | siehe Einzelbefunde |
|
||||
| Hexagonale Architektur – Domain-Isolation | ja | GRÜN |
|
||||
| Hexagonale Architektur – Port-Verträge (kein Path/NIO/JDBC) | ja | GRÜN |
|
||||
| Hexagonale Architektur – keine Adapter-zu-Adapter-Abhängigkeiten | ja | GRÜN |
|
||||
| Statusmodell (8 Werte, Semantik laut CLAUDE.md) | ja | GRÜN |
|
||||
| Naming-Convention-Regel (kein M1–M8, kein AP-xxx im Code) | ja | OFFEN (nicht blockierend) |
|
||||
| Logging-Sensibilitätsregel (log.ai.sensitive) | ja | GRÜN |
|
||||
| Exit-Code-Semantik (0 / 1) | ja | GRÜN |
|
||||
| Konfigurationsbeispiele (Pflicht- und Optionalparameter) | ja | GRÜN |
|
||||
| Betriebsdokumentation (docs/betrieb.md) | ja | GRÜN |
|
||||
| Prompt-Template im Repository | ja | GRÜN |
|
||||
| Rückwärtsverträglichkeit M4–M7 (Statusmodell, Schema) | ja (statisch) | GRÜN |
|
||||
|
||||
---
|
||||
|
||||
## Grüne Bereiche (keine Befunde)
|
||||
|
||||
### Build und Tests
|
||||
|
||||
- Vollständiger Maven-Reactor-Build erfolgreich (`BUILD SUCCESS`, Gesamtlaufzeit ~4 Minuten)
|
||||
- **827+ Tests** bestanden, 0 Fehler, 0 übersprungen:
|
||||
- Domain: 227 Tests
|
||||
- Application: 295 Tests
|
||||
- Adapter-out: 227 Tests
|
||||
- Bootstrap (Unit): 76 Tests
|
||||
- Smoke-IT: 2 Tests
|
||||
|
||||
### E2E-Szenarien (BatchRunEndToEndTest)
|
||||
|
||||
Alle geforderten Kernszenarien aus der E2E-Testbasis sind abgedeckt und grün:
|
||||
|
||||
- Happy-Path: zwei Läufe → `SUCCESS`
|
||||
- Deterministischer Inhaltsfehler: zwei Läufe → `FAILED_FINAL`
|
||||
- Transienter KI-Fehler → `FAILED_RETRYABLE`
|
||||
- Skip nach `SUCCESS` → `SKIPPED_ALREADY_PROCESSED`
|
||||
- Skip nach `FAILED_FINAL` → `SKIPPED_FINAL_FAILURE`
|
||||
- `PROPOSAL_READY`-Finalisierung ohne erneuten KI-Aufruf im zweiten Lauf
|
||||
- Zielkopierfehler mit Sofort-Wiederholversuch → `SUCCESS`
|
||||
- Transiente Fehler über mehrere Läufe → Ausschöpfung → `FAILED_FINAL`
|
||||
- Zielkopierfehler beide Versuche gescheitert → `FAILED_RETRYABLE`
|
||||
- Zwei verschiedene Dokumente, gleicher Vorschlagsname → Dubletten-Suffix `(1)`
|
||||
- Mixed-Batch: ein Erfolg, ein Inhaltsfehler → Batch-Outcome `SUCCESS` (Exit-Code 0)
|
||||
|
||||
### Hexagonale Architektur
|
||||
|
||||
- **Domain** vollständig infrastrukturfrei: keine Imports aus `java.nio`, `java.io.File`,
|
||||
JDBC, Log4j oder HTTP-Bibliotheken
|
||||
- **Port-Verträge** (alle Interfaces in `application.port.out`) enthalten keine `Path`-,
|
||||
`File`-, NIO- oder JDBC-Typen; nur Domain-Typen werden in Signaturen verwendet
|
||||
- **Keine Adapter-zu-Adapter-Abhängigkeiten** in `adapter-out`: kein Modul referenziert
|
||||
ein anderes Adapter-Implementierungspaket direkt
|
||||
- **Abhängigkeitsrichtung** korrekt: adapter-out → application → domain
|
||||
|
||||
### Fachregeln
|
||||
|
||||
- Statusmodell vollständig (8 Werte: `READY_FOR_AI`, `PROPOSAL_READY`, `SUCCESS`,
|
||||
`FAILED_RETRYABLE`, `FAILED_FINAL`, `SKIPPED_ALREADY_PROCESSED`,
|
||||
`SKIPPED_FINAL_FAILURE`, `PROCESSING`)
|
||||
- Retry-Semantik korrekt implementiert (deterministisch 1 Retry → final;
|
||||
transient bis `max.retries.transient`)
|
||||
- Skip-Semantik korrekt (SUCCESS → Skip, FAILED_FINAL → Skip, keine Zähleränderung)
|
||||
- Führende Proposal-Quelle: `PROPOSAL_READY`-Versuch wird korrekt als Quelle verwendet
|
||||
- SUCCESS-Bedingung: erst nach Zielkopie und konsistenter Persistenz
|
||||
|
||||
### Logging und Sensibilität
|
||||
|
||||
- `log.ai.sensitive`-Mechanismus vollständig implementiert und getestet
|
||||
- Default `false` (sicher): KI-Rohantwort und Reasoning nicht im Log
|
||||
- Persistenz in SQLite unabhängig von dieser Einstellung
|
||||
- Konfiguration in beiden Beispieldateien dokumentiert
|
||||
|
||||
### Konfiguration und Dokumentation
|
||||
|
||||
- `config/application-local.example.properties`: vollständig, alle Pflicht- und
|
||||
Optionalparameter vorhanden
|
||||
- `config/application-test.example.properties`: vollständig
|
||||
- `config/prompts/template.txt`: Prompt-Template im Repository vorhanden
|
||||
- `docs/betrieb.md`: Betriebsdokumentation mit Start, Konfiguration, Exit-Codes,
|
||||
Retry-Grundverhalten, Logging-Sensibilität
|
||||
- Konfigurationsparameter-Namen in Dokumentation und Code konsistent
|
||||
|
||||
### Exit-Code-Semantik
|
||||
|
||||
- Exit-Code `0`: technisch ordnungsgemäßer Lauf (auch bei Teilfehlern einzelner Dokumente)
|
||||
- Exit-Code `1`: harte Start-/Bootstrap-Fehler, ungültige Konfiguration, Lock-Fehler
|
||||
- Implementierung in `PdfUmbenennerApplication` und `BootstrapRunner` korrekt
|
||||
|
||||
### PIT-Mutationsanalyse (Gesamtstand)
|
||||
|
||||
- Domain: 83 % Mutation Kill Rate
|
||||
- Adapter-out: 83 % Mutation Kill Rate
|
||||
- Application: 87 % Test Strength
|
||||
- Bootstrap: 76 % Kill Rate (34 Mutationen, 26 getötet)
|
||||
|
||||
---
|
||||
|
||||
## Offene Punkte
|
||||
|
||||
### Nicht blockierend
|
||||
|
||||
#### B1 – Naming-Convention-Verletzungen in Code, Tests und Konfiguration (CLAUDE.md § Naming-Regel)
|
||||
|
||||
**Themenbereich:** Dokumentation / Codequalität
|
||||
**Norm:** CLAUDE.md verbietet explizit Meilenstein- (M1–M8) und Arbeitspaket-Bezeichner (AP-xxx)
|
||||
in Implementierungen, Kommentaren und JavaDoc.
|
||||
**Befund:** 43 Treffer in `.java`-Dateien (21 in Produktionscode, 22 in Testcode) sowie
|
||||
1 Treffer in `config/application.properties`.
|
||||
|
||||
Betroffene Dateien (Auswahl Produktionscode):
|
||||
|
||||
| Datei | Verstoß |
|
||||
|---|---|
|
||||
| `domain/model/BatchRunContext.java` | `@since M2-AP-003` |
|
||||
| `domain/model/DocumentFingerprint.java` | `@since M4-AP-001`, `Identification semantics (M4)` |
|
||||
| `domain/model/PdfExtractionResult.java` | `@since M3-AP-001` |
|
||||
| `domain/model/SourceDocumentCandidate.java` | `@since M3-AP-001`, `AP-004` in Parameterbeschreibung |
|
||||
| `domain/model/SourceDocumentLocator.java` | `@since M3-AP-001` |
|
||||
| `adapter/out/lock/FilesystemRunLockPortAdapter.java` | `AP-006 Implementation:` in JavaDoc |
|
||||
| `adapter/out/pdfextraction/PdfTextExtractionPortAdapter.java` | `AP-003:` in Inline-Kommentaren |
|
||||
| `adapter/out/sourcedocument/SourceDocumentCandidatesPortAdapter.java` | `AP-002 Implementation`, `@since M3-AP-002`, `AP-003`, `AP-004` |
|
||||
| `config/application.properties` | Kommentarheader `# PDF Umbenenner Configuration for AP-006 Testing` |
|
||||
|
||||
Betroffene Dateien (Auswahl Testcode):
|
||||
|
||||
| Datei | Verstoß |
|
||||
|---|---|
|
||||
| `adapter/out/bootstrap/validation/StartConfigurationValidatorTest.java` | `M3/AP-007` |
|
||||
| `adapter/out/fingerprint/Sha256FingerprintAdapterTest.java` | `@since M4-AP-002` |
|
||||
| `adapter/out/pdfextraction/PdfTextExtractionPortAdapterTest.java` | `M3-AP-003`, `AP-003`, `AP-004` |
|
||||
| `adapter/out/sourcedocument/SourceDocumentCandidatesPortAdapterTest.java` | `M3-AP-002`, `AP-004` |
|
||||
| `adapter/out/sqlite/SqliteUnitOfWorkAdapterTest.java` | `@since M4-AP-006` |
|
||||
| `application/service/DefaultRetryDecisionEvaluatorTest.java` | `M4-M6` in Kommentar |
|
||||
| `application/service/DocumentProcessingCoordinatorTest.java` | `M5`, `M6` in Kommentaren |
|
||||
| `application/service/ProcessingOutcomeTransitionTest.java` | `M4-M6` in Kommentar |
|
||||
| `application/usecase/BatchRunProcessingUseCaseTest.java` | `M7` in Kommentar |
|
||||
| `bootstrap/ExecutableJarSmokeTestIT.java` | `AP-008`, `M1` in JavaDoc |
|
||||
|
||||
**Bewertung:** Rein kosmetisch/dokumentarisch. Kein Einfluss auf Funktionalität, Build
|
||||
oder Testergebnis. Betrifft ausschließlich Kommentare und JavaDoc-Annotationen.
|
||||
**Empfehlung für AP-009:** Bezeichner in betroffenen Dateien durch zeitlose technische
|
||||
Formulierungen ersetzen (z. B. `@since M4-AP-001` → entfernen oder in neutrales
|
||||
`@since 1.0` umwandeln; Inline-Kommentare sachlich formulieren).
|
||||
|
||||
---
|
||||
|
||||
#### B2 – StartConfiguration in Application-Schicht enthält java.nio.file.Path (Architektur-Grenzfall)
|
||||
|
||||
**Themenbereich:** Architektur
|
||||
**Norm:** „Application orchestriert Use Cases und enthält keine technischen
|
||||
Implementierungsdetails" (technik-und-architektur.md §3.1); Port-Verträge dürfen keine
|
||||
NIO-Typen enthalten (CLAUDE.md).
|
||||
**Befund:** `StartConfiguration` (in `application/config/startup/`) ist ein Java-Record
|
||||
mit `java.nio.file.Path`-Feldern für `sourceFolder`, `targetFolder`, `sqliteFile`,
|
||||
`promptTemplateFile`, `runtimeLockFile`, `logDirectory`.
|
||||
**Kontext:** `StartConfiguration` ist kein Port-Vertrag, sondern ein unveränderliches
|
||||
Konfigurations-DTO, das ausschließlich von Bootstrap erzeugt und an Adapter übergeben wird.
|
||||
Die Port-Verträge selbst sind sauber (keine Path-Typen in Port-Interfaces).
|
||||
**Bewertung:** Grenzfall. `Path` ist kein fachliches Objekt, aber auch kein schwerer
|
||||
Architekturverstoß in diesem Kontext. Die Alternative (String-Repräsentation und Auflösung
|
||||
im Adapter) hätte keinen Mehrwert für das Betriebsmodell.
|
||||
**Empfehlung für AP-009:** Auf Wunsch im Rahmen von AP-009 prüfen, ob das Verschieben von
|
||||
`StartConfiguration` in das Bootstrap-Modul sinnvoller wäre. Keine Pflicht, da kein
|
||||
funktionaler Defekt vorliegt.
|
||||
|
||||
---
|
||||
|
||||
#### B3 – PIT-Überlebende in Bootstrap (Bootstrap: 76 % Kill Rate)
|
||||
|
||||
**Themenbereich:** Testqualität
|
||||
**Befund:** 8 überlebende Mutanten im Bootstrap-Modul (34 generiert, 26 getötet).
|
||||
Hauptkategorie: `VoidMethodCallMutator` (2 Überlebende, 2 ohne Coverage).
|
||||
**Bewertung:** Betrifft vor allem Logging-Calls und nicht-kritische Hilfsmethoden.
|
||||
Keine funktional tragenden Entscheidungspfade betroffen.
|
||||
**Empfehlung:** Kein AP-009-Handlungsbedarf; wurde bereits in AP-007 auf akzeptablem
|
||||
Niveau konsolidiert.
|
||||
|
||||
---
|
||||
|
||||
## Zusammenfassung
|
||||
|
||||
| Klassifikation | Anzahl | Beschreibung |
|
||||
|---|---|---|
|
||||
| Release-Blocker | **0** | – |
|
||||
| Nicht blockierend | **3** | B1 Naming, B2 Path-Grenzfall, B3 PIT-Bootstrap |
|
||||
|
||||
**Der Endstand ist produktionsbereit.** Alle fachlichen, technischen und architekturellen
|
||||
Kernanforderungen sind umgesetzt und durch automatisierte Tests abgesichert. Der Maven-Build
|
||||
ist fehlerfrei. Die identifizierten offenen Punkte sind ausschließlich nicht blockierend.
|
||||
|
||||
Falls AP-009 durchgeführt wird, sollte der Fokus auf **B1** (Naming-Convention-Bereinigung)
|
||||
liegen, da dieser Punkt die einzige verbindliche CLAUDE.md-Regel betrifft, die noch nicht
|
||||
vollständig eingehalten wird.
|
||||
214
docs/betrieb.md
Normal file
214
docs/betrieb.md
Normal file
@@ -0,0 +1,214 @@
|
||||
# Betriebsdokumentation – PDF Umbenenner
|
||||
|
||||
## Zweck
|
||||
|
||||
Der PDF Umbenenner liest bereits OCR-verarbeitete, durchsuchbare PDF-Dateien aus einem
|
||||
konfigurierten Quellordner, ermittelt per KI-Aufruf einen normierten deutschen Dateinamen
|
||||
und legt eine Kopie im konfigurierten Zielordner ab. Die Quelldatei bleibt unverändert.
|
||||
|
||||
---
|
||||
|
||||
## Voraussetzungen
|
||||
|
||||
- Java 21 (JRE oder JDK)
|
||||
- Zugang zu einem OpenAI-kompatiblen KI-Dienst (API-Schlüssel erforderlich)
|
||||
- Quellordner mit OCR-verarbeiteten PDF-Dateien
|
||||
- Schreibzugriff auf Zielordner und Datenbankverzeichnis
|
||||
|
||||
---
|
||||
|
||||
## Start des ausführbaren JAR
|
||||
|
||||
Das ausführbare JAR wird durch den Maven-Build im Verzeichnis
|
||||
`pdf-umbenenner-bootstrap/target/` erzeugt:
|
||||
|
||||
```
|
||||
java -jar pdf-umbenenner-bootstrap/target/pdf-umbenenner-bootstrap-0.0.1-SNAPSHOT.jar
|
||||
```
|
||||
|
||||
Die Anwendung liest die Konfiguration aus `config/application.properties` relativ zum
|
||||
Arbeitsverzeichnis, in dem der Befehl ausgeführt wird.
|
||||
|
||||
### Start über Windows Task Scheduler
|
||||
|
||||
Empfohlene Startsequenz für den Windows Task Scheduler:
|
||||
|
||||
1. Aktion: Programm/Skript starten
|
||||
2. Programm: `java`
|
||||
3. Argumente: `-jar pdf-umbenenner-bootstrap-0.0.1-SNAPSHOT.jar`
|
||||
4. Starten in: Verzeichnis mit `config/application.properties` und `config/prompts/`
|
||||
|
||||
---
|
||||
|
||||
## Konfiguration
|
||||
|
||||
Die Konfiguration wird aus `config/application.properties` geladen.
|
||||
Vorlagen für lokale und Test-Konfigurationen befinden sich in:
|
||||
|
||||
- `config/application-local.example.properties`
|
||||
- `config/application-test.example.properties`
|
||||
|
||||
### Pflichtparameter
|
||||
|
||||
| Parameter | Beschreibung |
|
||||
|------------------------|--------------|
|
||||
| `source.folder` | Quellordner mit OCR-PDFs (muss vorhanden und lesbar sein) |
|
||||
| `target.folder` | Zielordner für umbenannte Kopien (wird angelegt, wenn nicht vorhanden) |
|
||||
| `sqlite.file` | SQLite-Datenbankdatei (übergeordnetes Verzeichnis muss existieren) |
|
||||
| `api.baseUrl` | Basis-URL des KI-Dienstes (z. B. `https://api.openai.com/v1`) |
|
||||
| `api.model` | Modellname (z. B. `gpt-4o-mini`) |
|
||||
| `api.timeoutSeconds` | HTTP-Timeout für KI-Anfragen in Sekunden (ganzzahlig, > 0) |
|
||||
| `max.retries.transient`| Maximale transiente Fehlversuche pro Dokument (ganzzahlig, >= 1) |
|
||||
| `max.pages` | Maximale Seitenzahl pro Dokument (ganzzahlig, > 0) |
|
||||
| `max.text.characters` | Maximale Zeichenanzahl des Dokumenttexts für KI-Anfragen (ganzzahlig, > 0) |
|
||||
| `prompt.template.file` | Pfad zur externen Prompt-Datei (muss vorhanden sein) |
|
||||
|
||||
### Optionale Parameter
|
||||
|
||||
| Parameter | Beschreibung | Standard |
|
||||
|----------------------|--------------|---------|
|
||||
| `api.key` | API-Schlüssel (alternativ: Umgebungsvariable `PDF_UMBENENNER_API_KEY`) | – |
|
||||
| `runtime.lock.file` | Lock-Datei für Startschutz | `pdf-umbenenner.lock` im Arbeitsverzeichnis |
|
||||
| `log.directory` | Log-Verzeichnis | `./logs/` |
|
||||
| `log.level` | Log-Level (`DEBUG`, `INFO`, `WARN`, `ERROR`) | `INFO` |
|
||||
| `log.ai.sensitive` | KI-Rohantwort und Reasoning ins Log schreiben (`true`/`false`) | `false` |
|
||||
|
||||
### API-Schlüssel
|
||||
|
||||
Der API-Schlüssel kann auf zwei Wegen gesetzt werden:
|
||||
|
||||
1. **Umgebungsvariable `PDF_UMBENENNER_API_KEY`** (empfohlen, hat Vorrang)
|
||||
2. Property `api.key` in `config/application.properties`
|
||||
|
||||
Die Umgebungsvariable hat immer Vorrang über die Properties-Datei.
|
||||
|
||||
---
|
||||
|
||||
## Prompt-Konfiguration
|
||||
|
||||
Der Prompt wird aus der in `prompt.template.file` konfigurierten externen Textdatei geladen.
|
||||
Der Dateiname der Prompt-Datei dient als Prompt-Identifikator in der Versuchshistorie
|
||||
(SQLite) und ermöglicht so die Nachvollziehbarkeit, welche Prompt-Version für welchen
|
||||
Verarbeitungsversuch verwendet wurde.
|
||||
|
||||
Eine Vorlage befindet sich in `config/prompts/template.txt` und kann direkt verwendet oder
|
||||
an den jeweiligen KI-Dienst angepasst werden.
|
||||
|
||||
Die Anwendung ergänzt den Prompt automatisch um:
|
||||
- einen Dokumenttext-Abschnitt
|
||||
- eine explizite JSON-Antwortspezifikation mit den Feldern `title`, `reasoning` und `date`
|
||||
|
||||
Der Prompt in `template.txt` muss deshalb **keine** JSON-Formatanweisung enthalten –
|
||||
nur den inhaltlichen Auftrag an die KI.
|
||||
|
||||
---
|
||||
|
||||
## Zielformat
|
||||
|
||||
Jede erfolgreich verarbeitete PDF-Datei wird im Zielordner unter folgendem Namen abgelegt:
|
||||
|
||||
```
|
||||
YYYY-MM-DD - Titel.pdf
|
||||
```
|
||||
|
||||
Bei Namenskollisionen wird ein laufendes Suffix angehängt:
|
||||
|
||||
```
|
||||
YYYY-MM-DD - Titel(1).pdf
|
||||
YYYY-MM-DD - Titel(2).pdf
|
||||
```
|
||||
|
||||
Das Suffix zählt nicht zu den 20 Zeichen des Basistitels.
|
||||
|
||||
---
|
||||
|
||||
## Retry- und Skip-Verhalten
|
||||
|
||||
### Dokumentstatus
|
||||
|
||||
| Status | Bedeutung |
|
||||
|---------------------------|-----------|
|
||||
| `SUCCESS` | Erfolgreich verarbeitet und kopiert |
|
||||
| `FAILED_RETRYABLE` | Fehlgeschlagen, erneuter Versuch in späterem Lauf möglich |
|
||||
| `FAILED_FINAL` | Terminal fehlgeschlagen, wird nicht erneut verarbeitet |
|
||||
| `SKIPPED_ALREADY_PROCESSED` | Übersprungen – Dokument bereits erfolgreich verarbeitet |
|
||||
| `SKIPPED_FINAL_FAILURE` | Übersprungen – Dokument terminal fehlgeschlagen |
|
||||
|
||||
### Retry-Regeln
|
||||
|
||||
**Deterministische Inhaltsfehler** (z. B. kein extrahierbarer Text, Seitenlimit überschritten,
|
||||
unbrauchbarer KI-Titel):
|
||||
|
||||
- Erster Fehler → `FAILED_RETRYABLE` (ein Wiederholversuch in späterem Lauf erlaubt)
|
||||
- Zweiter Fehler → `FAILED_FINAL` (kein weiterer Versuch)
|
||||
|
||||
**Transiente technische Fehler** (z. B. KI nicht erreichbar, HTTP-Timeout):
|
||||
|
||||
- Wiederholbar bis zum Grenzwert `max.retries.transient`
|
||||
- Bei Erreichen des Grenzwerts → `FAILED_FINAL`
|
||||
|
||||
**Technischer Sofort-Wiederholversuch:**
|
||||
|
||||
Bei einem Schreibfehler der Zielkopie wird innerhalb desselben Laufs exakt ein
|
||||
Sofort-Wiederholversuch unternommen. Dieser zählt nicht zum laufübergreifenden
|
||||
Fehlerzähler.
|
||||
|
||||
---
|
||||
|
||||
## Logging
|
||||
|
||||
Logs werden in das konfigurierte `log.directory` geschrieben (Standard: `./logs/`).
|
||||
Log-Rotation erfolgt täglich und bei Erreichen von 10 MB je Datei.
|
||||
|
||||
### Sensible KI-Inhalte
|
||||
|
||||
Standardmäßig werden die vollständige KI-Rohantwort und das KI-Reasoning **nicht** ins Log
|
||||
geschrieben, sondern ausschließlich in der SQLite-Datenbank gespeichert.
|
||||
|
||||
Die Ausgabe kann für Diagnosezwecke mit `log.ai.sensitive=true` freigeschaltet werden.
|
||||
Erlaubte Werte: `true` oder `false`. Jeder andere Wert ist ungültig und verhindert den Start.
|
||||
|
||||
---
|
||||
|
||||
## Exit-Codes
|
||||
|
||||
| Code | Bedeutung |
|
||||
|------|-----------|
|
||||
| `0` | Lauf technisch ordnungsgemäß ausgeführt (auch bei dokumentbezogenen Teilfehlern) |
|
||||
| `1` | Harter Start- oder Bootstrap-Fehler (ungültige Konfiguration, Lock nicht erwerbbar, Schema-Initialisierungsfehler) |
|
||||
|
||||
Dokumentbezogene Fehler einzelner PDF-Dateien führen **nicht** zu Exit-Code `1`.
|
||||
|
||||
---
|
||||
|
||||
## Startschutz (Parallelinstanzschutz)
|
||||
|
||||
Die Anwendung verwendet eine exklusive Lock-Datei, um parallele Instanzen zu verhindern.
|
||||
Wenn bereits eine Instanz läuft, beendet sich die neue Instanz sofort mit Exit-Code `1`.
|
||||
|
||||
Der Pfad der Lock-Datei ist über `runtime.lock.file` konfigurierbar.
|
||||
Ohne Konfiguration wird `pdf-umbenenner.lock` im Arbeitsverzeichnis verwendet.
|
||||
|
||||
---
|
||||
|
||||
## SQLite-Datenbank
|
||||
|
||||
Die SQLite-Datei enthält:
|
||||
|
||||
- **Dokument-Stammsätze**: Gesamtstatus, Fehlerzähler, letzter Zieldateiname, Zeitstempel
|
||||
- **Versuchshistorie**: Jeder Verarbeitungsversuch mit Modell, Prompt-Identifikator,
|
||||
KI-Rohantwort, Reasoning, Datum, Titel und Fehlerstatus
|
||||
|
||||
Die Datenbank ist die führende Wahrheitsquelle für Bearbeitungsstatus und Nachvollziehbarkeit.
|
||||
Sie muss nicht manuell verwaltet werden – das Schema wird beim Start automatisch initialisiert.
|
||||
|
||||
---
|
||||
|
||||
## Systemgrenzen
|
||||
|
||||
- Nur OCR-verarbeitete, durchsuchbare PDF-Dateien werden verarbeitet
|
||||
- Keine eingebaute OCR-Funktion
|
||||
- Kein Web-UI, keine REST-API, keine interaktive Bedienung
|
||||
- Kein interner Scheduler – der Start erfolgt extern (z. B. Windows Task Scheduler)
|
||||
- Quelldateien werden nie überschrieben, verschoben oder gelöscht
|
||||
- Die Identifikation erfolgt über SHA-256-Fingerprint des Dateiinhalts, nicht über Dateinamen
|
||||
@@ -0,0 +1,18 @@
|
||||
/**
|
||||
* Outbound adapter for system time access.
|
||||
* <p>
|
||||
* Components:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.adapter.out.clock.SystemClockAdapter}
|
||||
* — Production implementation of {@link de.gecheckt.pdf.umbenenner.application.port.out.ClockPort}
|
||||
* that delegates to the JVM system clock ({@code Instant.now()}).</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The {@link de.gecheckt.pdf.umbenenner.application.port.out.ClockPort} abstraction ensures that
|
||||
* all application-layer and domain-layer code obtains the current instant through the port,
|
||||
* enabling deterministic time injection in tests without coupling to wall-clock time.
|
||||
* <p>
|
||||
* No date/time logic or formatting is performed in this package; that responsibility
|
||||
* belongs to the application layer.
|
||||
*/
|
||||
package de.gecheckt.pdf.umbenenner.adapter.out.clock;
|
||||
@@ -247,6 +247,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
|
||||
* @return the most recent {@code PROPOSAL_READY} attempt, or {@code null}
|
||||
* @throws DocumentPersistenceException if the query fails
|
||||
*/
|
||||
@Override
|
||||
public ProcessingAttempt findLatestProposalReadyAttempt(DocumentFingerprint fingerprint) {
|
||||
Objects.requireNonNull(fingerprint, "fingerprint must not be null");
|
||||
|
||||
@@ -259,7 +260,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
|
||||
final_target_file_name
|
||||
FROM processing_attempt
|
||||
WHERE fingerprint = ?
|
||||
AND status = 'PROPOSAL_READY'
|
||||
AND status = ?
|
||||
ORDER BY attempt_number DESC
|
||||
LIMIT 1
|
||||
""";
|
||||
@@ -270,6 +271,7 @@ public class SqliteProcessingAttemptRepositoryAdapter implements ProcessingAttem
|
||||
|
||||
pragmaStmt.execute(PRAGMA_FOREIGN_KEYS_ON);
|
||||
statement.setString(1, fingerprint.sha256Hex());
|
||||
statement.setString(2, ProcessingStatus.PROPOSAL_READY.name());
|
||||
|
||||
try (ResultSet rs = statement.executeQuery()) {
|
||||
if (rs.next()) {
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
package de.gecheckt.pdf.umbenenner.adapter.out.sqlite;
|
||||
|
||||
import java.lang.reflect.InvocationTargetException;
|
||||
import java.lang.reflect.Proxy;
|
||||
import java.sql.Connection;
|
||||
import java.sql.DriverManager;
|
||||
import java.sql.SQLException;
|
||||
@@ -93,6 +95,32 @@ public class SqliteUnitOfWorkAdapter implements UnitOfWorkPort {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Wraps a shared transaction connection so that {@code close()} becomes a no-op.
|
||||
* <p>
|
||||
* Repository adapters manage their own connection lifecycle via try-with-resources,
|
||||
* which would close the shared transaction connection prematurely if not wrapped.
|
||||
* All other {@link Connection} methods are delegated unchanged to the underlying connection.
|
||||
*
|
||||
* @param underlying the real shared connection; must not be null
|
||||
* @return a proxy connection that ignores {@code close()} calls
|
||||
*/
|
||||
private static Connection nonClosingWrapper(Connection underlying) {
|
||||
return (Connection) Proxy.newProxyInstance(
|
||||
Connection.class.getClassLoader(),
|
||||
new Class<?>[] { Connection.class },
|
||||
(proxy, method, args) -> {
|
||||
if ("close".equals(method.getName())) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
return method.invoke(underlying, args);
|
||||
} catch (InvocationTargetException e) {
|
||||
throw e.getCause();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private class TransactionOperationsImpl implements TransactionOperations {
|
||||
private final Connection connection;
|
||||
|
||||
@@ -102,14 +130,11 @@ public class SqliteUnitOfWorkAdapter implements UnitOfWorkPort {
|
||||
|
||||
@Override
|
||||
public void saveProcessingAttempt(ProcessingAttempt attempt) {
|
||||
// Repository methods declare DocumentPersistenceException as the only thrown exception.
|
||||
// Any other exception (NullPointerException, etc.) will propagate to the outer try-catch
|
||||
// and be caught there.
|
||||
SqliteProcessingAttemptRepositoryAdapter repo =
|
||||
new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl) {
|
||||
@Override
|
||||
protected Connection getConnection() throws SQLException {
|
||||
return connection;
|
||||
return nonClosingWrapper(connection);
|
||||
}
|
||||
};
|
||||
repo.save(attempt);
|
||||
@@ -117,14 +142,11 @@ public class SqliteUnitOfWorkAdapter implements UnitOfWorkPort {
|
||||
|
||||
@Override
|
||||
public void createDocumentRecord(DocumentRecord record) {
|
||||
// Repository methods declare DocumentPersistenceException as the only thrown exception.
|
||||
// Any other exception (NullPointerException, etc.) will propagate to the outer try-catch
|
||||
// and be caught there.
|
||||
SqliteDocumentRecordRepositoryAdapter repo =
|
||||
new SqliteDocumentRecordRepositoryAdapter(jdbcUrl) {
|
||||
@Override
|
||||
protected Connection getConnection() throws SQLException {
|
||||
return connection;
|
||||
return nonClosingWrapper(connection);
|
||||
}
|
||||
};
|
||||
repo.create(record);
|
||||
@@ -132,14 +154,11 @@ public class SqliteUnitOfWorkAdapter implements UnitOfWorkPort {
|
||||
|
||||
@Override
|
||||
public void updateDocumentRecord(DocumentRecord record) {
|
||||
// Repository methods declare DocumentPersistenceException as the only thrown exception.
|
||||
// Any other exception (NullPointerException, etc.) will propagate to the outer try-catch
|
||||
// and be caught there.
|
||||
SqliteDocumentRecordRepositoryAdapter repo =
|
||||
new SqliteDocumentRecordRepositoryAdapter(jdbcUrl) {
|
||||
@Override
|
||||
protected Connection getConnection() throws SQLException {
|
||||
return connection;
|
||||
return nonClosingWrapper(connection);
|
||||
}
|
||||
};
|
||||
repo.update(record);
|
||||
|
||||
@@ -0,0 +1,24 @@
|
||||
/**
|
||||
* Outbound adapter for writing the target file copy.
|
||||
* <p>
|
||||
* Components:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter}
|
||||
* — Filesystem-based implementation of
|
||||
* {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort}.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The adapter uses a two-step write pattern: the source is first copied to a temporary
|
||||
* file ({@code resolvedFilename + ".tmp"}) in the target folder, then renamed/moved to
|
||||
* the final filename. An atomic move is attempted first; a standard move is used as a
|
||||
* fallback when the filesystem does not support atomic cross-directory moves.
|
||||
* <p>
|
||||
* <strong>Source integrity:</strong> The source file is never modified, moved, or deleted.
|
||||
* Only a copy is created in the target folder.
|
||||
* <p>
|
||||
* <strong>Architecture boundary:</strong> All NIO ({@code Path}, {@code Files}) operations
|
||||
* are strictly confined to this package. The port interface
|
||||
* {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort} contains no
|
||||
* filesystem types, preserving the hexagonal architecture boundary.
|
||||
*/
|
||||
package de.gecheckt.pdf.umbenenner.adapter.out.targetcopy;
|
||||
@@ -0,0 +1,26 @@
|
||||
/**
|
||||
* Outbound adapter for target folder management and unique filename resolution.
|
||||
* <p>
|
||||
* Components:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.adapter.out.targetfolder.FilesystemTargetFolderAdapter}
|
||||
* — Filesystem-based implementation of
|
||||
* {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort}.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Duplicate resolution:</strong> Given a base name such as
|
||||
* {@code 2024-01-15 - Rechnung.pdf}, the adapter checks whether the file exists in the
|
||||
* target folder and appends a numeric suffix ({@code (1)}, {@code (2)}, …) directly
|
||||
* before {@code .pdf} until a free name is found. The 20-character base-title limit
|
||||
* does not apply to the suffix.
|
||||
* <p>
|
||||
* <strong>Rollback support:</strong> The adapter provides a best-effort deletion method
|
||||
* used by the application layer to remove a successfully written target copy when
|
||||
* subsequent persistence fails, preventing orphaned target files.
|
||||
* <p>
|
||||
* <strong>Architecture boundary:</strong> All NIO ({@code Path}, {@code Files}) operations
|
||||
* are strictly confined to this package. The port interface
|
||||
* {@link de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort} contains no
|
||||
* filesystem types, preserving the hexagonal architecture boundary.
|
||||
*/
|
||||
package de.gecheckt.pdf.umbenenner.adapter.out.targetfolder;
|
||||
@@ -85,30 +85,6 @@ public class AiRequestComposer {
|
||||
Objects.requireNonNull(promptContent, "promptContent must not be null");
|
||||
Objects.requireNonNull(documentText, "documentText must not be null");
|
||||
|
||||
// The complete request text is composed in a fixed, deterministic order:
|
||||
// 1. Prompt content (instruction)
|
||||
// 2. Newline separator
|
||||
// 3. Prompt identifier marker (for traceability)
|
||||
// 4. Newline separator
|
||||
// 5. Document text section marker
|
||||
// 6. Newline separator
|
||||
// 7. Document text content
|
||||
// 8. Newline separator
|
||||
// 9. Response format specification (JSON-only with required fields)
|
||||
//
|
||||
// This order is fixed so that another implementation knows exactly where
|
||||
// each part is positioned and what to expect.
|
||||
StringBuilder requestBuilder = new StringBuilder();
|
||||
requestBuilder.append(promptContent);
|
||||
requestBuilder.append("\n");
|
||||
requestBuilder.append("--- Prompt-ID: ").append(promptIdentifier.identifier()).append(" ---");
|
||||
requestBuilder.append("\n");
|
||||
requestBuilder.append("--- Document Text ---");
|
||||
requestBuilder.append("\n");
|
||||
requestBuilder.append(documentText);
|
||||
requestBuilder.append("\n");
|
||||
appendJsonResponseFormat(requestBuilder);
|
||||
|
||||
// Record the exact character count of the document text that was included.
|
||||
// This is the length of the document text (not the complete request).
|
||||
int sentCharacterCount = documentText.length();
|
||||
|
||||
@@ -393,10 +393,10 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
private void logProcessingOutcome(SourceDocumentCandidate candidate, DocumentProcessingOutcome outcome) {
|
||||
switch (outcome) {
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed failed ->
|
||||
logger.info("Pre-checks FAILED for '{}': {} (Deterministic content error).",
|
||||
logger.info("Pre-checks failed for '{}': {} (deterministic content error).",
|
||||
candidate.uniqueIdentifier(), failed.failureReasonDescription());
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError technicalError ->
|
||||
logger.warn("Processing FAILED for '{}': {} (Technical error – retryable).",
|
||||
logger.warn("Processing failed for '{}': {} (transient technical error – retryable).",
|
||||
candidate.uniqueIdentifier(), technicalError.errorMessage());
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady ready ->
|
||||
logger.info("AI naming proposal ready for '{}': title='{}', date={}.",
|
||||
@@ -404,10 +404,10 @@ public class DefaultBatchRunProcessingUseCase implements BatchRunProcessingUseCa
|
||||
ready.proposal().validatedTitle(),
|
||||
ready.proposal().resolvedDate());
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure aiTechnical ->
|
||||
logger.warn("AI technical failure for '{}': {} (Transient – retryable).",
|
||||
logger.warn("AI invocation failed for '{}': {} (transient technical error – retryable).",
|
||||
candidate.uniqueIdentifier(), aiTechnical.errorMessage());
|
||||
case de.gecheckt.pdf.umbenenner.domain.model.AiFunctionalFailure aiFunctional ->
|
||||
logger.info("AI functional failure for '{}': {} (Deterministic content error).",
|
||||
logger.info("AI naming failed for '{}': {} (deterministic content error).",
|
||||
candidate.uniqueIdentifier(), aiFunctional.errorMessage());
|
||||
default -> { /* other outcomes are handled elsewhere */ }
|
||||
}
|
||||
|
||||
@@ -314,4 +314,13 @@ class AiNamingServiceTest {
|
||||
.isInstanceOf(IllegalArgumentException.class)
|
||||
.hasMessageContaining("maxTextCharacters must be >= 1");
|
||||
}
|
||||
|
||||
@Test
|
||||
void constructor_maxTextCharactersOne_doesNotThrow() {
|
||||
// maxTextCharacters=1 is the minimum valid value (boundary test).
|
||||
// A changed-conditional-boundary mutation that changes '< 1' to '<= 1' would
|
||||
// cause this constructor call to throw — this test detects that mutation.
|
||||
new AiNamingService(aiInvocationPort, promptPort, validator, MODEL_NAME, 1);
|
||||
// No exception expected; reaching this line means the boundary is correct
|
||||
}
|
||||
}
|
||||
|
||||
@@ -829,8 +829,9 @@ class DocumentProcessingCoordinatorTest {
|
||||
// No PROPOSAL_READY attempt pre-populated
|
||||
|
||||
// persistTransientError returns true when the error record was persisted successfully
|
||||
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
|
||||
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
|
||||
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
|
||||
.findFirst()
|
||||
@@ -851,8 +852,9 @@ class DocumentProcessingCoordinatorTest {
|
||||
null, DateSource.AI_PROVIDED, "Rechnung", null);
|
||||
attemptRepo.savedAttempts.add(badProposal);
|
||||
|
||||
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
|
||||
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
|
||||
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
|
||||
.findFirst()
|
||||
@@ -871,8 +873,10 @@ class DocumentProcessingCoordinatorTest {
|
||||
new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(),
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithFailingFolder.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
boolean result = coordinatorWithFailingFolder.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
|
||||
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
|
||||
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
|
||||
.findFirst()
|
||||
@@ -891,8 +895,10 @@ class DocumentProcessingCoordinatorTest {
|
||||
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), new NoOpProcessingLogger(),
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithFailingCopy.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
boolean result = coordinatorWithFailingCopy.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
|
||||
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
|
||||
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
|
||||
.findFirst()
|
||||
@@ -914,8 +920,9 @@ class DocumentProcessingCoordinatorTest {
|
||||
"A".repeat(21), null);
|
||||
attemptRepo.savedAttempts.add(badProposal);
|
||||
|
||||
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
|
||||
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
|
||||
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
|
||||
.findFirst()
|
||||
@@ -939,8 +946,9 @@ class DocumentProcessingCoordinatorTest {
|
||||
"Rechnung-2026", null);
|
||||
attemptRepo.savedAttempts.add(badProposal);
|
||||
|
||||
processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
boolean result = processor.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
|
||||
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
|
||||
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
|
||||
.findFirst()
|
||||
@@ -1008,9 +1016,10 @@ class DocumentProcessingCoordinatorTest {
|
||||
new NoOpTargetFolderPort(), countingCopyPort, new NoOpProcessingLogger(),
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCountingCopy.processDeferredOutcome(
|
||||
boolean result = coordinatorWithCountingCopy.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
|
||||
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
|
||||
.filter(a -> a.status() == ProcessingStatus.FAILED_RETRYABLE)
|
||||
.findFirst()
|
||||
@@ -1037,9 +1046,10 @@ class DocumentProcessingCoordinatorTest {
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), failingCopy, new NoOpProcessingLogger(), 1);
|
||||
|
||||
coordinatorWith1Retry.processDeferredOutcome(
|
||||
boolean result = coordinatorWith1Retry.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(result, "processDeferredOutcome must return true when the transient error is persisted successfully");
|
||||
ProcessingAttempt errorAttempt = attemptRepo.savedAttempts.stream()
|
||||
.filter(a -> a.status() == ProcessingStatus.FAILED_FINAL)
|
||||
.findFirst()
|
||||
@@ -1055,6 +1065,58 @@ class DocumentProcessingCoordinatorTest {
|
||||
"Transient error counter must be 1 after the first cross-run transient error");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_copyFailure_retryDecisionLog_containsFailedRetryable() {
|
||||
// Verifies that when a copy failure leads to FAILED_RETRYABLE in persistTransientError,
|
||||
// the retry-decision log message specifically contains "FAILED_RETRYABLE" and
|
||||
// "will retry in later run" — the branch-specific text that distinguishes it from the
|
||||
// FAILED_FINAL branch. This kills the negated-conditional mutation on the retryable flag check.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
|
||||
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(capturingLogger.anyInfoContains("FAILED_RETRYABLE"),
|
||||
"Retry decision log for a retryable transient copy error must contain FAILED_RETRYABLE. "
|
||||
+ "Captured info messages: " + capturingLogger.infoMessages);
|
||||
assertTrue(capturingLogger.anyInfoContains("will retry in later run"),
|
||||
"Retry decision log for a retryable transient error must contain 'will retry in later run'. "
|
||||
+ "Captured info messages: " + capturingLogger.infoMessages);
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_copyFailure_maxRetriesTransient1_retryDecisionLog_containsFailedFinal() {
|
||||
// Verifies that when a copy failure with maxRetriesTransient=1 leads to FAILED_FINAL in
|
||||
// persistTransientError, the retry-decision log message contains "FAILED_FINAL" and
|
||||
// "transient error limit reached" — the branch-specific text that distinguishes it
|
||||
// from the FAILED_RETRYABLE branch.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
|
||||
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new FailingTargetFileCopyPort(), capturingLogger,
|
||||
1 /* maxRetriesTransient=1 → immediately final */);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(capturingLogger.anyInfoContains("FAILED_FINAL"),
|
||||
"Retry decision log for a finalising transient copy error must contain FAILED_FINAL. "
|
||||
+ "Captured info messages: " + capturingLogger.infoMessages);
|
||||
assertTrue(capturingLogger.anyInfoContains("transient error limit reached"),
|
||||
"Retry decision log for a finalising transient error must contain 'transient error limit reached'. "
|
||||
+ "Captured info messages: " + capturingLogger.infoMessages);
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_immediateRetryDoesNotTriggerAiOrNewProposal() {
|
||||
// Ensures that during the immediate retry path no pipeline (AI) execution happens
|
||||
@@ -1375,6 +1437,26 @@ class DocumentProcessingCoordinatorTest {
|
||||
}
|
||||
}
|
||||
|
||||
/** Counts calls to {@link #tryDeleteTargetFile(String)} for mutation detection. */
|
||||
private static class CapturingTargetFolderPort implements TargetFolderPort {
|
||||
int tryDeleteCallCount = 0;
|
||||
|
||||
@Override
|
||||
public String getTargetFolderLocator() {
|
||||
return "/tmp/target";
|
||||
}
|
||||
|
||||
@Override
|
||||
public TargetFilenameResolutionResult resolveUniqueFilename(String baseName) {
|
||||
return new ResolvedTargetFilename(baseName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void tryDeleteTargetFile(String resolvedFilename) {
|
||||
tryDeleteCallCount++;
|
||||
}
|
||||
}
|
||||
|
||||
private static class NoOpTargetFolderPort implements TargetFolderPort {
|
||||
@Override
|
||||
public String getTargetFolderLocator() {
|
||||
@@ -1493,6 +1575,162 @@ class DocumentProcessingCoordinatorTest {
|
||||
assertTrue(capturingLogger.anyInfoContains("FAILED_FINAL"),
|
||||
"Finalising retry decision log must contain the FAILED_FINAL classification. "
|
||||
+ "Captured info messages: " + capturingLogger.infoMessages);
|
||||
assertTrue(capturingLogger.anyInfoContains("permanently failed"),
|
||||
"Finalising retry decision log must contain 'permanently failed' to distinguish "
|
||||
+ "the FAILED_FINAL branch from the generic status log. "
|
||||
+ "Captured info messages: " + capturingLogger.infoMessages);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Finalization path logging: error, warn, and info calls in key paths
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_missingProposalAttempt_logsError() {
|
||||
// Missing PROPOSAL_READY attempt in history — finalizeProposalReady must log an error.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
// No attempt pre-loaded — proposalAttempt == null branch
|
||||
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(capturingLogger.errorCallCount > 0,
|
||||
"An error must be logged when the PROPOSAL_READY attempt is missing from history");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_inconsistentProposalState_logsError() {
|
||||
// Inconsistent proposal state (null date) — finalizeProposalReady must log an error.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
ProcessingAttempt badProposal = new ProcessingAttempt(
|
||||
fingerprint, context.runId(), 1, Instant.now(), Instant.now(),
|
||||
ProcessingStatus.PROPOSAL_READY, null, null, false,
|
||||
"model", "prompt", 1, 100, "{}", "reason",
|
||||
null, DateSource.AI_PROVIDED, "Rechnung", null);
|
||||
attemptRepo.savedAttempts.add(badProposal);
|
||||
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(capturingLogger.errorCallCount > 0,
|
||||
"An error must be logged when the proposal state is inconsistent");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_duplicateResolutionFailure_logsError() {
|
||||
// Duplicate resolution failure — finalizeProposalReady must log an error.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new FailingTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(capturingLogger.errorCallCount > 0,
|
||||
"An error must be logged when duplicate resolution fails");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_resolvedFilename_logsInfo() {
|
||||
// Successful duplicate resolution — resolved filename must be logged at INFO.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart,
|
||||
c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
|
||||
|
||||
assertTrue(capturingLogger.infoCallCount > 0,
|
||||
"Resolved target filename must be logged at INFO level");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_firstCopyFails_logsWarn() {
|
||||
// First copy attempt fails → immediate retry: a WARN must be logged for the first failure.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
CountingTargetFileCopyPort onlyFirstFails = new CountingTargetFileCopyPort(1);
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), onlyFirstFails, capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart,
|
||||
c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
|
||||
|
||||
assertTrue(capturingLogger.warnCallCount > 0,
|
||||
"A WARN must be logged when the first copy attempt fails and an immediate retry is triggered");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_bothCopyAttemptsFail_logsError() {
|
||||
// Both copy attempts fail — finalizeProposalReady must log an error.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
CountingTargetFileCopyPort bothFail = new CountingTargetFileCopyPort(2);
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), bothFail, capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(capturingLogger.errorCallCount > 0,
|
||||
"An error must be logged when both copy attempts fail");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_immediateRetrySucceeds_logsInfo() {
|
||||
// First copy fails, immediate retry succeeds — a success INFO must be logged.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
CountingTargetFileCopyPort onlyFirstFails = new CountingTargetFileCopyPort(1);
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), onlyFirstFails, capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart,
|
||||
c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
|
||||
|
||||
assertTrue(capturingLogger.infoCallCount > 0,
|
||||
"An INFO must be logged when the immediate within-run retry succeeds");
|
||||
}
|
||||
|
||||
/** Zählt Logger-Aufrufe je Level, um VoidMethodCallMutator-Mutationen zu erkennen. */
|
||||
@@ -1581,5 +1819,91 @@ class DocumentProcessingCoordinatorTest {
|
||||
boolean anyInfoContains(String text) {
|
||||
return infoMessages.stream().anyMatch(m -> m.contains(text));
|
||||
}
|
||||
|
||||
boolean anyErrorContains(String text) {
|
||||
return errorMessages.stream().anyMatch(m -> m.contains(text));
|
||||
}
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// AI sensitive content logging in finalization path
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_aiContentNotNull_callsDebugSensitiveAiContent() {
|
||||
// buildValidProposalAttempt() has non-null aiRawResponse and aiReasoning.
|
||||
// The conditional guards at lines 398 and 402 of finalizeProposalReady must
|
||||
// trigger the debugSensitiveAiContent call when the values are present.
|
||||
// If negated, the calls would be suppressed for non-null values — detectable here.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt()); // aiRawResponse="{}", aiReasoning="reason"
|
||||
|
||||
CapturingProcessingLogger capturingLogger = new CapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(
|
||||
candidate, fingerprint, context, attemptStart,
|
||||
c -> { throw new AssertionError("Pipeline must not run for PROPOSAL_READY"); });
|
||||
|
||||
assertTrue(capturingLogger.debugSensitiveAiContentCallCount >= 2,
|
||||
"debugSensitiveAiContent must be called for aiRawResponse and aiReasoning "
|
||||
+ "when both are non-null. Actual call count: "
|
||||
+ capturingLogger.debugSensitiveAiContentCallCount);
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// Best-effort rollback path: tryDeleteTargetFile and secondary persistence
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_persistenceFailureAfterCopy_callsTryDeleteTargetFile() {
|
||||
// When persistence fails after a successful copy, the best-effort rollback
|
||||
// must call tryDeleteTargetFile to clean up the orphaned target file.
|
||||
// This test kills the 'removed call to tryDeleteTargetFile' mutation.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
unitOfWorkPort.failOnExecute = true;
|
||||
|
||||
CapturingTargetFolderPort capturingFolderPort = new CapturingTargetFolderPort();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
capturingFolderPort, new NoOpTargetFileCopyPort(), new NoOpProcessingLogger(),
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(capturingFolderPort.tryDeleteCallCount > 0,
|
||||
"tryDeleteTargetFile must be called at least once for best-effort rollback "
|
||||
+ "when persistence fails after a successful copy");
|
||||
}
|
||||
|
||||
@Test
|
||||
void processDeferredOutcome_proposalReady_persistenceFailureAfterCopy_logsSecondaryFailure() {
|
||||
// When persistence fails after a successful copy and the secondary persistence
|
||||
// attempt in persistTransientErrorAfterPersistenceFailure also fails,
|
||||
// an error must be logged for the secondary failure.
|
||||
// This kills the 'removed call to persistTransientErrorAfterPersistenceFailure' mutation.
|
||||
DocumentRecord existingRecord = buildRecord(ProcessingStatus.PROPOSAL_READY, FailureCounters.zero());
|
||||
recordRepo.setLookupResult(new DocumentKnownProcessable(existingRecord));
|
||||
attemptRepo.savedAttempts.add(buildValidProposalAttempt());
|
||||
unitOfWorkPort.failOnExecute = true; // both primary and secondary persistence fail
|
||||
|
||||
MessageCapturingProcessingLogger capturingLogger = new MessageCapturingProcessingLogger();
|
||||
DocumentProcessingCoordinator coordinatorWithCapturing = new DocumentProcessingCoordinator(
|
||||
recordRepo, attemptRepo, unitOfWorkPort,
|
||||
new NoOpTargetFolderPort(), new NoOpTargetFileCopyPort(), capturingLogger,
|
||||
DEFAULT_MAX_RETRIES_TRANSIENT);
|
||||
|
||||
coordinatorWithCapturing.processDeferredOutcome(candidate, fingerprint, context, attemptStart, c -> null);
|
||||
|
||||
assertTrue(capturingLogger.anyErrorContains("Secondary persistence failure")
|
||||
|| capturingLogger.anyErrorContains("secondary"),
|
||||
"An error must be logged for the secondary persistence failure. "
|
||||
+ "Captured error messages: " + capturingLogger.errorMessages);
|
||||
}
|
||||
}
|
||||
@@ -7,8 +7,11 @@ import de.gecheckt.pdf.umbenenner.domain.model.AiTechnicalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DateSource;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposal;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.NamingProposalReady;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
@@ -314,4 +317,58 @@ class ProcessingOutcomeTransitionTest {
|
||||
assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus());
|
||||
assertEquals(2, result.counters().transientErrorCount());
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// PreCheckPassed routed through transition (edge case: no AI step taken)
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
@Test
|
||||
void forNewDocument_preCheckPassed_limitOne_immediatelyFinal() {
|
||||
// PreCheckPassed without an AI outcome is treated as a transient error by the transition.
|
||||
// With limit=1 the first such error must immediately finalise to FAILED_FINAL.
|
||||
PreCheckPassed outcome = new PreCheckPassed(
|
||||
candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
|
||||
ProcessingOutcomeTransition.ProcessingOutcome result =
|
||||
ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_1);
|
||||
|
||||
assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus(),
|
||||
"With limit=1 a PreCheckPassed-routed transient error must immediately finalise");
|
||||
assertFalse(result.retryable());
|
||||
assertEquals(1, result.counters().transientErrorCount());
|
||||
assertEquals(0, result.counters().contentErrorCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
void forNewDocument_preCheckPassed_limitTwo_firstErrorRetryable() {
|
||||
// With limit=2 the first PreCheckPassed-routed transient error is retryable.
|
||||
PreCheckPassed outcome = new PreCheckPassed(
|
||||
candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
|
||||
ProcessingOutcomeTransition.ProcessingOutcome result =
|
||||
ProcessingOutcomeTransition.forNewDocument(outcome, LIMIT_2);
|
||||
|
||||
assertEquals(ProcessingStatus.FAILED_RETRYABLE, result.overallStatus(),
|
||||
"With limit=2 the first PreCheckPassed-routed transient error must be retryable");
|
||||
assertTrue(result.retryable());
|
||||
assertEquals(1, result.counters().transientErrorCount());
|
||||
assertEquals(0, result.counters().contentErrorCount());
|
||||
}
|
||||
|
||||
@Test
|
||||
void forKnownDocument_preCheckPassed_limitTwo_secondErrorFinal() {
|
||||
// With limit=2 and an existing transient error count of 1,
|
||||
// the next PreCheckPassed-routed error increments to 2 = limit → FAILED_FINAL.
|
||||
PreCheckPassed outcome = new PreCheckPassed(
|
||||
candidate(), new PdfExtractionSuccess("text", new PdfPageCount(1)));
|
||||
FailureCounters existing = new FailureCounters(0, 1);
|
||||
|
||||
ProcessingOutcomeTransition.ProcessingOutcome result =
|
||||
ProcessingOutcomeTransition.forKnownDocument(outcome, existing, LIMIT_2);
|
||||
|
||||
assertEquals(ProcessingStatus.FAILED_FINAL, result.overallStatus(),
|
||||
"PreCheckPassed-routed error at transient limit must finalise to FAILED_FINAL");
|
||||
assertFalse(result.retryable());
|
||||
assertEquals(2, result.counters().transientErrorCount());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,6 +62,11 @@
|
||||
<artifactId>mockito-junit-jupiter</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.assertj</groupId>
|
||||
<artifactId>assertj-core</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
<build>
|
||||
|
||||
@@ -310,10 +310,10 @@ public class BootstrapRunner {
|
||||
LOG.error("Configuration validation failed: {}", e.getMessage());
|
||||
return 1;
|
||||
} catch (DocumentPersistenceException e) {
|
||||
LOG.error("Persistence operation failed: {}", e.getMessage(), e);
|
||||
LOG.error("Schema initialization failed: {}", e.getMessage(), e);
|
||||
return 1;
|
||||
} catch (Exception e) {
|
||||
LOG.error("Bootstrap failure during startup.", e);
|
||||
LOG.error("Unexpected startup failure.", e);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -391,7 +391,7 @@ public class BootstrapRunner {
|
||||
*/
|
||||
private BatchRunContext createRunContext() {
|
||||
RunId runId = new RunId(UUID.randomUUID().toString());
|
||||
LOG.info("Batch run started. RunId: {}", runId);
|
||||
LOG.info("Preparing batch run. RunId: {}", runId);
|
||||
return new BatchRunContext(runId, Instant.now());
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,698 @@
|
||||
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyResult;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopySuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyTechnicalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* Deterministic end-to-end tests for the complete batch processing pipeline.
|
||||
* <p>
|
||||
* Each test method is independent and uses its own {@link E2ETestContext} backed by a
|
||||
* JUnit {@code @TempDir}. All infrastructure adapters (SQLite, filesystem, PDF extraction,
|
||||
* fingerprinting) are real production implementations. Only the AI invocation port is
|
||||
* replaced by a configurable {@link StubAiInvocationPort} to avoid real HTTP calls.
|
||||
*
|
||||
* <h2>End-to-end invariants verified</h2>
|
||||
* <ul>
|
||||
* <li><strong>Happy-path to {@code SUCCESS}</strong>: two-run flow via {@code PROPOSAL_READY}
|
||||
* intermediate state to a final {@code SUCCESS} with a target file on disk.</li>
|
||||
* <li><strong>Deterministic content error</strong>: blank PDFs (no extractable text) reach
|
||||
* {@code FAILED_RETRYABLE} after the first run and {@code FAILED_FINAL} after the
|
||||
* second run, exercising the one-retry rule for deterministic content errors.</li>
|
||||
* <li><strong>Transient technical error</strong>: AI stub failures produce
|
||||
* {@code FAILED_RETRYABLE} (transient counter incremented) without a target file.</li>
|
||||
* <li><strong>Transient error exhaustion</strong>: repeated AI stub failures across
|
||||
* {@code maxRetriesTransient} runs increment the transient counter each time until
|
||||
* the limit is reached and the document is finalized to {@code FAILED_FINAL}.</li>
|
||||
* <li><strong>Skip after {@code SUCCESS}</strong>: a document whose status is
|
||||
* {@code SUCCESS} generates exactly one {@code SKIPPED_ALREADY_PROCESSED} attempt
|
||||
* in the next run; the overall status and target file remain unchanged.</li>
|
||||
* <li><strong>Skip after {@code FAILED_FINAL}</strong>: a document whose status is
|
||||
* {@code FAILED_FINAL} generates exactly one {@code SKIPPED_FINAL_FAILURE} attempt
|
||||
* in the next run; the overall status and failure counters remain unchanged.</li>
|
||||
* <li><strong>{@code PROPOSAL_READY} with later finalization</strong>: a document in
|
||||
* {@code PROPOSAL_READY} state is finalized without an AI call in the next run,
|
||||
* confirming the leading-proposal-attempt rule.</li>
|
||||
* <li><strong>Target copy error with immediate within-run retry (success)</strong>: when the
|
||||
* first copy attempt fails but the immediate within-run retry succeeds, the document is
|
||||
* recorded as {@code SUCCESS} and no transient error counter is incremented.</li>
|
||||
* <li><strong>Target copy error with immediate within-run retry (failure)</strong>: when both
|
||||
* the initial and immediate-retry copy attempts fail, the document is recorded as
|
||||
* {@code FAILED_RETRYABLE} with an incremented transient counter.</li>
|
||||
* <li><strong>Duplicate target filename suffix</strong>: when two distinct documents produce
|
||||
* the same base target filename in the same batch run, the second receives a {@code (1)}
|
||||
* suffix to avoid overwriting the first.</li>
|
||||
* <li><strong>Mixed batch outcome</strong>: a batch run that contains both successfully
|
||||
* processed documents and documents with content errors completes with
|
||||
* {@link BatchRunOutcome#SUCCESS}, confirming that document-level failures do not
|
||||
* abort the batch or change the overall exit-code-relevant outcome.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Document text used in tests</h2>
|
||||
* <p>
|
||||
* Searchable PDFs embed enough text to pass the minimum-text pre-check. The AI stub
|
||||
* returns a title of {@code "Stromabrechnung"} and date {@code "2024-01-15"} by default,
|
||||
* producing a target filename of {@code "2024-01-15 - Stromabrechnung.pdf"}.
|
||||
*/
|
||||
class BatchRunEndToEndTest {
|
||||
|
||||
private static final String SAMPLE_PDF_TEXT =
|
||||
"Stromabrechnung Kundenname Musterstadt Datum 15.01.2024 Betrag 123,45 EUR";
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 1: Happy-path to SUCCESS
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the complete two-run happy-path:
|
||||
* <ol>
|
||||
* <li>Run 1: AI stub returns valid proposal → document status becomes
|
||||
* {@code PROPOSAL_READY}; no target file yet.</li>
|
||||
* <li>Run 2: AI is NOT called again; target file is copied; document status
|
||||
* becomes {@code SUCCESS}.</li>
|
||||
* </ol>
|
||||
* This confirms the leading-proposal-attempt rule and the two-phase finalization.
|
||||
*/
|
||||
@Test
|
||||
void happyPath_twoRuns_reachesSuccess(@TempDir Path tempDir) throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("rechnung.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("rechnung.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// --- Run 1: AI produces a naming proposal ---
|
||||
BatchRunOutcome run1 = ctx.runBatch();
|
||||
|
||||
assertThat(run1).isEqualTo(BatchRunOutcome.SUCCESS);
|
||||
assertThat(ctx.aiStub.invocationCount()).isEqualTo(1);
|
||||
|
||||
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
|
||||
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
|
||||
assertThat(attempts1).hasSize(1);
|
||||
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
|
||||
// --- Run 2: Finalization without AI call ---
|
||||
ctx.aiStub.resetInvocationCount();
|
||||
BatchRunOutcome run2 = ctx.runBatch();
|
||||
|
||||
assertThat(run2).isEqualTo(BatchRunOutcome.SUCCESS);
|
||||
assertThat(ctx.aiStub.invocationCount())
|
||||
.as("AI must not be called again when PROPOSAL_READY exists")
|
||||
.isEqualTo(0);
|
||||
|
||||
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
assertThat(record2.lastSuccessInstant()).isNotNull();
|
||||
assertThat(record2.lastTargetFileName()).isNotNull();
|
||||
|
||||
List<String> targetFiles = ctx.listTargetFiles();
|
||||
assertThat(targetFiles).hasSize(1);
|
||||
assertThat(targetFiles.get(0)).endsWith(".pdf");
|
||||
assertThat(Files.exists(ctx.targetFolder().resolve(targetFiles.get(0)))).isTrue();
|
||||
|
||||
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
|
||||
assertThat(attempts2).hasSize(2);
|
||||
assertThat(attempts2.get(1).status()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 2: Deterministic content error → FAILED_RETRYABLE → FAILED_FINAL
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the one-retry rule for deterministic content errors:
|
||||
* <ol>
|
||||
* <li>Run 1: blank PDF → pre-check fails (no extractable text) →
|
||||
* {@code FAILED_RETRYABLE}, content error counter = 1.</li>
|
||||
* <li>Run 2: same outcome again → {@code FAILED_FINAL}, content error counter = 2.</li>
|
||||
* </ol>
|
||||
* No AI call is made in either run because the content pre-check prevents it.
|
||||
*/
|
||||
@Test
|
||||
void deterministicContentError_twoRuns_reachesFailedFinal(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createBlankPdf("blank.pdf");
|
||||
Path pdfPath = ctx.sourceFolder().resolve("blank.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// --- Run 1 ---
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.aiStub.invocationCount())
|
||||
.as("AI must not be called for a blank PDF")
|
||||
.isEqualTo(0);
|
||||
|
||||
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(1);
|
||||
assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(0);
|
||||
|
||||
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
|
||||
assertThat(attempts1).hasSize(1);
|
||||
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(attempts1.get(0).retryable()).isTrue();
|
||||
|
||||
// --- Run 2 ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(record2.failureCounters().contentErrorCount()).isEqualTo(2);
|
||||
|
||||
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
|
||||
assertThat(attempts2).hasSize(2);
|
||||
assertThat(attempts2.get(1).status()).isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(attempts2.get(1).retryable()).isFalse();
|
||||
|
||||
// No target file should exist
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 3: Transient technical error → FAILED_RETRYABLE
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies that a transient AI failure produces {@code FAILED_RETRYABLE} with an
|
||||
* incremented transient error counter, and that no target file is written.
|
||||
* The document remains retryable in subsequent runs until the transient limit is reached.
|
||||
*/
|
||||
@Test
|
||||
void transientAiFailure_producesFailedRetryable(@TempDir Path tempDir) throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
ctx.aiStub.configureTechnicalFailure();
|
||||
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.aiStub.invocationCount())
|
||||
.as("AI must have been invoked (and failed) once")
|
||||
.isEqualTo(1);
|
||||
|
||||
DocumentRecord record = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(record.failureCounters().transientErrorCount()).isEqualTo(1);
|
||||
assertThat(record.failureCounters().contentErrorCount()).isEqualTo(0);
|
||||
|
||||
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
|
||||
assertThat(attempts).hasSize(1);
|
||||
assertThat(attempts.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(attempts.get(0).retryable()).isTrue();
|
||||
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 4: Skip after SUCCESS
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the skip-after-success invariant:
|
||||
* after a document reaches {@code SUCCESS} (via two runs), a third run records a
|
||||
* {@code SKIPPED_ALREADY_PROCESSED} attempt without changing the overall status,
|
||||
* failure counters, or the target file.
|
||||
*/
|
||||
@Test
|
||||
void skipAfterSuccess_thirdRun_recordsSkip(@TempDir Path tempDir) throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// Reach SUCCESS via two runs
|
||||
ctx.runBatch(); // → PROPOSAL_READY
|
||||
ctx.runBatch(); // → SUCCESS
|
||||
|
||||
DocumentRecord successRecord = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(successRecord.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
String targetFileBefore = successRecord.lastTargetFileName();
|
||||
|
||||
// --- Run 3: should produce skip ---
|
||||
ctx.aiStub.resetInvocationCount();
|
||||
BatchRunOutcome run3 = ctx.runBatch();
|
||||
|
||||
assertThat(run3).isEqualTo(BatchRunOutcome.SUCCESS);
|
||||
assertThat(ctx.aiStub.invocationCount())
|
||||
.as("AI must not be called for an already-successful document")
|
||||
.isEqualTo(0);
|
||||
|
||||
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record3.overallStatus())
|
||||
.as("Overall status must remain SUCCESS after a skip")
|
||||
.isEqualTo(ProcessingStatus.SUCCESS);
|
||||
assertThat(record3.lastTargetFileName())
|
||||
.as("Target filename must not change after a skip")
|
||||
.isEqualTo(targetFileBefore);
|
||||
|
||||
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
|
||||
assertThat(attempts).hasSize(3);
|
||||
assertThat(attempts.get(2).status()).isEqualTo(ProcessingStatus.SKIPPED_ALREADY_PROCESSED);
|
||||
assertThat(attempts.get(2).retryable()).isFalse();
|
||||
|
||||
// Target file count must remain exactly one
|
||||
assertThat(ctx.listTargetFiles()).hasSize(1);
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 5: Skip after FAILED_FINAL
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the skip-after-final-failure invariant:
|
||||
* after a document reaches {@code FAILED_FINAL} (via two blank-PDF runs), a third run
|
||||
* records a {@code SKIPPED_FINAL_FAILURE} attempt without changing the overall status
|
||||
* or failure counters.
|
||||
*/
|
||||
@Test
|
||||
void skipAfterFailedFinal_thirdRun_recordsSkip(@TempDir Path tempDir) throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createBlankPdf("blank.pdf");
|
||||
Path pdfPath = ctx.sourceFolder().resolve("blank.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// Reach FAILED_FINAL via two blank-PDF runs
|
||||
ctx.runBatch(); // → FAILED_RETRYABLE
|
||||
ctx.runBatch(); // → FAILED_FINAL
|
||||
|
||||
DocumentRecord finalRecord = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(finalRecord.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
int contentErrorsBefore = finalRecord.failureCounters().contentErrorCount();
|
||||
|
||||
// --- Run 3: should produce skip ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record3.overallStatus())
|
||||
.as("Overall status must remain FAILED_FINAL after a skip")
|
||||
.isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(record3.failureCounters().contentErrorCount())
|
||||
.as("Failure counters must not change after a skip")
|
||||
.isEqualTo(contentErrorsBefore);
|
||||
|
||||
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
|
||||
assertThat(attempts).hasSize(3);
|
||||
assertThat(attempts.get(2).status()).isEqualTo(ProcessingStatus.SKIPPED_FINAL_FAILURE);
|
||||
assertThat(attempts.get(2).retryable()).isFalse();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 6: Existing PROPOSAL_READY with later finalization
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the leading-proposal-attempt rule in isolation:
|
||||
* <ol>
|
||||
* <li>Run 1: AI produces a naming proposal → document status is {@code PROPOSAL_READY}.</li>
|
||||
* <li>Run 2: AI stub is reset to technical failure; the coordinator must still finalize
|
||||
* the document to {@code SUCCESS} using the persisted proposal — without calling the AI.</li>
|
||||
* </ol>
|
||||
* This confirms that the second run never re-invokes the AI when a valid
|
||||
* {@code PROPOSAL_READY} attempt already exists.
|
||||
*/
|
||||
@Test
|
||||
void proposalReadyFinalization_noAiCallInSecondRun(@TempDir Path tempDir) throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// --- Run 1: establish PROPOSAL_READY ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
|
||||
// --- Run 2: AI stub would fail if called, but must not be called ---
|
||||
ctx.aiStub.configureTechnicalFailure();
|
||||
ctx.aiStub.resetInvocationCount();
|
||||
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.aiStub.invocationCount())
|
||||
.as("AI must not be invoked during PROPOSAL_READY finalization")
|
||||
.isEqualTo(0);
|
||||
|
||||
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
|
||||
List<String> targetFiles = ctx.listTargetFiles();
|
||||
assertThat(targetFiles).hasSize(1);
|
||||
assertThat(targetFiles.get(0)).endsWith(".pdf");
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 7: Target copy error with immediate within-run retry
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the immediate within-run retry for target copy failures:
|
||||
* <ol>
|
||||
* <li>Run 1: AI produces {@code PROPOSAL_READY}.</li>
|
||||
* <li>Run 2: The {@link TargetFileCopyPort} is overridden with a stub that fails on
|
||||
* the first invocation but delegates to the real adapter on the second.
|
||||
* The coordinator must detect the first failure, retry immediately within the
|
||||
* same run, and record {@code SUCCESS} — without incrementing the transient
|
||||
* error counter.</li>
|
||||
* </ol>
|
||||
* The immediate retry does not count as a cross-run transient error.
|
||||
*/
|
||||
@Test
|
||||
void targetCopyError_immediateRetrySucceeds_recordsSuccess(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// --- Run 1: produce PROPOSAL_READY ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
|
||||
// --- Run 2: first copy attempt fails, retry succeeds ---
|
||||
TargetFileCopyPort realAdapter =
|
||||
new de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter(
|
||||
ctx.targetFolder());
|
||||
AtomicInteger copyCallCount = new AtomicInteger(0);
|
||||
|
||||
TargetFileCopyPort stubWithRetry = (locator, resolvedFilename) -> {
|
||||
int call = copyCallCount.incrementAndGet();
|
||||
if (call == 1) {
|
||||
// First attempt: simulate a transient write failure
|
||||
return new TargetFileCopyTechnicalFailure(
|
||||
"Simulated write failure on first attempt", true);
|
||||
}
|
||||
// Second attempt (immediate within-run retry): delegate to real adapter
|
||||
return realAdapter.copyToTarget(locator, resolvedFilename);
|
||||
};
|
||||
|
||||
ctx.setTargetFileCopyPortOverride(stubWithRetry);
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(copyCallCount.get())
|
||||
.as("Copy port must have been called twice (initial + retry)")
|
||||
.isEqualTo(2);
|
||||
|
||||
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
assertThat(record2.failureCounters().transientErrorCount())
|
||||
.as("Immediate within-run retry must not increment the transient error counter")
|
||||
.isEqualTo(0);
|
||||
|
||||
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
|
||||
assertThat(attempts).hasSize(2);
|
||||
assertThat(attempts.get(1).status()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
|
||||
List<String> targetFiles = ctx.listTargetFiles();
|
||||
assertThat(targetFiles).hasSize(1);
|
||||
assertThat(Files.exists(ctx.targetFolder().resolve(targetFiles.get(0)))).isTrue();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 8: Transient error exhaustion → FAILED_FINAL
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the complete transient error exhaustion path over multiple runs:
|
||||
* <ol>
|
||||
* <li>Run 1: AI stub fails technically → {@code FAILED_RETRYABLE},
|
||||
* transient counter = 1 (below limit 3).</li>
|
||||
* <li>Run 2: AI stub fails again → {@code FAILED_RETRYABLE},
|
||||
* transient counter = 2 (below limit 3).</li>
|
||||
* <li>Run 3: AI stub fails again → transient counter reaches the limit (3 = 3) →
|
||||
* {@code FAILED_FINAL}; no target file is ever written.</li>
|
||||
* </ol>
|
||||
* This confirms the {@code maxRetriesTransient} boundary: the run that pushes the
|
||||
* counter to the configured limit is the run that finalises the document.
|
||||
*/
|
||||
@Test
|
||||
void transientErrors_multipleRuns_exhaustsLimit_reachesFailedFinal(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
ctx.aiStub.configureTechnicalFailure();
|
||||
|
||||
// --- Run 1: counter 0 → 1, below limit → FAILED_RETRYABLE ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record1 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record1.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(record1.failureCounters().transientErrorCount()).isEqualTo(1);
|
||||
assertThat(record1.failureCounters().contentErrorCount()).isEqualTo(0);
|
||||
|
||||
List<ProcessingAttempt> attempts1 = ctx.findAttempts(fp);
|
||||
assertThat(attempts1).hasSize(1);
|
||||
assertThat(attempts1.get(0).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(attempts1.get(0).retryable()).isTrue();
|
||||
|
||||
// --- Run 2: counter 1 → 2, below limit → FAILED_RETRYABLE ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record2 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record2.overallStatus()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(record2.failureCounters().transientErrorCount()).isEqualTo(2);
|
||||
|
||||
List<ProcessingAttempt> attempts2 = ctx.findAttempts(fp);
|
||||
assertThat(attempts2).hasSize(2);
|
||||
assertThat(attempts2.get(1).retryable()).isTrue();
|
||||
|
||||
// --- Run 3: counter 2 → 3 = limit → FAILED_FINAL ---
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record3 = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record3.overallStatus())
|
||||
.as("Transient counter reaching the configured limit must finalise the document")
|
||||
.isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(record3.failureCounters().transientErrorCount())
|
||||
.as("Transient counter must equal maxRetriesTransient after exhaustion")
|
||||
.isEqualTo(E2ETestContext.MAX_RETRIES_TRANSIENT);
|
||||
assertThat(record3.failureCounters().contentErrorCount()).isEqualTo(0);
|
||||
|
||||
List<ProcessingAttempt> attempts3 = ctx.findAttempts(fp);
|
||||
assertThat(attempts3).hasSize(3);
|
||||
assertThat(attempts3.get(2).status()).isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(attempts3.get(2).retryable()).isFalse();
|
||||
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 9: Target copy error – both attempts fail → FAILED_RETRYABLE
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the failure path of the immediate within-run retry mechanism:
|
||||
* <ol>
|
||||
* <li>Run 1: AI stub returns a valid proposal → {@code PROPOSAL_READY}.</li>
|
||||
* <li>Run 2: The {@link TargetFileCopyPort} is overridden with a stub that fails
|
||||
* on every call. The coordinator issues the initial copy attempt (failure),
|
||||
* grants exactly one immediate retry (also failure), then classifies the
|
||||
* result as a transient technical error and records {@code FAILED_RETRYABLE}
|
||||
* with an incremented transient counter.</li>
|
||||
* </ol>
|
||||
* This confirms that the within-run retry does not suppress the error when both
|
||||
* attempts fail, and that the transient counter is incremented exactly once.
|
||||
*/
|
||||
@Test
|
||||
void targetCopyError_bothAttemptsFail_reachesFailedRetryable(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("doc.pdf", SAMPLE_PDF_TEXT);
|
||||
Path pdfPath = ctx.sourceFolder().resolve("doc.pdf");
|
||||
DocumentFingerprint fp = ctx.computeFingerprint(pdfPath);
|
||||
|
||||
// --- Run 1: establish PROPOSAL_READY ---
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.findDocumentRecord(fp).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
|
||||
// --- Run 2: both copy attempts fail ---
|
||||
ctx.setTargetFileCopyPortOverride(
|
||||
(locator, resolvedFilename) ->
|
||||
new TargetFileCopyTechnicalFailure(
|
||||
"Simulated persistent write failure", true));
|
||||
|
||||
ctx.runBatch();
|
||||
|
||||
DocumentRecord record = ctx.findDocumentRecord(fp).orElseThrow();
|
||||
assertThat(record.overallStatus())
|
||||
.as("Both copy attempts failing must produce FAILED_RETRYABLE "
|
||||
+ "(transient error, limit not yet reached)")
|
||||
.isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(record.failureCounters().transientErrorCount())
|
||||
.as("The double copy failure must increment the transient counter exactly once")
|
||||
.isEqualTo(1);
|
||||
|
||||
List<ProcessingAttempt> attempts = ctx.findAttempts(fp);
|
||||
assertThat(attempts).hasSize(2);
|
||||
assertThat(attempts.get(1).status()).isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(attempts.get(1).retryable()).isTrue();
|
||||
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 10: Two documents with identical target name → duplicate suffix
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies the duplicate target filename suffix rule at end-to-end level:
|
||||
* when two distinct source documents both resolve to the same base target name
|
||||
* ({@code "2024-01-15 - Stromabrechnung.pdf"}) in the same finalization run, the
|
||||
* second document written to the target folder must receive a {@code (1)} suffix.
|
||||
* <ol>
|
||||
* <li>Run 1: both PDFs are processed by the AI stub (same configured response) →
|
||||
* both reach {@code PROPOSAL_READY}.</li>
|
||||
* <li>Run 2: both are finalized in sequence; the first written claims the base name,
|
||||
* the second receives {@code "2024-01-15 - Stromabrechnung(1).pdf"}.</li>
|
||||
* </ol>
|
||||
* Both documents reach {@code SUCCESS} and the target folder contains exactly two files.
|
||||
*/
|
||||
@Test
|
||||
void twoDifferentDocuments_sameProposedName_secondGetsDuplicateSuffix(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
// Two distinct PDFs; the AI stub returns the same title and date for both
|
||||
ctx.createSearchablePdf("rechnung1.pdf", SAMPLE_PDF_TEXT);
|
||||
ctx.createSearchablePdf("rechnung2.pdf",
|
||||
"Stromabrechnung Zweiter Kunde Musterstadt Datum 15.01.2024 Betrag 99,00 EUR");
|
||||
|
||||
Path pdf1 = ctx.sourceFolder().resolve("rechnung1.pdf");
|
||||
Path pdf2 = ctx.sourceFolder().resolve("rechnung2.pdf");
|
||||
DocumentFingerprint fp1 = ctx.computeFingerprint(pdf1);
|
||||
DocumentFingerprint fp2 = ctx.computeFingerprint(pdf2);
|
||||
|
||||
// --- Run 1: AI stub processes both PDFs → PROPOSAL_READY ---
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
assertThat(ctx.listTargetFiles()).isEmpty();
|
||||
|
||||
// --- Run 2: both finalized; the second must receive the (1) suffix ---
|
||||
ctx.runBatch();
|
||||
|
||||
assertThat(ctx.findDocumentRecord(fp1).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.SUCCESS);
|
||||
assertThat(ctx.findDocumentRecord(fp2).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.SUCCESS);
|
||||
|
||||
List<String> targetFiles = ctx.listTargetFiles();
|
||||
assertThat(targetFiles)
|
||||
.as("Both distinct documents must produce separate target files")
|
||||
.hasSize(2);
|
||||
assertThat(targetFiles)
|
||||
.as("Base name must exist for the first document written")
|
||||
.anyMatch(f -> f.equals("2024-01-15 - Stromabrechnung.pdf"));
|
||||
assertThat(targetFiles)
|
||||
.as("Duplicate suffix (1) must be appended for the second document written")
|
||||
.anyMatch(f -> f.equals("2024-01-15 - Stromabrechnung(1).pdf"));
|
||||
}
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Scenario 11: Mixed batch – document failures do not affect batch outcome
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Verifies that document-level failures do not cause a batch-level failure:
|
||||
* <ol>
|
||||
* <li>Run 1: a searchable PDF reaches {@code PROPOSAL_READY}; a blank PDF
|
||||
* (no extractable text) reaches {@code FAILED_RETRYABLE}.
|
||||
* {@link BatchRunOutcome#SUCCESS} is returned.</li>
|
||||
* <li>Run 2: the searchable PDF is finalized to {@code SUCCESS};
|
||||
* the blank PDF reaches its second content error and is finalized to
|
||||
* {@code FAILED_FINAL}. {@link BatchRunOutcome#SUCCESS} is returned.</li>
|
||||
* </ol>
|
||||
* This confirms the exit-code contract: only hard bootstrap or infrastructure
|
||||
* failures produce a non-zero exit code; document-level errors do not.
|
||||
*/
|
||||
@Test
|
||||
void mixedBatch_oneSuccess_oneContentError_batchOutcomeIsSuccess(@TempDir Path tempDir)
|
||||
throws Exception {
|
||||
try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
ctx.createSearchablePdf("good.pdf", SAMPLE_PDF_TEXT);
|
||||
ctx.createBlankPdf("blank.pdf");
|
||||
|
||||
Path goodPdf = ctx.sourceFolder().resolve("good.pdf");
|
||||
Path blankPdf = ctx.sourceFolder().resolve("blank.pdf");
|
||||
DocumentFingerprint fpGood = ctx.computeFingerprint(goodPdf);
|
||||
DocumentFingerprint fpBlank = ctx.computeFingerprint(blankPdf);
|
||||
|
||||
// --- Run 1 ---
|
||||
BatchRunOutcome run1 = ctx.runBatch();
|
||||
|
||||
assertThat(run1)
|
||||
.as("Batch must complete with SUCCESS even when individual documents fail")
|
||||
.isEqualTo(BatchRunOutcome.SUCCESS);
|
||||
assertThat(ctx.findDocumentRecord(fpGood).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.PROPOSAL_READY);
|
||||
assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow().overallStatus())
|
||||
.isEqualTo(ProcessingStatus.FAILED_RETRYABLE);
|
||||
assertThat(ctx.findDocumentRecord(fpBlank).orElseThrow()
|
||||
.failureCounters().contentErrorCount()).isEqualTo(1);
|
||||
|
||||
// --- Run 2 ---
|
||||
BatchRunOutcome run2 = ctx.runBatch();
|
||||
|
||||
assertThat(run2)
|
||||
.as("Batch must complete with SUCCESS even when a document is finalised "
|
||||
+ "to FAILED_FINAL")
|
||||
.isEqualTo(BatchRunOutcome.SUCCESS);
|
||||
|
||||
DocumentRecord goodRecord = ctx.findDocumentRecord(fpGood).orElseThrow();
|
||||
assertThat(goodRecord.overallStatus()).isEqualTo(ProcessingStatus.SUCCESS);
|
||||
|
||||
DocumentRecord blankRecord = ctx.findDocumentRecord(fpBlank).orElseThrow();
|
||||
assertThat(blankRecord.overallStatus()).isEqualTo(ProcessingStatus.FAILED_FINAL);
|
||||
assertThat(blankRecord.failureCounters().contentErrorCount()).isEqualTo(2);
|
||||
|
||||
// Exactly one target file from the successfully processed document
|
||||
List<String> targetFiles = ctx.listTargetFiles();
|
||||
assertThat(targetFiles).hasSize(1);
|
||||
assertThat(targetFiles.get(0)).endsWith(".pdf");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,406 @@
|
||||
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.clock.SystemClockAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.fingerprint.Sha256FingerprintAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.lock.FilesystemRunLockPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.pdfextraction.PdfTextExtractionPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.prompt.FilesystemPromptPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sourcedocument.SourceDocumentCandidatesPortAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteDocumentRecordRepositoryAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteProcessingAttemptRepositoryAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteSchemaInitializationAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.sqlite.SqliteUnitOfWorkAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.targetcopy.FilesystemTargetFileCopyAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.adapter.out.targetfolder.FilesystemTargetFolderAdapter;
|
||||
import de.gecheckt.pdf.umbenenner.application.config.RuntimeConfiguration;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.in.BatchRunOutcome;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiContentSensitivity;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ClockPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentKnownProcessable;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecord;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentRecordRepository;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalFinalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.DocumentTerminalSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttempt;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingAttemptRepository;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.ProcessingLogger;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PromptPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFileCopyPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.TargetFolderPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.UnitOfWorkPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.service.AiNamingService;
|
||||
import de.gecheckt.pdf.umbenenner.application.service.AiResponseValidator;
|
||||
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingCoordinator;
|
||||
import de.gecheckt.pdf.umbenenner.application.usecase.DefaultBatchRunProcessingUseCase;
|
||||
import de.gecheckt.pdf.umbenenner.bootstrap.adapter.Log4jProcessingLogger;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.RunId;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.Instant;
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
import java.util.UUID;
|
||||
|
||||
/**
|
||||
* Full adapter wiring context for deterministic end-to-end tests of the batch processing pipeline.
|
||||
* <p>
|
||||
* Provides real infrastructure adapters for all subsystems (SQLite persistence, filesystem
|
||||
* source/target folders, PDF text extraction, SHA-256 fingerprinting, run locking) and a
|
||||
* configurable stub ({@link StubAiInvocationPort}) for the AI invocation port.
|
||||
* This ensures that end-to-end tests cover the complete production code path without
|
||||
* performing real HTTP calls to an AI service.
|
||||
*
|
||||
* <h2>Invariants verified by this context</h2>
|
||||
* <ul>
|
||||
* <li>Happy-path to {@code SUCCESS}: two-run flow where Run 1 produces {@code PROPOSAL_READY}
|
||||
* and Run 2 copies the file and records {@code SUCCESS}.</li>
|
||||
* <li>Deterministic content error: blank PDFs (no extractable text) produce
|
||||
* {@code FAILED_RETRYABLE} after the first run and {@code FAILED_FINAL} after a
|
||||
* second run.</li>
|
||||
* <li>Transient technical error: AI stub failures produce {@code FAILED_RETRYABLE} for each
|
||||
* run until the transient error limit is reached, at which point the document is
|
||||
* finalized to {@code FAILED_FINAL}.</li>
|
||||
* <li>Skip after {@code SUCCESS}: a document in {@code SUCCESS} state generates a
|
||||
* {@code SKIPPED_ALREADY_PROCESSED} attempt in subsequent runs.</li>
|
||||
* <li>Skip after {@code FAILED_FINAL}: a document in {@code FAILED_FINAL} state generates a
|
||||
* {@code SKIPPED_FINAL_FAILURE} attempt in subsequent runs.</li>
|
||||
* <li>{@code PROPOSAL_READY} with later finalization: a document in {@code PROPOSAL_READY}
|
||||
* state is finalized without an AI call in the next run.</li>
|
||||
* <li>Target copy error with immediate retry: when the first copy attempt fails but the
|
||||
* immediate within-run retry succeeds, the document is recorded as {@code SUCCESS}.</li>
|
||||
* </ul>
|
||||
*
|
||||
* <h2>Usage pattern</h2>
|
||||
* <pre>{@code
|
||||
* @TempDir Path tempDir;
|
||||
*
|
||||
* @Test
|
||||
* void example() throws Exception {
|
||||
* try (E2ETestContext ctx = E2ETestContext.initialize(tempDir)) {
|
||||
* ctx.createSearchablePdf("doc.pdf", "Rechnung 2024-01-15 ...");
|
||||
* BatchRunOutcome run1 = ctx.runBatch();
|
||||
* // assertions...
|
||||
* }
|
||||
* }
|
||||
* }</pre>
|
||||
*
|
||||
* <h2>Thread safety</h2>
|
||||
* <p>
|
||||
* Not thread-safe. Each test method should use its own context instance.
|
||||
*/
|
||||
public final class E2ETestContext implements AutoCloseable {
|
||||
|
||||
/** Maximum pages before triggering a deterministic content error. */
|
||||
static final int MAX_PAGES = 50;
|
||||
|
||||
/** Maximum text characters sent to the AI service. */
|
||||
static final int MAX_TEXT_CHARS = 10_000;
|
||||
|
||||
/**
|
||||
* Maximum transient retries before a document is finalized to {@code FAILED_FINAL}.
|
||||
* Set to 3 to allow multi-run transient-failure tests without immediate finalization.
|
||||
*/
|
||||
static final int MAX_RETRIES_TRANSIENT = 3;
|
||||
|
||||
/** Model name carried in attempt history (no real inference occurs). */
|
||||
static final String AI_MODEL = "e2e-stub-model";
|
||||
|
||||
private final Path sourceFolder;
|
||||
private final Path targetFolder;
|
||||
private final Path lockFile;
|
||||
private final Path promptFile;
|
||||
private final String jdbcUrl;
|
||||
|
||||
private final SqliteDocumentRecordRepositoryAdapter documentRepo;
|
||||
private final SqliteProcessingAttemptRepositoryAdapter attemptRepo;
|
||||
|
||||
/**
|
||||
* Configurable AI stub. Tests may call {@link StubAiInvocationPort#configureSuccess},
|
||||
* {@link StubAiInvocationPort#configureTechnicalFailure}, or
|
||||
* {@link StubAiInvocationPort#reset()} between batch runs.
|
||||
*/
|
||||
public final StubAiInvocationPort aiStub;
|
||||
|
||||
/**
|
||||
* Optional override for the {@link TargetFileCopyPort}.
|
||||
* {@code null} means the real {@link FilesystemTargetFileCopyAdapter} is used.
|
||||
* Set via {@link #setTargetFileCopyPortOverride} to inject a failure-simulating stub.
|
||||
*/
|
||||
private TargetFileCopyPort targetFileCopyPortOverride;
|
||||
|
||||
private E2ETestContext(
|
||||
Path sourceFolder,
|
||||
Path targetFolder,
|
||||
Path lockFile,
|
||||
Path promptFile,
|
||||
String jdbcUrl,
|
||||
SqliteDocumentRecordRepositoryAdapter documentRepo,
|
||||
SqliteProcessingAttemptRepositoryAdapter attemptRepo,
|
||||
StubAiInvocationPort aiStub) {
|
||||
this.sourceFolder = sourceFolder;
|
||||
this.targetFolder = targetFolder;
|
||||
this.lockFile = lockFile;
|
||||
this.promptFile = promptFile;
|
||||
this.jdbcUrl = jdbcUrl;
|
||||
this.documentRepo = documentRepo;
|
||||
this.attemptRepo = attemptRepo;
|
||||
this.aiStub = aiStub;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initializes a fully wired end-to-end test context rooted in {@code tempDir}.
|
||||
* <p>
|
||||
* Creates the {@code source/}, {@code target/} subdirectories and a minimal prompt
|
||||
* file, initializes the SQLite schema, and wires all adapters.
|
||||
*
|
||||
* @param tempDir the JUnit {@code @TempDir} or any writable temporary directory
|
||||
* @return a ready-to-use context; caller is responsible for closing it
|
||||
* @throws Exception if schema initialization or directory/file creation fails
|
||||
*/
|
||||
public static E2ETestContext initialize(Path tempDir) throws Exception {
|
||||
Path sourceFolder = Files.createDirectories(tempDir.resolve("source"));
|
||||
Path targetFolder = Files.createDirectories(tempDir.resolve("target"));
|
||||
Path lockFile = tempDir.resolve("run.lock");
|
||||
Path promptFile = tempDir.resolve("prompt.txt");
|
||||
|
||||
Files.writeString(promptFile,
|
||||
"Analysiere das folgende Dokument und liefere Datum, Titel und Begruendung als JSON-Objekt.");
|
||||
|
||||
String jdbcUrl = "jdbc:sqlite:" + tempDir.resolve("test.db").toAbsolutePath().toString().replace('\\', '/');
|
||||
|
||||
SqliteSchemaInitializationAdapter schema = new SqliteSchemaInitializationAdapter(jdbcUrl);
|
||||
schema.initializeSchema();
|
||||
|
||||
SqliteDocumentRecordRepositoryAdapter documentRepo =
|
||||
new SqliteDocumentRecordRepositoryAdapter(jdbcUrl);
|
||||
SqliteProcessingAttemptRepositoryAdapter attemptRepo =
|
||||
new SqliteProcessingAttemptRepositoryAdapter(jdbcUrl);
|
||||
|
||||
return new E2ETestContext(
|
||||
sourceFolder, targetFolder, lockFile, promptFile,
|
||||
jdbcUrl, documentRepo, attemptRepo, new StubAiInvocationPort());
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Test fixture creation
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Creates a single-page searchable PDF in the source folder with the given text.
|
||||
* <p>
|
||||
* The file is ready for the batch run as soon as this method returns.
|
||||
*
|
||||
* @param filename the PDF filename (e.g. {@code "rechnung.pdf"})
|
||||
* @param textContent text to embed; should be at least a few words to pass pre-checks
|
||||
* @return the absolute path of the created file
|
||||
* @throws IOException if the file cannot be written
|
||||
*/
|
||||
public Path createSearchablePdf(String filename, String textContent) throws IOException {
|
||||
Path pdfPath = sourceFolder.resolve(filename);
|
||||
MinimalPdfFactory.createSearchablePdf(pdfPath, textContent);
|
||||
return pdfPath;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a single-page blank PDF (no extractable text) in the source folder.
|
||||
* <p>
|
||||
* Processing this file triggers the "no usable text" deterministic content error,
|
||||
* which skips the AI call.
|
||||
*
|
||||
* @param filename the PDF filename (e.g. {@code "blank.pdf"})
|
||||
* @return the absolute path of the created file
|
||||
* @throws IOException if the file cannot be written
|
||||
*/
|
||||
public Path createBlankPdf(String filename) throws IOException {
|
||||
Path pdfPath = sourceFolder.resolve(filename);
|
||||
MinimalPdfFactory.createBlankPdf(pdfPath);
|
||||
return pdfPath;
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Batch execution
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Executes one complete batch run using the current stub configuration.
|
||||
* <p>
|
||||
* A fresh {@link BatchRunContext} with a new {@link RunId} is created for each call,
|
||||
* matching the production behavior where every Task Scheduler invocation is a
|
||||
* distinct run.
|
||||
*
|
||||
* @return the outcome of the batch run
|
||||
*/
|
||||
public BatchRunOutcome runBatch() {
|
||||
DefaultBatchRunProcessingUseCase useCase = buildUseCase();
|
||||
BatchRunContext context = new BatchRunContext(
|
||||
new RunId(UUID.randomUUID().toString()), Instant.now());
|
||||
return useCase.execute(context);
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// State inspection helpers
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Looks up the document master record for the given fingerprint.
|
||||
*
|
||||
* @param fingerprint the document fingerprint to query
|
||||
* @return the master record if one exists, {@link Optional#empty()} if unknown or
|
||||
* if a persistence lookup error occurred
|
||||
*/
|
||||
public Optional<DocumentRecord> findDocumentRecord(DocumentFingerprint fingerprint) {
|
||||
return switch (documentRepo.findByFingerprint(fingerprint)) {
|
||||
case DocumentTerminalSuccess s -> Optional.of(s.record());
|
||||
case DocumentTerminalFinalFailure f -> Optional.of(f.record());
|
||||
case DocumentKnownProcessable p -> Optional.of(p.record());
|
||||
default -> Optional.empty();
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns all processing attempts for the given fingerprint in insertion order.
|
||||
*
|
||||
* @param fingerprint the document fingerprint to query
|
||||
* @return all recorded attempts; empty list if none exist
|
||||
*/
|
||||
public List<ProcessingAttempt> findAttempts(DocumentFingerprint fingerprint) {
|
||||
return attemptRepo.findAllByFingerprint(fingerprint);
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the SHA-256 fingerprint for the given file using the production adapter.
|
||||
* <p>
|
||||
* Useful for correlating a test PDF with its database record after a batch run.
|
||||
*
|
||||
* @param file the absolute path of the file to fingerprint
|
||||
* @return the fingerprint
|
||||
* @throws IllegalStateException if fingerprint computation fails
|
||||
*/
|
||||
public DocumentFingerprint computeFingerprint(Path file) {
|
||||
Sha256FingerprintAdapter adapter = new Sha256FingerprintAdapter();
|
||||
// Construct a minimal candidate that mirrors how the production source adapter creates one
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
|
||||
file.getFileName().toString(),
|
||||
0L,
|
||||
new SourceDocumentLocator(file.toAbsolutePath().toString()));
|
||||
return switch (adapter.computeFingerprint(candidate)) {
|
||||
case FingerprintSuccess s -> s.fingerprint();
|
||||
default -> throw new IllegalStateException(
|
||||
"Fingerprint computation failed for test fixture: " + file);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Lists the filenames of all files currently in the target folder.
|
||||
*
|
||||
* @return list of filenames; empty if target folder is empty
|
||||
* @throws IOException if the target folder cannot be read
|
||||
*/
|
||||
public List<String> listTargetFiles() throws IOException {
|
||||
try (var stream = Files.list(targetFolder)) {
|
||||
return stream.map(p -> p.getFileName().toString()).sorted().toList();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the source folder path used by this context.
|
||||
*/
|
||||
public Path sourceFolder() {
|
||||
return sourceFolder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the target folder path used by this context.
|
||||
*/
|
||||
public Path targetFolder() {
|
||||
return targetFolder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Overrides the {@link TargetFileCopyPort} used in subsequent batch runs.
|
||||
* Pass {@code null} to revert to the real {@link FilesystemTargetFileCopyAdapter}.
|
||||
*
|
||||
* @param override the port implementation to use, or {@code null} for the real adapter
|
||||
*/
|
||||
public void setTargetFileCopyPortOverride(TargetFileCopyPort override) {
|
||||
this.targetFileCopyPortOverride = override;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
// No explicit cleanup needed: @TempDir removes all files automatically
|
||||
}
|
||||
|
||||
// =========================================================================
|
||||
// Private wiring
|
||||
// =========================================================================
|
||||
|
||||
/**
|
||||
* Constructs a fully wired {@link DefaultBatchRunProcessingUseCase} for a single batch run.
|
||||
* <p>
|
||||
* All adapters are instantiated fresh per run to avoid shared mutable state between
|
||||
* runs (e.g. locks, connection states). The AI stub and optional copy-port override
|
||||
* are re-used across runs within the same test.
|
||||
*/
|
||||
private DefaultBatchRunProcessingUseCase buildUseCase() {
|
||||
RuntimeConfiguration runtimeConfig = new RuntimeConfiguration(
|
||||
MAX_PAGES, MAX_RETRIES_TRANSIENT, AiContentSensitivity.PROTECT_SENSITIVE_CONTENT);
|
||||
|
||||
FingerprintPort fingerprintPort = new Sha256FingerprintAdapter();
|
||||
|
||||
DocumentRecordRepository documentRecordRepository = documentRepo;
|
||||
ProcessingAttemptRepository processingAttemptRepository = attemptRepo;
|
||||
UnitOfWorkPort unitOfWorkPort = new SqliteUnitOfWorkAdapter(jdbcUrl);
|
||||
|
||||
ProcessingLogger coordinatorLogger = new Log4jProcessingLogger(
|
||||
DocumentProcessingCoordinator.class);
|
||||
TargetFolderPort targetFolderPort = new FilesystemTargetFolderAdapter(targetFolder);
|
||||
TargetFileCopyPort targetFileCopyPort = (targetFileCopyPortOverride != null)
|
||||
? targetFileCopyPortOverride
|
||||
: new FilesystemTargetFileCopyAdapter(targetFolder);
|
||||
|
||||
DocumentProcessingCoordinator coordinator = new DocumentProcessingCoordinator(
|
||||
documentRecordRepository,
|
||||
processingAttemptRepository,
|
||||
unitOfWorkPort,
|
||||
targetFolderPort,
|
||||
targetFileCopyPort,
|
||||
coordinatorLogger,
|
||||
MAX_RETRIES_TRANSIENT);
|
||||
|
||||
PromptPort promptPort = new FilesystemPromptPortAdapter(promptFile);
|
||||
ClockPort clockPort = new SystemClockAdapter();
|
||||
AiResponseValidator aiResponseValidator = new AiResponseValidator(clockPort);
|
||||
AiNamingService aiNamingService = new AiNamingService(
|
||||
aiStub, promptPort, aiResponseValidator, AI_MODEL, MAX_TEXT_CHARS);
|
||||
|
||||
ProcessingLogger useCaseLogger = new Log4jProcessingLogger(
|
||||
DefaultBatchRunProcessingUseCase.class);
|
||||
|
||||
RunLockPort runLockPort = new FilesystemRunLockPortAdapter(lockFile);
|
||||
SourceDocumentCandidatesPort candidatesPort =
|
||||
new SourceDocumentCandidatesPortAdapter(sourceFolder);
|
||||
PdfTextExtractionPort extractionPort = new PdfTextExtractionPortAdapter();
|
||||
|
||||
return new DefaultBatchRunProcessingUseCase(
|
||||
runtimeConfig,
|
||||
runLockPort,
|
||||
candidatesPort,
|
||||
extractionPort,
|
||||
fingerprintPort,
|
||||
coordinator,
|
||||
aiNamingService,
|
||||
useCaseLogger);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,72 @@
|
||||
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
|
||||
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.apache.pdfbox.pdmodel.PDPageContentStream;
|
||||
import org.apache.pdfbox.pdmodel.font.PDType1Font;
|
||||
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Path;
|
||||
|
||||
/**
|
||||
* Factory for creating minimal PDF test fixtures used in end-to-end tests.
|
||||
* <p>
|
||||
* Provides two variants:
|
||||
* <ul>
|
||||
* <li>Searchable PDFs with embedded text content — used for happy-path, transient-error,
|
||||
* and target-copy-failure scenarios where the pre-check must pass.</li>
|
||||
* <li>Blank PDFs with no extractable text — used for deterministic content-error
|
||||
* scenarios where the pre-check must fail.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Uses Apache PDFBox to create real, structurally valid PDF files so that the
|
||||
* production {@code PdfTextExtractionPortAdapter} processes them correctly.
|
||||
*/
|
||||
final class MinimalPdfFactory {
|
||||
|
||||
private MinimalPdfFactory() {
|
||||
// Static utility class — not instantiable
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a single-page searchable PDF with the given text content at the output path.
|
||||
* <p>
|
||||
* The resulting file passes the production pre-checks for minimum text length and
|
||||
* page count, enabling the AI naming pipeline to run.
|
||||
*
|
||||
* @param outputPath the path where the PDF will be written; parent directory must exist
|
||||
* @param textContent the text to embed in the PDF; should be non-empty for happy-path tests
|
||||
* @throws IOException if the file cannot be written
|
||||
*/
|
||||
static void createSearchablePdf(Path outputPath, String textContent) throws IOException {
|
||||
try (PDDocument doc = new PDDocument()) {
|
||||
PDPage page = new PDPage();
|
||||
doc.addPage(page);
|
||||
try (PDPageContentStream stream = new PDPageContentStream(doc, page)) {
|
||||
stream.beginText();
|
||||
stream.setFont(new PDType1Font(Standard14Fonts.FontName.HELVETICA), 12);
|
||||
stream.newLineAtOffset(50, 700);
|
||||
stream.showText(textContent);
|
||||
stream.endText();
|
||||
}
|
||||
doc.save(outputPath.toFile());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a single-page blank PDF with no text content at the output path.
|
||||
* <p>
|
||||
* The resulting file triggers the "no usable text" pre-check failure
|
||||
* (deterministic content error), which does not invoke the AI service.
|
||||
*
|
||||
* @param outputPath the path where the PDF will be written; parent directory must exist
|
||||
* @throws IOException if the file cannot be written
|
||||
*/
|
||||
static void createBlankPdf(Path outputPath) throws IOException {
|
||||
try (PDDocument doc = new PDDocument()) {
|
||||
doc.addPage(new PDPage());
|
||||
doc.save(outputPath.toFile());
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,109 @@
|
||||
package de.gecheckt.pdf.umbenenner.bootstrap.e2e;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationResult;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.AiInvocationTechnicalFailure;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.AiRawResponse;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.AiRequestRepresentation;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
/**
|
||||
* Configurable test double for {@link AiInvocationPort}.
|
||||
* <p>
|
||||
* Replaces the real HTTP-based AI adapter in end-to-end tests so that the processing
|
||||
* pipeline can be exercised without real network calls. Supports two response modes:
|
||||
* <ul>
|
||||
* <li><strong>Success mode</strong> (default): returns a structurally valid JSON response
|
||||
* containing configurable {@code title} and {@code date} fields. This produces a
|
||||
* {@code PROPOSAL_READY} outcome when the response passes validation.</li>
|
||||
* <li><strong>Technical failure mode</strong>: returns an {@link AiInvocationTechnicalFailure},
|
||||
* simulating network errors or service unavailability. This produces a
|
||||
* {@code FAILED_RETRYABLE} (transient) outcome.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The stub tracks the total number of invocations so that tests can verify whether
|
||||
* the AI pipeline was called at all (e.g. confirming that a {@code PROPOSAL_READY}
|
||||
* finalization skips the AI call).
|
||||
*/
|
||||
final class StubAiInvocationPort implements AiInvocationPort {
|
||||
|
||||
private final AtomicInteger invocationCount = new AtomicInteger(0);
|
||||
|
||||
private volatile boolean returnTechnicalFailure = false;
|
||||
private volatile String title = "Stromabrechnung";
|
||||
private volatile String date = "2024-01-15";
|
||||
private volatile String reasoning = "Testdokument fuer End-to-End-Tests.";
|
||||
|
||||
/**
|
||||
* Configures the stub to return a valid naming proposal with the given title and date.
|
||||
*
|
||||
* @param title the document title (must pass validation: max 20 chars, no special chars)
|
||||
* @param date the document date in {@code YYYY-MM-DD} format, or {@code null} to omit
|
||||
*/
|
||||
void configureSuccess(String title, String date) {
|
||||
this.title = title;
|
||||
this.date = date;
|
||||
this.returnTechnicalFailure = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Configures the stub to return a transient technical failure on every invocation.
|
||||
* The failure reason is {@code STUB_FAILURE}.
|
||||
*/
|
||||
void configureTechnicalFailure() {
|
||||
this.returnTechnicalFailure = true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the stub to its default success configuration with title "Stromabrechnung"
|
||||
* and date "2024-01-15", and clears the invocation counter.
|
||||
*/
|
||||
void reset() {
|
||||
this.title = "Stromabrechnung";
|
||||
this.date = "2024-01-15";
|
||||
this.reasoning = "Testdokument fuer End-to-End-Tests.";
|
||||
this.returnTechnicalFailure = false;
|
||||
invocationCount.set(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of times {@link #invoke} was called since construction
|
||||
* or the last {@link #reset()}.
|
||||
*/
|
||||
int invocationCount() {
|
||||
return invocationCount.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the invocation counter to zero without changing response configuration.
|
||||
*/
|
||||
void resetInvocationCount() {
|
||||
invocationCount.set(0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns either a success response or a technical failure, depending on current configuration.
|
||||
* Increments the invocation counter on every call.
|
||||
*/
|
||||
@Override
|
||||
public AiInvocationResult invoke(AiRequestRepresentation request) {
|
||||
invocationCount.incrementAndGet();
|
||||
|
||||
if (returnTechnicalFailure) {
|
||||
return new AiInvocationTechnicalFailure(
|
||||
request,
|
||||
"STUB_FAILURE",
|
||||
"Test stub: configured to return technical failure");
|
||||
}
|
||||
|
||||
String dateField = (date != null) ? "\"date\": \"" + date + "\", " : "";
|
||||
String rawJson = "{"
|
||||
+ dateField
|
||||
+ "\"title\": \"" + title + "\", "
|
||||
+ "\"reasoning\": \"" + reasoning + "\""
|
||||
+ "}";
|
||||
return new AiInvocationSuccess(request, new AiRawResponse(rawJson));
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user