From 747f22438da5076cc62879c6d776a60fd59a9656 Mon Sep 17 00:00:00 2001 From: Marcus van Elst Date: Thu, 2 Apr 2026 15:10:47 +0200 Subject: [PATCH] Inhaltsfehler bei PDF-Extraktion korrekt klassifiziert --- .../service/DocumentProcessingService.java | 13 ++++++++----- .../service/DocumentProcessingServiceTest.java | 10 +++++----- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingService.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingService.java index 7144b0a..25fe918 100644 --- a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingService.java +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingService.java @@ -2,6 +2,7 @@ package de.gecheckt.pdf.umbenenner.application.service; import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome; +import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed; import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError; import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult; @@ -16,7 +17,8 @@ import java.util.Objects; *

* Converts technical extraction results into processing outcomes through this pipeline: *

    - *
  1. If extraction fails (content or technical): {@link TechnicalDocumentError}
  2. + *
  3. If extraction fails (technical): {@link TechnicalDocumentError}
  4. + *
  5. If extraction fails (content): {@link PreCheckFailed}
  6. *
  7. If extraction succeeds: Evaluate pre-checks via {@link PreCheckEvaluator}
  8. *
*

@@ -24,7 +26,7 @@ import java.util.Objects; * all document processing outcomes: *

*

@@ -38,7 +40,8 @@ public class DocumentProcessingService { * Pipeline: *

    *
  1. Extract text and page count from the PDF candidate
  2. - *
  3. If extraction fails (technical or content): classify as technical document error
  4. + *
  5. If extraction fails (technical): classify as technical document error
  6. + *
  7. If extraction fails (content): classify as pre-check failed (deterministic content error)
  8. *
  9. If extraction succeeds: evaluate pre-checks
  10. *
* @@ -63,8 +66,8 @@ public class DocumentProcessingService { PreCheckEvaluator.evaluate(candidate, success, configuration); case PdfExtractionContentError contentError -> - // PDF content not extractable: classify as technical document error - new TechnicalDocumentError(candidate, "PDF content not extractable: " + contentError.reason(), null); + // PDF content not extractable: classify as pre-check failed (deterministic content error) + new PreCheckFailed(candidate, "PDF content not extractable: " + contentError.reason()); case PdfExtractionTechnicalError technicalError -> // Technical failure during extraction: potentially retryable diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java index 8fc1bc4..eeda255 100644 --- a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/DocumentProcessingServiceTest.java @@ -119,18 +119,18 @@ class DocumentProcessingServiceTest { @Test void testProcessDocument_WithContentError() { - // Arrange: PDF content not extractable (classified as technical document error) + // Arrange: PDF content not extractable (classified as pre-check failed) var contentError = new PdfExtractionContentError("PDF is corrupted"); // Act DocumentProcessingOutcome outcome = DocumentProcessingService.processDocument( candidate, contentError, configuration); - // Assert: Should produce TechnicalDocumentError - assertInstanceOf(TechnicalDocumentError.class, outcome); - TechnicalDocumentError result = (TechnicalDocumentError) outcome; + // Assert: Should produce PreCheckFailed + assertInstanceOf(PreCheckFailed.class, outcome); + PreCheckFailed result = (PreCheckFailed) outcome; assertEquals(candidate, result.candidate()); - assertTrue(result.errorMessage().contains("PDF is corrupted")); + assertTrue(result.failureReason().contains("PDF is corrupted")); } @Test