M3-AP-006: Fehlerklassifikation vereinfacht und Logging auf korrekte
Ergebnisfälle ausgerichtet
This commit is contained in:
@@ -12,7 +12,12 @@
|
|||||||
"Bash(mvn -pl pdf-umbenenner-domain clean compile)",
|
"Bash(mvn -pl pdf-umbenenner-domain clean compile)",
|
||||||
"Bash(mvn help:describe -Dplugin=org.apache.pdfbox:pdfbox -Ddetail=false)",
|
"Bash(mvn help:describe -Dplugin=org.apache.pdfbox:pdfbox -Ddetail=false)",
|
||||||
"Bash(cd /d D:/Dev/Projects/pdf-umbenenner-parent)",
|
"Bash(cd /d D:/Dev/Projects/pdf-umbenenner-parent)",
|
||||||
"Bash(mvn -v)"
|
"Bash(mvn -v)",
|
||||||
|
"Bash(grep -E \"\\\\.java$\")",
|
||||||
|
"Bash(grep \"\\\\.java$\")",
|
||||||
|
"Bash(mvn -q clean compile -DskipTests)",
|
||||||
|
"Bash(mvn -q test)",
|
||||||
|
"Bash(mvn -q clean test)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,80 @@
|
|||||||
|
package de.gecheckt.pdf.umbenenner.application.service;
|
||||||
|
|
||||||
|
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.M3TechnicalDocumentError;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Orchestrates M3 document processing pipeline: extraction → pre-checks → outcome classification.
|
||||||
|
* <p>
|
||||||
|
* Converts technical extraction results into M3 processing outcomes through this pipeline:
|
||||||
|
* <ol>
|
||||||
|
* <li>If extraction fails (content or technical): {@link M3TechnicalDocumentError}</li>
|
||||||
|
* <li>If extraction succeeds: Evaluate M3 pre-checks via {@link M3PreCheckEvaluator}</li>
|
||||||
|
* </ol>
|
||||||
|
* <p>
|
||||||
|
* This service produces {@link M3DocumentProcessingOutcome}, a sealed interface that covers
|
||||||
|
* all four M3 document processing outcomes:
|
||||||
|
* <ul>
|
||||||
|
* <li>Pre-check passed (document ready for M4+)</li>
|
||||||
|
* <li>Pre-check failed (deterministic content error: no usable text, page limit exceeded)</li>
|
||||||
|
* <li>Technical document error (I/O, access, PDF parsing, etc.)</li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* This service is stateless and thread-safe.
|
||||||
|
*
|
||||||
|
* @since M3-AP-006
|
||||||
|
*/
|
||||||
|
public class M3DocumentProcessingService {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Processes a document candidate through the complete M3 pipeline.
|
||||||
|
* <p>
|
||||||
|
* Pipeline:
|
||||||
|
* <ol>
|
||||||
|
* <li>Extract text and page count from the PDF candidate</li>
|
||||||
|
* <li>If extraction fails (technical or content): classify as technical document error</li>
|
||||||
|
* <li>If extraction succeeds: evaluate M3 pre-checks</li>
|
||||||
|
* </ol>
|
||||||
|
*
|
||||||
|
* @param candidate the document candidate to process
|
||||||
|
* @param extractionResult the result from PDF extraction (from {@link de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort})
|
||||||
|
* @param configuration the startup configuration (used for pre-check validation)
|
||||||
|
* @return the complete M3 processing outcome (one of four possibilities, all implementing {@link M3DocumentProcessingOutcome})
|
||||||
|
* @throws NullPointerException if any parameter is null
|
||||||
|
*/
|
||||||
|
public static M3DocumentProcessingOutcome processDocument(
|
||||||
|
SourceDocumentCandidate candidate,
|
||||||
|
PdfExtractionResult extractionResult,
|
||||||
|
StartConfiguration configuration) {
|
||||||
|
|
||||||
|
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||||
|
Objects.requireNonNull(extractionResult, "extractionResult must not be null");
|
||||||
|
Objects.requireNonNull(configuration, "configuration must not be null");
|
||||||
|
|
||||||
|
return switch (extractionResult) {
|
||||||
|
case PdfExtractionSuccess success ->
|
||||||
|
// Extraction succeeded: evaluate M3 pre-checks
|
||||||
|
M3PreCheckEvaluator.evaluate(candidate, success, configuration);
|
||||||
|
|
||||||
|
case PdfExtractionContentError contentError ->
|
||||||
|
// PDF content not extractable: classify as technical document error
|
||||||
|
new M3TechnicalDocumentError(candidate, "PDF content not extractable: " + contentError.reason(), null);
|
||||||
|
|
||||||
|
case PdfExtractionTechnicalError technicalError ->
|
||||||
|
// Technical failure during extraction: potentially retryable
|
||||||
|
new M3TechnicalDocumentError(candidate, technicalError.errorMessage(), technicalError.cause());
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private M3DocumentProcessingService() {
|
||||||
|
// Static utility class – no instances
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
package de.gecheckt.pdf.umbenenner.application.service;
|
package de.gecheckt.pdf.umbenenner.application.service;
|
||||||
|
|
||||||
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
|
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason;
|
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
|
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
|
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision;
|
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||||
|
|
||||||
@@ -40,14 +40,17 @@ public class M3PreCheckEvaluator {
|
|||||||
* <p>
|
* <p>
|
||||||
* Returns {@link M3PreCheckPassed} if both checks pass, or {@link M3PreCheckFailed}
|
* Returns {@link M3PreCheckPassed} if both checks pass, or {@link M3PreCheckFailed}
|
||||||
* with a specific reason if any check fails.
|
* with a specific reason if any check fails.
|
||||||
|
* <p>
|
||||||
|
* Note: Returns {@link M3DocumentProcessingOutcome} to integrate cleanly with the complete
|
||||||
|
* M3 document processing pipeline.
|
||||||
*
|
*
|
||||||
* @param candidate the source document metadata
|
* @param candidate the source document metadata
|
||||||
* @param extraction the successfully extracted PDF content
|
* @param extraction the successfully extracted PDF content
|
||||||
* @param configuration the startup configuration (used for maxPages limit)
|
* @param configuration the startup configuration (used for maxPages limit)
|
||||||
* @return the pre-check decision: passed or failed with reason
|
* @return the pre-check outcome: passed or failed with reason (both implement {@link M3DocumentProcessingOutcome})
|
||||||
* @throws NullPointerException if any parameter is null
|
* @throws NullPointerException if any parameter is null
|
||||||
*/
|
*/
|
||||||
public static M3ProcessingDecision evaluate(
|
public static M3DocumentProcessingOutcome evaluate(
|
||||||
SourceDocumentCandidate candidate,
|
SourceDocumentCandidate candidate,
|
||||||
PdfExtractionSuccess extraction,
|
PdfExtractionSuccess extraction,
|
||||||
StartConfiguration configuration) {
|
StartConfiguration configuration) {
|
||||||
|
|||||||
@@ -13,8 +13,18 @@
|
|||||||
* Current services:
|
* Current services:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.M3PreCheckEvaluator} — M3 pre-check evaluation (M3-AP-004)</li>
|
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.M3PreCheckEvaluator} — M3 pre-check evaluation (M3-AP-004)</li>
|
||||||
|
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.M3DocumentProcessingService} — complete M3 document processing pipeline orchestration (M3-AP-006)</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
|
* M3 Document Processing Pipeline (M3-AP-006):
|
||||||
|
* The {@link de.gecheckt.pdf.umbenenner.application.service.M3DocumentProcessingService} coordinates
|
||||||
|
* the complete M3 processing workflow:
|
||||||
|
* <ol>
|
||||||
|
* <li>Convert technical PDF extraction results to M3 processing outcomes</li>
|
||||||
|
* <li>Route successful extractions through M3 pre-check validation</li>
|
||||||
|
* <li>Classify extraction and pre-check failures with appropriate error types</li>
|
||||||
|
* </ol>
|
||||||
|
*
|
||||||
* @since M3-AP-004
|
* @since M3-AP-004
|
||||||
*/
|
*/
|
||||||
package de.gecheckt.pdf.umbenenner.application.service;
|
package de.gecheckt.pdf.umbenenner.application.service;
|
||||||
|
|||||||
@@ -8,11 +8,12 @@ import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
|
|||||||
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
|
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
|
||||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
|
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
|
||||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
|
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
|
||||||
import de.gecheckt.pdf.umbenenner.application.service.M3PreCheckEvaluator;
|
import de.gecheckt.pdf.umbenenner.application.service.M3DocumentProcessingService;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
|
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
|
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
|
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision;
|
import de.gecheckt.pdf.umbenenner.domain.model.M3TechnicalDocumentError;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||||
@@ -142,16 +143,17 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
|
|||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Processes a single PDF candidate through the M3 pipeline.
|
* Processes a single PDF candidate through the complete M3 pipeline.
|
||||||
* <p>
|
* <p>
|
||||||
* M3 processing steps per document:
|
* M3 processing steps per document:
|
||||||
* <ol>
|
* <ol>
|
||||||
|
* <li>Log candidate recognition</li>
|
||||||
* <li>Extract text and page count from the PDF via {@link PdfTextExtractionPort}</li>
|
* <li>Extract text and page count from the PDF via {@link PdfTextExtractionPort}</li>
|
||||||
* <li>On successful extraction: run M3 pre-checks via {@link M3PreCheckEvaluator}</li>
|
* <li>Process extraction result through M3 pre-checks via {@link M3DocumentProcessingService}</li>
|
||||||
* <li>Log the per-document M3 decision and end controlled</li>
|
* <li>Log extraction outcome and final M3 decision</li>
|
||||||
* </ol>
|
* </ol>
|
||||||
* <p>
|
* <p>
|
||||||
* Per-document errors (extraction failure, pre-check failure) do not abort the overall
|
* Per-document errors (extraction failure, technical error, pre-check failure) do not abort the overall
|
||||||
* batch run. Each candidate ends controlled regardless of its outcome.
|
* batch run. Each candidate ends controlled regardless of its outcome.
|
||||||
* <p>
|
* <p>
|
||||||
* M3 processing boundary: no KI call, no persistence, no filename generation,
|
* M3 processing boundary: no KI call, no persistence, no filename generation,
|
||||||
@@ -160,25 +162,36 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
|
|||||||
* @param candidate the candidate to process
|
* @param candidate the candidate to process
|
||||||
*/
|
*/
|
||||||
private void processCandidate(SourceDocumentCandidate candidate) {
|
private void processCandidate(SourceDocumentCandidate candidate) {
|
||||||
|
LOG.debug("Processing candidate: {}", candidate.uniqueIdentifier());
|
||||||
|
|
||||||
PdfExtractionResult extractionResult = pdfTextExtractionPort.extractTextAndPageCount(candidate);
|
PdfExtractionResult extractionResult = pdfTextExtractionPort.extractTextAndPageCount(candidate);
|
||||||
|
|
||||||
|
// Log extraction outcome
|
||||||
switch (extractionResult) {
|
switch (extractionResult) {
|
||||||
case PdfExtractionSuccess success -> {
|
case PdfExtractionSuccess success ->
|
||||||
M3ProcessingDecision decision = M3PreCheckEvaluator.evaluate(candidate, success, configuration);
|
LOG.debug("PDF extraction successful for '{}'. Pages: {}, Text length: {} chars.",
|
||||||
switch (decision) {
|
candidate.uniqueIdentifier(), success.pageCount().value(), success.extractedText().length());
|
||||||
case M3PreCheckPassed passed ->
|
|
||||||
LOG.info("M3 pre-checks passed for '{}'. Candidate ready for further processing (M4+).",
|
|
||||||
candidate.uniqueIdentifier());
|
|
||||||
case M3PreCheckFailed failed ->
|
|
||||||
LOG.info("M3 pre-check failed for '{}': {}",
|
|
||||||
candidate.uniqueIdentifier(), failed.failureReason());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case PdfExtractionContentError contentError ->
|
case PdfExtractionContentError contentError ->
|
||||||
LOG.info("PDF content not extractable for '{}': {}",
|
LOG.debug("PDF content extraction failed for '{}' (content problem): {}",
|
||||||
candidate.uniqueIdentifier(), contentError.reason());
|
candidate.uniqueIdentifier(), contentError.reason());
|
||||||
case PdfExtractionTechnicalError technicalError ->
|
case PdfExtractionTechnicalError technicalError ->
|
||||||
LOG.warn("Technical error extracting PDF '{}': {}",
|
LOG.debug("PDF extraction technical error for '{}': {}",
|
||||||
|
candidate.uniqueIdentifier(), technicalError.errorMessage());
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process through complete M3 pipeline
|
||||||
|
var m3Outcome = M3DocumentProcessingService.processDocument(candidate, extractionResult, configuration);
|
||||||
|
|
||||||
|
// Log M3 processing outcome
|
||||||
|
switch (m3Outcome) {
|
||||||
|
case M3PreCheckPassed passed ->
|
||||||
|
LOG.info("M3 pre-checks PASSED for '{}'. Candidate ready for further processing (M4+).",
|
||||||
|
candidate.uniqueIdentifier());
|
||||||
|
case M3PreCheckFailed failed ->
|
||||||
|
LOG.info("M3 pre-checks FAILED for '{}': {} (Deterministic content error – may retry in later run).",
|
||||||
|
candidate.uniqueIdentifier(), failed.failureReason());
|
||||||
|
case M3TechnicalDocumentError technicalError ->
|
||||||
|
LOG.warn("M3 processing FAILED for '{}': {} (Technical error – may retry in later run).",
|
||||||
candidate.uniqueIdentifier(), technicalError.errorMessage());
|
candidate.uniqueIdentifier(), technicalError.errorMessage());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,195 @@
|
|||||||
|
package de.gecheckt.pdf.umbenenner.application.service;
|
||||||
|
|
||||||
|
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.M3TechnicalDocumentError;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
|
||||||
|
import java.net.URI;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for {@link M3DocumentProcessingService}.
|
||||||
|
* <p>
|
||||||
|
* Verifies that all four M3 document processing outcomes are correctly classified.
|
||||||
|
*/
|
||||||
|
class M3DocumentProcessingServiceTest {
|
||||||
|
|
||||||
|
@TempDir
|
||||||
|
Path tempDir;
|
||||||
|
|
||||||
|
private SourceDocumentCandidate candidate;
|
||||||
|
private StartConfiguration configuration;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
void setUp() throws Exception {
|
||||||
|
// Create test PDF file
|
||||||
|
Path pdfFile = tempDir.resolve("document.pdf");
|
||||||
|
Files.createFile(pdfFile);
|
||||||
|
SourceDocumentLocator locator = new SourceDocumentLocator(pdfFile.toString());
|
||||||
|
candidate = new SourceDocumentCandidate("document.pdf", 2048L, locator);
|
||||||
|
|
||||||
|
// Create directories and files for configuration
|
||||||
|
Path sourceDir = Files.createDirectories(tempDir.resolve("source"));
|
||||||
|
Path targetDir = Files.createDirectories(tempDir.resolve("target"));
|
||||||
|
Path dbFile = tempDir.resolve("db.sqlite");
|
||||||
|
Files.createFile(dbFile);
|
||||||
|
Path promptFile = tempDir.resolve("prompt.txt");
|
||||||
|
Files.createFile(promptFile);
|
||||||
|
|
||||||
|
configuration = new StartConfiguration(
|
||||||
|
sourceDir,
|
||||||
|
targetDir,
|
||||||
|
dbFile,
|
||||||
|
URI.create("http://localhost:8000"),
|
||||||
|
"gpt-4",
|
||||||
|
30,
|
||||||
|
3,
|
||||||
|
10,
|
||||||
|
5000,
|
||||||
|
promptFile,
|
||||||
|
tempDir.resolve("lock"),
|
||||||
|
tempDir.resolve("logs"),
|
||||||
|
"INFO",
|
||||||
|
"test-key"
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testProcessDocument_WithSuccessfulExtraction_AndPassedPreChecks() {
|
||||||
|
// Arrange: Successful extraction with valid text
|
||||||
|
var extraction = new PdfExtractionSuccess("This is valid PDF text", new PdfPageCount(5));
|
||||||
|
|
||||||
|
// Act
|
||||||
|
M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument(
|
||||||
|
candidate, extraction, configuration);
|
||||||
|
|
||||||
|
// Assert: Should produce M3PreCheckPassed
|
||||||
|
assertInstanceOf(M3PreCheckPassed.class, outcome);
|
||||||
|
M3PreCheckPassed passed = (M3PreCheckPassed) outcome;
|
||||||
|
assertEquals(candidate, passed.candidate());
|
||||||
|
assertEquals(extraction, passed.extraction());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testProcessDocument_WithSuccessfulExtraction_AndFailedPreCheck_NoUsableText() {
|
||||||
|
// Arrange: Successful extraction but with only whitespace
|
||||||
|
var extraction = new PdfExtractionSuccess(" \n \t ", new PdfPageCount(1));
|
||||||
|
|
||||||
|
// Act
|
||||||
|
M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument(
|
||||||
|
candidate, extraction, configuration);
|
||||||
|
|
||||||
|
// Assert: Should produce M3PreCheckFailed with appropriate reason
|
||||||
|
assertInstanceOf(M3PreCheckFailed.class, outcome);
|
||||||
|
M3PreCheckFailed failed = (M3PreCheckFailed) outcome;
|
||||||
|
assertEquals(candidate, failed.candidate());
|
||||||
|
assertTrue(failed.failureReason().toLowerCase().contains("usable"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testProcessDocument_WithSuccessfulExtraction_AndFailedPreCheck_PageLimitExceeded() {
|
||||||
|
// Arrange: Successful extraction but exceeds page limit
|
||||||
|
var extraction = new PdfExtractionSuccess("Valid text content", new PdfPageCount(50));
|
||||||
|
|
||||||
|
// Act
|
||||||
|
M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument(
|
||||||
|
candidate, extraction, configuration);
|
||||||
|
|
||||||
|
// Assert: Should produce M3PreCheckFailed with page limit reason
|
||||||
|
assertInstanceOf(M3PreCheckFailed.class, outcome);
|
||||||
|
M3PreCheckFailed failed = (M3PreCheckFailed) outcome;
|
||||||
|
assertEquals(candidate, failed.candidate());
|
||||||
|
assertTrue(failed.failureReason().toLowerCase().contains("page"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testProcessDocument_WithContentError() {
|
||||||
|
// Arrange: PDF content not extractable (classified as technical document error in M3)
|
||||||
|
var contentError = new PdfExtractionContentError("PDF is corrupted");
|
||||||
|
|
||||||
|
// Act
|
||||||
|
M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument(
|
||||||
|
candidate, contentError, configuration);
|
||||||
|
|
||||||
|
// Assert: Should produce M3TechnicalDocumentError
|
||||||
|
assertInstanceOf(M3TechnicalDocumentError.class, outcome);
|
||||||
|
M3TechnicalDocumentError result = (M3TechnicalDocumentError) outcome;
|
||||||
|
assertEquals(candidate, result.candidate());
|
||||||
|
assertTrue(result.errorMessage().contains("PDF is corrupted"));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testProcessDocument_WithTechnicalError() {
|
||||||
|
// Arrange: Technical error during extraction
|
||||||
|
var technicalError = new PdfExtractionTechnicalError("I/O error reading file",
|
||||||
|
new RuntimeException("File not found"));
|
||||||
|
|
||||||
|
// Act
|
||||||
|
M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument(
|
||||||
|
candidate, technicalError, configuration);
|
||||||
|
|
||||||
|
// Assert: Should produce M3TechnicalDocumentError
|
||||||
|
assertInstanceOf(M3TechnicalDocumentError.class, outcome);
|
||||||
|
M3TechnicalDocumentError result = (M3TechnicalDocumentError) outcome;
|
||||||
|
assertEquals(candidate, result.candidate());
|
||||||
|
assertEquals("I/O error reading file", result.errorMessage());
|
||||||
|
assertNotNull(result.cause());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testProcessDocument_WithTechnicalError_WithoutCause() {
|
||||||
|
// Arrange: Technical error without underlying exception
|
||||||
|
var technicalError = new PdfExtractionTechnicalError("Unknown error", null);
|
||||||
|
|
||||||
|
// Act
|
||||||
|
M3DocumentProcessingOutcome outcome = M3DocumentProcessingService.processDocument(
|
||||||
|
candidate, technicalError, configuration);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
assertInstanceOf(M3TechnicalDocumentError.class, outcome);
|
||||||
|
M3TechnicalDocumentError result = (M3TechnicalDocumentError) outcome;
|
||||||
|
assertNull(result.cause());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testProcessDocument_WithNullCandidate_ThrowsException() {
|
||||||
|
// Arrange
|
||||||
|
var extraction = new PdfExtractionSuccess("Text", new PdfPageCount(1));
|
||||||
|
|
||||||
|
// Act & Assert
|
||||||
|
assertThrows(NullPointerException.class,
|
||||||
|
() -> M3DocumentProcessingService.processDocument(null, extraction, configuration));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testProcessDocument_WithNullExtractionResult_ThrowsException() {
|
||||||
|
// Act & Assert
|
||||||
|
assertThrows(NullPointerException.class,
|
||||||
|
() -> M3DocumentProcessingService.processDocument(candidate, null, configuration));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testProcessDocument_WithNullConfiguration_ThrowsException() {
|
||||||
|
// Arrange
|
||||||
|
var extraction = new PdfExtractionSuccess("Text", new PdfPageCount(1));
|
||||||
|
|
||||||
|
// Act & Assert
|
||||||
|
assertThrows(NullPointerException.class,
|
||||||
|
() -> M3DocumentProcessingService.processDocument(candidate, extraction, null));
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,10 +1,10 @@
|
|||||||
package de.gecheckt.pdf.umbenenner.application.service;
|
package de.gecheckt.pdf.umbenenner.application.service;
|
||||||
|
|
||||||
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
|
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
|
||||||
|
import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
|
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason;
|
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
|
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision;
|
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
|
import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
|
||||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||||
@@ -35,7 +35,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Some meaningful text", new PdfPageCount(5));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Some meaningful text", new PdfPageCount(5));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckPassed, "Should pass when text is usable and page count is valid");
|
assertTrue(result instanceof M3PreCheckPassed, "Should pass when text is usable and page count is valid");
|
||||||
M3PreCheckPassed passed = (M3PreCheckPassed) result;
|
M3PreCheckPassed passed = (M3PreCheckPassed) result;
|
||||||
@@ -49,7 +49,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("", new PdfPageCount(1));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("", new PdfPageCount(1));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckFailed, "Should fail with empty text");
|
assertTrue(result instanceof M3PreCheckFailed, "Should fail with empty text");
|
||||||
M3PreCheckFailed failed = (M3PreCheckFailed) result;
|
M3PreCheckFailed failed = (M3PreCheckFailed) result;
|
||||||
@@ -62,7 +62,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess(" \n\t \r\n ", new PdfPageCount(1));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess(" \n\t \r\n ", new PdfPageCount(1));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckFailed, "Should fail with whitespace-only text");
|
assertTrue(result instanceof M3PreCheckFailed, "Should fail with whitespace-only text");
|
||||||
M3PreCheckFailed failed = (M3PreCheckFailed) result;
|
M3PreCheckFailed failed = (M3PreCheckFailed) result;
|
||||||
@@ -75,7 +75,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("!@#$%^&*()_+-=[]{}|;:',.<>?/", new PdfPageCount(1));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("!@#$%^&*()_+-=[]{}|;:',.<>?/", new PdfPageCount(1));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckFailed, "Should fail with special characters only");
|
assertTrue(result instanceof M3PreCheckFailed, "Should fail with special characters only");
|
||||||
M3PreCheckFailed failed = (M3PreCheckFailed) result;
|
M3PreCheckFailed failed = (M3PreCheckFailed) result;
|
||||||
@@ -88,7 +88,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("a", new PdfPageCount(1));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("a", new PdfPageCount(1));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckPassed, "Should pass with single letter");
|
assertTrue(result instanceof M3PreCheckPassed, "Should pass with single letter");
|
||||||
}
|
}
|
||||||
@@ -99,7 +99,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("5", new PdfPageCount(1));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("5", new PdfPageCount(1));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckPassed, "Should pass with single digit");
|
assertTrue(result instanceof M3PreCheckPassed, "Should pass with single digit");
|
||||||
}
|
}
|
||||||
@@ -110,7 +110,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("!@#a$%^&*", new PdfPageCount(1));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("!@#a$%^&*", new PdfPageCount(1));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckPassed, "Should pass when letters/digits are present among special chars");
|
assertTrue(result instanceof M3PreCheckPassed, "Should pass when letters/digits are present among special chars");
|
||||||
}
|
}
|
||||||
@@ -121,7 +121,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess(" meaningful text ", new PdfPageCount(1));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess(" meaningful text ", new PdfPageCount(1));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckPassed, "Should pass when text has meaningful content despite whitespace");
|
assertTrue(result instanceof M3PreCheckPassed, "Should pass when text has meaningful content despite whitespace");
|
||||||
}
|
}
|
||||||
@@ -132,7 +132,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Valid text", new PdfPageCount(5));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Valid text", new PdfPageCount(5));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckPassed, "Should pass when page count equals limit (not exceeded)");
|
assertTrue(result instanceof M3PreCheckPassed, "Should pass when page count equals limit (not exceeded)");
|
||||||
}
|
}
|
||||||
@@ -143,7 +143,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Valid text", new PdfPageCount(6));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Valid text", new PdfPageCount(6));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckFailed, "Should fail when page count exceeds limit");
|
assertTrue(result instanceof M3PreCheckFailed, "Should fail when page count exceeds limit");
|
||||||
M3PreCheckFailed failed = (M3PreCheckFailed) result;
|
M3PreCheckFailed failed = (M3PreCheckFailed) result;
|
||||||
@@ -156,7 +156,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Excellent meaningful text with lots of content", new PdfPageCount(100));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Excellent meaningful text with lots of content", new PdfPageCount(100));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckFailed, "Should fail with page limit exceeded even if text is good");
|
assertTrue(result instanceof M3PreCheckFailed, "Should fail with page limit exceeded even if text is good");
|
||||||
M3PreCheckFailed failed = (M3PreCheckFailed) result;
|
M3PreCheckFailed failed = (M3PreCheckFailed) result;
|
||||||
@@ -171,7 +171,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("", new PdfPageCount(10));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("", new PdfPageCount(10));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckFailed, "Should fail when both checks fail");
|
assertTrue(result instanceof M3PreCheckFailed, "Should fail when both checks fail");
|
||||||
// The specific order of checks doesn't matter for M3; just verify one reason is returned
|
// The specific order of checks doesn't matter for M3; just verify one reason is returned
|
||||||
@@ -216,7 +216,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Äußerst äöüß Großes", new PdfPageCount(1));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Äußerst äöüß Großes", new PdfPageCount(1));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckPassed, "Should pass with German umlauts (ÄÖÜß)");
|
assertTrue(result instanceof M3PreCheckPassed, "Should pass with German umlauts (ÄÖÜß)");
|
||||||
}
|
}
|
||||||
@@ -227,7 +227,7 @@ class M3PreCheckEvaluatorTest {
|
|||||||
SourceDocumentCandidate candidate = buildCandidate();
|
SourceDocumentCandidate candidate = buildCandidate();
|
||||||
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Αβγδ 中文 καλημέρα", new PdfPageCount(1));
|
PdfExtractionSuccess extraction = new PdfExtractionSuccess("Αβγδ 中文 καλημέρα", new PdfPageCount(1));
|
||||||
|
|
||||||
M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
M3DocumentProcessingOutcome result = M3PreCheckEvaluator.evaluate(candidate, extraction, config);
|
||||||
|
|
||||||
assertTrue(result instanceof M3PreCheckPassed, "Should pass with Greek, Chinese, and other Unicode letters");
|
assertTrue(result instanceof M3PreCheckPassed, "Should pass with Greek, Chinese, and other Unicode letters");
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -0,0 +1,33 @@
|
|||||||
|
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sealed interface representing the complete outcome of M3 document processing.
|
||||||
|
* <p>
|
||||||
|
* This interface models all four possible M3 document outcomes:
|
||||||
|
* <ul>
|
||||||
|
* <li>{@link M3PreCheckPassed}: Document passed all M3 pre-checks</li>
|
||||||
|
* <li>{@link M3PreCheckFailed}: Document failed a pre-check (deterministic content error: no usable text or page limit exceeded)</li>
|
||||||
|
* <li>{@link M3TechnicalDocumentError}: Technical failure during candidate access or PDF extraction (I/O, access, parsing, etc.)</li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* Design principles:
|
||||||
|
* <ul>
|
||||||
|
* <li>Exhaustive: All M3 document processing outcomes are covered (exactly four cases)</li>
|
||||||
|
* <li>Document-centric: Each outcome carries the source candidate for correlation and traceability</li>
|
||||||
|
* <li>No exceptions: Results are encoded in the type system</li>
|
||||||
|
* <li>Clear distinction: Deterministic content errors (M3PreCheckFailed) vs. technical failures (M3TechnicalDocumentError)</li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* Error classification:
|
||||||
|
* <ul>
|
||||||
|
* <li>M3PreCheckPassed: Extraction succeeded and all pre-checks passed (ready for M4+)</li>
|
||||||
|
* <li>M3PreCheckFailed: Extraction succeeded but deterministic content check failed (no usable text, page limit exceeded)</li>
|
||||||
|
* <li>M3TechnicalDocumentError: Extraction failed due to technical issue (I/O, file access, PDF parsing, etc.)</li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* @since M3-AP-006
|
||||||
|
*/
|
||||||
|
public sealed interface M3DocumentProcessingOutcome
|
||||||
|
permits M3PreCheckPassed, M3PreCheckFailed, M3TechnicalDocumentError {
|
||||||
|
// Marker interface; concrete implementations define structure
|
||||||
|
}
|
||||||
@@ -27,7 +27,7 @@ import java.util.Objects;
|
|||||||
public record M3PreCheckFailed(
|
public record M3PreCheckFailed(
|
||||||
SourceDocumentCandidate candidate,
|
SourceDocumentCandidate candidate,
|
||||||
String failureReason
|
String failureReason
|
||||||
) implements M3ProcessingDecision {
|
) implements M3ProcessingDecision, M3DocumentProcessingOutcome {
|
||||||
/**
|
/**
|
||||||
* Constructor with validation.
|
* Constructor with validation.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ import java.util.Objects;
|
|||||||
public record M3PreCheckPassed(
|
public record M3PreCheckPassed(
|
||||||
SourceDocumentCandidate candidate,
|
SourceDocumentCandidate candidate,
|
||||||
PdfExtractionSuccess extraction
|
PdfExtractionSuccess extraction
|
||||||
) implements M3ProcessingDecision {
|
) implements M3ProcessingDecision, M3DocumentProcessingOutcome {
|
||||||
/**
|
/**
|
||||||
* Constructor with validation.
|
* Constructor with validation.
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -0,0 +1,52 @@
|
|||||||
|
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||||
|
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Represents a technical (infrastructure) failure during candidate access or PDF extraction.
|
||||||
|
* <p>
|
||||||
|
* This outcome indicates that a document could not be processed due to technical infrastructure failures,
|
||||||
|
* such as I/O errors, file access problems, or extraction engine failures.
|
||||||
|
* <p>
|
||||||
|
* These are typically retryable conditions, as they may be transient issues that could succeed
|
||||||
|
* in a later batch run.
|
||||||
|
* <p>
|
||||||
|
* Examples:
|
||||||
|
* <ul>
|
||||||
|
* <li>File not readable due to permissions</li>
|
||||||
|
* <li>File disappeared between discovery and extraction</li>
|
||||||
|
* <li>I/O error during file read</li>
|
||||||
|
* <li>Extraction engine (PDFBox) internal failure</li>
|
||||||
|
* <li>Out of memory during extraction</li>
|
||||||
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* This is distinct from {@link M3ExtractedContentError}, which represents problems with the document
|
||||||
|
* content itself rather than infrastructure failures.
|
||||||
|
*
|
||||||
|
* @param candidate the source document metadata
|
||||||
|
* @param errorMessage a description of the technical failure
|
||||||
|
* @param cause the underlying exception, if any (may be null)
|
||||||
|
* @since M3-AP-006
|
||||||
|
*/
|
||||||
|
public record M3TechnicalDocumentError(
|
||||||
|
SourceDocumentCandidate candidate,
|
||||||
|
String errorMessage,
|
||||||
|
Throwable cause
|
||||||
|
) implements M3DocumentProcessingOutcome {
|
||||||
|
/**
|
||||||
|
* Constructor with validation.
|
||||||
|
*
|
||||||
|
* @param candidate must be non-null
|
||||||
|
* @param errorMessage must be non-null and non-empty
|
||||||
|
* @param cause may be null
|
||||||
|
* @throws NullPointerException if candidate or errorMessage is null
|
||||||
|
* @throws IllegalArgumentException if errorMessage is empty
|
||||||
|
*/
|
||||||
|
public M3TechnicalDocumentError {
|
||||||
|
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||||
|
Objects.requireNonNull(errorMessage, "errorMessage must not be null");
|
||||||
|
if (errorMessage.isEmpty()) {
|
||||||
|
throw new IllegalArgumentException("errorMessage must not be empty");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -16,12 +16,23 @@
|
|||||||
* Additional classes introduced in M3:
|
* Additional classes introduced in M3:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason} — enumeration of M3 pre-check failure reasons (M3-AP-004)</li>
|
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason} — enumeration of M3 pre-check failure reasons (M3-AP-004)</li>
|
||||||
|
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome} — sealed interface for all M3 document processing outcomes (M3-AP-006)</li>
|
||||||
|
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3TechnicalDocumentError} — technical failure during extraction (M3-AP-006)</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* Implementation classes:
|
* Implementation classes:
|
||||||
* <ul>
|
* <ul>
|
||||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed} — document passed M3 pre-checks (M3-AP-001, M3-AP-004)</li>
|
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed} — document passed M3 pre-checks (M3-AP-001, M3-AP-004, M3-AP-006)</li>
|
||||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed} — document failed M3 pre-check (M3-AP-001, M3-AP-004)</li>
|
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed} — document failed M3 pre-check (M3-AP-001, M3-AP-004, M3-AP-006)</li>
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* M3 Document Processing Outcome Model (M3-AP-006):
|
||||||
|
* The complete M3 document processing pipeline results in one of three outcomes, all implementing
|
||||||
|
* {@link de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome}:
|
||||||
|
* <ul>
|
||||||
|
* <li>Pre-check passed: Document text extracted and validated successfully (ready for M4+)</li>
|
||||||
|
* <li>Pre-check failed: Deterministic content error (no usable text, page limit exceeded)</li>
|
||||||
|
* <li>Technical document error: Infrastructure or parsing failure (I/O, access, PDF parsing)</li>
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* All classes in this package are:
|
* All classes in this package are:
|
||||||
|
|||||||
@@ -0,0 +1,113 @@
|
|||||||
|
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.BeforeEach;
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
import org.junit.jupiter.api.io.TempDir;
|
||||||
|
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
|
||||||
|
import static org.junit.jupiter.api.Assertions.*;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests for M3 document processing outcome types.
|
||||||
|
* <p>
|
||||||
|
* Verifies that all four outcome types are properly created and validated.
|
||||||
|
*/
|
||||||
|
class M3DocumentProcessingOutcomeTest {
|
||||||
|
|
||||||
|
@TempDir
|
||||||
|
Path tempDir;
|
||||||
|
|
||||||
|
private SourceDocumentCandidate candidate;
|
||||||
|
|
||||||
|
@BeforeEach
|
||||||
|
void setUp() throws Exception {
|
||||||
|
Path pdfFile = tempDir.resolve("doc.pdf");
|
||||||
|
Files.createFile(pdfFile);
|
||||||
|
SourceDocumentLocator locator = new SourceDocumentLocator(pdfFile.toString());
|
||||||
|
candidate = new SourceDocumentCandidate("doc.pdf", 1024L, locator);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testM3TechnicalDocumentError_ValidConstruction() {
|
||||||
|
// Act
|
||||||
|
var error = new M3TechnicalDocumentError(candidate, "I/O error", null);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
assertEquals(candidate, error.candidate());
|
||||||
|
assertEquals("I/O error", error.errorMessage());
|
||||||
|
assertNull(error.cause());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testM3TechnicalDocumentError_WithCause() {
|
||||||
|
// Arrange
|
||||||
|
var cause = new RuntimeException("File not found");
|
||||||
|
|
||||||
|
// Act
|
||||||
|
var error = new M3TechnicalDocumentError(candidate, "I/O error", cause);
|
||||||
|
|
||||||
|
// Assert
|
||||||
|
assertEquals(cause, error.cause());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testM3TechnicalDocumentError_WithNullCandidate_ThrowsException() {
|
||||||
|
assertThrows(NullPointerException.class,
|
||||||
|
() -> new M3TechnicalDocumentError(null, "Error", null));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testM3TechnicalDocumentError_WithNullErrorMessage_ThrowsException() {
|
||||||
|
assertThrows(NullPointerException.class,
|
||||||
|
() -> new M3TechnicalDocumentError(candidate, null, null));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testM3TechnicalDocumentError_WithEmptyErrorMessage_ThrowsException() {
|
||||||
|
assertThrows(IllegalArgumentException.class,
|
||||||
|
() -> new M3TechnicalDocumentError(candidate, "", null));
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testM3TechnicalDocumentError_IsM3DocumentProcessingOutcome() {
|
||||||
|
// Verify type relationship
|
||||||
|
var error = new M3TechnicalDocumentError(candidate, "Error", null);
|
||||||
|
assertInstanceOf(M3DocumentProcessingOutcome.class, error);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testM3PreCheckPassed_IsM3DocumentProcessingOutcome() {
|
||||||
|
// Verify type relationship
|
||||||
|
var extraction = new PdfExtractionSuccess("text", new PdfPageCount(1));
|
||||||
|
var passed = new M3PreCheckPassed(candidate, extraction);
|
||||||
|
assertInstanceOf(M3DocumentProcessingOutcome.class, passed);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testM3PreCheckFailed_IsM3DocumentProcessingOutcome() {
|
||||||
|
// Verify type relationship
|
||||||
|
var failed = new M3PreCheckFailed(candidate, "Test failure reason");
|
||||||
|
assertInstanceOf(M3DocumentProcessingOutcome.class, failed);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void testAllThreeOutcomesAreExhaustive() {
|
||||||
|
// This test verifies that the three outcome types are the only implementations
|
||||||
|
// M3 has exactly three outcome types: passed, failed (deterministic), and technical error
|
||||||
|
|
||||||
|
var extraction = new PdfExtractionSuccess("text", new PdfPageCount(1));
|
||||||
|
|
||||||
|
M3DocumentProcessingOutcome[] outcomes = {
|
||||||
|
new M3PreCheckPassed(candidate, extraction),
|
||||||
|
new M3PreCheckFailed(candidate, "Deterministic content failure"),
|
||||||
|
new M3TechnicalDocumentError(candidate, "Technical extraction error", null)
|
||||||
|
};
|
||||||
|
|
||||||
|
for (M3DocumentProcessingOutcome outcome : outcomes) {
|
||||||
|
assertInstanceOf(M3DocumentProcessingOutcome.class, outcome);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user