PreCheckFailed auf strukturierten Fehlergrund umgestellt
This commit is contained in:
@@ -8,38 +8,42 @@ import java.util.Objects;
|
||||
* This result encapsulates:
|
||||
* <ul>
|
||||
* <li>The original document candidate metadata (for correlation)</li>
|
||||
* <li>A description of why the pre-check failed</li>
|
||||
* <li>A reason why the pre-check failed</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Reasons include:
|
||||
* <ul>
|
||||
* <li>"No usable text" – extraction yielded no meaningful content</li>
|
||||
* <li>"Page limit exceeded" – document exceeds the configured page limit</li>
|
||||
* <li>"Technical extraction error" – I/O or PDFBox failure (may be retryable later)</li>
|
||||
* <li>{@link PreCheckFailureReason#NO_USABLE_TEXT} – extraction yielded no meaningful content</li>
|
||||
* <li>{@link PreCheckFailureReason#PAGE_LIMIT_EXCEEDED} – document exceeds the configured page limit</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* A document with this decision will not proceed further in the current batch run.
|
||||
*
|
||||
* @param candidate the source document metadata
|
||||
* @param failureReason a human-readable explanation of the pre-check failure
|
||||
* @param failureReason the reason for the pre-check failure
|
||||
*/
|
||||
public record PreCheckFailed(
|
||||
SourceDocumentCandidate candidate,
|
||||
String failureReason
|
||||
PreCheckFailureReason failureReason
|
||||
) implements ProcessingDecision, DocumentProcessingOutcome {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param candidate must be non-null
|
||||
* @param failureReason must be non-null and non-empty
|
||||
* @param failureReason must be non-null
|
||||
* @throws NullPointerException if either parameter is null
|
||||
* @throws IllegalArgumentException if failureReason is empty
|
||||
*/
|
||||
public PreCheckFailed {
|
||||
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||
Objects.requireNonNull(failureReason, "failureReason must not be null");
|
||||
if (failureReason.isEmpty()) {
|
||||
throw new IllegalArgumentException("failureReason must not be empty");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a human-readable description of the failure reason.
|
||||
*
|
||||
* @return the description of the failure reason
|
||||
*/
|
||||
public String failureReasonDescription() {
|
||||
return failureReason.getDescription();
|
||||
}
|
||||
}
|
||||
@@ -10,6 +10,7 @@ package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
* <ul>
|
||||
* <li>{@link #NO_USABLE_TEXT}: The extracted text contains no meaningful content after normalization.</li>
|
||||
* <li>{@link #PAGE_LIMIT_EXCEEDED}: The document exceeds the configured page limit.</li>
|
||||
* <li>{@link #CONTENT_NOT_EXTRACTABLE}: The PDF content cannot be extracted due to structural issues.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Note: Technical extraction failures (I/O errors, PDFBox failures) are not pre-check reasons;
|
||||
@@ -33,7 +34,19 @@ public enum PreCheckFailureReason {
|
||||
* <p>
|
||||
* Retry logic: exactly 1 retry in a later batch run.
|
||||
*/
|
||||
PAGE_LIMIT_EXCEEDED("Document page count exceeds configured limit");
|
||||
PAGE_LIMIT_EXCEEDED("Document page count exceeds configured limit"),
|
||||
|
||||
/**
|
||||
* The PDF content cannot be extracted due to structural issues in the document.
|
||||
* <p>
|
||||
* This is a deterministic content error: reprocessing the same file in a later run
|
||||
* will have the same outcome unless the source file is changed.
|
||||
* <p>
|
||||
* Examples: Password-protected PDFs, corrupted internal structure, unsupported encryption.
|
||||
* <p>
|
||||
* Retry logic: exactly 1 retry in a later batch run.
|
||||
*/
|
||||
CONTENT_NOT_EXTRACTABLE("PDF content not extractable");
|
||||
|
||||
private final String description;
|
||||
|
||||
|
||||
@@ -89,7 +89,7 @@ class DocumentProcessingOutcomeTest {
|
||||
@Test
|
||||
void testPreCheckFailed_IsDocumentProcessingOutcome() {
|
||||
// Verify type relationship
|
||||
var failed = new PreCheckFailed(candidate, "Test failure reason");
|
||||
var failed = new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT);
|
||||
assertInstanceOf(DocumentProcessingOutcome.class, failed);
|
||||
}
|
||||
|
||||
@@ -100,7 +100,7 @@ class DocumentProcessingOutcomeTest {
|
||||
|
||||
DocumentProcessingOutcome[] outcomes = {
|
||||
new PreCheckPassed(candidate, extraction),
|
||||
new PreCheckFailed(candidate, "Deterministic content failure"),
|
||||
new PreCheckFailed(candidate, PreCheckFailureReason.NO_USABLE_TEXT),
|
||||
new TechnicalDocumentError(candidate, "Technical extraction error", null)
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user