M3-AP-004: Vorprüfung auf Unicode-fähigen brauchbaren Text korrigiert
This commit is contained in:
@@ -0,0 +1,46 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a document that failed an M3 pre-check.
|
||||
* <p>
|
||||
* This result encapsulates:
|
||||
* <ul>
|
||||
* <li>The original document candidate metadata (for correlation)</li>
|
||||
* <li>A description of why the pre-check failed</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Reasons include:
|
||||
* <ul>
|
||||
* <li>"No usable text" – extraction yielded no meaningful content</li>
|
||||
* <li>"Page limit exceeded" – document exceeds the configured page limit</li>
|
||||
* <li>"Technical extraction error" – I/O or PDFBox failure (may be retryable later)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* A document with this decision will not proceed further in the current batch run.
|
||||
*
|
||||
* @param candidate the source document metadata
|
||||
* @param failureReason a human-readable explanation of the pre-check failure
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public record M3PreCheckFailed(
|
||||
SourceDocumentCandidate candidate,
|
||||
String failureReason
|
||||
) implements M3ProcessingDecision {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param candidate must be non-null
|
||||
* @param failureReason must be non-null and non-empty
|
||||
* @throws NullPointerException if either parameter is null
|
||||
* @throws IllegalArgumentException if failureReason is empty
|
||||
*/
|
||||
public M3PreCheckFailed {
|
||||
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||
Objects.requireNonNull(failureReason, "failureReason must not be null");
|
||||
if (failureReason.isEmpty()) {
|
||||
throw new IllegalArgumentException("failureReason must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,54 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
/**
|
||||
* Enumeration of M3 pre-check failure reasons.
|
||||
* <p>
|
||||
* These are the deterministic content errors that can occur during M3 pre-check evaluation.
|
||||
* They distinguish between failures in the document content versus technical extraction failures.
|
||||
* <p>
|
||||
* Deterministic content errors:
|
||||
* <ul>
|
||||
* <li>{@link #NO_USABLE_TEXT}: The extracted text contains no meaningful content after normalization.</li>
|
||||
* <li>{@link #PAGE_LIMIT_EXCEEDED}: The document exceeds the configured page limit.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Note: Technical extraction failures (I/O errors, PDFBox failures) are not M3 pre-check reasons;
|
||||
* they are represented as {@link PdfExtractionTechnicalError} in the extraction result.
|
||||
*
|
||||
* @since M3-AP-004
|
||||
*/
|
||||
public enum M3PreCheckFailureReason {
|
||||
/**
|
||||
* The extracted PDF text, after normalization, contains no letters or digits.
|
||||
* <p>
|
||||
* This is a deterministic content error: reprocessing the same file in a later run
|
||||
* will have the same outcome unless the source file is changed.
|
||||
* <p>
|
||||
* In M3, retry logic: exactly 1 retry in a later batch run.
|
||||
*/
|
||||
NO_USABLE_TEXT("No usable text in extracted PDF content"),
|
||||
|
||||
/**
|
||||
* The document's page count exceeds the configured limit.
|
||||
* <p>
|
||||
* This is a deterministic content error: the page count will not change unless the source file is modified.
|
||||
* <p>
|
||||
* In M3, retry logic: exactly 1 retry in a later batch run.
|
||||
*/
|
||||
PAGE_LIMIT_EXCEEDED("Document page count exceeds configured limit");
|
||||
|
||||
private final String description;
|
||||
|
||||
M3PreCheckFailureReason(String description) {
|
||||
this.description = description;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a human-readable description of this failure reason.
|
||||
*
|
||||
* @return the description
|
||||
*/
|
||||
public String getDescription() {
|
||||
return description;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a document that passed all M3 pre-checks.
|
||||
* <p>
|
||||
* This result encapsulates:
|
||||
* <ul>
|
||||
* <li>The original document candidate metadata</li>
|
||||
* <li>The successful PDF text extraction result</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* A document with this decision is ready to proceed to M4 and later milestones
|
||||
* (fingerprinting, persistence, KI integration, filename generation, target copy).
|
||||
*
|
||||
* @param candidate the source document metadata
|
||||
* @param extraction the successful text extraction result
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public record M3PreCheckPassed(
|
||||
SourceDocumentCandidate candidate,
|
||||
PdfExtractionSuccess extraction
|
||||
) implements M3ProcessingDecision {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param candidate must be non-null
|
||||
* @param extraction must be non-null
|
||||
* @throws NullPointerException if either parameter is null
|
||||
*/
|
||||
public M3PreCheckPassed {
|
||||
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||
Objects.requireNonNull(extraction, "extraction must not be null");
|
||||
}
|
||||
}
|
||||
@@ -1,13 +1,12 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Sealed interface representing the outcome of M3 document pre-checks.
|
||||
* <p>
|
||||
* This is a placeholder interface introduced in AP-001 to establish the architectural
|
||||
* This interface introduced in AP-001 establishes the architectural
|
||||
* pattern for M3 pre-check results. The actual pre-check logic (fachlich validation
|
||||
* such as "brauchbarer Text" and "Seitenlimit") is implemented in later APs (AP-004, AP-005).
|
||||
* such as "brauchbarer Text" and "Seitenlimit") is implemented in AP-004 via
|
||||
* {@link de.gecheckt.pdf.umbenenner.application.service.M3PreCheckEvaluator}.
|
||||
* <p>
|
||||
* There are two allowed implementations:
|
||||
* <ul>
|
||||
@@ -29,79 +28,3 @@ public sealed interface M3ProcessingDecision
|
||||
permits M3PreCheckPassed, M3PreCheckFailed {
|
||||
// Marker interface; concrete implementations define structure
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a document that passed all M3 pre-checks.
|
||||
* <p>
|
||||
* This result encapsulates:
|
||||
* <ul>
|
||||
* <li>The original document candidate metadata</li>
|
||||
* <li>The successful PDF text extraction result</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* A document with this decision is ready to proceed to M4 and later milestones
|
||||
* (fingerprinting, persistence, KI integration, filename generation, target copy).
|
||||
*
|
||||
* @param candidate the source document metadata
|
||||
* @param extraction the successful text extraction result
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record M3PreCheckPassed(
|
||||
SourceDocumentCandidate candidate,
|
||||
PdfExtractionSuccess extraction
|
||||
) implements M3ProcessingDecision {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param candidate must be non-null
|
||||
* @param extraction must be non-null
|
||||
* @throws NullPointerException if either parameter is null
|
||||
*/
|
||||
M3PreCheckPassed {
|
||||
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||
Objects.requireNonNull(extraction, "extraction must not be null");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a document that failed an M3 pre-check.
|
||||
* <p>
|
||||
* This result encapsulates:
|
||||
* <ul>
|
||||
* <li>The original document candidate metadata (for correlation)</li>
|
||||
* <li>A description of why the pre-check failed</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Reasons include:
|
||||
* <ul>
|
||||
* <li>"No usable text" – extraction yielded no meaningful content</li>
|
||||
* <li>"Page limit exceeded" – document exceeds the configured page limit</li>
|
||||
* <li>"Technical extraction error" – I/O or PDFBox failure (may be retryable later)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* A document with this decision will not proceed further in the current batch run.
|
||||
*
|
||||
* @param candidate the source document metadata
|
||||
* @param failureReason a human-readable explanation of the pre-check failure
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record M3PreCheckFailed(
|
||||
SourceDocumentCandidate candidate,
|
||||
String failureReason
|
||||
) implements M3ProcessingDecision {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param candidate must be non-null
|
||||
* @param failureReason must be non-null and non-empty
|
||||
* @throws NullPointerException if either parameter is null
|
||||
* @throws IllegalArgumentException if failureReason is empty
|
||||
*/
|
||||
M3PreCheckFailed {
|
||||
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||
Objects.requireNonNull(failureReason, "failureReason must not be null");
|
||||
if (failureReason.isEmpty()) {
|
||||
throw new IllegalArgumentException("failureReason must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,6 +13,17 @@
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision} — sealed result of M3 pre-checks (M3-AP-001)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Additional classes introduced in M3:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason} — enumeration of M3 pre-check failure reasons (M3-AP-004)</li>
|
||||
* </ul>
|
||||
*
|
||||
* Implementation classes:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed} — document passed M3 pre-checks (M3-AP-001, M3-AP-004)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed} — document failed M3 pre-check (M3-AP-001, M3-AP-004)</li>
|
||||
* </ul>
|
||||
*
|
||||
* All classes in this package are:
|
||||
* <ul>
|
||||
* <li>Infrastructure-agnostic (no database, filesystem, network, or framework dependencies)</li>
|
||||
|
||||
Reference in New Issue
Block a user