M3-Kernobjekte und Ports für Quellkandidaten und PDF-Auslese eingeführt
This commit is contained in:
@@ -0,0 +1,107 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Sealed interface representing the outcome of M3 document pre-checks.
|
||||
* <p>
|
||||
* This is a placeholder interface introduced in AP-001 to establish the architectural
|
||||
* pattern for M3 pre-check results. The actual pre-check logic (fachlich validation
|
||||
* such as "brauchbarer Text" and "Seitenlimit") is implemented in later APs (AP-004, AP-005).
|
||||
* <p>
|
||||
* There are two allowed implementations:
|
||||
* <ul>
|
||||
* <li>{@link M3PreCheckPassed}: Document passed all M3 pre-checks and is ready for KI integration</li>
|
||||
* <li>{@link M3PreCheckFailed}: Document failed an M3 pre-check and will not proceed further in this run</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Design principles:
|
||||
* <ul>
|
||||
* <li>Sealed: enforces exhaustive handling of all cases</li>
|
||||
* <li>Carries both success path ({@link M3PreCheckPassed}) and failure reason ({@link M3PreCheckFailed})</li>
|
||||
* <li>Defined early (AP-001) to ensure architecture is established before logic arrives</li>
|
||||
* <li>Future-extensible for additional pre-check variants in later milestones</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public sealed interface M3ProcessingDecision
|
||||
permits M3PreCheckPassed, M3PreCheckFailed {
|
||||
// Marker interface; concrete implementations define structure
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a document that passed all M3 pre-checks.
|
||||
* <p>
|
||||
* This result encapsulates:
|
||||
* <ul>
|
||||
* <li>The original document candidate metadata</li>
|
||||
* <li>The successful PDF text extraction result</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* A document with this decision is ready to proceed to M4 and later milestones
|
||||
* (fingerprinting, persistence, KI integration, filename generation, target copy).
|
||||
*
|
||||
* @param candidate the source document metadata
|
||||
* @param extraction the successful text extraction result
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record M3PreCheckPassed(
|
||||
SourceDocumentCandidate candidate,
|
||||
PdfExtractionSuccess extraction
|
||||
) implements M3ProcessingDecision {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param candidate must be non-null
|
||||
* @param extraction must be non-null
|
||||
* @throws NullPointerException if either parameter is null
|
||||
*/
|
||||
M3PreCheckPassed {
|
||||
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||
Objects.requireNonNull(extraction, "extraction must not be null");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a document that failed an M3 pre-check.
|
||||
* <p>
|
||||
* This result encapsulates:
|
||||
* <ul>
|
||||
* <li>The original document candidate metadata (for correlation)</li>
|
||||
* <li>A description of why the pre-check failed</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Reasons include:
|
||||
* <ul>
|
||||
* <li>"No usable text" – extraction yielded no meaningful content</li>
|
||||
* <li>"Page limit exceeded" – document exceeds the configured page limit</li>
|
||||
* <li>"Technical extraction error" – I/O or PDFBox failure (may be retryable later)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* A document with this decision will not proceed further in the current batch run.
|
||||
*
|
||||
* @param candidate the source document metadata
|
||||
* @param failureReason a human-readable explanation of the pre-check failure
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record M3PreCheckFailed(
|
||||
SourceDocumentCandidate candidate,
|
||||
String failureReason
|
||||
) implements M3ProcessingDecision {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param candidate must be non-null
|
||||
* @param failureReason must be non-null and non-empty
|
||||
* @throws NullPointerException if either parameter is null
|
||||
* @throws IllegalArgumentException if failureReason is empty
|
||||
*/
|
||||
M3PreCheckFailed {
|
||||
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||
Objects.requireNonNull(failureReason, "failureReason must not be null");
|
||||
if (failureReason.isEmpty()) {
|
||||
throw new IllegalArgumentException("failureReason must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,122 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Sealed interface representing the outcome of PDF text extraction.
|
||||
* <p>
|
||||
* This interface uses Java 17+ sealed types to enforce exhaustive case handling.
|
||||
* There are exactly three allowed implementations:
|
||||
* <ul>
|
||||
* <li>{@link PdfExtractionSuccess}: Text and page count were successfully extracted</li>
|
||||
* <li>{@link PdfExtractionContentError}: PDF exists but is not extractable (content problem)</li>
|
||||
* <li>{@link PdfExtractionTechnicalError}: Technical failure during extraction (I/O, etc.)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Design principles:
|
||||
* <ul>
|
||||
* <li>No exceptions thrown: results are encoded in the type system</li>
|
||||
* <li>Exhaustive: pattern matching forces handling of all cases</li>
|
||||
* <li>Distinct error types: allows retry logic to differentiate recoverable from non-recoverable</li>
|
||||
* <li>No PDFBox or filesystem types: pure domain representation</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public sealed interface PdfExtractionResult
|
||||
permits PdfExtractionSuccess, PdfExtractionContentError, PdfExtractionTechnicalError {
|
||||
// Marker interface; concrete implementations define structure
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents successful PDF text extraction.
|
||||
* <p>
|
||||
* When this result is obtained, both text content and page count have been
|
||||
* successfully extracted and are guaranteed to be valid.
|
||||
*
|
||||
* @param extractedText the full text content extracted from the PDF (non-null, may be empty string)
|
||||
* @param pageCount the number of pages in the PDF (non-null, validated >= 1)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record PdfExtractionSuccess(
|
||||
String extractedText,
|
||||
PdfPageCount pageCount
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param extractedText must be non-null (may be empty)
|
||||
* @param pageCount must be non-null
|
||||
* @throws NullPointerException if either parameter is null
|
||||
*/
|
||||
PdfExtractionSuccess {
|
||||
Objects.requireNonNull(extractedText, "extractedText must not be null");
|
||||
Objects.requireNonNull(pageCount, "pageCount must not be null");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a content-related failure during PDF text extraction.
|
||||
* <p>
|
||||
* This indicates that the PDF file itself is readable (no I/O error),
|
||||
* but its content is not suitable for text extraction.
|
||||
* <p>
|
||||
* Examples: PDF is image-only (not OCR'd), PDF is encrypted and cannot be unlocked,
|
||||
* PDF is severely corrupted in the content layer.
|
||||
* <p>
|
||||
* This is typically a deterministic, non-retryable condition for a given source file
|
||||
* (unless the source file is modified and re-scanned in a later run).
|
||||
*
|
||||
* @param reason a human-readable explanation of why extraction failed (non-null, non-empty)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record PdfExtractionContentError(
|
||||
String reason
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param reason must be non-null and non-empty
|
||||
* @throws NullPointerException if reason is null
|
||||
* @throws IllegalArgumentException if reason is empty
|
||||
*/
|
||||
PdfExtractionContentError {
|
||||
Objects.requireNonNull(reason, "reason must not be null");
|
||||
if (reason.isEmpty()) {
|
||||
throw new IllegalArgumentException("reason must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a technical (infrastructure) failure during PDF text extraction.
|
||||
* <p>
|
||||
* This indicates that something went wrong with the extraction process itself,
|
||||
* such as file I/O errors, PDFBox library problems, or out-of-memory conditions.
|
||||
* <p>
|
||||
* These are typically retryable conditions in later batch runs, as they may be
|
||||
* transient infrastructure issues.
|
||||
*
|
||||
* @param errorMessage a description of what went wrong (non-null, non-empty)
|
||||
* @param cause the underlying exception, if any (may be null)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record PdfExtractionTechnicalError(
|
||||
String errorMessage,
|
||||
Throwable cause
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param errorMessage must be non-null and non-empty
|
||||
* @param cause may be null
|
||||
* @throws NullPointerException if errorMessage is null
|
||||
* @throws IllegalArgumentException if errorMessage is empty
|
||||
*/
|
||||
PdfExtractionTechnicalError {
|
||||
Objects.requireNonNull(errorMessage, "errorMessage must not be null");
|
||||
if (errorMessage.isEmpty()) {
|
||||
throw new IllegalArgumentException("errorMessage must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
/**
|
||||
* Typed representation of a PDF document's page count.
|
||||
* <p>
|
||||
* This record provides type safety and semantic clarity, distinguishing page count
|
||||
* from other numeric values (character counts, file sizes, error codes, etc.).
|
||||
* <p>
|
||||
* Design principles:
|
||||
* <ul>
|
||||
* <li>Validated: only positive page counts are allowed</li>
|
||||
* <li>Type-safe: prevents confusion with other numeric metrics</li>
|
||||
* <li>Self-documenting: {@code PdfPageCount} is clearer than naked {@code int}</li>
|
||||
* <li>Future-extensible: can add validation rules per milestone without signature changes</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public record PdfPageCount(int value) {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
* <p>
|
||||
* Ensures the page count is meaningful and valid.
|
||||
*
|
||||
* @param value must be >= 1
|
||||
* @throws IllegalArgumentException if value < 1
|
||||
*/
|
||||
public PdfPageCount {
|
||||
if (value < 1) {
|
||||
throw new IllegalArgumentException("Page count must be >= 1, but got: " + value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns whether this page count exceeds a given limit.
|
||||
* <p>
|
||||
* Convenience method for page limit checks in later milestones.
|
||||
*
|
||||
* @param limit the maximum allowed page count
|
||||
* @return true if this count exceeds the limit
|
||||
*/
|
||||
public boolean exceedsLimit(int limit) {
|
||||
return this.value > limit;
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,59 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a discovered PDF candidate from the source folder.
|
||||
* <p>
|
||||
* This record encapsulates the minimal, infrastructure-agnostic metadata
|
||||
* needed to identify, correlate, and later extract a document during processing.
|
||||
* <p>
|
||||
* It deliberately does NOT expose filesystem paths or file handles directly.
|
||||
* The hexagonal boundary is maintained: adapters map filesystem objects to this
|
||||
* representation. The physical document location is carried opaquely via
|
||||
* {@link SourceDocumentLocator}, which only adapters interpret.
|
||||
* <p>
|
||||
* Fields:
|
||||
* <ul>
|
||||
* <li>{@code uniqueIdentifier} — human-readable name for logging and correlation (e.g. filename)</li>
|
||||
* <li>{@code fileSizeBytes} — enables early detection of corrupt/empty documents</li>
|
||||
* <li>{@code locator} — opaque reference passed through unchanged to the extraction adapter;
|
||||
* Domain and Application never interpret its value</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* No java.io.File or java.nio.file.Path references appear in this record.
|
||||
*
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public record SourceDocumentCandidate(
|
||||
String uniqueIdentifier,
|
||||
long fileSizeBytes,
|
||||
SourceDocumentLocator locator
|
||||
) {
|
||||
/**
|
||||
* Compact constructor with validation.
|
||||
* <p>
|
||||
* Ensures all parameters are non-null and meaningful:
|
||||
* <ul>
|
||||
* <li>{@code uniqueIdentifier} must be non-null and non-empty</li>
|
||||
* <li>{@code fileSizeBytes} must be positive</li>
|
||||
* <li>{@code locator} must be non-null</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param uniqueIdentifier non-null, non-empty identifier for logging and correlation
|
||||
* @param fileSizeBytes must be > 0
|
||||
* @param locator non-null opaque locator; only adapters interpret its value
|
||||
* @throws NullPointerException if uniqueIdentifier or locator is null
|
||||
* @throws IllegalArgumentException if uniqueIdentifier is empty or fileSizeBytes <= 0
|
||||
*/
|
||||
public SourceDocumentCandidate {
|
||||
Objects.requireNonNull(uniqueIdentifier, "uniqueIdentifier must not be null");
|
||||
if (uniqueIdentifier.isEmpty()) {
|
||||
throw new IllegalArgumentException("uniqueIdentifier must not be empty");
|
||||
}
|
||||
if (fileSizeBytes <= 0) {
|
||||
throw new IllegalArgumentException("fileSizeBytes must be positive");
|
||||
}
|
||||
Objects.requireNonNull(locator, "locator must not be null");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Opaque locator that allows an adapter to re-find the physical document
|
||||
* that was originally discovered by the source-scan adapter.
|
||||
* <p>
|
||||
* <strong>This type is deliberately opaque to Domain and Application.</strong>
|
||||
* Neither layer interprets or constructs the contained value. Only adapters
|
||||
* read and write the {@code value} field:
|
||||
* <ul>
|
||||
* <li>The {@code SourceDocumentCandidatesPort} adapter writes the value when
|
||||
* constructing a {@link SourceDocumentCandidate}.</li>
|
||||
* <li>The {@code PdfTextExtractionPort} adapter reads the value to locate
|
||||
* the physical file for extraction.</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* The value itself is an adapter-internal convention (e.g. an absolute path string).
|
||||
* Domain and Application never interpret it — they only pass it through.
|
||||
* This preserves the hexagonal boundary: no {@code java.nio.file.Path},
|
||||
* {@code java.io.File}, or other infrastructure types appear outside the adapter layer.
|
||||
* <p>
|
||||
* Coupling: Both the scan adapter and the extraction adapter live in the same
|
||||
* {@code adapter-out} module and share the same encoding convention for the value.
|
||||
* This is an intentional intra-adapter contract, not a cross-layer concern.
|
||||
*
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public record SourceDocumentLocator(String value) {
|
||||
|
||||
/**
|
||||
* Compact constructor with validation.
|
||||
*
|
||||
* @param value non-null, non-empty opaque locator value; content is an adapter-internal convention
|
||||
* @throws NullPointerException if value is null
|
||||
* @throws IllegalArgumentException if value is empty
|
||||
*/
|
||||
public SourceDocumentLocator {
|
||||
Objects.requireNonNull(value, "value must not be null");
|
||||
if (value.isEmpty()) {
|
||||
throw new IllegalArgumentException("value must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -3,7 +3,14 @@
|
||||
* <p>
|
||||
* This package contains the fundamental domain entities and status models required for document processing:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus} — enumeration of all valid document processing states</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.ProcessingStatus} — enumeration of all valid document processing states (M2-AP-001)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.RunId} — unique identifier for a batch run (M2-AP-003)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext} — technical context for a batch run (M2-AP-003)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate} — discovered PDF from source folder (M3-AP-001)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator} — opaque locator passed from scan adapter to extraction adapter (M3-AP-001)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount} — typed page count validation (M3-AP-001)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult} — sealed result of PDF text extraction (M3-AP-001)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision} — sealed result of M3 pre-checks (M3-AP-001)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* All classes in this package are:
|
||||
|
||||
Reference in New Issue
Block a user