M3-Kernobjekte und Ports für Quellkandidaten und PDF-Auslese eingeführt
This commit is contained in:
@@ -0,0 +1,65 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
|
||||
/**
|
||||
* Outbound port for extracting text content and page count from a PDF document.
|
||||
* <p>
|
||||
* This interface abstracts PDF text extraction, allowing the application layer
|
||||
* to remain independent of the underlying PDF library or extraction mechanism.
|
||||
* <p>
|
||||
* Responsibilities of the implementing adapter:
|
||||
* <ul>
|
||||
* <li>Read the PDF file identified by the candidate</li>
|
||||
* <li>Extract all text content from the PDF</li>
|
||||
* <li>Count the total number of pages</li>
|
||||
* <li>Distinguish between content errors (PDF exists but is not extractable) and technical errors</li>
|
||||
* <li>Return a structured {@link PdfExtractionResult} encoding success or failure type</li>
|
||||
* <li>Encapsulate all PDF library details (no PDFBox types appear in domain/application)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Architecture notes:
|
||||
* <ul>
|
||||
* <li>The adapter implementation is the ONLY place where PDFBox appears in the codebase</li>
|
||||
* <li>Results are returned as structured types, never as exceptions thrown</li>
|
||||
* <li>The three result variants ({@code PdfExtractionSuccess}, {@code PdfExtractionContentError},
|
||||
* {@code PdfExtractionTechnicalError}) allow callers to distinguish recoverable from non-recoverable failures</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Non-goals of this port (handled in later milestones):
|
||||
* <ul>
|
||||
* <li>Validation of extracted text content (e.g., minimum length, character sets)</li>
|
||||
* <li>Checking page count against limits</li>
|
||||
* <li>Any form of document fingerprinting</li>
|
||||
* <li>Persistence or caching of results</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public interface PdfTextExtractionPort {
|
||||
|
||||
/**
|
||||
* Extracts text content and page count from a single PDF document.
|
||||
* <p>
|
||||
* Performs a single extraction operation and returns the result.
|
||||
* Errors are never thrown as exceptions; they are encoded in the result type.
|
||||
* <p>
|
||||
* The candidate parameter serves as both:
|
||||
* <ul>
|
||||
* <li>Document identification (e.g., filename for logging)</li>
|
||||
* <li>Adapter guidance (the adapter uses the candidate's identifier to locate the actual file)</li>
|
||||
* </ul>
|
||||
*
|
||||
* @param candidate the document to extract; non-null
|
||||
* @return a {@link PdfExtractionResult} encoding the outcome:
|
||||
* <ul>
|
||||
* <li>Success: contains extracted text and page count</li>
|
||||
* <li>Content error: PDF exists but is not extractable (e.g., image-only, encrypted)</li>
|
||||
* <li>Technical error: I/O failure, PDFBox library error, etc.</li>
|
||||
* </ul>
|
||||
* @throws NullPointerException if candidate is null
|
||||
* @see PdfExtractionResult
|
||||
*/
|
||||
PdfExtractionResult extractTextAndPageCount(SourceDocumentCandidate candidate);
|
||||
}
|
||||
@@ -0,0 +1,39 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
/**
|
||||
* Exception thrown when source documents cannot be accessed or read.
|
||||
* <p>
|
||||
* This exception is raised by the {@link SourceDocumentCandidatesPort} when:
|
||||
* <ul>
|
||||
* <li>The source folder does not exist or is not readable</li>
|
||||
* <li>Permission issues prevent directory listing</li>
|
||||
* <li>Filesystem I/O errors occur during candidate discovery</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* This is a runtime exception, allowing adapters to propagate filesystem errors
|
||||
* without forcing try/catch blocks in the application layer.
|
||||
*
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public final class SourceDocumentAccessException extends RuntimeException {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
/**
|
||||
* Constructs an exception with a message.
|
||||
*
|
||||
* @param message descriptive message about the access failure
|
||||
*/
|
||||
public SourceDocumentAccessException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
/**
|
||||
* Constructs an exception with a message and cause.
|
||||
*
|
||||
* @param message descriptive message about the access failure
|
||||
* @param cause the underlying exception that caused this failure
|
||||
*/
|
||||
public SourceDocumentAccessException(String message, Throwable cause) {
|
||||
super(message, cause);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,49 @@
|
||||
package de.gecheckt.pdf.umbenenner.application.port.out;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
/**
|
||||
* Outbound port for loading PDF document candidates from the source folder.
|
||||
* <p>
|
||||
* This interface abstracts filesystem access, allowing the application layer
|
||||
* to remain independent of how candidates are discovered or retrieved.
|
||||
* <p>
|
||||
* Responsibilities of the implementing adapter:
|
||||
* <ul>
|
||||
* <li>Scan the configured source folder</li>
|
||||
* <li>Identify only PDF files (by extension or content type)</li>
|
||||
* <li>Ignore non-PDF files and directories</li>
|
||||
* <li>Return candidates in deterministic order (e.g., alphabetical by filename)</li>
|
||||
* <li>Convert filesystem objects to {@link SourceDocumentCandidate} records</li>
|
||||
* <li>Encapsulate all filesystem access details (no Path or File leakage)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Architecture note:
|
||||
* <ul>
|
||||
* <li>The adapter implementation is the ONLY place where java.nio.file.Path or java.io.File appears</li>
|
||||
* <li>The application never sees or handles raw filesystem objects</li>
|
||||
* <li>This enforces the hexagonal boundary and allows easy adapter swapping (e.g., cloud storage)</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public interface SourceDocumentCandidatesPort {
|
||||
|
||||
/**
|
||||
* Loads all PDF candidates from the configured source folder.
|
||||
* <p>
|
||||
* Returns a list of candidates representing discovered PDF files.
|
||||
* The list is guaranteed to be in deterministic order to enable reproducible runs.
|
||||
* <p>
|
||||
* Non-goal: This method does NOT filter candidates by any fachlich criteria
|
||||
* (such as file size, corruption checks, or content validation).
|
||||
* It delivers raw candidates from the filesystem; fachlich evaluation happens later.
|
||||
*
|
||||
* @return a list of discovered PDF candidates in deterministic order (may be empty if no PDFs found)
|
||||
* @throws SourceDocumentAccessException if the source folder cannot be read or accessed
|
||||
* @see SourceDocumentCandidate
|
||||
*/
|
||||
List<SourceDocumentCandidate> loadCandidates() throws SourceDocumentAccessException;
|
||||
}
|
||||
@@ -14,10 +14,20 @@
|
||||
* — System time access for timestamps and run context</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* M3-AP-001 ports:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort}
|
||||
* — Load PDF document candidates from the source folder</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort}
|
||||
* — Extract text content and page count from a single PDF</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Exception types:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException}
|
||||
* — Thrown when run lock cannot be acquired (another instance running)</li>
|
||||
* — Thrown when run lock cannot be acquired (another instance running) (M2)</li>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException}
|
||||
* — Thrown when source folder cannot be read or accessed (M3)</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Architecture Rule: Outbound ports are implementation-agnostic and contain no business logic.
|
||||
|
||||
Reference in New Issue
Block a user