M3-Quellordneradapter korrigiert und leere PDF-Kandidaten zugelassen
This commit is contained in:
+151
@@ -0,0 +1,151 @@
|
||||
package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* File-system based implementation of {@link SourceDocumentCandidatesPort}.
|
||||
* <p>
|
||||
* AP-002 Implementation: Scans a configured source folder and returns only PDF files
|
||||
* (by extension) as {@link SourceDocumentCandidate} objects.
|
||||
* <p>
|
||||
* Design:
|
||||
* <ul>
|
||||
* <li>Reads exactly one directory level (no recursion)</li>
|
||||
* <li>Filters for files with `.pdf` extension (case-insensitive)</li>
|
||||
* <li>Ignores directories and non-PDF files</li>
|
||||
* <li>Returns candidates in deterministic, stable order (sorted by absolute path)</li>
|
||||
* <li>Each candidate's locator contains the absolute path as a String (adapter-internal convention)</li>
|
||||
* <li>Technical filesystem errors are wrapped in {@link SourceDocumentAccessException}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Non-goals:
|
||||
* <ul>
|
||||
* <li>No PDF validation (that is AP-003)</li>
|
||||
* <li>No recursion into subdirectories</li>
|
||||
* <li>No content evaluation (that happens in AP-004: brauchbarer Text assessment)</li>
|
||||
* <li>No fachlich evaluation of candidates</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-002
|
||||
*/
|
||||
public class SourceDocumentCandidatesPortAdapter implements SourceDocumentCandidatesPort {
|
||||
|
||||
private static final String PDF_EXTENSION = ".pdf";
|
||||
|
||||
private final Path sourceFolder;
|
||||
|
||||
/**
|
||||
* Creates a new SourceDocumentCandidatesPortAdapter for the given source folder.
|
||||
*
|
||||
* @param sourceFolder the directory to scan for PDF files; must be a readable directory
|
||||
* @throws NullPointerException if sourceFolder is null
|
||||
*/
|
||||
public SourceDocumentCandidatesPortAdapter(Path sourceFolder) {
|
||||
this.sourceFolder = sourceFolder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads all PDF candidates from the source folder.
|
||||
* <p>
|
||||
* Scans the source folder at exactly one level (no recursion), identifies PDF files
|
||||
* (by extension), and returns them as candidates in sorted order.
|
||||
* <p>
|
||||
* Each returned candidate carries:
|
||||
* <ul>
|
||||
* <li>{@code uniqueIdentifier}: the filename (e.g. "document.pdf")</li>
|
||||
* <li>{@code fileSizeBytes}: the file size in bytes</li>
|
||||
* <li>{@code locator}: opaque reference containing the absolute path as a String.
|
||||
* This is an adapter-internal convention and is never interpreted by Domain or Application.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @return a list of discovered PDF candidates sorted by absolute path (may be empty)
|
||||
* @throws SourceDocumentAccessException if the source folder cannot be read or accessed
|
||||
*/
|
||||
@Override
|
||||
public List<SourceDocumentCandidate> loadCandidates() throws SourceDocumentAccessException {
|
||||
try {
|
||||
// Validate that source folder exists and is readable
|
||||
if (!Files.exists(sourceFolder)) {
|
||||
throw new SourceDocumentAccessException(
|
||||
"Source folder does not exist: " + sourceFolder.toAbsolutePath());
|
||||
}
|
||||
if (!Files.isDirectory(sourceFolder)) {
|
||||
throw new SourceDocumentAccessException(
|
||||
"Source folder is not a directory: " + sourceFolder.toAbsolutePath());
|
||||
}
|
||||
if (!Files.isReadable(sourceFolder)) {
|
||||
throw new SourceDocumentAccessException(
|
||||
"Source folder is not readable: " + sourceFolder.toAbsolutePath());
|
||||
}
|
||||
|
||||
// Scan folder: list exactly one level, filter for PDF files, sort deterministically
|
||||
List<Path> pdfPaths;
|
||||
try (Stream<Path> stream = Files.list(sourceFolder)) {
|
||||
pdfPaths = stream
|
||||
.filter(Files::isRegularFile) // Only files, not directories
|
||||
.filter(this::isPdfFile) // Only .pdf extension
|
||||
.sorted() // Deterministic order (by path)
|
||||
.toList();
|
||||
}
|
||||
|
||||
// Convert paths to candidates
|
||||
List<SourceDocumentCandidate> candidates = new java.util.ArrayList<>();
|
||||
for (Path path : pdfPaths) {
|
||||
candidates.add(toSourceDocumentCandidate(path));
|
||||
}
|
||||
return candidates;
|
||||
|
||||
} catch (SourceDocumentAccessException e) {
|
||||
throw e;
|
||||
} catch (IOException e) {
|
||||
throw new SourceDocumentAccessException(
|
||||
"Failed to read source folder: " + sourceFolder.toAbsolutePath(), e);
|
||||
} catch (Exception e) {
|
||||
throw new SourceDocumentAccessException(
|
||||
"Unexpected error while scanning source folder: " + sourceFolder.toAbsolutePath(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a file is a PDF by extension (case-insensitive).
|
||||
*
|
||||
* @param path the file path to check
|
||||
* @return true if the file name ends with .pdf (case-insensitive), false otherwise
|
||||
*/
|
||||
private boolean isPdfFile(Path path) {
|
||||
String fileName = path.getFileName().toString().toLowerCase();
|
||||
return fileName.endsWith(PDF_EXTENSION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a file path to a SourceDocumentCandidate.
|
||||
* <p>
|
||||
* The locator is set to the absolute path as a String. This is an adapter-internal
|
||||
* convention that allows the extraction adapter to re-locate the file later.
|
||||
* Domain and Application never interpret this value.
|
||||
*
|
||||
* @param path the file path
|
||||
* @return a new SourceDocumentCandidate with metadata extracted from the path
|
||||
* @throws IOException if file size or path cannot be determined
|
||||
*/
|
||||
private SourceDocumentCandidate toSourceDocumentCandidate(Path path) throws IOException {
|
||||
long fileSizeBytes = Files.size(path);
|
||||
String fileName = path.getFileName().toString();
|
||||
String absolutePath = path.toAbsolutePath().toString();
|
||||
|
||||
return new SourceDocumentCandidate(
|
||||
fileName,
|
||||
fileSizeBytes,
|
||||
new SourceDocumentLocator(absolutePath)
|
||||
);
|
||||
}
|
||||
}
|
||||
+12
@@ -0,0 +1,12 @@
|
||||
/**
|
||||
* Source document adapters for discovering and accessing PDF candidates.
|
||||
* <p>
|
||||
* M3-AP-002 implementations:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument.SourceDocumentCandidatesPortAdapter}
|
||||
* — File-system based discovery of PDF candidates from the source folder</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-002
|
||||
*/
|
||||
package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;
|
||||
Reference in New Issue
Block a user