M3-Quellordneradapter korrigiert und leere PDF-Kandidaten zugelassen

This commit is contained in:
2026-04-01 18:35:28 +02:00
parent dd282e8f7b
commit 8f138d4cfa
4 changed files with 378 additions and 6 deletions
@@ -0,0 +1,151 @@
package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.stream.Stream;
/**
* File-system based implementation of {@link SourceDocumentCandidatesPort}.
* <p>
* AP-002 Implementation: Scans a configured source folder and returns only PDF files
* (by extension) as {@link SourceDocumentCandidate} objects.
* <p>
* Design:
* <ul>
* <li>Reads exactly one directory level (no recursion)</li>
* <li>Filters for files with `.pdf` extension (case-insensitive)</li>
* <li>Ignores directories and non-PDF files</li>
* <li>Returns candidates in deterministic, stable order (sorted by absolute path)</li>
* <li>Each candidate's locator contains the absolute path as a String (adapter-internal convention)</li>
* <li>Technical filesystem errors are wrapped in {@link SourceDocumentAccessException}</li>
* </ul>
* <p>
* Non-goals:
* <ul>
* <li>No PDF validation (that is AP-003)</li>
* <li>No recursion into subdirectories</li>
* <li>No content evaluation (that happens in AP-004: brauchbarer Text assessment)</li>
* <li>No fachlich evaluation of candidates</li>
* </ul>
*
* @since M3-AP-002
*/
public class SourceDocumentCandidatesPortAdapter implements SourceDocumentCandidatesPort {
private static final String PDF_EXTENSION = ".pdf";
private final Path sourceFolder;
/**
* Creates a new SourceDocumentCandidatesPortAdapter for the given source folder.
*
* @param sourceFolder the directory to scan for PDF files; must be a readable directory
* @throws NullPointerException if sourceFolder is null
*/
public SourceDocumentCandidatesPortAdapter(Path sourceFolder) {
this.sourceFolder = sourceFolder;
}
/**
* Loads all PDF candidates from the source folder.
* <p>
* Scans the source folder at exactly one level (no recursion), identifies PDF files
* (by extension), and returns them as candidates in sorted order.
* <p>
* Each returned candidate carries:
* <ul>
* <li>{@code uniqueIdentifier}: the filename (e.g. "document.pdf")</li>
* <li>{@code fileSizeBytes}: the file size in bytes</li>
* <li>{@code locator}: opaque reference containing the absolute path as a String.
* This is an adapter-internal convention and is never interpreted by Domain or Application.</li>
* </ul>
*
* @return a list of discovered PDF candidates sorted by absolute path (may be empty)
* @throws SourceDocumentAccessException if the source folder cannot be read or accessed
*/
@Override
public List<SourceDocumentCandidate> loadCandidates() throws SourceDocumentAccessException {
try {
// Validate that source folder exists and is readable
if (!Files.exists(sourceFolder)) {
throw new SourceDocumentAccessException(
"Source folder does not exist: " + sourceFolder.toAbsolutePath());
}
if (!Files.isDirectory(sourceFolder)) {
throw new SourceDocumentAccessException(
"Source folder is not a directory: " + sourceFolder.toAbsolutePath());
}
if (!Files.isReadable(sourceFolder)) {
throw new SourceDocumentAccessException(
"Source folder is not readable: " + sourceFolder.toAbsolutePath());
}
// Scan folder: list exactly one level, filter for PDF files, sort deterministically
List<Path> pdfPaths;
try (Stream<Path> stream = Files.list(sourceFolder)) {
pdfPaths = stream
.filter(Files::isRegularFile) // Only files, not directories
.filter(this::isPdfFile) // Only .pdf extension
.sorted() // Deterministic order (by path)
.toList();
}
// Convert paths to candidates
List<SourceDocumentCandidate> candidates = new java.util.ArrayList<>();
for (Path path : pdfPaths) {
candidates.add(toSourceDocumentCandidate(path));
}
return candidates;
} catch (SourceDocumentAccessException e) {
throw e;
} catch (IOException e) {
throw new SourceDocumentAccessException(
"Failed to read source folder: " + sourceFolder.toAbsolutePath(), e);
} catch (Exception e) {
throw new SourceDocumentAccessException(
"Unexpected error while scanning source folder: " + sourceFolder.toAbsolutePath(), e);
}
}
/**
* Checks if a file is a PDF by extension (case-insensitive).
*
* @param path the file path to check
* @return true if the file name ends with .pdf (case-insensitive), false otherwise
*/
private boolean isPdfFile(Path path) {
String fileName = path.getFileName().toString().toLowerCase();
return fileName.endsWith(PDF_EXTENSION);
}
/**
* Converts a file path to a SourceDocumentCandidate.
* <p>
* The locator is set to the absolute path as a String. This is an adapter-internal
* convention that allows the extraction adapter to re-locate the file later.
* Domain and Application never interpret this value.
*
* @param path the file path
* @return a new SourceDocumentCandidate with metadata extracted from the path
* @throws IOException if file size or path cannot be determined
*/
private SourceDocumentCandidate toSourceDocumentCandidate(Path path) throws IOException {
long fileSizeBytes = Files.size(path);
String fileName = path.getFileName().toString();
String absolutePath = path.toAbsolutePath().toString();
return new SourceDocumentCandidate(
fileName,
fileSizeBytes,
new SourceDocumentLocator(absolutePath)
);
}
}
@@ -0,0 +1,12 @@
/**
* Source document adapters for discovering and accessing PDF candidates.
* <p>
* M3-AP-002 implementations:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument.SourceDocumentCandidatesPortAdapter}
* — File-system based discovery of PDF candidates from the source folder</li>
* </ul>
*
* @since M3-AP-002
*/
package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;