M3-Quellordneradapter korrigiert und leere PDF-Kandidaten zugelassen

2026-04-01 18:35:28 +02:00
parent dd282e8f7b
commit 8f138d4cfa
4 changed files with 378 additions and 6 deletions
@@ -0,0 +1,151 @@
+package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;
+
+import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
+import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.stream.Stream;
+
+/**
+ * File-system based implementation of {@link SourceDocumentCandidatesPort}.
+ * <p>
+ * AP-002 Implementation: Scans a configured source folder and returns only PDF files
+ * (by extension) as {@link SourceDocumentCandidate} objects.
+ * <p>
+ * Design:
+ * <ul>
+ *   <li>Reads exactly one directory level (no recursion)</li>
+ *   <li>Filters for files with `.pdf` extension (case-insensitive)</li>
+ *   <li>Ignores directories and non-PDF files</li>
+ *   <li>Returns candidates in deterministic, stable order (sorted by absolute path)</li>
+ *   <li>Each candidate's locator contains the absolute path as a String (adapter-internal convention)</li>
+ *   <li>Technical filesystem errors are wrapped in {@link SourceDocumentAccessException}</li>
+ * </ul>
+ * <p>
+ * Non-goals:
+ * <ul>
+ *   <li>No PDF validation (that is AP-003)</li>
+ *   <li>No recursion into subdirectories</li>
+ *   <li>No content evaluation (that happens in AP-004: brauchbarer Text assessment)</li>
+ *   <li>No fachlich evaluation of candidates</li>
+ * </ul>
+ *
+ * @since M3-AP-002
+ */
+public class SourceDocumentCandidatesPortAdapter implements SourceDocumentCandidatesPort {
+
+    private static final String PDF_EXTENSION = ".pdf";
+
+    private final Path sourceFolder;
+
+    /**
+     * Creates a new SourceDocumentCandidatesPortAdapter for the given source folder.
+     *
+     * @param sourceFolder the directory to scan for PDF files; must be a readable directory
+     * @throws NullPointerException if sourceFolder is null
+     */
+    public SourceDocumentCandidatesPortAdapter(Path sourceFolder) {
+        this.sourceFolder = sourceFolder;
+    }
+
+    /**
+     * Loads all PDF candidates from the source folder.
+     * <p>
+     * Scans the source folder at exactly one level (no recursion), identifies PDF files
+     * (by extension), and returns them as candidates in sorted order.
+     * <p>
+     * Each returned candidate carries:
+     * <ul>
+     *   <li>{@code uniqueIdentifier}: the filename (e.g. "document.pdf")</li>
+     *   <li>{@code fileSizeBytes}: the file size in bytes</li>
+     *   <li>{@code locator}: opaque reference containing the absolute path as a String.
+     *       This is an adapter-internal convention and is never interpreted by Domain or Application.</li>
+     * </ul>
+     *
+     * @return a list of discovered PDF candidates sorted by absolute path (may be empty)
+     * @throws SourceDocumentAccessException if the source folder cannot be read or accessed
+     */
+    @Override
+    public List<SourceDocumentCandidate> loadCandidates() throws SourceDocumentAccessException {
+        try {
+            // Validate that source folder exists and is readable
+            if (!Files.exists(sourceFolder)) {
+                throw new SourceDocumentAccessException(
+                    "Source folder does not exist: " + sourceFolder.toAbsolutePath());
+            }
+            if (!Files.isDirectory(sourceFolder)) {
+                throw new SourceDocumentAccessException(
+                    "Source folder is not a directory: " + sourceFolder.toAbsolutePath());
+            }
+            if (!Files.isReadable(sourceFolder)) {
+                throw new SourceDocumentAccessException(
+                    "Source folder is not readable: " + sourceFolder.toAbsolutePath());
+            }
+
+            // Scan folder: list exactly one level, filter for PDF files, sort deterministically
+            List<Path> pdfPaths;
+            try (Stream<Path> stream = Files.list(sourceFolder)) {
+                pdfPaths = stream
+                    .filter(Files::isRegularFile)                    // Only files, not directories
+                    .filter(this::isPdfFile)                         // Only .pdf extension
+                    .sorted()                                        // Deterministic order (by path)
+                    .toList();
+            }
+
+            // Convert paths to candidates
+            List<SourceDocumentCandidate> candidates = new java.util.ArrayList<>();
+            for (Path path : pdfPaths) {
+                candidates.add(toSourceDocumentCandidate(path));
+            }
+            return candidates;
+
+        } catch (SourceDocumentAccessException e) {
+            throw e;
+        } catch (IOException e) {
+            throw new SourceDocumentAccessException(
+                "Failed to read source folder: " + sourceFolder.toAbsolutePath(), e);
+        } catch (Exception e) {
+            throw new SourceDocumentAccessException(
+                "Unexpected error while scanning source folder: " + sourceFolder.toAbsolutePath(), e);
+        }
+    }
+
+    /**
+     * Checks if a file is a PDF by extension (case-insensitive).
+     *
+     * @param path the file path to check
+     * @return true if the file name ends with .pdf (case-insensitive), false otherwise
+     */
+    private boolean isPdfFile(Path path) {
+        String fileName = path.getFileName().toString().toLowerCase();
+        return fileName.endsWith(PDF_EXTENSION);
+    }
+
+    /**
+     * Converts a file path to a SourceDocumentCandidate.
+     * <p>
+     * The locator is set to the absolute path as a String. This is an adapter-internal
+     * convention that allows the extraction adapter to re-locate the file later.
+     * Domain and Application never interpret this value.
+     *
+     * @param path the file path
+     * @return a new SourceDocumentCandidate with metadata extracted from the path
+     * @throws IOException if file size or path cannot be determined
+     */
+    private SourceDocumentCandidate toSourceDocumentCandidate(Path path) throws IOException {
+        long fileSizeBytes = Files.size(path);
+        String fileName = path.getFileName().toString();
+        String absolutePath = path.toAbsolutePath().toString();
+
+        return new SourceDocumentCandidate(
+            fileName,
+            fileSizeBytes,
+            new SourceDocumentLocator(absolutePath)
+        );
+    }
+}
@@ -0,0 +1,12 @@
+/**
+ * Source document adapters for discovering and accessing PDF candidates.
+ * <p>
+ * M3-AP-002 implementations:
+ * <ul>
+ *   <li>{@link de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument.SourceDocumentCandidatesPortAdapter}
+ *       — File-system based discovery of PDF candidates from the source folder</li>
+ * </ul>
+ *
+ * @since M3-AP-002
+ */
+package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;