diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapter.java
new file mode 100644
index 0000000..6c14fc3
--- /dev/null
+++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapter.java
@@ -0,0 +1,151 @@
+package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;
+
+import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
+import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.stream.Stream;
+
+/**
+ * File-system based implementation of {@link SourceDocumentCandidatesPort}.
+ *
+ * AP-002 Implementation: Scans a configured source folder and returns only PDF files
+ * (by extension) as {@link SourceDocumentCandidate} objects.
+ *
+ * Design:
+ *
+ * - Reads exactly one directory level (no recursion)
+ * - Filters for files with `.pdf` extension (case-insensitive)
+ * - Ignores directories and non-PDF files
+ * - Returns candidates in deterministic, stable order (sorted by absolute path)
+ * - Each candidate's locator contains the absolute path as a String (adapter-internal convention)
+ * - Technical filesystem errors are wrapped in {@link SourceDocumentAccessException}
+ *
+ *
+ * Non-goals:
+ *
+ * - No PDF validation (that is AP-003)
+ * - No recursion into subdirectories
+ * - No content evaluation (that happens in AP-004: brauchbarer Text assessment)
+ * - No fachlich evaluation of candidates
+ *
+ *
+ * @since M3-AP-002
+ */
+public class SourceDocumentCandidatesPortAdapter implements SourceDocumentCandidatesPort {
+
+ private static final String PDF_EXTENSION = ".pdf";
+
+ private final Path sourceFolder;
+
+ /**
+ * Creates a new SourceDocumentCandidatesPortAdapter for the given source folder.
+ *
+ * @param sourceFolder the directory to scan for PDF files; must be a readable directory
+ * @throws NullPointerException if sourceFolder is null
+ */
+ public SourceDocumentCandidatesPortAdapter(Path sourceFolder) {
+ this.sourceFolder = sourceFolder;
+ }
+
+ /**
+ * Loads all PDF candidates from the source folder.
+ *
+ * Scans the source folder at exactly one level (no recursion), identifies PDF files
+ * (by extension), and returns them as candidates in sorted order.
+ *
+ * Each returned candidate carries:
+ *
+ * - {@code uniqueIdentifier}: the filename (e.g. "document.pdf")
+ * - {@code fileSizeBytes}: the file size in bytes
+ * - {@code locator}: opaque reference containing the absolute path as a String.
+ * This is an adapter-internal convention and is never interpreted by Domain or Application.
+ *
+ *
+ * @return a list of discovered PDF candidates sorted by absolute path (may be empty)
+ * @throws SourceDocumentAccessException if the source folder cannot be read or accessed
+ */
+ @Override
+ public List loadCandidates() throws SourceDocumentAccessException {
+ try {
+ // Validate that source folder exists and is readable
+ if (!Files.exists(sourceFolder)) {
+ throw new SourceDocumentAccessException(
+ "Source folder does not exist: " + sourceFolder.toAbsolutePath());
+ }
+ if (!Files.isDirectory(sourceFolder)) {
+ throw new SourceDocumentAccessException(
+ "Source folder is not a directory: " + sourceFolder.toAbsolutePath());
+ }
+ if (!Files.isReadable(sourceFolder)) {
+ throw new SourceDocumentAccessException(
+ "Source folder is not readable: " + sourceFolder.toAbsolutePath());
+ }
+
+ // Scan folder: list exactly one level, filter for PDF files, sort deterministically
+ List pdfPaths;
+ try (Stream stream = Files.list(sourceFolder)) {
+ pdfPaths = stream
+ .filter(Files::isRegularFile) // Only files, not directories
+ .filter(this::isPdfFile) // Only .pdf extension
+ .sorted() // Deterministic order (by path)
+ .toList();
+ }
+
+ // Convert paths to candidates
+ List candidates = new java.util.ArrayList<>();
+ for (Path path : pdfPaths) {
+ candidates.add(toSourceDocumentCandidate(path));
+ }
+ return candidates;
+
+ } catch (SourceDocumentAccessException e) {
+ throw e;
+ } catch (IOException e) {
+ throw new SourceDocumentAccessException(
+ "Failed to read source folder: " + sourceFolder.toAbsolutePath(), e);
+ } catch (Exception e) {
+ throw new SourceDocumentAccessException(
+ "Unexpected error while scanning source folder: " + sourceFolder.toAbsolutePath(), e);
+ }
+ }
+
+ /**
+ * Checks if a file is a PDF by extension (case-insensitive).
+ *
+ * @param path the file path to check
+ * @return true if the file name ends with .pdf (case-insensitive), false otherwise
+ */
+ private boolean isPdfFile(Path path) {
+ String fileName = path.getFileName().toString().toLowerCase();
+ return fileName.endsWith(PDF_EXTENSION);
+ }
+
+ /**
+ * Converts a file path to a SourceDocumentCandidate.
+ *
+ * The locator is set to the absolute path as a String. This is an adapter-internal
+ * convention that allows the extraction adapter to re-locate the file later.
+ * Domain and Application never interpret this value.
+ *
+ * @param path the file path
+ * @return a new SourceDocumentCandidate with metadata extracted from the path
+ * @throws IOException if file size or path cannot be determined
+ */
+ private SourceDocumentCandidate toSourceDocumentCandidate(Path path) throws IOException {
+ long fileSizeBytes = Files.size(path);
+ String fileName = path.getFileName().toString();
+ String absolutePath = path.toAbsolutePath().toString();
+
+ return new SourceDocumentCandidate(
+ fileName,
+ fileSizeBytes,
+ new SourceDocumentLocator(absolutePath)
+ );
+ }
+}
diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/package-info.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/package-info.java
new file mode 100644
index 0000000..2f21ccf
--- /dev/null
+++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/package-info.java
@@ -0,0 +1,12 @@
+/**
+ * Source document adapters for discovering and accessing PDF candidates.
+ *
+ * M3-AP-002 implementations:
+ *
+ * - {@link de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument.SourceDocumentCandidatesPortAdapter}
+ * — File-system based discovery of PDF candidates from the source folder
+ *
+ *
+ * @since M3-AP-002
+ */
+package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;
diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapterTest.java
new file mode 100644
index 0000000..248eaac
--- /dev/null
+++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapterTest.java
@@ -0,0 +1,209 @@
+package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;
+
+import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
+import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
+import org.junit.jupiter.api.BeforeEach;
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+
+import static org.junit.jupiter.api.Assertions.*;
+
+/**
+ * Tests for {@link SourceDocumentCandidatesPortAdapter}.
+ *
+ * @since M3-AP-002
+ */
+class SourceDocumentCandidatesPortAdapterTest {
+
+ @TempDir
+ Path tempDir;
+
+ private SourceDocumentCandidatesPortAdapter adapter;
+
+ @BeforeEach
+ void setUp() {
+ adapter = new SourceDocumentCandidatesPortAdapter(tempDir);
+ }
+
+ @Test
+ void testLoadCandidates_EmptyFolder() throws IOException {
+ List candidates = adapter.loadCandidates();
+
+ assertNotNull(candidates);
+ assertTrue(candidates.isEmpty(), "Empty folder should return empty list");
+ }
+
+ @Test
+ void testLoadCandidates_OnlyPdfFiles() throws IOException {
+ // Create test PDF files
+ Path pdf1 = tempDir.resolve("document1.pdf");
+ Path pdf2 = tempDir.resolve("document2.pdf");
+ Files.write(pdf1, "pdf content".getBytes());
+ Files.write(pdf2, "pdf content".getBytes());
+
+ List candidates = adapter.loadCandidates();
+
+ assertEquals(2, candidates.size(), "Should return exactly 2 PDF candidates");
+ assertTrue(candidates.stream()
+ .allMatch(c -> c.uniqueIdentifier().endsWith(".pdf")),
+ "All candidates should be PDF files");
+ }
+
+ @Test
+ void testLoadCandidates_FiltersNonPdfFiles() throws IOException {
+ // Create mixed file types
+ Files.write(tempDir.resolve("document.pdf"), "content".getBytes());
+ Files.write(tempDir.resolve("image.png"), "content".getBytes());
+ Files.write(tempDir.resolve("text.txt"), "content".getBytes());
+ Files.write(tempDir.resolve("data.xlsx"), "content".getBytes());
+
+ List candidates = adapter.loadCandidates();
+
+ assertEquals(1, candidates.size(), "Should return only 1 PDF candidate");
+ assertEquals("document.pdf", candidates.get(0).uniqueIdentifier());
+ }
+
+ @Test
+ void testLoadCandidates_IgnoresDirectories() throws IOException {
+ // Create files and subdirectories
+ Files.write(tempDir.resolve("document.pdf"), "content".getBytes());
+ Files.createDirectory(tempDir.resolve("subfolder"));
+ Files.write(tempDir.resolve("subfolder/nested.pdf"), "content".getBytes());
+
+ List candidates = adapter.loadCandidates();
+
+ assertEquals(1, candidates.size(), "Should return only 1 PDF candidate (in root folder)");
+ assertEquals("document.pdf", candidates.get(0).uniqueIdentifier());
+ }
+
+ @Test
+ void testLoadCandidates_CaseInsensitiveExtension() throws IOException {
+ // Create PDFs with various case combinations
+ Files.write(tempDir.resolve("file1.pdf"), "content".getBytes());
+ Files.write(tempDir.resolve("file2.PDF"), "content".getBytes());
+ Files.write(tempDir.resolve("file3.Pdf"), "content".getBytes());
+ Files.write(tempDir.resolve("file4.pDf"), "content".getBytes());
+
+ List candidates = adapter.loadCandidates();
+
+ assertEquals(4, candidates.size(), "Should recognize PDF in any case combination");
+ }
+
+ @Test
+ void testLoadCandidates_DeterministicOrder() throws IOException {
+ // Create PDFs in non-alphabetical order
+ Files.write(tempDir.resolve("zebra.pdf"), "content".getBytes());
+ Files.write(tempDir.resolve("apple.pdf"), "content".getBytes());
+ Files.write(tempDir.resolve("monkey.pdf"), "content".getBytes());
+
+ List candidates = adapter.loadCandidates();
+
+ assertEquals(3, candidates.size());
+ // Files are sorted by absolute path, which will be consistent
+ List candidates2 = adapter.loadCandidates();
+ assertEquals(candidates, candidates2, "Multiple calls should return same order");
+ }
+
+ @Test
+ void testLoadCandidates_FileSizeMetadata() throws IOException {
+ Path pdfFile = tempDir.resolve("test.pdf");
+ Files.write(pdfFile, "test content 12345".getBytes());
+
+ List candidates = adapter.loadCandidates();
+
+ assertEquals(1, candidates.size());
+ SourceDocumentCandidate candidate = candidates.get(0);
+ assertEquals(18, candidate.fileSizeBytes(), "File size should match written content");
+ }
+
+ @Test
+ void testLoadCandidates_UniqueIdentifier() throws IOException {
+ Path pdfFile = tempDir.resolve("myfile.pdf");
+ Files.write(pdfFile, "content".getBytes());
+
+ List candidates = adapter.loadCandidates();
+
+ assertEquals(1, candidates.size());
+ assertEquals("myfile.pdf", candidates.get(0).uniqueIdentifier(),
+ "uniqueIdentifier should be filename");
+ }
+
+ @Test
+ void testLoadCandidates_LocatorContainsAbsolutePath() throws IOException {
+ Path pdfFile = tempDir.resolve("test.pdf");
+ Files.write(pdfFile, "content".getBytes());
+
+ List candidates = adapter.loadCandidates();
+
+ assertEquals(1, candidates.size());
+ String locatorValue = candidates.get(0).locator().value();
+ assertTrue(locatorValue.contains("test.pdf"), "Locator should contain filename");
+ assertTrue(new java.io.File(locatorValue).isAbsolute(),
+ "Locator value should be an absolute path");
+ }
+
+ @Test
+ void testLoadCandidates_SourceFolderNotFound() {
+ Path nonExistentFolder = tempDir.resolve("does-not-exist");
+ SourceDocumentCandidatesPortAdapter adapterForMissing =
+ new SourceDocumentCandidatesPortAdapter(nonExistentFolder);
+
+ SourceDocumentAccessException ex = assertThrows(
+ SourceDocumentAccessException.class,
+ adapterForMissing::loadCandidates,
+ "Should throw exception for non-existent source folder");
+
+ assertTrue(ex.getMessage().contains("does not exist"));
+ }
+
+ @Test
+ void testLoadCandidates_SourceFolderIsFile() throws IOException {
+ Path fileInsteadOfFolder = tempDir.resolve("regular-file");
+ Files.createFile(fileInsteadOfFolder);
+
+ SourceDocumentCandidatesPortAdapter adapterForFile =
+ new SourceDocumentCandidatesPortAdapter(fileInsteadOfFolder);
+
+ SourceDocumentAccessException ex = assertThrows(
+ SourceDocumentAccessException.class,
+ adapterForFile::loadCandidates,
+ "Should throw exception if source path is a file, not a folder");
+
+ assertTrue(ex.getMessage().contains("not a directory"));
+ }
+
+ @Test
+ void testLoadCandidates_HasLocatorForEachCandidate() throws IOException {
+ Files.createFile(tempDir.resolve("file1.pdf"));
+ Files.createFile(tempDir.resolve("file2.pdf"));
+
+ List candidates = adapter.loadCandidates();
+
+ for (SourceDocumentCandidate candidate : candidates) {
+ assertNotNull(candidate.locator(), "Each candidate must have a locator");
+ assertNotNull(candidate.locator().value(), "Locator value must not be null");
+ assertFalse(candidate.locator().value().isEmpty(), "Locator value must not be empty");
+ }
+ }
+
+ @Test
+ void testLoadCandidates_EmptyPdfFilesAreIncluded() throws IOException {
+ // Create empty PDF files (M3-AP-002 requirement: PDF-Dateien im Quellordner)
+ Files.createFile(tempDir.resolve("empty1.pdf"));
+ Files.createFile(tempDir.resolve("empty2.pdf"));
+ // Also add a non-empty PDF for contrast
+ Files.write(tempDir.resolve("nonempty.pdf"), "content".getBytes());
+
+ List candidates = adapter.loadCandidates();
+
+ assertEquals(3, candidates.size(),
+ "Empty PDF files should be included as candidates; content evaluation happens in AP-004");
+ assertTrue(candidates.stream().allMatch(c -> c.uniqueIdentifier().endsWith(".pdf")),
+ "All candidates should be PDF files");
+ }
+}
diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/SourceDocumentCandidate.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/SourceDocumentCandidate.java
index 89c5464..03b7394 100644
--- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/SourceDocumentCandidate.java
+++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/SourceDocumentCandidate.java
@@ -16,7 +16,7 @@ import java.util.Objects;
* Fields:
*
* - {@code uniqueIdentifier} — human-readable name for logging and correlation (e.g. filename)
- * - {@code fileSizeBytes} — enables early detection of corrupt/empty documents
+ * - {@code fileSizeBytes} — file size for metadata and tracing; may be zero for empty files (content evaluation happens later in AP-004)
* - {@code locator} — opaque reference passed through unchanged to the extraction adapter;
* Domain and Application never interpret its value
*
@@ -36,23 +36,23 @@ public record SourceDocumentCandidate(
* Ensures all parameters are non-null and meaningful:
*
* - {@code uniqueIdentifier} must be non-null and non-empty
- * - {@code fileSizeBytes} must be positive
+ * - {@code fileSizeBytes} must be non-negative (may be zero for empty files; content evaluation is AP-004)
* - {@code locator} must be non-null
*
*
* @param uniqueIdentifier non-null, non-empty identifier for logging and correlation
- * @param fileSizeBytes must be > 0
+ * @param fileSizeBytes must be >= 0 (may be 0; content evaluation happens in AP-004)
* @param locator non-null opaque locator; only adapters interpret its value
* @throws NullPointerException if uniqueIdentifier or locator is null
- * @throws IllegalArgumentException if uniqueIdentifier is empty or fileSizeBytes <= 0
+ * @throws IllegalArgumentException if uniqueIdentifier is empty or fileSizeBytes < 0
*/
public SourceDocumentCandidate {
Objects.requireNonNull(uniqueIdentifier, "uniqueIdentifier must not be null");
if (uniqueIdentifier.isEmpty()) {
throw new IllegalArgumentException("uniqueIdentifier must not be empty");
}
- if (fileSizeBytes <= 0) {
- throw new IllegalArgumentException("fileSizeBytes must be positive");
+ if (fileSizeBytes < 0) {
+ throw new IllegalArgumentException("fileSizeBytes must not be negative");
}
Objects.requireNonNull(locator, "locator must not be null");
}