diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapter.java new file mode 100644 index 0000000..6c14fc3 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapter.java @@ -0,0 +1,151 @@ +package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument; + +import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException; +import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; +import java.util.stream.Stream; + +/** + * File-system based implementation of {@link SourceDocumentCandidatesPort}. + *

+ * AP-002 Implementation: Scans a configured source folder and returns only PDF files + * (by extension) as {@link SourceDocumentCandidate} objects. + *

+ * Design: + *

+ *

+ * Non-goals: + *

+ * + * @since M3-AP-002 + */ +public class SourceDocumentCandidatesPortAdapter implements SourceDocumentCandidatesPort { + + private static final String PDF_EXTENSION = ".pdf"; + + private final Path sourceFolder; + + /** + * Creates a new SourceDocumentCandidatesPortAdapter for the given source folder. + * + * @param sourceFolder the directory to scan for PDF files; must be a readable directory + * @throws NullPointerException if sourceFolder is null + */ + public SourceDocumentCandidatesPortAdapter(Path sourceFolder) { + this.sourceFolder = sourceFolder; + } + + /** + * Loads all PDF candidates from the source folder. + *

+ * Scans the source folder at exactly one level (no recursion), identifies PDF files + * (by extension), and returns them as candidates in sorted order. + *

+ * Each returned candidate carries: + *

+ * + * @return a list of discovered PDF candidates sorted by absolute path (may be empty) + * @throws SourceDocumentAccessException if the source folder cannot be read or accessed + */ + @Override + public List loadCandidates() throws SourceDocumentAccessException { + try { + // Validate that source folder exists and is readable + if (!Files.exists(sourceFolder)) { + throw new SourceDocumentAccessException( + "Source folder does not exist: " + sourceFolder.toAbsolutePath()); + } + if (!Files.isDirectory(sourceFolder)) { + throw new SourceDocumentAccessException( + "Source folder is not a directory: " + sourceFolder.toAbsolutePath()); + } + if (!Files.isReadable(sourceFolder)) { + throw new SourceDocumentAccessException( + "Source folder is not readable: " + sourceFolder.toAbsolutePath()); + } + + // Scan folder: list exactly one level, filter for PDF files, sort deterministically + List pdfPaths; + try (Stream stream = Files.list(sourceFolder)) { + pdfPaths = stream + .filter(Files::isRegularFile) // Only files, not directories + .filter(this::isPdfFile) // Only .pdf extension + .sorted() // Deterministic order (by path) + .toList(); + } + + // Convert paths to candidates + List candidates = new java.util.ArrayList<>(); + for (Path path : pdfPaths) { + candidates.add(toSourceDocumentCandidate(path)); + } + return candidates; + + } catch (SourceDocumentAccessException e) { + throw e; + } catch (IOException e) { + throw new SourceDocumentAccessException( + "Failed to read source folder: " + sourceFolder.toAbsolutePath(), e); + } catch (Exception e) { + throw new SourceDocumentAccessException( + "Unexpected error while scanning source folder: " + sourceFolder.toAbsolutePath(), e); + } + } + + /** + * Checks if a file is a PDF by extension (case-insensitive). + * + * @param path the file path to check + * @return true if the file name ends with .pdf (case-insensitive), false otherwise + */ + private boolean isPdfFile(Path path) { + String fileName = path.getFileName().toString().toLowerCase(); + return fileName.endsWith(PDF_EXTENSION); + } + + /** + * Converts a file path to a SourceDocumentCandidate. + *

+ * The locator is set to the absolute path as a String. This is an adapter-internal + * convention that allows the extraction adapter to re-locate the file later. + * Domain and Application never interpret this value. + * + * @param path the file path + * @return a new SourceDocumentCandidate with metadata extracted from the path + * @throws IOException if file size or path cannot be determined + */ + private SourceDocumentCandidate toSourceDocumentCandidate(Path path) throws IOException { + long fileSizeBytes = Files.size(path); + String fileName = path.getFileName().toString(); + String absolutePath = path.toAbsolutePath().toString(); + + return new SourceDocumentCandidate( + fileName, + fileSizeBytes, + new SourceDocumentLocator(absolutePath) + ); + } +} diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/package-info.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/package-info.java new file mode 100644 index 0000000..2f21ccf --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/package-info.java @@ -0,0 +1,12 @@ +/** + * Source document adapters for discovering and accessing PDF candidates. + *

+ * M3-AP-002 implementations: + *

    + *
  • {@link de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument.SourceDocumentCandidatesPortAdapter} + * — File-system based discovery of PDF candidates from the source folder
  • + *
+ * + * @since M3-AP-002 + */ +package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument; diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapterTest.java new file mode 100644 index 0000000..248eaac --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/outbound/sourcedocument/SourceDocumentCandidatesPortAdapterTest.java @@ -0,0 +1,209 @@ +package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument; + +import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for {@link SourceDocumentCandidatesPortAdapter}. + * + * @since M3-AP-002 + */ +class SourceDocumentCandidatesPortAdapterTest { + + @TempDir + Path tempDir; + + private SourceDocumentCandidatesPortAdapter adapter; + + @BeforeEach + void setUp() { + adapter = new SourceDocumentCandidatesPortAdapter(tempDir); + } + + @Test + void testLoadCandidates_EmptyFolder() throws IOException { + List candidates = adapter.loadCandidates(); + + assertNotNull(candidates); + assertTrue(candidates.isEmpty(), "Empty folder should return empty list"); + } + + @Test + void testLoadCandidates_OnlyPdfFiles() throws IOException { + // Create test PDF files + Path pdf1 = tempDir.resolve("document1.pdf"); + Path pdf2 = tempDir.resolve("document2.pdf"); + Files.write(pdf1, "pdf content".getBytes()); + Files.write(pdf2, "pdf content".getBytes()); + + List candidates = adapter.loadCandidates(); + + assertEquals(2, candidates.size(), "Should return exactly 2 PDF candidates"); + assertTrue(candidates.stream() + .allMatch(c -> c.uniqueIdentifier().endsWith(".pdf")), + "All candidates should be PDF files"); + } + + @Test + void testLoadCandidates_FiltersNonPdfFiles() throws IOException { + // Create mixed file types + Files.write(tempDir.resolve("document.pdf"), "content".getBytes()); + Files.write(tempDir.resolve("image.png"), "content".getBytes()); + Files.write(tempDir.resolve("text.txt"), "content".getBytes()); + Files.write(tempDir.resolve("data.xlsx"), "content".getBytes()); + + List candidates = adapter.loadCandidates(); + + assertEquals(1, candidates.size(), "Should return only 1 PDF candidate"); + assertEquals("document.pdf", candidates.get(0).uniqueIdentifier()); + } + + @Test + void testLoadCandidates_IgnoresDirectories() throws IOException { + // Create files and subdirectories + Files.write(tempDir.resolve("document.pdf"), "content".getBytes()); + Files.createDirectory(tempDir.resolve("subfolder")); + Files.write(tempDir.resolve("subfolder/nested.pdf"), "content".getBytes()); + + List candidates = adapter.loadCandidates(); + + assertEquals(1, candidates.size(), "Should return only 1 PDF candidate (in root folder)"); + assertEquals("document.pdf", candidates.get(0).uniqueIdentifier()); + } + + @Test + void testLoadCandidates_CaseInsensitiveExtension() throws IOException { + // Create PDFs with various case combinations + Files.write(tempDir.resolve("file1.pdf"), "content".getBytes()); + Files.write(tempDir.resolve("file2.PDF"), "content".getBytes()); + Files.write(tempDir.resolve("file3.Pdf"), "content".getBytes()); + Files.write(tempDir.resolve("file4.pDf"), "content".getBytes()); + + List candidates = adapter.loadCandidates(); + + assertEquals(4, candidates.size(), "Should recognize PDF in any case combination"); + } + + @Test + void testLoadCandidates_DeterministicOrder() throws IOException { + // Create PDFs in non-alphabetical order + Files.write(tempDir.resolve("zebra.pdf"), "content".getBytes()); + Files.write(tempDir.resolve("apple.pdf"), "content".getBytes()); + Files.write(tempDir.resolve("monkey.pdf"), "content".getBytes()); + + List candidates = adapter.loadCandidates(); + + assertEquals(3, candidates.size()); + // Files are sorted by absolute path, which will be consistent + List candidates2 = adapter.loadCandidates(); + assertEquals(candidates, candidates2, "Multiple calls should return same order"); + } + + @Test + void testLoadCandidates_FileSizeMetadata() throws IOException { + Path pdfFile = tempDir.resolve("test.pdf"); + Files.write(pdfFile, "test content 12345".getBytes()); + + List candidates = adapter.loadCandidates(); + + assertEquals(1, candidates.size()); + SourceDocumentCandidate candidate = candidates.get(0); + assertEquals(18, candidate.fileSizeBytes(), "File size should match written content"); + } + + @Test + void testLoadCandidates_UniqueIdentifier() throws IOException { + Path pdfFile = tempDir.resolve("myfile.pdf"); + Files.write(pdfFile, "content".getBytes()); + + List candidates = adapter.loadCandidates(); + + assertEquals(1, candidates.size()); + assertEquals("myfile.pdf", candidates.get(0).uniqueIdentifier(), + "uniqueIdentifier should be filename"); + } + + @Test + void testLoadCandidates_LocatorContainsAbsolutePath() throws IOException { + Path pdfFile = tempDir.resolve("test.pdf"); + Files.write(pdfFile, "content".getBytes()); + + List candidates = adapter.loadCandidates(); + + assertEquals(1, candidates.size()); + String locatorValue = candidates.get(0).locator().value(); + assertTrue(locatorValue.contains("test.pdf"), "Locator should contain filename"); + assertTrue(new java.io.File(locatorValue).isAbsolute(), + "Locator value should be an absolute path"); + } + + @Test + void testLoadCandidates_SourceFolderNotFound() { + Path nonExistentFolder = tempDir.resolve("does-not-exist"); + SourceDocumentCandidatesPortAdapter adapterForMissing = + new SourceDocumentCandidatesPortAdapter(nonExistentFolder); + + SourceDocumentAccessException ex = assertThrows( + SourceDocumentAccessException.class, + adapterForMissing::loadCandidates, + "Should throw exception for non-existent source folder"); + + assertTrue(ex.getMessage().contains("does not exist")); + } + + @Test + void testLoadCandidates_SourceFolderIsFile() throws IOException { + Path fileInsteadOfFolder = tempDir.resolve("regular-file"); + Files.createFile(fileInsteadOfFolder); + + SourceDocumentCandidatesPortAdapter adapterForFile = + new SourceDocumentCandidatesPortAdapter(fileInsteadOfFolder); + + SourceDocumentAccessException ex = assertThrows( + SourceDocumentAccessException.class, + adapterForFile::loadCandidates, + "Should throw exception if source path is a file, not a folder"); + + assertTrue(ex.getMessage().contains("not a directory")); + } + + @Test + void testLoadCandidates_HasLocatorForEachCandidate() throws IOException { + Files.createFile(tempDir.resolve("file1.pdf")); + Files.createFile(tempDir.resolve("file2.pdf")); + + List candidates = adapter.loadCandidates(); + + for (SourceDocumentCandidate candidate : candidates) { + assertNotNull(candidate.locator(), "Each candidate must have a locator"); + assertNotNull(candidate.locator().value(), "Locator value must not be null"); + assertFalse(candidate.locator().value().isEmpty(), "Locator value must not be empty"); + } + } + + @Test + void testLoadCandidates_EmptyPdfFilesAreIncluded() throws IOException { + // Create empty PDF files (M3-AP-002 requirement: PDF-Dateien im Quellordner) + Files.createFile(tempDir.resolve("empty1.pdf")); + Files.createFile(tempDir.resolve("empty2.pdf")); + // Also add a non-empty PDF for contrast + Files.write(tempDir.resolve("nonempty.pdf"), "content".getBytes()); + + List candidates = adapter.loadCandidates(); + + assertEquals(3, candidates.size(), + "Empty PDF files should be included as candidates; content evaluation happens in AP-004"); + assertTrue(candidates.stream().allMatch(c -> c.uniqueIdentifier().endsWith(".pdf")), + "All candidates should be PDF files"); + } +} diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/SourceDocumentCandidate.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/SourceDocumentCandidate.java index 89c5464..03b7394 100644 --- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/SourceDocumentCandidate.java +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/SourceDocumentCandidate.java @@ -16,7 +16,7 @@ import java.util.Objects; * Fields: *
    *
  • {@code uniqueIdentifier} — human-readable name for logging and correlation (e.g. filename)
  • - *
  • {@code fileSizeBytes} — enables early detection of corrupt/empty documents
  • + *
  • {@code fileSizeBytes} — file size for metadata and tracing; may be zero for empty files (content evaluation happens later in AP-004)
  • *
  • {@code locator} — opaque reference passed through unchanged to the extraction adapter; * Domain and Application never interpret its value
  • *
@@ -36,23 +36,23 @@ public record SourceDocumentCandidate( * Ensures all parameters are non-null and meaningful: *
    *
  • {@code uniqueIdentifier} must be non-null and non-empty
  • - *
  • {@code fileSizeBytes} must be positive
  • + *
  • {@code fileSizeBytes} must be non-negative (may be zero for empty files; content evaluation is AP-004)
  • *
  • {@code locator} must be non-null
  • *
* * @param uniqueIdentifier non-null, non-empty identifier for logging and correlation - * @param fileSizeBytes must be > 0 + * @param fileSizeBytes must be >= 0 (may be 0; content evaluation happens in AP-004) * @param locator non-null opaque locator; only adapters interpret its value * @throws NullPointerException if uniqueIdentifier or locator is null - * @throws IllegalArgumentException if uniqueIdentifier is empty or fileSizeBytes <= 0 + * @throws IllegalArgumentException if uniqueIdentifier is empty or fileSizeBytes < 0 */ public SourceDocumentCandidate { Objects.requireNonNull(uniqueIdentifier, "uniqueIdentifier must not be null"); if (uniqueIdentifier.isEmpty()) { throw new IllegalArgumentException("uniqueIdentifier must not be empty"); } - if (fileSizeBytes <= 0) { - throw new IllegalArgumentException("fileSizeBytes must be positive"); + if (fileSizeBytes < 0) { + throw new IllegalArgumentException("fileSizeBytes must not be negative"); } Objects.requireNonNull(locator, "locator must not be null"); }