M3-Quellordneradapter korrigiert und leere PDF-Kandidaten zugelassen
This commit is contained in:
@@ -0,0 +1,151 @@
|
||||
package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
/**
|
||||
* File-system based implementation of {@link SourceDocumentCandidatesPort}.
|
||||
* <p>
|
||||
* AP-002 Implementation: Scans a configured source folder and returns only PDF files
|
||||
* (by extension) as {@link SourceDocumentCandidate} objects.
|
||||
* <p>
|
||||
* Design:
|
||||
* <ul>
|
||||
* <li>Reads exactly one directory level (no recursion)</li>
|
||||
* <li>Filters for files with `.pdf` extension (case-insensitive)</li>
|
||||
* <li>Ignores directories and non-PDF files</li>
|
||||
* <li>Returns candidates in deterministic, stable order (sorted by absolute path)</li>
|
||||
* <li>Each candidate's locator contains the absolute path as a String (adapter-internal convention)</li>
|
||||
* <li>Technical filesystem errors are wrapped in {@link SourceDocumentAccessException}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Non-goals:
|
||||
* <ul>
|
||||
* <li>No PDF validation (that is AP-003)</li>
|
||||
* <li>No recursion into subdirectories</li>
|
||||
* <li>No content evaluation (that happens in AP-004: brauchbarer Text assessment)</li>
|
||||
* <li>No fachlich evaluation of candidates</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-002
|
||||
*/
|
||||
public class SourceDocumentCandidatesPortAdapter implements SourceDocumentCandidatesPort {
|
||||
|
||||
private static final String PDF_EXTENSION = ".pdf";
|
||||
|
||||
private final Path sourceFolder;
|
||||
|
||||
/**
|
||||
* Creates a new SourceDocumentCandidatesPortAdapter for the given source folder.
|
||||
*
|
||||
* @param sourceFolder the directory to scan for PDF files; must be a readable directory
|
||||
* @throws NullPointerException if sourceFolder is null
|
||||
*/
|
||||
public SourceDocumentCandidatesPortAdapter(Path sourceFolder) {
|
||||
this.sourceFolder = sourceFolder;
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads all PDF candidates from the source folder.
|
||||
* <p>
|
||||
* Scans the source folder at exactly one level (no recursion), identifies PDF files
|
||||
* (by extension), and returns them as candidates in sorted order.
|
||||
* <p>
|
||||
* Each returned candidate carries:
|
||||
* <ul>
|
||||
* <li>{@code uniqueIdentifier}: the filename (e.g. "document.pdf")</li>
|
||||
* <li>{@code fileSizeBytes}: the file size in bytes</li>
|
||||
* <li>{@code locator}: opaque reference containing the absolute path as a String.
|
||||
* This is an adapter-internal convention and is never interpreted by Domain or Application.</li>
|
||||
* </ul>
|
||||
*
|
||||
* @return a list of discovered PDF candidates sorted by absolute path (may be empty)
|
||||
* @throws SourceDocumentAccessException if the source folder cannot be read or accessed
|
||||
*/
|
||||
@Override
|
||||
public List<SourceDocumentCandidate> loadCandidates() throws SourceDocumentAccessException {
|
||||
try {
|
||||
// Validate that source folder exists and is readable
|
||||
if (!Files.exists(sourceFolder)) {
|
||||
throw new SourceDocumentAccessException(
|
||||
"Source folder does not exist: " + sourceFolder.toAbsolutePath());
|
||||
}
|
||||
if (!Files.isDirectory(sourceFolder)) {
|
||||
throw new SourceDocumentAccessException(
|
||||
"Source folder is not a directory: " + sourceFolder.toAbsolutePath());
|
||||
}
|
||||
if (!Files.isReadable(sourceFolder)) {
|
||||
throw new SourceDocumentAccessException(
|
||||
"Source folder is not readable: " + sourceFolder.toAbsolutePath());
|
||||
}
|
||||
|
||||
// Scan folder: list exactly one level, filter for PDF files, sort deterministically
|
||||
List<Path> pdfPaths;
|
||||
try (Stream<Path> stream = Files.list(sourceFolder)) {
|
||||
pdfPaths = stream
|
||||
.filter(Files::isRegularFile) // Only files, not directories
|
||||
.filter(this::isPdfFile) // Only .pdf extension
|
||||
.sorted() // Deterministic order (by path)
|
||||
.toList();
|
||||
}
|
||||
|
||||
// Convert paths to candidates
|
||||
List<SourceDocumentCandidate> candidates = new java.util.ArrayList<>();
|
||||
for (Path path : pdfPaths) {
|
||||
candidates.add(toSourceDocumentCandidate(path));
|
||||
}
|
||||
return candidates;
|
||||
|
||||
} catch (SourceDocumentAccessException e) {
|
||||
throw e;
|
||||
} catch (IOException e) {
|
||||
throw new SourceDocumentAccessException(
|
||||
"Failed to read source folder: " + sourceFolder.toAbsolutePath(), e);
|
||||
} catch (Exception e) {
|
||||
throw new SourceDocumentAccessException(
|
||||
"Unexpected error while scanning source folder: " + sourceFolder.toAbsolutePath(), e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks if a file is a PDF by extension (case-insensitive).
|
||||
*
|
||||
* @param path the file path to check
|
||||
* @return true if the file name ends with .pdf (case-insensitive), false otherwise
|
||||
*/
|
||||
private boolean isPdfFile(Path path) {
|
||||
String fileName = path.getFileName().toString().toLowerCase();
|
||||
return fileName.endsWith(PDF_EXTENSION);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a file path to a SourceDocumentCandidate.
|
||||
* <p>
|
||||
* The locator is set to the absolute path as a String. This is an adapter-internal
|
||||
* convention that allows the extraction adapter to re-locate the file later.
|
||||
* Domain and Application never interpret this value.
|
||||
*
|
||||
* @param path the file path
|
||||
* @return a new SourceDocumentCandidate with metadata extracted from the path
|
||||
* @throws IOException if file size or path cannot be determined
|
||||
*/
|
||||
private SourceDocumentCandidate toSourceDocumentCandidate(Path path) throws IOException {
|
||||
long fileSizeBytes = Files.size(path);
|
||||
String fileName = path.getFileName().toString();
|
||||
String absolutePath = path.toAbsolutePath().toString();
|
||||
|
||||
return new SourceDocumentCandidate(
|
||||
fileName,
|
||||
fileSizeBytes,
|
||||
new SourceDocumentLocator(absolutePath)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
/**
|
||||
* Source document adapters for discovering and accessing PDF candidates.
|
||||
* <p>
|
||||
* M3-AP-002 implementations:
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument.SourceDocumentCandidatesPortAdapter}
|
||||
* — File-system based discovery of PDF candidates from the source folder</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-002
|
||||
*/
|
||||
package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;
|
||||
@@ -0,0 +1,209 @@
|
||||
package de.gecheckt.pdf.umbenenner.adapter.outbound.sourcedocument;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.*;
|
||||
|
||||
/**
|
||||
* Tests for {@link SourceDocumentCandidatesPortAdapter}.
|
||||
*
|
||||
* @since M3-AP-002
|
||||
*/
|
||||
class SourceDocumentCandidatesPortAdapterTest {
|
||||
|
||||
@TempDir
|
||||
Path tempDir;
|
||||
|
||||
private SourceDocumentCandidatesPortAdapter adapter;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
adapter = new SourceDocumentCandidatesPortAdapter(tempDir);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_EmptyFolder() throws IOException {
|
||||
List<SourceDocumentCandidate> candidates = adapter.loadCandidates();
|
||||
|
||||
assertNotNull(candidates);
|
||||
assertTrue(candidates.isEmpty(), "Empty folder should return empty list");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_OnlyPdfFiles() throws IOException {
|
||||
// Create test PDF files
|
||||
Path pdf1 = tempDir.resolve("document1.pdf");
|
||||
Path pdf2 = tempDir.resolve("document2.pdf");
|
||||
Files.write(pdf1, "pdf content".getBytes());
|
||||
Files.write(pdf2, "pdf content".getBytes());
|
||||
|
||||
List<SourceDocumentCandidate> candidates = adapter.loadCandidates();
|
||||
|
||||
assertEquals(2, candidates.size(), "Should return exactly 2 PDF candidates");
|
||||
assertTrue(candidates.stream()
|
||||
.allMatch(c -> c.uniqueIdentifier().endsWith(".pdf")),
|
||||
"All candidates should be PDF files");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_FiltersNonPdfFiles() throws IOException {
|
||||
// Create mixed file types
|
||||
Files.write(tempDir.resolve("document.pdf"), "content".getBytes());
|
||||
Files.write(tempDir.resolve("image.png"), "content".getBytes());
|
||||
Files.write(tempDir.resolve("text.txt"), "content".getBytes());
|
||||
Files.write(tempDir.resolve("data.xlsx"), "content".getBytes());
|
||||
|
||||
List<SourceDocumentCandidate> candidates = adapter.loadCandidates();
|
||||
|
||||
assertEquals(1, candidates.size(), "Should return only 1 PDF candidate");
|
||||
assertEquals("document.pdf", candidates.get(0).uniqueIdentifier());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_IgnoresDirectories() throws IOException {
|
||||
// Create files and subdirectories
|
||||
Files.write(tempDir.resolve("document.pdf"), "content".getBytes());
|
||||
Files.createDirectory(tempDir.resolve("subfolder"));
|
||||
Files.write(tempDir.resolve("subfolder/nested.pdf"), "content".getBytes());
|
||||
|
||||
List<SourceDocumentCandidate> candidates = adapter.loadCandidates();
|
||||
|
||||
assertEquals(1, candidates.size(), "Should return only 1 PDF candidate (in root folder)");
|
||||
assertEquals("document.pdf", candidates.get(0).uniqueIdentifier());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_CaseInsensitiveExtension() throws IOException {
|
||||
// Create PDFs with various case combinations
|
||||
Files.write(tempDir.resolve("file1.pdf"), "content".getBytes());
|
||||
Files.write(tempDir.resolve("file2.PDF"), "content".getBytes());
|
||||
Files.write(tempDir.resolve("file3.Pdf"), "content".getBytes());
|
||||
Files.write(tempDir.resolve("file4.pDf"), "content".getBytes());
|
||||
|
||||
List<SourceDocumentCandidate> candidates = adapter.loadCandidates();
|
||||
|
||||
assertEquals(4, candidates.size(), "Should recognize PDF in any case combination");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_DeterministicOrder() throws IOException {
|
||||
// Create PDFs in non-alphabetical order
|
||||
Files.write(tempDir.resolve("zebra.pdf"), "content".getBytes());
|
||||
Files.write(tempDir.resolve("apple.pdf"), "content".getBytes());
|
||||
Files.write(tempDir.resolve("monkey.pdf"), "content".getBytes());
|
||||
|
||||
List<SourceDocumentCandidate> candidates = adapter.loadCandidates();
|
||||
|
||||
assertEquals(3, candidates.size());
|
||||
// Files are sorted by absolute path, which will be consistent
|
||||
List<SourceDocumentCandidate> candidates2 = adapter.loadCandidates();
|
||||
assertEquals(candidates, candidates2, "Multiple calls should return same order");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_FileSizeMetadata() throws IOException {
|
||||
Path pdfFile = tempDir.resolve("test.pdf");
|
||||
Files.write(pdfFile, "test content 12345".getBytes());
|
||||
|
||||
List<SourceDocumentCandidate> candidates = adapter.loadCandidates();
|
||||
|
||||
assertEquals(1, candidates.size());
|
||||
SourceDocumentCandidate candidate = candidates.get(0);
|
||||
assertEquals(18, candidate.fileSizeBytes(), "File size should match written content");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_UniqueIdentifier() throws IOException {
|
||||
Path pdfFile = tempDir.resolve("myfile.pdf");
|
||||
Files.write(pdfFile, "content".getBytes());
|
||||
|
||||
List<SourceDocumentCandidate> candidates = adapter.loadCandidates();
|
||||
|
||||
assertEquals(1, candidates.size());
|
||||
assertEquals("myfile.pdf", candidates.get(0).uniqueIdentifier(),
|
||||
"uniqueIdentifier should be filename");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_LocatorContainsAbsolutePath() throws IOException {
|
||||
Path pdfFile = tempDir.resolve("test.pdf");
|
||||
Files.write(pdfFile, "content".getBytes());
|
||||
|
||||
List<SourceDocumentCandidate> candidates = adapter.loadCandidates();
|
||||
|
||||
assertEquals(1, candidates.size());
|
||||
String locatorValue = candidates.get(0).locator().value();
|
||||
assertTrue(locatorValue.contains("test.pdf"), "Locator should contain filename");
|
||||
assertTrue(new java.io.File(locatorValue).isAbsolute(),
|
||||
"Locator value should be an absolute path");
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_SourceFolderNotFound() {
|
||||
Path nonExistentFolder = tempDir.resolve("does-not-exist");
|
||||
SourceDocumentCandidatesPortAdapter adapterForMissing =
|
||||
new SourceDocumentCandidatesPortAdapter(nonExistentFolder);
|
||||
|
||||
SourceDocumentAccessException ex = assertThrows(
|
||||
SourceDocumentAccessException.class,
|
||||
adapterForMissing::loadCandidates,
|
||||
"Should throw exception for non-existent source folder");
|
||||
|
||||
assertTrue(ex.getMessage().contains("does not exist"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_SourceFolderIsFile() throws IOException {
|
||||
Path fileInsteadOfFolder = tempDir.resolve("regular-file");
|
||||
Files.createFile(fileInsteadOfFolder);
|
||||
|
||||
SourceDocumentCandidatesPortAdapter adapterForFile =
|
||||
new SourceDocumentCandidatesPortAdapter(fileInsteadOfFolder);
|
||||
|
||||
SourceDocumentAccessException ex = assertThrows(
|
||||
SourceDocumentAccessException.class,
|
||||
adapterForFile::loadCandidates,
|
||||
"Should throw exception if source path is a file, not a folder");
|
||||
|
||||
assertTrue(ex.getMessage().contains("not a directory"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_HasLocatorForEachCandidate() throws IOException {
|
||||
Files.createFile(tempDir.resolve("file1.pdf"));
|
||||
Files.createFile(tempDir.resolve("file2.pdf"));
|
||||
|
||||
List<SourceDocumentCandidate> candidates = adapter.loadCandidates();
|
||||
|
||||
for (SourceDocumentCandidate candidate : candidates) {
|
||||
assertNotNull(candidate.locator(), "Each candidate must have a locator");
|
||||
assertNotNull(candidate.locator().value(), "Locator value must not be null");
|
||||
assertFalse(candidate.locator().value().isEmpty(), "Locator value must not be empty");
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void testLoadCandidates_EmptyPdfFilesAreIncluded() throws IOException {
|
||||
// Create empty PDF files (M3-AP-002 requirement: PDF-Dateien im Quellordner)
|
||||
Files.createFile(tempDir.resolve("empty1.pdf"));
|
||||
Files.createFile(tempDir.resolve("empty2.pdf"));
|
||||
// Also add a non-empty PDF for contrast
|
||||
Files.write(tempDir.resolve("nonempty.pdf"), "content".getBytes());
|
||||
|
||||
List<SourceDocumentCandidate> candidates = adapter.loadCandidates();
|
||||
|
||||
assertEquals(3, candidates.size(),
|
||||
"Empty PDF files should be included as candidates; content evaluation happens in AP-004");
|
||||
assertTrue(candidates.stream().allMatch(c -> c.uniqueIdentifier().endsWith(".pdf")),
|
||||
"All candidates should be PDF files");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user