M3-APP-03: PDFBox-Extraktion technisch sauber abgegrenzt und
Fehlersemantik korrigiert
This commit is contained in:
@@ -6,7 +6,11 @@
|
||||
"Bash(mvn clean:*)",
|
||||
"Bash(mvn verify:*)",
|
||||
"Bash(mvn test:*)",
|
||||
"Bash(find D:/Dev/Projects/pdf-umbenenner-parent -not -path */target/* -type d)"
|
||||
"Bash(find D:/Dev/Projects/pdf-umbenenner-parent -not -path */target/* -type d)",
|
||||
"Bash(mvn -pl pdf-umbenenner-adapter-out clean compile)",
|
||||
"Bash(mvn dependency:tree -pl pdf-umbenenner-adapter-out)",
|
||||
"Bash(mvn -pl pdf-umbenenner-domain clean compile)",
|
||||
"Bash(mvn help:describe -Dplugin=org.apache.pdfbox:pdfbox -Ddetail=false)"
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,141 @@
|
||||
package de.gecheckt.pdf.umbenenner.adapter.outbound.pdfextraction;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import org.apache.pdfbox.Loader;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.text.PDFTextStripper;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* PDFBox-based implementation of {@link PdfTextExtractionPort}.
|
||||
* <p>
|
||||
* AP-003 Implementation: Extracts text content and page count from a single PDF document
|
||||
* using Apache PDFBox. All technical problems during extraction are reported as
|
||||
* {@link de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError}.
|
||||
* <p>
|
||||
* Design:
|
||||
* <ul>
|
||||
* <li>Uses PDFBox 3.x for PDF processing</li>
|
||||
* <li>Extracts complete text from all pages (may be empty)</li>
|
||||
* <li>Counts total page count</li>
|
||||
* <li>Returns results as typed {@link PdfExtractionResult} (no exceptions thrown)</li>
|
||||
* <li>All extraction failures are treated as technical errors (AP-003 scope)</li>
|
||||
* <li>PDFBox is encapsulated and never exposed beyond this adapter</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Success criteria:
|
||||
* <ul>
|
||||
* <li>PDF file is loadable by PDFBox</li>
|
||||
* <li>Page count is determinable (>= 1)</li>
|
||||
* <li>Text can be extracted (may be empty string)</li>
|
||||
* <li>All three values are combined into {@link PdfExtractionSuccess}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Technical error cases (AP-003):
|
||||
* <ul>
|
||||
* <li>File not found or unreadable</li>
|
||||
* <li>PDF cannot be loaded by PDFBox (any load error)</li>
|
||||
* <li>Page count cannot be determined</li>
|
||||
* <li>Text extraction fails or throws exception</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* Non-goals (handled in later APs):
|
||||
* <ul>
|
||||
* <li>Fachliche Bewertung des extrahierten Texts (AP-004)</li>
|
||||
* <li>Page limit checking (AP-004)</li>
|
||||
* <li>Text normalization or preprocessing</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-003
|
||||
*/
|
||||
public class PdfTextExtractionPortAdapter implements PdfTextExtractionPort {
|
||||
|
||||
/**
|
||||
* Extracts text content and page count from a single PDF document.
|
||||
* <p>
|
||||
* Reads the file identified by the candidate's locator, uses PDFBox to extract
|
||||
* text from all pages, and counts the total page count.
|
||||
* <p>
|
||||
* The locator is expected to contain an absolute file path as a String (adapter-internal convention).
|
||||
* <p>
|
||||
* In M3-AP-003, all technical problems are reported as {@link PdfExtractionTechnicalError}.
|
||||
* Fachliche Bewertungen like "text is not usable" are deferred to AP-004.
|
||||
*
|
||||
* @param candidate the document to extract; must be non-null
|
||||
* @return a {@link PdfExtractionResult} encoding the outcome:
|
||||
* <ul>
|
||||
* <li>Success: PDF loaded, text extracted (may be empty), page count determined</li>
|
||||
* <li>Technical error: any I/O, file access, or PDFBox loading/parsing problem</li>
|
||||
* </ul>
|
||||
* @throws NullPointerException if candidate is null
|
||||
*/
|
||||
@Override
|
||||
public PdfExtractionResult extractTextAndPageCount(SourceDocumentCandidate candidate) {
|
||||
Objects.requireNonNull(candidate, "candidate must not be null");
|
||||
|
||||
String filePath = candidate.locator().value();
|
||||
|
||||
try {
|
||||
// Validate file exists and is readable
|
||||
var path = Paths.get(filePath);
|
||||
if (!Files.exists(path)) {
|
||||
return new PdfExtractionTechnicalError(
|
||||
"PDF file not found: " + filePath,
|
||||
null);
|
||||
}
|
||||
if (!Files.isReadable(path)) {
|
||||
return new PdfExtractionTechnicalError(
|
||||
"PDF file is not readable: " + filePath,
|
||||
null);
|
||||
}
|
||||
|
||||
// Load and process PDF using PDFBox Loader (3.x API)
|
||||
PDDocument document = Loader.loadPDF(path.toFile());
|
||||
try {
|
||||
int pageCount = document.getNumberOfPages();
|
||||
|
||||
// AP-003: Handle case of zero pages as technical error
|
||||
// (PdfPageCount requires >= 1, so this is a constraint violation)
|
||||
if (pageCount < 1) {
|
||||
return new PdfExtractionTechnicalError(
|
||||
"PDF has zero pages, cannot extract content",
|
||||
null);
|
||||
}
|
||||
|
||||
// Extract text from all pages
|
||||
// Note: extractedText may be empty string, which is valid in M3 (no fachliche validation here)
|
||||
PDFTextStripper textStripper = new PDFTextStripper();
|
||||
String extractedText = textStripper.getText(document);
|
||||
|
||||
// Success: return extracted text and page count
|
||||
// (Empty text is not an error in AP-003; fachliche validation is AP-004)
|
||||
PdfPageCount pageCountTyped = new PdfPageCount(pageCount);
|
||||
return new PdfExtractionSuccess(extractedText, pageCountTyped);
|
||||
} finally {
|
||||
document.close();
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
// All I/O and PDFBox loading/parsing errors are technical errors in AP-003
|
||||
String errorMessage = e.getMessage() != null ? e.getMessage() : e.toString();
|
||||
return new PdfExtractionTechnicalError(
|
||||
"Failed to load or parse PDF: " + errorMessage,
|
||||
e);
|
||||
} catch (Exception e) {
|
||||
// Catch-all for unexpected errors
|
||||
String errorMessage = e.getMessage() != null ? e.getMessage() : e.toString();
|
||||
return new PdfExtractionTechnicalError(
|
||||
"Unexpected error during PDF extraction: " + errorMessage,
|
||||
e);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,34 @@
|
||||
/**
|
||||
* PDFBox-based adapter for PDF text extraction.
|
||||
* <p>
|
||||
* <strong>M3-AP-003:</strong> This package contains the sole implementation
|
||||
* of {@link de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort},
|
||||
* using Apache PDFBox to extract text and page count from PDF documents.
|
||||
* <p>
|
||||
* <strong>Scope (AP-003):</strong>
|
||||
* <ul>
|
||||
* <li>Pure technical extraction: read PDF, extract text, count pages</li>
|
||||
* <li>All extraction problems (file not found, PDF unreadable, PDFBox errors) → {@link de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError}</li>
|
||||
* <li>No fachliche validation: empty text is SUCCESS, not error</li>
|
||||
* <li>PDFBox is <em>only</em> used in this package; no PDFBox types appear in Domain or Application</li>
|
||||
* <li>Results always typed as {@link de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult}, never exceptions</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Restriction:</strong>
|
||||
* <ul>
|
||||
* <li>{@link de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError} is reserved for later APs</li>
|
||||
* <li>AP-003 adapter uses only {@link de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess} and
|
||||
* {@link de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError}</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Out of scope (handled in later APs):</strong>
|
||||
* <ul>
|
||||
* <li>Text validation or quality assessment (AP-004)</li>
|
||||
* <li>Page limit checking (AP-004)</li>
|
||||
* <li>Text normalization or preprocessing</li>
|
||||
* <li>Fachliche Bewertung of extracted content</li>
|
||||
* </ul>
|
||||
*
|
||||
* @since M3-AP-003
|
||||
*/
|
||||
package de.gecheckt.pdf.umbenenner.adapter.outbound.pdfextraction;
|
||||
@@ -0,0 +1,164 @@
|
||||
package de.gecheckt.pdf.umbenenner.adapter.outbound.pdfextraction;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static org.junit.jupiter.api.Assertions.assertEquals;
|
||||
import static org.junit.jupiter.api.Assertions.assertInstanceOf;
|
||||
import static org.junit.jupiter.api.Assertions.assertNotNull;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
import static org.junit.jupiter.api.Assertions.assertTrue;
|
||||
|
||||
/**
|
||||
* Tests for {@link PdfTextExtractionPortAdapter}.
|
||||
* <p>
|
||||
* M3-AP-003: Minimal tests validating basic extraction functionality and technical error handling.
|
||||
* In AP-003 scope: all extraction problems are treated as TechnicalError, not ContentError.
|
||||
* No fachliche validation of text content (that is AP-004).
|
||||
* PDFs are created programmatically using PDFBox to avoid external dependencies on test files.
|
||||
*
|
||||
* @since M3-AP-003
|
||||
*/
|
||||
class PdfTextExtractionPortAdapterTest {
|
||||
|
||||
private PdfTextExtractionPortAdapter adapter;
|
||||
|
||||
@TempDir
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
adapter = new PdfTextExtractionPortAdapter();
|
||||
}
|
||||
|
||||
@Test
|
||||
void testNullCandidateThrowsNullPointerException() {
|
||||
assertThrows(NullPointerException.class, () -> adapter.extractTextAndPageCount(null));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testNonExistentFileReturnsTechnicalError() throws Exception {
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
|
||||
"nonexistent.pdf",
|
||||
1,
|
||||
new SourceDocumentLocator("/path/that/does/not/exist.pdf")
|
||||
);
|
||||
|
||||
PdfExtractionResult result = adapter.extractTextAndPageCount(candidate);
|
||||
|
||||
assertInstanceOf(PdfExtractionTechnicalError.class, result);
|
||||
PdfExtractionTechnicalError error = (PdfExtractionTechnicalError) result;
|
||||
assertTrue(error.errorMessage().contains("not found"));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testSimplePdfExtractionSuccess() throws Exception {
|
||||
// Create a simple single-page PDF
|
||||
Path pdfFile = tempDir.resolve("simple.pdf");
|
||||
createSimplePdf(pdfFile);
|
||||
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
|
||||
"simple.pdf",
|
||||
Files.size(pdfFile),
|
||||
new SourceDocumentLocator(pdfFile.toAbsolutePath().toString())
|
||||
);
|
||||
|
||||
PdfExtractionResult result = adapter.extractTextAndPageCount(candidate);
|
||||
|
||||
assertInstanceOf(PdfExtractionSuccess.class, result);
|
||||
PdfExtractionSuccess success = (PdfExtractionSuccess) result;
|
||||
assertEquals(1, success.pageCount().value());
|
||||
assertNotNull(success.extractedText());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testMultiPagePdfExtractionSuccess() throws Exception {
|
||||
// Create a three-page PDF
|
||||
Path pdfFile = tempDir.resolve("multipage.pdf");
|
||||
createMultiPagePdf(pdfFile, 3);
|
||||
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
|
||||
"multipage.pdf",
|
||||
Files.size(pdfFile),
|
||||
new SourceDocumentLocator(pdfFile.toAbsolutePath().toString())
|
||||
);
|
||||
|
||||
PdfExtractionResult result = adapter.extractTextAndPageCount(candidate);
|
||||
|
||||
assertInstanceOf(PdfExtractionSuccess.class, result);
|
||||
PdfExtractionSuccess success = (PdfExtractionSuccess) result;
|
||||
assertEquals(3, success.pageCount().value());
|
||||
assertNotNull(success.extractedText());
|
||||
}
|
||||
|
||||
@Test
|
||||
void testReadablePdfWithEmptyTextReturnsSuccess() throws Exception {
|
||||
// Create a PDF with no text content (blank page)
|
||||
// This is a technically readable PDF, so it should succeed
|
||||
Path pdfFile = tempDir.resolve("blank.pdf");
|
||||
createBlankPdf(pdfFile);
|
||||
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate(
|
||||
"blank.pdf",
|
||||
Files.size(pdfFile),
|
||||
new SourceDocumentLocator(pdfFile.toAbsolutePath().toString())
|
||||
);
|
||||
|
||||
PdfExtractionResult result = adapter.extractTextAndPageCount(candidate);
|
||||
|
||||
// AP-003: Empty text is SUCCESS, not an error
|
||||
// Fachliche Bewertung of text content happens in AP-004
|
||||
assertInstanceOf(PdfExtractionSuccess.class, result);
|
||||
PdfExtractionSuccess success = (PdfExtractionSuccess) result;
|
||||
assertEquals(1, success.pageCount().value());
|
||||
assertNotNull(success.extractedText()); // May be empty, but not null
|
||||
}
|
||||
|
||||
// --- Helper methods to create test PDFs ---
|
||||
|
||||
/**
|
||||
* Creates a simple single-page PDF.
|
||||
*/
|
||||
private void createSimplePdf(Path filePath) throws Exception {
|
||||
PDDocument document = new PDDocument();
|
||||
PDPage page = new PDPage();
|
||||
document.addPage(page);
|
||||
document.save(filePath.toAbsolutePath().toString());
|
||||
document.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a PDF with multiple blank pages.
|
||||
*/
|
||||
private void createMultiPagePdf(Path filePath, int pageCount) throws Exception {
|
||||
PDDocument document = new PDDocument();
|
||||
for (int i = 0; i < pageCount; i++) {
|
||||
PDPage page = new PDPage();
|
||||
document.addPage(page);
|
||||
}
|
||||
document.save(filePath.toAbsolutePath().toString());
|
||||
document.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a blank PDF with a single page and no text.
|
||||
*/
|
||||
private void createBlankPdf(Path filePath) throws Exception {
|
||||
PDDocument document = new PDDocument();
|
||||
PDPage page = new PDPage();
|
||||
document.addPage(page);
|
||||
document.save(filePath.toAbsolutePath().toString());
|
||||
document.close();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a content-related failure during PDF text extraction.
|
||||
* <p>
|
||||
* This indicates that the PDF file itself is readable (no I/O error),
|
||||
* but its content is not suitable for text extraction.
|
||||
* <p>
|
||||
* Examples: PDF is image-only (not OCR'd), PDF is encrypted and cannot be unlocked,
|
||||
* PDF is severely corrupted in the content layer.
|
||||
* <p>
|
||||
* This is typically a deterministic, non-retryable condition for a given source file
|
||||
* (unless the source file is modified and re-scanned in a later run).
|
||||
*
|
||||
* @param reason a human-readable explanation of why extraction failed (non-null, non-empty)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public record PdfExtractionContentError(
|
||||
String reason
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param reason must be non-null and non-empty
|
||||
* @throws NullPointerException if reason is null
|
||||
* @throws IllegalArgumentException if reason is empty
|
||||
*/
|
||||
public PdfExtractionContentError {
|
||||
Objects.requireNonNull(reason, "reason must not be null");
|
||||
if (reason.isEmpty()) {
|
||||
throw new IllegalArgumentException("reason must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,5 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Sealed interface representing the outcome of PDF text extraction.
|
||||
* <p>
|
||||
@@ -27,96 +25,3 @@ public sealed interface PdfExtractionResult
|
||||
permits PdfExtractionSuccess, PdfExtractionContentError, PdfExtractionTechnicalError {
|
||||
// Marker interface; concrete implementations define structure
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents successful PDF text extraction.
|
||||
* <p>
|
||||
* When this result is obtained, both text content and page count have been
|
||||
* successfully extracted and are guaranteed to be valid.
|
||||
*
|
||||
* @param extractedText the full text content extracted from the PDF (non-null, may be empty string)
|
||||
* @param pageCount the number of pages in the PDF (non-null, validated >= 1)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record PdfExtractionSuccess(
|
||||
String extractedText,
|
||||
PdfPageCount pageCount
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param extractedText must be non-null (may be empty)
|
||||
* @param pageCount must be non-null
|
||||
* @throws NullPointerException if either parameter is null
|
||||
*/
|
||||
PdfExtractionSuccess {
|
||||
Objects.requireNonNull(extractedText, "extractedText must not be null");
|
||||
Objects.requireNonNull(pageCount, "pageCount must not be null");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a content-related failure during PDF text extraction.
|
||||
* <p>
|
||||
* This indicates that the PDF file itself is readable (no I/O error),
|
||||
* but its content is not suitable for text extraction.
|
||||
* <p>
|
||||
* Examples: PDF is image-only (not OCR'd), PDF is encrypted and cannot be unlocked,
|
||||
* PDF is severely corrupted in the content layer.
|
||||
* <p>
|
||||
* This is typically a deterministic, non-retryable condition for a given source file
|
||||
* (unless the source file is modified and re-scanned in a later run).
|
||||
*
|
||||
* @param reason a human-readable explanation of why extraction failed (non-null, non-empty)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record PdfExtractionContentError(
|
||||
String reason
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param reason must be non-null and non-empty
|
||||
* @throws NullPointerException if reason is null
|
||||
* @throws IllegalArgumentException if reason is empty
|
||||
*/
|
||||
PdfExtractionContentError {
|
||||
Objects.requireNonNull(reason, "reason must not be null");
|
||||
if (reason.isEmpty()) {
|
||||
throw new IllegalArgumentException("reason must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a technical (infrastructure) failure during PDF text extraction.
|
||||
* <p>
|
||||
* This indicates that something went wrong with the extraction process itself,
|
||||
* such as file I/O errors, PDFBox library problems, or out-of-memory conditions.
|
||||
* <p>
|
||||
* These are typically retryable conditions in later batch runs, as they may be
|
||||
* transient infrastructure issues.
|
||||
*
|
||||
* @param errorMessage a description of what went wrong (non-null, non-empty)
|
||||
* @param cause the underlying exception, if any (may be null)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record PdfExtractionTechnicalError(
|
||||
String errorMessage,
|
||||
Throwable cause
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param errorMessage must be non-null and non-empty
|
||||
* @param cause may be null
|
||||
* @throws NullPointerException if errorMessage is null
|
||||
* @throws IllegalArgumentException if errorMessage is empty
|
||||
*/
|
||||
PdfExtractionTechnicalError {
|
||||
Objects.requireNonNull(errorMessage, "errorMessage must not be null");
|
||||
if (errorMessage.isEmpty()) {
|
||||
throw new IllegalArgumentException("errorMessage must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents successful PDF text extraction.
|
||||
* <p>
|
||||
* When this result is obtained, both text content and page count have been
|
||||
* successfully extracted and are guaranteed to be valid.
|
||||
*
|
||||
* @param extractedText the full text content extracted from the PDF (non-null, may be empty string)
|
||||
* @param pageCount the number of pages in the PDF (non-null, validated >= 1)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public record PdfExtractionSuccess(
|
||||
String extractedText,
|
||||
PdfPageCount pageCount
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param extractedText must be non-null (may be empty)
|
||||
* @param pageCount must be non-null
|
||||
* @throws NullPointerException if either parameter is null
|
||||
*/
|
||||
public PdfExtractionSuccess {
|
||||
Objects.requireNonNull(extractedText, "extractedText must not be null");
|
||||
Objects.requireNonNull(pageCount, "pageCount must not be null");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a technical (infrastructure) failure during PDF text extraction.
|
||||
* <p>
|
||||
* This indicates that something went wrong with the extraction process itself,
|
||||
* such as file I/O errors, PDFBox library problems, or out-of-memory conditions.
|
||||
* <p>
|
||||
* These are typically retryable conditions in later batch runs, as they may be
|
||||
* transient infrastructure issues.
|
||||
*
|
||||
* @param errorMessage a description of what went wrong (non-null, non-empty)
|
||||
* @param cause the underlying exception, if any (may be null)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public record PdfExtractionTechnicalError(
|
||||
String errorMessage,
|
||||
Throwable cause
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param errorMessage must be non-null and non-empty
|
||||
* @param cause may be null
|
||||
* @throws NullPointerException if errorMessage is null
|
||||
* @throws IllegalArgumentException if errorMessage is empty
|
||||
*/
|
||||
public PdfExtractionTechnicalError {
|
||||
Objects.requireNonNull(errorMessage, "errorMessage must not be null");
|
||||
if (errorMessage.isEmpty()) {
|
||||
throw new IllegalArgumentException("errorMessage must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user