M3-APP-03: PDFBox-Extraktion technisch sauber abgegrenzt und

Fehlersemantik korrigiert
2026-04-01 18:54:35 +02:00
parent 8f138d4cfa
commit a9407aaba2
8 changed files with 446 additions and 96 deletions
@@ -0,0 +1,36 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+import java.util.Objects;
+
+/**
+ * Represents a content-related failure during PDF text extraction.
+ * <p>
+ * This indicates that the PDF file itself is readable (no I/O error),
+ * but its content is not suitable for text extraction.
+ * <p>
+ * Examples: PDF is image-only (not OCR'd), PDF is encrypted and cannot be unlocked,
+ * PDF is severely corrupted in the content layer.
+ * <p>
+ * This is typically a deterministic, non-retryable condition for a given source file
+ * (unless the source file is modified and re-scanned in a later run).
+ *
+ * @param reason a human-readable explanation of why extraction failed (non-null, non-empty)
+ * @since M3-AP-001
+ */
+public record PdfExtractionContentError(
+    String reason
+) implements PdfExtractionResult {
+    /**
+     * Constructor with validation.
+     *
+     * @param reason must be non-null and non-empty
+     * @throws NullPointerException if reason is null
+     * @throws IllegalArgumentException if reason is empty
+     */
+    public PdfExtractionContentError {
+        Objects.requireNonNull(reason, "reason must not be null");
+        if (reason.isEmpty()) {
+            throw new IllegalArgumentException("reason must not be empty");
+        }
+    }
+}
@@ -1,7 +1,5 @@
 package de.gecheckt.pdf.umbenenner.domain.model;

-import java.util.Objects;
-
 /**
 * Sealed interface representing the outcome of PDF text extraction.
 * <p>
@@ -27,96 +25,3 @@ public sealed interface PdfExtractionResult
    permits PdfExtractionSuccess, PdfExtractionContentError, PdfExtractionTechnicalError {
    // Marker interface; concrete implementations define structure
 }
-
-/**
- * Represents successful PDF text extraction.
- * <p>
- * When this result is obtained, both text content and page count have been
- * successfully extracted and are guaranteed to be valid.
- *
- * @param extractedText the full text content extracted from the PDF (non-null, may be empty string)
- * @param pageCount the number of pages in the PDF (non-null, validated &gt;= 1)
- * @since M3-AP-001
- */
-record PdfExtractionSuccess(
-    String extractedText,
-    PdfPageCount pageCount
-) implements PdfExtractionResult {
-    /**
-     * Constructor with validation.
-     *
-     * @param extractedText must be non-null (may be empty)
-     * @param pageCount must be non-null
-     * @throws NullPointerException if either parameter is null
-     */
-    PdfExtractionSuccess {
-        Objects.requireNonNull(extractedText, "extractedText must not be null");
-        Objects.requireNonNull(pageCount, "pageCount must not be null");
-    }
-}
-
-/**
- * Represents a content-related failure during PDF text extraction.
- * <p>
- * This indicates that the PDF file itself is readable (no I/O error),
- * but its content is not suitable for text extraction.
- * <p>
- * Examples: PDF is image-only (not OCR'd), PDF is encrypted and cannot be unlocked,
- * PDF is severely corrupted in the content layer.
- * <p>
- * This is typically a deterministic, non-retryable condition for a given source file
- * (unless the source file is modified and re-scanned in a later run).
- *
- * @param reason a human-readable explanation of why extraction failed (non-null, non-empty)
- * @since M3-AP-001
- */
-record PdfExtractionContentError(
-    String reason
-) implements PdfExtractionResult {
-    /**
-     * Constructor with validation.
-     *
-     * @param reason must be non-null and non-empty
-     * @throws NullPointerException if reason is null
-     * @throws IllegalArgumentException if reason is empty
-     */
-    PdfExtractionContentError {
-        Objects.requireNonNull(reason, "reason must not be null");
-        if (reason.isEmpty()) {
-            throw new IllegalArgumentException("reason must not be empty");
-        }
-    }
-}
-
-/**
- * Represents a technical (infrastructure) failure during PDF text extraction.
- * <p>
- * This indicates that something went wrong with the extraction process itself,
- * such as file I/O errors, PDFBox library problems, or out-of-memory conditions.
- * <p>
- * These are typically retryable conditions in later batch runs, as they may be
- * transient infrastructure issues.
- *
- * @param errorMessage a description of what went wrong (non-null, non-empty)
- * @param cause the underlying exception, if any (may be null)
- * @since M3-AP-001
- */
-record PdfExtractionTechnicalError(
-    String errorMessage,
-    Throwable cause
-) implements PdfExtractionResult {
-    /**
-     * Constructor with validation.
-     *
-     * @param errorMessage must be non-null and non-empty
-     * @param cause may be null
-     * @throws NullPointerException if errorMessage is null
-     * @throws IllegalArgumentException if errorMessage is empty
-     */
-    PdfExtractionTechnicalError {
-        Objects.requireNonNull(errorMessage, "errorMessage must not be null");
-        if (errorMessage.isEmpty()) {
-            throw new IllegalArgumentException("errorMessage must not be empty");
-        }
-    }
-}
@@ -0,0 +1,30 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+import java.util.Objects;
+
+/**
+ * Represents successful PDF text extraction.
+ * <p>
+ * When this result is obtained, both text content and page count have been
+ * successfully extracted and are guaranteed to be valid.
+ *
+ * @param extractedText the full text content extracted from the PDF (non-null, may be empty string)
+ * @param pageCount the number of pages in the PDF (non-null, validated &gt;= 1)
+ * @since M3-AP-001
+ */
+public record PdfExtractionSuccess(
+    String extractedText,
+    PdfPageCount pageCount
+) implements PdfExtractionResult {
+    /**
+     * Constructor with validation.
+     *
+     * @param extractedText must be non-null (may be empty)
+     * @param pageCount must be non-null
+     * @throws NullPointerException if either parameter is null
+     */
+    public PdfExtractionSuccess {
+        Objects.requireNonNull(extractedText, "extractedText must not be null");
+        Objects.requireNonNull(pageCount, "pageCount must not be null");
+    }
+}
@@ -0,0 +1,36 @@
+package de.gecheckt.pdf.umbenenner.domain.model;
+
+import java.util.Objects;
+
+/**
+ * Represents a technical (infrastructure) failure during PDF text extraction.
+ * <p>
+ * This indicates that something went wrong with the extraction process itself,
+ * such as file I/O errors, PDFBox library problems, or out-of-memory conditions.
+ * <p>
+ * These are typically retryable conditions in later batch runs, as they may be
+ * transient infrastructure issues.
+ *
+ * @param errorMessage a description of what went wrong (non-null, non-empty)
+ * @param cause the underlying exception, if any (may be null)
+ * @since M3-AP-001
+ */
+public record PdfExtractionTechnicalError(
+    String errorMessage,
+    Throwable cause
+) implements PdfExtractionResult {
+    /**
+     * Constructor with validation.
+     *
+     * @param errorMessage must be non-null and non-empty
+     * @param cause may be null
+     * @throws NullPointerException if errorMessage is null
+     * @throws IllegalArgumentException if errorMessage is empty
+     */
+    public PdfExtractionTechnicalError {
+        Objects.requireNonNull(errorMessage, "errorMessage must not be null");
+        if (errorMessage.isEmpty()) {
+            throw new IllegalArgumentException("errorMessage must not be empty");
+        }
+    }
+}