M3-APP-03: PDFBox-Extraktion technisch sauber abgegrenzt und
Fehlersemantik korrigiert
This commit is contained in:
@@ -0,0 +1,36 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a content-related failure during PDF text extraction.
|
||||
* <p>
|
||||
* This indicates that the PDF file itself is readable (no I/O error),
|
||||
* but its content is not suitable for text extraction.
|
||||
* <p>
|
||||
* Examples: PDF is image-only (not OCR'd), PDF is encrypted and cannot be unlocked,
|
||||
* PDF is severely corrupted in the content layer.
|
||||
* <p>
|
||||
* This is typically a deterministic, non-retryable condition for a given source file
|
||||
* (unless the source file is modified and re-scanned in a later run).
|
||||
*
|
||||
* @param reason a human-readable explanation of why extraction failed (non-null, non-empty)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public record PdfExtractionContentError(
|
||||
String reason
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param reason must be non-null and non-empty
|
||||
* @throws NullPointerException if reason is null
|
||||
* @throws IllegalArgumentException if reason is empty
|
||||
*/
|
||||
public PdfExtractionContentError {
|
||||
Objects.requireNonNull(reason, "reason must not be null");
|
||||
if (reason.isEmpty()) {
|
||||
throw new IllegalArgumentException("reason must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,7 +1,5 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Sealed interface representing the outcome of PDF text extraction.
|
||||
* <p>
|
||||
@@ -27,96 +25,3 @@ public sealed interface PdfExtractionResult
|
||||
permits PdfExtractionSuccess, PdfExtractionContentError, PdfExtractionTechnicalError {
|
||||
// Marker interface; concrete implementations define structure
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents successful PDF text extraction.
|
||||
* <p>
|
||||
* When this result is obtained, both text content and page count have been
|
||||
* successfully extracted and are guaranteed to be valid.
|
||||
*
|
||||
* @param extractedText the full text content extracted from the PDF (non-null, may be empty string)
|
||||
* @param pageCount the number of pages in the PDF (non-null, validated >= 1)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record PdfExtractionSuccess(
|
||||
String extractedText,
|
||||
PdfPageCount pageCount
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param extractedText must be non-null (may be empty)
|
||||
* @param pageCount must be non-null
|
||||
* @throws NullPointerException if either parameter is null
|
||||
*/
|
||||
PdfExtractionSuccess {
|
||||
Objects.requireNonNull(extractedText, "extractedText must not be null");
|
||||
Objects.requireNonNull(pageCount, "pageCount must not be null");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a content-related failure during PDF text extraction.
|
||||
* <p>
|
||||
* This indicates that the PDF file itself is readable (no I/O error),
|
||||
* but its content is not suitable for text extraction.
|
||||
* <p>
|
||||
* Examples: PDF is image-only (not OCR'd), PDF is encrypted and cannot be unlocked,
|
||||
* PDF is severely corrupted in the content layer.
|
||||
* <p>
|
||||
* This is typically a deterministic, non-retryable condition for a given source file
|
||||
* (unless the source file is modified and re-scanned in a later run).
|
||||
*
|
||||
* @param reason a human-readable explanation of why extraction failed (non-null, non-empty)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record PdfExtractionContentError(
|
||||
String reason
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param reason must be non-null and non-empty
|
||||
* @throws NullPointerException if reason is null
|
||||
* @throws IllegalArgumentException if reason is empty
|
||||
*/
|
||||
PdfExtractionContentError {
|
||||
Objects.requireNonNull(reason, "reason must not be null");
|
||||
if (reason.isEmpty()) {
|
||||
throw new IllegalArgumentException("reason must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a technical (infrastructure) failure during PDF text extraction.
|
||||
* <p>
|
||||
* This indicates that something went wrong with the extraction process itself,
|
||||
* such as file I/O errors, PDFBox library problems, or out-of-memory conditions.
|
||||
* <p>
|
||||
* These are typically retryable conditions in later batch runs, as they may be
|
||||
* transient infrastructure issues.
|
||||
*
|
||||
* @param errorMessage a description of what went wrong (non-null, non-empty)
|
||||
* @param cause the underlying exception, if any (may be null)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
record PdfExtractionTechnicalError(
|
||||
String errorMessage,
|
||||
Throwable cause
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param errorMessage must be non-null and non-empty
|
||||
* @param cause may be null
|
||||
* @throws NullPointerException if errorMessage is null
|
||||
* @throws IllegalArgumentException if errorMessage is empty
|
||||
*/
|
||||
PdfExtractionTechnicalError {
|
||||
Objects.requireNonNull(errorMessage, "errorMessage must not be null");
|
||||
if (errorMessage.isEmpty()) {
|
||||
throw new IllegalArgumentException("errorMessage must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,30 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents successful PDF text extraction.
|
||||
* <p>
|
||||
* When this result is obtained, both text content and page count have been
|
||||
* successfully extracted and are guaranteed to be valid.
|
||||
*
|
||||
* @param extractedText the full text content extracted from the PDF (non-null, may be empty string)
|
||||
* @param pageCount the number of pages in the PDF (non-null, validated >= 1)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public record PdfExtractionSuccess(
|
||||
String extractedText,
|
||||
PdfPageCount pageCount
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param extractedText must be non-null (may be empty)
|
||||
* @param pageCount must be non-null
|
||||
* @throws NullPointerException if either parameter is null
|
||||
*/
|
||||
public PdfExtractionSuccess {
|
||||
Objects.requireNonNull(extractedText, "extractedText must not be null");
|
||||
Objects.requireNonNull(pageCount, "pageCount must not be null");
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
package de.gecheckt.pdf.umbenenner.domain.model;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Represents a technical (infrastructure) failure during PDF text extraction.
|
||||
* <p>
|
||||
* This indicates that something went wrong with the extraction process itself,
|
||||
* such as file I/O errors, PDFBox library problems, or out-of-memory conditions.
|
||||
* <p>
|
||||
* These are typically retryable conditions in later batch runs, as they may be
|
||||
* transient infrastructure issues.
|
||||
*
|
||||
* @param errorMessage a description of what went wrong (non-null, non-empty)
|
||||
* @param cause the underlying exception, if any (may be null)
|
||||
* @since M3-AP-001
|
||||
*/
|
||||
public record PdfExtractionTechnicalError(
|
||||
String errorMessage,
|
||||
Throwable cause
|
||||
) implements PdfExtractionResult {
|
||||
/**
|
||||
* Constructor with validation.
|
||||
*
|
||||
* @param errorMessage must be non-null and non-empty
|
||||
* @param cause may be null
|
||||
* @throws NullPointerException if errorMessage is null
|
||||
* @throws IllegalArgumentException if errorMessage is empty
|
||||
*/
|
||||
public PdfExtractionTechnicalError {
|
||||
Objects.requireNonNull(errorMessage, "errorMessage must not be null");
|
||||
if (errorMessage.isEmpty()) {
|
||||
throw new IllegalArgumentException("errorMessage must not be empty");
|
||||
}
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user