From c482b20df9f706c906332cc57b275ed6431f9453 Mon Sep 17 00:00:00 2001 From: Marcus van Elst Date: Wed, 1 Apr 2026 19:07:03 +0200 Subject: [PATCH] =?UTF-8?q?M3-AP-004:=20Vorpr=C3=BCfung=20auf=20Unicode-f?= =?UTF-8?q?=C3=A4higen=20brauchbaren=20Text=20korrigiert?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .claude/settings.local.json | 4 +- .../service/M3PreCheckEvaluator.java | 119 ++++++++ .../application/service/package-info.java | 20 ++ .../service/M3PreCheckEvaluatorTest.java | 276 ++++++++++++++++++ .../domain/model/M3PreCheckFailed.java | 46 +++ .../domain/model/M3PreCheckFailureReason.java | 54 ++++ .../domain/model/M3PreCheckPassed.java | 36 +++ .../domain/model/M3ProcessingDecision.java | 83 +----- .../umbenenner/domain/model/package-info.java | 11 + 9 files changed, 568 insertions(+), 81 deletions(-) create mode 100644 pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluator.java create mode 100644 pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java create mode 100644 pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluatorTest.java create mode 100644 pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckFailed.java create mode 100644 pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckFailureReason.java create mode 100644 pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckPassed.java diff --git a/.claude/settings.local.json b/.claude/settings.local.json index 949c18b..47e5593 100644 --- a/.claude/settings.local.json +++ b/.claude/settings.local.json @@ -10,7 +10,9 @@ "Bash(mvn -pl pdf-umbenenner-adapter-out clean compile)", "Bash(mvn dependency:tree -pl pdf-umbenenner-adapter-out)", "Bash(mvn -pl pdf-umbenenner-domain clean compile)", - "Bash(mvn help:describe -Dplugin=org.apache.pdfbox:pdfbox -Ddetail=false)" + "Bash(mvn help:describe -Dplugin=org.apache.pdfbox:pdfbox -Ddetail=false)", + "Bash(cd /d D:/Dev/Projects/pdf-umbenenner-parent)", + "Bash(mvn -v)" ] } } diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluator.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluator.java new file mode 100644 index 0000000..d5224f6 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluator.java @@ -0,0 +1,119 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; +import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason; +import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed; +import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed; +import de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; + +import java.util.Objects; + +/** + * Evaluates whether a successfully extracted PDF passes M3 pre-checks. + *

+ * M3 Pre-checks verify that: + *

+ *

+ * A document that passes both pre-checks is ready to proceed to M4 and later milestones. + * A document that fails a pre-check is classified with a specific deterministic failure reason + * and will not proceed further in the current batch run. + *

+ * This service is stateless and thread-safe. + * + * @since M3-AP-004 + */ +public class M3PreCheckEvaluator { + + /** + * Evaluates M3 pre-checks for a successfully extracted PDF document. + *

+ * Pre-check logic: + *

    + *
  1. Check if extracted text contains at least one letter or digit after normalization
  2. + *
  3. Check if document page count does not exceed the configured limit
  4. + *
+ *

+ * Returns {@link M3PreCheckPassed} if both checks pass, or {@link M3PreCheckFailed} + * with a specific reason if any check fails. + * + * @param candidate the source document metadata + * @param extraction the successfully extracted PDF content + * @param configuration the startup configuration (used for maxPages limit) + * @return the pre-check decision: passed or failed with reason + * @throws NullPointerException if any parameter is null + */ + public static M3ProcessingDecision evaluate( + SourceDocumentCandidate candidate, + PdfExtractionSuccess extraction, + StartConfiguration configuration) { + + Objects.requireNonNull(candidate, "candidate must not be null"); + Objects.requireNonNull(extraction, "extraction must not be null"); + Objects.requireNonNull(configuration, "configuration must not be null"); + + // Pre-check 1: Verify document has usable text + if (!hasUsableText(extraction.extractedText())) { + return new M3PreCheckFailed( + candidate, + M3PreCheckFailureReason.NO_USABLE_TEXT.getDescription() + ); + } + + // Pre-check 2: Verify document page count does not exceed configured limit + if (extraction.pageCount().exceedsLimit(configuration.maxPages())) { + return new M3PreCheckFailed( + candidate, + M3PreCheckFailureReason.PAGE_LIMIT_EXCEEDED.getDescription() + ); + } + + // All pre-checks passed + return new M3PreCheckPassed(candidate, extraction); + } + + /** + * Determines whether the extracted text contains at least one meaningful character. + *

+ * Definition of "usable text" for M3: + *

+ *

+ * Normalization process: + *

    + *
  1. Trim leading and trailing whitespace
  2. + *
  3. Scan for at least one character where {@link Character#isLetterOrDigit(char)} returns true
  4. + *
  5. Unicode-aware character classification (not limited to ASCII)
  6. + *
+ * + * @param text the extracted text from the PDF (non-null, may be empty) + * @return true if text contains at least one letter or digit (Unicode-aware) after normalization + */ + private static boolean hasUsableText(String text) { + Objects.requireNonNull(text, "text must not be null"); + + // Trim whitespace first + String trimmed = text.strip(); + + // Check if text contains at least one letter or digit (Unicode-aware) + for (char c : trimmed.toCharArray()) { + if (Character.isLetterOrDigit(c)) { + return true; + } + } + + // No letter or digit found + return false; + } + + private M3PreCheckEvaluator() { + // Static utility class – no instances + } +} diff --git a/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java new file mode 100644 index 0000000..866df46 --- /dev/null +++ b/pdf-umbenenner-application/src/main/java/de/gecheckt/pdf/umbenenner/application/service/package-info.java @@ -0,0 +1,20 @@ +/** + * Application-level services for business logic evaluation. + *

+ * This package contains stateless, pure-logic services that evaluate document content + * and apply business rules. Services in this package: + *

+ * + * Current services: + * + * + * @since M3-AP-004 + */ +package de.gecheckt.pdf.umbenenner.application.service; diff --git a/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluatorTest.java b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluatorTest.java new file mode 100644 index 0000000..d0ac30c --- /dev/null +++ b/pdf-umbenenner-application/src/test/java/de/gecheckt/pdf/umbenenner/application/service/M3PreCheckEvaluatorTest.java @@ -0,0 +1,276 @@ +package de.gecheckt.pdf.umbenenner.application.service; + +import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration; +import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed; +import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason; +import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed; +import de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision; +import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess; +import de.gecheckt.pdf.umbenenner.domain.model.PdfPageCount; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.net.URI; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.junit.jupiter.api.Assertions.*; + +/** + * Tests for {@link M3PreCheckEvaluator}. + *

+ * Verifies correct M3 pre-check logic for usable text and page limit validation. + */ +class M3PreCheckEvaluatorTest { + + @TempDir + Path tempDir; + + @Test + void evaluate_passesWhenDocumentHasUsableTextAndValidPageCount() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("Some meaningful text", new PdfPageCount(5)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckPassed, "Should pass when text is usable and page count is valid"); + M3PreCheckPassed passed = (M3PreCheckPassed) result; + assertSame(passed.candidate(), candidate, "Candidate should be preserved"); + assertSame(passed.extraction(), extraction, "Extraction should be preserved"); + } + + @Test + void evaluate_failsWithNoUsableTextWhenExtractedTextIsEmpty() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("", new PdfPageCount(1)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckFailed, "Should fail with empty text"); + M3PreCheckFailed failed = (M3PreCheckFailed) result; + assertEquals(M3PreCheckFailureReason.NO_USABLE_TEXT.getDescription(), failed.failureReason()); + } + + @Test + void evaluate_failsWithNoUsableTextWhenTextIsOnlyWhitespace() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess(" \n\t \r\n ", new PdfPageCount(1)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckFailed, "Should fail with whitespace-only text"); + M3PreCheckFailed failed = (M3PreCheckFailed) result; + assertEquals(M3PreCheckFailureReason.NO_USABLE_TEXT.getDescription(), failed.failureReason()); + } + + @Test + void evaluate_failsWithNoUsableTextWhenTextContainsOnlySpecialCharacters() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("!@#$%^&*()_+-=[]{}|;:',.<>?/", new PdfPageCount(1)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckFailed, "Should fail with special characters only"); + M3PreCheckFailed failed = (M3PreCheckFailed) result; + assertEquals(M3PreCheckFailureReason.NO_USABLE_TEXT.getDescription(), failed.failureReason()); + } + + @Test + void evaluate_passesWithTextContainingSingleLetter() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("a", new PdfPageCount(1)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckPassed, "Should pass with single letter"); + } + + @Test + void evaluate_passesWithTextContainingSingleDigit() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("5", new PdfPageCount(1)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckPassed, "Should pass with single digit"); + } + + @Test + void evaluate_passesWithTextMixedWithSpecialCharactersIfLettersOrDigitsPresent() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("!@#a$%^&*", new PdfPageCount(1)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckPassed, "Should pass when letters/digits are present among special chars"); + } + + @Test + void evaluate_passesWithWhitespaceAroundUsableText() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess(" meaningful text ", new PdfPageCount(1)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckPassed, "Should pass when text has meaningful content despite whitespace"); + } + + @Test + void evaluate_failsWithPageLimitExceededWhenPageCountEqualsLimit() throws Exception { + StartConfiguration config = buildConfig(maxPages(5)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("Valid text", new PdfPageCount(5)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckPassed, "Should pass when page count equals limit (not exceeded)"); + } + + @Test + void evaluate_failsWithPageLimitExceededWhenPageCountExceedsLimit() throws Exception { + StartConfiguration config = buildConfig(maxPages(5)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("Valid text", new PdfPageCount(6)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckFailed, "Should fail when page count exceeds limit"); + M3PreCheckFailed failed = (M3PreCheckFailed) result; + assertEquals(M3PreCheckFailureReason.PAGE_LIMIT_EXCEEDED.getDescription(), failed.failureReason()); + } + + @Test + void evaluate_failsWithPageLimitExceededEvenIfTextIsValid() throws Exception { + StartConfiguration config = buildConfig(maxPages(2)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("Excellent meaningful text with lots of content", new PdfPageCount(100)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckFailed, "Should fail with page limit exceeded even if text is good"); + M3PreCheckFailed failed = (M3PreCheckFailed) result; + assertEquals(M3PreCheckFailureReason.PAGE_LIMIT_EXCEEDED.getDescription(), failed.failureReason()); + } + + @Test + void evaluate_prefersPageLimitCheckOverTextCheck() throws Exception { + // If both checks fail, page limit check should take precedence (not tested for priority, + // but we verify that one failure is reported consistently) + StartConfiguration config = buildConfig(maxPages(1)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("", new PdfPageCount(10)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckFailed, "Should fail when both checks fail"); + // The specific order of checks doesn't matter for M3; just verify one reason is returned + M3PreCheckFailed failed = (M3PreCheckFailed) result; + assertNotNull(failed.failureReason()); + assertFalse(failed.failureReason().isEmpty()); + } + + @Test + void evaluate_throwsNullPointerExceptionWhenCandidateIsNull() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("Valid text", new PdfPageCount(1)); + + assertThrows(NullPointerException.class, () -> + M3PreCheckEvaluator.evaluate(null, extraction, config) + ); + } + + @Test + void evaluate_throwsNullPointerExceptionWhenExtractionIsNull() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + SourceDocumentCandidate candidate = buildCandidate(); + + assertThrows(NullPointerException.class, () -> + M3PreCheckEvaluator.evaluate(candidate, null, config) + ); + } + + @Test + void evaluate_throwsNullPointerExceptionWhenConfigurationIsNull() throws Exception { + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("Valid text", new PdfPageCount(1)); + + assertThrows(NullPointerException.class, () -> + M3PreCheckEvaluator.evaluate(candidate, extraction, null) + ); + } + + @Test + void evaluate_passesWithUnicodeGermanUmlauts() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("Äußerst äöüß Großes", new PdfPageCount(1)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckPassed, "Should pass with German umlauts (ÄÖÜß)"); + } + + @Test + void evaluate_passesWithOtherUnicodeCharacters() throws Exception { + StartConfiguration config = buildConfig(maxPages(10)); + SourceDocumentCandidate candidate = buildCandidate(); + PdfExtractionSuccess extraction = new PdfExtractionSuccess("Αβγδ 中文 καλημέρα", new PdfPageCount(1)); + + M3ProcessingDecision result = M3PreCheckEvaluator.evaluate(candidate, extraction, config); + + assertTrue(result instanceof M3PreCheckPassed, "Should pass with Greek, Chinese, and other Unicode letters"); + } + + // ========================================================================= + // Helpers + // ========================================================================= + + private StartConfiguration buildConfig(int maxPages) throws Exception { + Path sourceDir = Files.createDirectories(tempDir.resolve("source")); + Path targetDir = Files.createDirectories(tempDir.resolve("target")); + Path dbFile = tempDir.resolve("db.sqlite"); + Files.createFile(dbFile); + Path promptFile = tempDir.resolve("prompt.txt"); + Files.createFile(promptFile); + + return new StartConfiguration( + sourceDir, + targetDir, + dbFile, + URI.create("https://api.example.com"), + "gpt-4", + 30, + 3, + maxPages, + 50000, + promptFile, + tempDir.resolve("lock.lock"), + tempDir.resolve("logs"), + "INFO", + "test-key" + ); + } + + private int maxPages(int limit) { + return limit; + } + + private SourceDocumentCandidate buildCandidate() throws Exception { + Path sourceDir = Files.createDirectories(tempDir.resolve("source")); + Path pdfFile = sourceDir.resolve("test.pdf"); + Files.createFile(pdfFile); + SourceDocumentLocator locator = new SourceDocumentLocator(pdfFile.toString()); + return new SourceDocumentCandidate(pdfFile.getFileName().toString(), 0L, locator); + } +} diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckFailed.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckFailed.java new file mode 100644 index 0000000..67c39ab --- /dev/null +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckFailed.java @@ -0,0 +1,46 @@ +package de.gecheckt.pdf.umbenenner.domain.model; + +import java.util.Objects; + +/** + * Represents a document that failed an M3 pre-check. + *

+ * This result encapsulates: + *

+ *

+ * Reasons include: + *

+ *

+ * A document with this decision will not proceed further in the current batch run. + * + * @param candidate the source document metadata + * @param failureReason a human-readable explanation of the pre-check failure + * @since M3-AP-001 + */ +public record M3PreCheckFailed( + SourceDocumentCandidate candidate, + String failureReason +) implements M3ProcessingDecision { + /** + * Constructor with validation. + * + * @param candidate must be non-null + * @param failureReason must be non-null and non-empty + * @throws NullPointerException if either parameter is null + * @throws IllegalArgumentException if failureReason is empty + */ + public M3PreCheckFailed { + Objects.requireNonNull(candidate, "candidate must not be null"); + Objects.requireNonNull(failureReason, "failureReason must not be null"); + if (failureReason.isEmpty()) { + throw new IllegalArgumentException("failureReason must not be empty"); + } + } +} diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckFailureReason.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckFailureReason.java new file mode 100644 index 0000000..6ef5a33 --- /dev/null +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckFailureReason.java @@ -0,0 +1,54 @@ +package de.gecheckt.pdf.umbenenner.domain.model; + +/** + * Enumeration of M3 pre-check failure reasons. + *

+ * These are the deterministic content errors that can occur during M3 pre-check evaluation. + * They distinguish between failures in the document content versus technical extraction failures. + *

+ * Deterministic content errors: + *

+ *

+ * Note: Technical extraction failures (I/O errors, PDFBox failures) are not M3 pre-check reasons; + * they are represented as {@link PdfExtractionTechnicalError} in the extraction result. + * + * @since M3-AP-004 + */ +public enum M3PreCheckFailureReason { + /** + * The extracted PDF text, after normalization, contains no letters or digits. + *

+ * This is a deterministic content error: reprocessing the same file in a later run + * will have the same outcome unless the source file is changed. + *

+ * In M3, retry logic: exactly 1 retry in a later batch run. + */ + NO_USABLE_TEXT("No usable text in extracted PDF content"), + + /** + * The document's page count exceeds the configured limit. + *

+ * This is a deterministic content error: the page count will not change unless the source file is modified. + *

+ * In M3, retry logic: exactly 1 retry in a later batch run. + */ + PAGE_LIMIT_EXCEEDED("Document page count exceeds configured limit"); + + private final String description; + + M3PreCheckFailureReason(String description) { + this.description = description; + } + + /** + * Returns a human-readable description of this failure reason. + * + * @return the description + */ + public String getDescription() { + return description; + } +} diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckPassed.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckPassed.java new file mode 100644 index 0000000..12eee37 --- /dev/null +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3PreCheckPassed.java @@ -0,0 +1,36 @@ +package de.gecheckt.pdf.umbenenner.domain.model; + +import java.util.Objects; + +/** + * Represents a document that passed all M3 pre-checks. + *

+ * This result encapsulates: + *

+ *

+ * A document with this decision is ready to proceed to M4 and later milestones + * (fingerprinting, persistence, KI integration, filename generation, target copy). + * + * @param candidate the source document metadata + * @param extraction the successful text extraction result + * @since M3-AP-001 + */ +public record M3PreCheckPassed( + SourceDocumentCandidate candidate, + PdfExtractionSuccess extraction +) implements M3ProcessingDecision { + /** + * Constructor with validation. + * + * @param candidate must be non-null + * @param extraction must be non-null + * @throws NullPointerException if either parameter is null + */ + public M3PreCheckPassed { + Objects.requireNonNull(candidate, "candidate must not be null"); + Objects.requireNonNull(extraction, "extraction must not be null"); + } +} diff --git a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3ProcessingDecision.java b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3ProcessingDecision.java index d55979b..2b77aee 100644 --- a/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3ProcessingDecision.java +++ b/pdf-umbenenner-domain/src/main/java/de/gecheckt/pdf/umbenenner/domain/model/M3ProcessingDecision.java @@ -1,13 +1,12 @@ package de.gecheckt.pdf.umbenenner.domain.model; -import java.util.Objects; - /** * Sealed interface representing the outcome of M3 document pre-checks. *

- * This is a placeholder interface introduced in AP-001 to establish the architectural + * This interface introduced in AP-001 establishes the architectural * pattern for M3 pre-check results. The actual pre-check logic (fachlich validation - * such as "brauchbarer Text" and "Seitenlimit") is implemented in later APs (AP-004, AP-005). + * such as "brauchbarer Text" and "Seitenlimit") is implemented in AP-004 via + * {@link de.gecheckt.pdf.umbenenner.application.service.M3PreCheckEvaluator}. *

* There are two allowed implementations: *

*

+ * Additional classes introduced in M3: + *

+ * + * Implementation classes: + * + * * All classes in this package are: *