1
0

M3-AP-006: Fehlerklassifikation vereinfacht und Logging auf korrekte

Ergebnisfälle ausgerichtet
This commit is contained in:
2026-04-01 21:45:06 +02:00
parent d60d050948
commit 4d769643d4
13 changed files with 557 additions and 42 deletions

View File

@@ -0,0 +1,80 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.M3TechnicalDocumentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionTechnicalError;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import java.util.Objects;
/**
* Orchestrates M3 document processing pipeline: extraction → pre-checks → outcome classification.
* <p>
* Converts technical extraction results into M3 processing outcomes through this pipeline:
* <ol>
* <li>If extraction fails (content or technical): {@link M3TechnicalDocumentError}</li>
* <li>If extraction succeeds: Evaluate M3 pre-checks via {@link M3PreCheckEvaluator}</li>
* </ol>
* <p>
* This service produces {@link M3DocumentProcessingOutcome}, a sealed interface that covers
* all four M3 document processing outcomes:
* <ul>
* <li>Pre-check passed (document ready for M4+)</li>
* <li>Pre-check failed (deterministic content error: no usable text, page limit exceeded)</li>
* <li>Technical document error (I/O, access, PDF parsing, etc.)</li>
* </ul>
* <p>
* This service is stateless and thread-safe.
*
* @since M3-AP-006
*/
public class M3DocumentProcessingService {
/**
* Processes a document candidate through the complete M3 pipeline.
* <p>
* Pipeline:
* <ol>
* <li>Extract text and page count from the PDF candidate</li>
* <li>If extraction fails (technical or content): classify as technical document error</li>
* <li>If extraction succeeds: evaluate M3 pre-checks</li>
* </ol>
*
* @param candidate the document candidate to process
* @param extractionResult the result from PDF extraction (from {@link de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort})
* @param configuration the startup configuration (used for pre-check validation)
* @return the complete M3 processing outcome (one of four possibilities, all implementing {@link M3DocumentProcessingOutcome})
* @throws NullPointerException if any parameter is null
*/
public static M3DocumentProcessingOutcome processDocument(
SourceDocumentCandidate candidate,
PdfExtractionResult extractionResult,
StartConfiguration configuration) {
Objects.requireNonNull(candidate, "candidate must not be null");
Objects.requireNonNull(extractionResult, "extractionResult must not be null");
Objects.requireNonNull(configuration, "configuration must not be null");
return switch (extractionResult) {
case PdfExtractionSuccess success ->
// Extraction succeeded: evaluate M3 pre-checks
M3PreCheckEvaluator.evaluate(candidate, success, configuration);
case PdfExtractionContentError contentError ->
// PDF content not extractable: classify as technical document error
new M3TechnicalDocumentError(candidate, "PDF content not extractable: " + contentError.reason(), null);
case PdfExtractionTechnicalError technicalError ->
// Technical failure during extraction: potentially retryable
new M3TechnicalDocumentError(candidate, technicalError.errorMessage(), technicalError.cause());
};
}
private M3DocumentProcessingService() {
// Static utility class no instances
}
}

View File

@@ -1,10 +1,10 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
@@ -40,14 +40,17 @@ public class M3PreCheckEvaluator {
* <p>
* Returns {@link M3PreCheckPassed} if both checks pass, or {@link M3PreCheckFailed}
* with a specific reason if any check fails.
* <p>
* Note: Returns {@link M3DocumentProcessingOutcome} to integrate cleanly with the complete
* M3 document processing pipeline.
*
* @param candidate the source document metadata
* @param extraction the successfully extracted PDF content
* @param configuration the startup configuration (used for maxPages limit)
* @return the pre-check decision: passed or failed with reason
* @return the pre-check outcome: passed or failed with reason (both implement {@link M3DocumentProcessingOutcome})
* @throws NullPointerException if any parameter is null
*/
public static M3ProcessingDecision evaluate(
public static M3DocumentProcessingOutcome evaluate(
SourceDocumentCandidate candidate,
PdfExtractionSuccess extraction,
StartConfiguration configuration) {

View File

@@ -13,8 +13,18 @@
* Current services:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.M3PreCheckEvaluator} — M3 pre-check evaluation (M3-AP-004)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.M3DocumentProcessingService} — complete M3 document processing pipeline orchestration (M3-AP-006)</li>
* </ul>
*
* M3 Document Processing Pipeline (M3-AP-006):
* The {@link de.gecheckt.pdf.umbenenner.application.service.M3DocumentProcessingService} coordinates
* the complete M3 processing workflow:
* <ol>
* <li>Convert technical PDF extraction results to M3 processing outcomes</li>
* <li>Route successful extractions through M3 pre-check validation</li>
* <li>Classify extraction and pre-check failures with appropriate error types</li>
* </ol>
*
* @since M3-AP-004
*/
package de.gecheckt.pdf.umbenenner.application.service;

View File

@@ -8,11 +8,12 @@ import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.service.M3PreCheckEvaluator;
import de.gecheckt.pdf.umbenenner.application.service.M3DocumentProcessingService;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.M3ProcessingDecision;
import de.gecheckt.pdf.umbenenner.domain.model.M3TechnicalDocumentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
@@ -142,16 +143,17 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
}
/**
* Processes a single PDF candidate through the M3 pipeline.
* Processes a single PDF candidate through the complete M3 pipeline.
* <p>
* M3 processing steps per document:
* <ol>
* <li>Log candidate recognition</li>
* <li>Extract text and page count from the PDF via {@link PdfTextExtractionPort}</li>
* <li>On successful extraction: run M3 pre-checks via {@link M3PreCheckEvaluator}</li>
* <li>Log the per-document M3 decision and end controlled</li>
* <li>Process extraction result through M3 pre-checks via {@link M3DocumentProcessingService}</li>
* <li>Log extraction outcome and final M3 decision</li>
* </ol>
* <p>
* Per-document errors (extraction failure, pre-check failure) do not abort the overall
* Per-document errors (extraction failure, technical error, pre-check failure) do not abort the overall
* batch run. Each candidate ends controlled regardless of its outcome.
* <p>
* M3 processing boundary: no KI call, no persistence, no filename generation,
@@ -160,25 +162,36 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
* @param candidate the candidate to process
*/
private void processCandidate(SourceDocumentCandidate candidate) {
LOG.debug("Processing candidate: {}", candidate.uniqueIdentifier());
PdfExtractionResult extractionResult = pdfTextExtractionPort.extractTextAndPageCount(candidate);
// Log extraction outcome
switch (extractionResult) {
case PdfExtractionSuccess success -> {
M3ProcessingDecision decision = M3PreCheckEvaluator.evaluate(candidate, success, configuration);
switch (decision) {
case M3PreCheckPassed passed ->
LOG.info("M3 pre-checks passed for '{}'. Candidate ready for further processing (M4+).",
candidate.uniqueIdentifier());
case M3PreCheckFailed failed ->
LOG.info("M3 pre-check failed for '{}': {}",
candidate.uniqueIdentifier(), failed.failureReason());
}
}
case PdfExtractionSuccess success ->
LOG.debug("PDF extraction successful for '{}'. Pages: {}, Text length: {} chars.",
candidate.uniqueIdentifier(), success.pageCount().value(), success.extractedText().length());
case PdfExtractionContentError contentError ->
LOG.info("PDF content not extractable for '{}': {}",
LOG.debug("PDF content extraction failed for '{}' (content problem): {}",
candidate.uniqueIdentifier(), contentError.reason());
case PdfExtractionTechnicalError technicalError ->
LOG.warn("Technical error extracting PDF '{}': {}",
LOG.debug("PDF extraction technical error for '{}': {}",
candidate.uniqueIdentifier(), technicalError.errorMessage());
}
// Process through complete M3 pipeline
var m3Outcome = M3DocumentProcessingService.processDocument(candidate, extractionResult, configuration);
// Log M3 processing outcome
switch (m3Outcome) {
case M3PreCheckPassed passed ->
LOG.info("M3 pre-checks PASSED for '{}'. Candidate ready for further processing (M4+).",
candidate.uniqueIdentifier());
case M3PreCheckFailed failed ->
LOG.info("M3 pre-checks FAILED for '{}': {} (Deterministic content error may retry in later run).",
candidate.uniqueIdentifier(), failed.failureReason());
case M3TechnicalDocumentError technicalError ->
LOG.warn("M3 processing FAILED for '{}': {} (Technical error may retry in later run).",
candidate.uniqueIdentifier(), technicalError.errorMessage());
}
}