1
0

Meilenstein-Präfixe aus Klassennamen entfernt

This commit is contained in:
2026-04-02 09:11:52 +02:00
parent c0cdd0ed6e
commit 7d5c21f14c
21 changed files with 501 additions and 455 deletions

View File

@@ -1,8 +1,8 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.M3TechnicalDocumentError;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
@@ -12,45 +12,43 @@ import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import java.util.Objects;
/**
* Orchestrates M3 document processing pipeline: extraction pre-checks outcome classification.
* Orchestrates document processing pipeline: extraction pre-checks outcome classification.
* <p>
* Converts technical extraction results into M3 processing outcomes through this pipeline:
* Converts technical extraction results into processing outcomes through this pipeline:
* <ol>
* <li>If extraction fails (content or technical): {@link M3TechnicalDocumentError}</li>
* <li>If extraction succeeds: Evaluate M3 pre-checks via {@link M3PreCheckEvaluator}</li>
* <li>If extraction fails (content or technical): {@link TechnicalDocumentError}</li>
* <li>If extraction succeeds: Evaluate pre-checks via {@link PreCheckEvaluator}</li>
* </ol>
* <p>
* This service produces {@link M3DocumentProcessingOutcome}, a sealed interface that covers
* all four M3 document processing outcomes:
* This service produces {@link DocumentProcessingOutcome}, a sealed interface that covers
* all document processing outcomes:
* <ul>
* <li>Pre-check passed (document ready for M4+)</li>
* <li>Pre-check passed (document ready for further processing)</li>
* <li>Pre-check failed (deterministic content error: no usable text, page limit exceeded)</li>
* <li>Technical document error (I/O, access, PDF parsing, etc.)</li>
* </ul>
* <p>
* This service is stateless and thread-safe.
*
* @since M3-AP-006
*/
public class M3DocumentProcessingService {
public class DocumentProcessingService {
/**
* Processes a document candidate through the complete M3 pipeline.
* Processes a document candidate through the complete pipeline.
* <p>
* Pipeline:
* <ol>
* <li>Extract text and page count from the PDF candidate</li>
* <li>If extraction fails (technical or content): classify as technical document error</li>
* <li>If extraction succeeds: evaluate M3 pre-checks</li>
* <li>If extraction succeeds: evaluate pre-checks</li>
* </ol>
*
* @param candidate the document candidate to process
* @param extractionResult the result from PDF extraction (from {@link de.gecheckt.pdf.umbenenner.application.port.out.PdfTextExtractionPort})
* @param configuration the startup configuration (used for pre-check validation)
* @return the complete M3 processing outcome (one of four possibilities, all implementing {@link M3DocumentProcessingOutcome})
* @return the complete processing outcome (implementing {@link DocumentProcessingOutcome})
* @throws NullPointerException if any parameter is null
*/
public static M3DocumentProcessingOutcome processDocument(
public static DocumentProcessingOutcome processDocument(
SourceDocumentCandidate candidate,
PdfExtractionResult extractionResult,
StartConfiguration configuration) {
@@ -61,20 +59,20 @@ public class M3DocumentProcessingService {
return switch (extractionResult) {
case PdfExtractionSuccess success ->
// Extraction succeeded: evaluate M3 pre-checks
M3PreCheckEvaluator.evaluate(candidate, success, configuration);
// Extraction succeeded: evaluate pre-checks
PreCheckEvaluator.evaluate(candidate, success, configuration);
case PdfExtractionContentError contentError ->
// PDF content not extractable: classify as technical document error
new M3TechnicalDocumentError(candidate, "PDF content not extractable: " + contentError.reason(), null);
new TechnicalDocumentError(candidate, "PDF content not extractable: " + contentError.reason(), null);
case PdfExtractionTechnicalError technicalError ->
// Technical failure during extraction: potentially retryable
new M3TechnicalDocumentError(candidate, technicalError.errorMessage(), technicalError.cause());
new TechnicalDocumentError(candidate, technicalError.errorMessage(), technicalError.cause());
};
}
private M3DocumentProcessingService() {
private DocumentProcessingService() {
// Static utility class no instances
}
}
}

View File

@@ -1,36 +1,34 @@
package de.gecheckt.pdf.umbenenner.application.service;
import de.gecheckt.pdf.umbenenner.application.config.StartConfiguration;
import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailureReason;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailureReason;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import java.util.Objects;
/**
* Evaluates whether a successfully extracted PDF passes M3 pre-checks.
* Evaluates whether a successfully extracted PDF passes pre-checks.
* <p>
* M3 Pre-checks verify that:
* Pre-checks verify that:
* <ul>
* <li>The extracted text contains at least one meaningful character after normalization</li>
* <li>The document's page count does not exceed the configured limit</li>
* </ul>
* <p>
* A document that passes both pre-checks is ready to proceed to M4 and later milestones.
* A document that passes both pre-checks is ready to proceed to further processing steps.
* A document that fails a pre-check is classified with a specific deterministic failure reason
* and will not proceed further in the current batch run.
* <p>
* This service is stateless and thread-safe.
*
* @since M3-AP-004
*/
public class M3PreCheckEvaluator {
public class PreCheckEvaluator {
/**
* Evaluates M3 pre-checks for a successfully extracted PDF document.
* Evaluates pre-checks for a successfully extracted PDF document.
* <p>
* Pre-check logic:
* <ol>
@@ -38,19 +36,19 @@ public class M3PreCheckEvaluator {
* <li>Check if document page count does not exceed the configured limit</li>
* </ol>
* <p>
* Returns {@link M3PreCheckPassed} if both checks pass, or {@link M3PreCheckFailed}
* Returns {@link PreCheckPassed} if both checks pass, or {@link PreCheckFailed}
* with a specific reason if any check fails.
* <p>
* Note: Returns {@link M3DocumentProcessingOutcome} to integrate cleanly with the complete
* M3 document processing pipeline.
* Note: Returns {@link DocumentProcessingOutcome} to integrate cleanly with the complete
* document processing pipeline.
*
* @param candidate the source document metadata
* @param extraction the successfully extracted PDF content
* @param configuration the startup configuration (used for maxPages limit)
* @return the pre-check outcome: passed or failed with reason (both implement {@link M3DocumentProcessingOutcome})
* @return the pre-check outcome: passed or failed with reason (both implement {@link DocumentProcessingOutcome})
* @throws NullPointerException if any parameter is null
*/
public static M3DocumentProcessingOutcome evaluate(
public static DocumentProcessingOutcome evaluate(
SourceDocumentCandidate candidate,
PdfExtractionSuccess extraction,
StartConfiguration configuration) {
@@ -61,28 +59,28 @@ public class M3PreCheckEvaluator {
// Pre-check 1: Verify document has usable text
if (!hasUsableText(extraction.extractedText())) {
return new M3PreCheckFailed(
return new PreCheckFailed(
candidate,
M3PreCheckFailureReason.NO_USABLE_TEXT.getDescription()
PreCheckFailureReason.NO_USABLE_TEXT.getDescription()
);
}
// Pre-check 2: Verify document page count does not exceed configured limit
if (extraction.pageCount().exceedsLimit(configuration.maxPages())) {
return new M3PreCheckFailed(
return new PreCheckFailed(
candidate,
M3PreCheckFailureReason.PAGE_LIMIT_EXCEEDED.getDescription()
PreCheckFailureReason.PAGE_LIMIT_EXCEEDED.getDescription()
);
}
// All pre-checks passed
return new M3PreCheckPassed(candidate, extraction);
return new PreCheckPassed(candidate, extraction);
}
/**
* Determines whether the extracted text contains at least one meaningful character.
* <p>
* Definition of "usable text" for M3:
* Definition of "usable text":
* <ul>
* <li>After normalization (trimming whitespace), at least one letter or digit remains</li>
* <li>Pure whitespace or only special characters do not qualify as usable text</li>
@@ -116,7 +114,7 @@ public class M3PreCheckEvaluator {
return false;
}
private M3PreCheckEvaluator() {
private PreCheckEvaluator() {
// Static utility class no instances
}
}
}

View File

@@ -12,19 +12,17 @@
*
* Current services:
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.M3PreCheckEvaluator} — M3 pre-check evaluation (M3-AP-004)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.M3DocumentProcessingService} — complete M3 document processing pipeline orchestration (M3-AP-006)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.PreCheckEvaluator} — Pre-check evaluation</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService} — Complete document processing pipeline orchestration</li>
* </ul>
*
* M3 Document Processing Pipeline (M3-AP-006):
* The {@link de.gecheckt.pdf.umbenenner.application.service.M3DocumentProcessingService} coordinates
* the complete M3 processing workflow:
* Document Processing Pipeline:
* The {@link de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService} coordinates
* the complete processing workflow:
* <ol>
* <li>Convert technical PDF extraction results to M3 processing outcomes</li>
* <li>Route successful extractions through M3 pre-check validation</li>
* <li>Convert technical PDF extraction results to processing outcomes</li>
* <li>Route successful extractions through pre-check validation</li>
* <li>Classify extraction and pre-check failures with appropriate error types</li>
* </ol>
*
* @since M3-AP-004
*/
package de.gecheckt.pdf.umbenenner.application.service;

View File

@@ -8,12 +8,12 @@ import de.gecheckt.pdf.umbenenner.application.port.out.RunLockPort;
import de.gecheckt.pdf.umbenenner.application.port.out.RunLockUnavailableException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentAccessException;
import de.gecheckt.pdf.umbenenner.application.port.out.SourceDocumentCandidatesPort;
import de.gecheckt.pdf.umbenenner.application.service.M3DocumentProcessingService;
import de.gecheckt.pdf.umbenenner.application.service.DocumentProcessingService;
import de.gecheckt.pdf.umbenenner.domain.model.BatchRunContext;
import de.gecheckt.pdf.umbenenner.domain.model.M3DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.M3PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.M3TechnicalDocumentError;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentProcessingOutcome;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckFailed;
import de.gecheckt.pdf.umbenenner.domain.model.PreCheckPassed;
import de.gecheckt.pdf.umbenenner.domain.model.TechnicalDocumentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionContentError;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionResult;
import de.gecheckt.pdf.umbenenner.domain.model.PdfExtractionSuccess;
@@ -26,38 +26,36 @@ import org.apache.logging.log4j.Logger;
import java.util.List;
/**
* M3 batch processing implementation of {@link RunBatchProcessingUseCase}.
* Batch processing implementation of {@link RunBatchProcessingUseCase}.
* <p>
* Orchestrates the complete M3 batch processing workflow:
* Orchestrates the complete batch processing workflow:
* <ol>
* <li>Acquire exclusive run lock to prevent concurrent instances</li>
* <li>Scan source folder for PDF candidates</li>
* <li>For each candidate: extract text and page count, run M3 pre-checks</li>
* <li>Log per-document M3 decision; end each document controlled without KI or target copy</li>
* <li>For each candidate: extract text and page count, run pre-checks</li>
* <li>Log per-document decision; end each document controlled without KI or target copy</li>
* <li>Release lock and return structured outcome for Bootstrap exit code mapping</li>
* </ol>
* <p>
* M3 processing boundary:
* Processing boundary:
* <ul>
* <li>Documents that pass M3 pre-checks end controlled and are ready for M4+ (KI, persistence, copy)</li>
* <li>Documents that pass pre-checks end controlled and are ready for further processing (KI, persistence, copy)</li>
* <li>Documents with deterministic content errors (no usable text, page limit exceeded) end controlled</li>
* <li>Documents with technical extraction errors end controlled; they do not abort the overall run</li>
* <li>If the source folder itself is inaccessible, the run fails with {@link BatchRunOutcome#FAILURE}</li>
* </ul>
* <p>
* M3 Non-Goals (not implemented):
* Non-Goals (not implemented):
* <ul>
* <li>No fingerprinting or SQLite persistence</li>
* <li>No KI/AI integration or prompt loading</li>
* <li>No filename generation or target file copy</li>
* <li>No cross-run retry logic</li>
* </ul>
*
* @since M2-AP-004 (extended in M3-AP-005)
*/
public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
public class BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
private static final Logger LOG = LogManager.getLogger(M2BatchRunProcessingUseCase.class);
private static final Logger LOG = LogManager.getLogger(BatchRunProcessingUseCase.class);
private final StartConfiguration configuration;
private final RunLockPort runLockPort;
@@ -76,7 +74,7 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
* @param pdfTextExtractionPort for extracting text and page count from a single PDF
* @throws NullPointerException if any parameter is null
*/
public M2BatchRunProcessingUseCase(
public BatchRunProcessingUseCase(
StartConfiguration configuration,
RunLockPort runLockPort,
SourceDocumentCandidatesPort sourceDocumentCandidatesPort,
@@ -116,7 +114,7 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
}
LOG.info("Found {} PDF candidate(s) in source folder.", candidates.size());
// Step 3: Process each candidate through the M3 pipeline
// Step 3: Process each candidate through the pipeline
for (SourceDocumentCandidate candidate : candidates) {
processCandidate(candidate);
}
@@ -143,20 +141,20 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
}
/**
* Processes a single PDF candidate through the complete M3 pipeline.
* Processes a single PDF candidate through the complete pipeline.
* <p>
* M3 processing steps per document:
* Processing steps per document:
* <ol>
* <li>Log candidate recognition</li>
* <li>Extract text and page count from the PDF via {@link PdfTextExtractionPort}</li>
* <li>Process extraction result through M3 pre-checks via {@link M3DocumentProcessingService}</li>
* <li>Log extraction outcome and final M3 decision</li>
* <li>Process extraction result through pre-checks via {@link DocumentProcessingService}</li>
* <li>Log extraction outcome and final decision</li>
* </ol>
* <p>
* Per-document errors (extraction failure, technical error, pre-check failure) do not abort the overall
* batch run. Each candidate ends controlled regardless of its outcome.
* <p>
* M3 processing boundary: no KI call, no persistence, no filename generation,
* Processing boundary: no KI call, no persistence, no filename generation,
* no target file copy is initiated here, even for candidates that pass all pre-checks.
*
* @param candidate the candidate to process
@@ -179,20 +177,20 @@ public class M2BatchRunProcessingUseCase implements RunBatchProcessingUseCase {
candidate.uniqueIdentifier(), technicalError.errorMessage());
}
// Process through complete M3 pipeline
var m3Outcome = M3DocumentProcessingService.processDocument(candidate, extractionResult, configuration);
// Process through complete pipeline
var outcome = DocumentProcessingService.processDocument(candidate, extractionResult, configuration);
// Log M3 processing outcome
switch (m3Outcome) {
case M3PreCheckPassed passed ->
LOG.info("M3 pre-checks PASSED for '{}'. Candidate ready for further processing (M4+).",
// Log processing outcome
switch (outcome) {
case PreCheckPassed passed ->
LOG.info("Pre-checks PASSED for '{}'. Candidate ready for further processing.",
candidate.uniqueIdentifier());
case M3PreCheckFailed failed ->
LOG.info("M3 pre-checks FAILED for '{}': {} (Deterministic content error may retry in later run).",
case PreCheckFailed failed ->
LOG.info("Pre-checks FAILED for '{}': {} (Deterministic content error may retry in later run).",
candidate.uniqueIdentifier(), failed.failureReason());
case M3TechnicalDocumentError technicalError ->
LOG.warn("M3 processing FAILED for '{}': {} (Technical error may retry in later run).",
case TechnicalDocumentError technicalError ->
LOG.warn("Processing FAILED for '{}': {} (Technical error may retry in later run).",
candidate.uniqueIdentifier(), technicalError.errorMessage());
}
}
}
}

View File

@@ -5,8 +5,8 @@
* <ul>
* <li>{@link de.gecheckt.pdf.umbenenner.application.usecase.NoOpRunBatchProcessingUseCase}
* — Minimal no-op for technical validation without start protection</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.usecase.M2BatchRunProcessingUseCase}
* — M2 production implementation with run lock and controlled batch cycle (AP-004)</li>
* <li>{@link de.gecheckt.pdf.umbenenner.application.usecase.BatchRunProcessingUseCase}
* — Production implementation with run lock and controlled batch cycle</li>
* </ul>
* <p>
* All implementations are infrastructure-agnostic and interact only through ports.