From cae9c944d7705ff2deaf7c70547d0942ad683b29 Mon Sep 17 00:00:00 2001 From: Marcus van Elst Date: Thu, 2 Apr 2026 19:38:53 +0200 Subject: [PATCH] M4 AP-002 SHA-256-Fingerprint-Adapter implementieren --- pdf-umbenenner-adapter-out/pom.xml | 5 + .../fingerprint/Sha256FingerprintAdapter.java | 157 +++++++++++++++++ .../adapter/out/fingerprint/package-info.java | 12 ++ .../Sha256FingerprintAdapterTest.java | 158 ++++++++++++++++++ pom.xml | 6 + 5 files changed, 338 insertions(+) create mode 100644 pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/Sha256FingerprintAdapter.java create mode 100644 pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/package-info.java create mode 100644 pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/Sha256FingerprintAdapterTest.java diff --git a/pdf-umbenenner-adapter-out/pom.xml b/pdf-umbenenner-adapter-out/pom.xml index bd4ef3b..9e11df3 100644 --- a/pdf-umbenenner-adapter-out/pom.xml +++ b/pdf-umbenenner-adapter-out/pom.xml @@ -52,5 +52,10 @@ mockito-junit-jupiter test + + org.assertj + assertj-core + test + \ No newline at end of file diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/Sha256FingerprintAdapter.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/Sha256FingerprintAdapter.java new file mode 100644 index 0000000..3917154 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/Sha256FingerprintAdapter.java @@ -0,0 +1,157 @@ +package de.gecheckt.pdf.umbenenner.adapter.out.fingerprint; + +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError; +import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.InvalidPathException; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; + +/** + * SHA-256-based implementation of {@link FingerprintPort}. + *

+ * Computes deterministic, content-based fingerprints for PDF documents by applying + * SHA-256 to the raw file content. The implementation ensures that: + *

+ *

+ * Technical failure handling: Any I/O errors, path resolution issues, + * or cryptographic problems are converted to {@link FingerprintTechnicalError} results + * without throwing exceptions. Pre-fingerprint failures are not historized in SQLite. + * + * @since M4-AP-002 + */ +public class Sha256FingerprintAdapter implements FingerprintPort { + + private static final Logger logger = LogManager.getLogger(Sha256FingerprintAdapter.class); + + /** + * Computes the SHA-256 fingerprint for the given candidate. + *

+ * The implementation: + *

    + *
  1. Resolves the opaque locator to a filesystem path
  2. + *
  3. Reads the entire file content
  4. + *
  5. Applies SHA-256 hashing
  6. + *
  7. Returns the hex-encoded result wrapped in a {@link FingerprintSuccess}
  8. + *
+ *

+ * Any technical failures during these steps are caught and returned as + * {@link FingerprintTechnicalError} without throwing exceptions. + * + * @param candidate the candidate whose file content is to be hashed; must not be null + * @return {@link FingerprintSuccess} on success, or {@link FingerprintTechnicalError} + * on any infrastructure failure + * @throws NullPointerException if {@code candidate} is null + */ + @Override + public FingerprintResult computeFingerprint(SourceDocumentCandidate candidate) { + if (candidate == null) { + throw new NullPointerException("candidate must not be null"); + } + + try { + // Resolve the opaque locator to a filesystem path + Path filePath = resolveFilePath(candidate.locator()); + + // Compute the SHA-256 hash of the file content + String sha256Hex = computeSha256Hash(filePath); + + // Create and return the successful result + DocumentFingerprint fingerprint = new DocumentFingerprint(sha256Hex); + logger.debug("Successfully computed fingerprint for '{}': {}", + candidate.uniqueIdentifier(), sha256Hex); + return new FingerprintSuccess(fingerprint); + + } catch (IOException e) { + String errorMsg = String.format("Failed to read file for '%s': %s", + candidate.uniqueIdentifier(), e.getMessage()); + logger.warn(errorMsg, e); + return new FingerprintTechnicalError(errorMsg, e); + } catch (InvalidPathException e) { + String errorMsg = String.format("Invalid file path for '%s': %s", + candidate.uniqueIdentifier(), e.getMessage()); + logger.warn(errorMsg, e); + return new FingerprintTechnicalError(errorMsg, e); + } catch (NoSuchAlgorithmException e) { + String errorMsg = String.format("SHA-256 algorithm not available for '%s'", + candidate.uniqueIdentifier()); + logger.error(errorMsg, e); + return new FingerprintTechnicalError(errorMsg, e); + } catch (Exception e) { + String errorMsg = String.format("Unexpected error computing fingerprint for '%s': %s", + candidate.uniqueIdentifier(), e.getMessage()); + logger.error(errorMsg, e); + return new FingerprintTechnicalError(errorMsg, e); + } + } + + /** + * Resolves the opaque locator value to a filesystem path. + *

+ * The locator's value is expected to contain an absolute file path as a string. + * This is the intra-adapter convention between the source document scanner and + * this fingerprint adapter. + * + * @param locator the opaque locator containing the file path; must not be null + * @return the resolved filesystem path + * @throws InvalidPathException if the locator value is not a valid path + */ + private Path resolveFilePath(SourceDocumentLocator locator) throws InvalidPathException { + return Paths.get(locator.value()); + } + + /** + * Computes the SHA-256 hash of the file content at the given path. + *

+ * Reads the entire file content and applies SHA-256 hashing to produce + * a lowercase hexadecimal representation of the digest. + * + * @param filePath the path to the file to hash; must not be null + * @return the lowercase hexadecimal representation of the SHA-256 digest (64 characters) + * @throws IOException if reading the file fails + * @throws NoSuchAlgorithmException if the SHA-256 algorithm is not available + */ + private String computeSha256Hash(Path filePath) throws IOException, NoSuchAlgorithmException { + MessageDigest digest = MessageDigest.getInstance("SHA-256"); + byte[] fileBytes = Files.readAllBytes(filePath); + byte[] hashBytes = digest.digest(fileBytes); + return bytesToHex(hashBytes); + } + + /** + * Converts a byte array to a lowercase hexadecimal string. + *

+ * Each byte is represented by exactly two hexadecimal characters. + * + * @param bytes the byte array to convert; must not be null + * @return the lowercase hexadecimal representation + */ + private String bytesToHex(byte[] bytes) { + StringBuilder hexString = new StringBuilder(); + for (byte b : bytes) { + String hex = Integer.toHexString(0xff & b); + if (hex.length() == 1) { + hexString.append('0'); + } + hexString.append(hex); + } + return hexString.toString(); + } +} \ No newline at end of file diff --git a/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/package-info.java b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/package-info.java new file mode 100644 index 0000000..ce7c605 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/main/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/package-info.java @@ -0,0 +1,12 @@ +/** + * SHA-256 fingerprint adapter for computing content-based document fingerprints. + * + *

This package contains the concrete implementation of the {@link de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort} + * that computes SHA-256 hashes of PDF document content to create stable, deterministic fingerprints. + * + *

All file I/O and cryptographic operations are strictly confined to this adapter layer, + * maintaining the hexagonal architecture boundary. + * + * @since M4-AP-002 + */ +package de.gecheckt.pdf.umbenenner.adapter.out.fingerprint; \ No newline at end of file diff --git a/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/Sha256FingerprintAdapterTest.java b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/Sha256FingerprintAdapterTest.java new file mode 100644 index 0000000..a9bae75 --- /dev/null +++ b/pdf-umbenenner-adapter-out/src/test/java/de/gecheckt/pdf/umbenenner/adapter/out/fingerprint/Sha256FingerprintAdapterTest.java @@ -0,0 +1,158 @@ +package de.gecheckt.pdf.umbenenner.adapter.out.fingerprint; + +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess; +import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate; +import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +/** + * Unit tests for {@link Sha256FingerprintAdapter}. + * + * @since M4-AP-002 + */ +class Sha256FingerprintAdapterTest { + + private Sha256FingerprintAdapter adapter; + + @TempDir + Path tempDir; + + @BeforeEach + void setUp() { + adapter = new Sha256FingerprintAdapter(); + } + + @Test + void computeFingerprint_shouldReturnSuccess_whenFileExistsAndReadable() throws IOException { + // Given + String content = "Test PDF content for fingerprinting"; + Path testFile = tempDir.resolve("test.pdf"); + Files.write(testFile, content.getBytes()); + + SourceDocumentLocator locator = new SourceDocumentLocator(testFile.toString()); + SourceDocumentCandidate candidate = new SourceDocumentCandidate("test.pdf", content.length(), locator); + + // When + FingerprintResult result = adapter.computeFingerprint(candidate); + + // Then + assertThat(result).isInstanceOf(FingerprintSuccess.class); + FingerprintSuccess success = (FingerprintSuccess) result; + assertThat(success.fingerprint().sha256Hex()).hasSize(64); + assertThat(success.fingerprint().sha256Hex()).matches("[0-9a-f]{64}"); + } + + @Test + void computeFingerprint_shouldReturnSameFingerprint_forSameContent() throws IOException { + // Given + String content = "Identical content for testing deterministic behavior"; + Path testFile1 = tempDir.resolve("test1.pdf"); + Path testFile2 = tempDir.resolve("test2.pdf"); + Files.write(testFile1, content.getBytes()); + Files.write(testFile2, content.getBytes()); + + SourceDocumentLocator locator1 = new SourceDocumentLocator(testFile1.toString()); + SourceDocumentLocator locator2 = new SourceDocumentLocator(testFile2.toString()); + SourceDocumentCandidate candidate1 = new SourceDocumentCandidate("test1.pdf", content.length(), locator1); + SourceDocumentCandidate candidate2 = new SourceDocumentCandidate("test2.pdf", content.length(), locator2); + + // When + FingerprintResult result1 = adapter.computeFingerprint(candidate1); + FingerprintResult result2 = adapter.computeFingerprint(candidate2); + + // Then + assertThat(result1).isInstanceOf(FingerprintSuccess.class); + assertThat(result2).isInstanceOf(FingerprintSuccess.class); + + FingerprintSuccess success1 = (FingerprintSuccess) result1; + FingerprintSuccess success2 = (FingerprintSuccess) result2; + + assertThat(success1.fingerprint().sha256Hex()) + .isEqualTo(success2.fingerprint().sha256Hex()); + } + + @Test + void computeFingerprint_shouldReturnDifferentFingerprints_forDifferentContent() throws IOException { + // Given + String content1 = "First PDF content"; + String content2 = "Second PDF content"; + Path testFile1 = tempDir.resolve("test1.pdf"); + Path testFile2 = tempDir.resolve("test2.pdf"); + Files.write(testFile1, content1.getBytes()); + Files.write(testFile2, content2.getBytes()); + + SourceDocumentLocator locator1 = new SourceDocumentLocator(testFile1.toString()); + SourceDocumentLocator locator2 = new SourceDocumentLocator(testFile2.toString()); + SourceDocumentCandidate candidate1 = new SourceDocumentCandidate("test1.pdf", content1.length(), locator1); + SourceDocumentCandidate candidate2 = new SourceDocumentCandidate("test2.pdf", content2.length(), locator2); + + // When + FingerprintResult result1 = adapter.computeFingerprint(candidate1); + FingerprintResult result2 = adapter.computeFingerprint(candidate2); + + // Then + assertThat(result1).isInstanceOf(FingerprintSuccess.class); + assertThat(result2).isInstanceOf(FingerprintSuccess.class); + + FingerprintSuccess success1 = (FingerprintSuccess) result1; + FingerprintSuccess success2 = (FingerprintSuccess) result2; + + assertThat(success1.fingerprint().sha256Hex()) + .isNotEqualTo(success2.fingerprint().sha256Hex()); + } + + @Test + void computeFingerprint_shouldReturnTechnicalError_whenFileDoesNotExist() { + // Given + Path nonExistentFile = tempDir.resolve("nonexistent.pdf"); + SourceDocumentLocator locator = new SourceDocumentLocator(nonExistentFile.toString()); + SourceDocumentCandidate candidate = new SourceDocumentCandidate("nonexistent.pdf", 0, locator); + + // When + FingerprintResult result = adapter.computeFingerprint(candidate); + + // Then + assertThat(result).isInstanceOf(FingerprintTechnicalError.class); + FingerprintTechnicalError error = (FingerprintTechnicalError) result; + assertThat(error.errorMessage()).contains("nonexistent.pdf"); + assertThat(error.errorMessage()).contains("Failed to read file"); + assertThat(error.cause()).isNotNull(); + } + + @Test + void computeFingerprint_shouldReturnTechnicalError_whenLocatorValueIsInvalid() { + // Given + SourceDocumentLocator locator = new SourceDocumentLocator("\0invalid\0path"); + SourceDocumentCandidate candidate = new SourceDocumentCandidate("invalid.pdf", 0, locator); + + // When + FingerprintResult result = adapter.computeFingerprint(candidate); + + // Then + assertThat(result).isInstanceOf(FingerprintTechnicalError.class); + FingerprintTechnicalError error = (FingerprintTechnicalError) result; + assertThat(error.errorMessage()).contains("invalid.pdf"); + assertThat(error.errorMessage()).contains("Invalid file path"); + assertThat(error.cause()).isNotNull(); + } + + @Test + void computeFingerprint_shouldThrowNullPointerException_whenCandidateIsNull() { + // When & Then + assertThatThrownBy(() -> adapter.computeFingerprint(null)) + .isInstanceOf(NullPointerException.class) + .hasMessage("candidate must not be null"); + } +} \ No newline at end of file diff --git a/pom.xml b/pom.xml index aca0225..fb69fb9 100644 --- a/pom.xml +++ b/pom.xml @@ -98,6 +98,12 @@ ${mockito.version} test + + org.assertj + assertj-core + 3.25.3 + test +