M4 AP-002 SHA-256-Fingerprint-Adapter implementieren
This commit is contained in:
@@ -52,5 +52,10 @@
|
||||
<artifactId>mockito-junit-jupiter</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.assertj</groupId>
|
||||
<artifactId>assertj-core</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
</project>
|
||||
@@ -0,0 +1,157 @@
|
||||
package de.gecheckt.pdf.umbenenner.adapter.out.fingerprint;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.InvalidPathException;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.security.MessageDigest;
|
||||
import java.security.NoSuchAlgorithmException;
|
||||
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
/**
|
||||
* SHA-256-based implementation of {@link FingerprintPort}.
|
||||
* <p>
|
||||
* Computes deterministic, content-based fingerprints for PDF documents by applying
|
||||
* SHA-256 to the raw file content. The implementation ensures that:
|
||||
* <ul>
|
||||
* <li>Fingerprints are derived exclusively from file content, not metadata</li>
|
||||
* <li>Identical content always produces the same fingerprint</li>
|
||||
* <li>Different content always produces different fingerprints</li>
|
||||
* <li>All file I/O and cryptographic operations remain in the adapter layer</li>
|
||||
* </ul>
|
||||
* <p>
|
||||
* <strong>Technical failure handling:</strong> Any I/O errors, path resolution issues,
|
||||
* or cryptographic problems are converted to {@link FingerprintTechnicalError} results
|
||||
* without throwing exceptions. Pre-fingerprint failures are not historized in SQLite.
|
||||
*
|
||||
* @since M4-AP-002
|
||||
*/
|
||||
public class Sha256FingerprintAdapter implements FingerprintPort {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(Sha256FingerprintAdapter.class);
|
||||
|
||||
/**
|
||||
* Computes the SHA-256 fingerprint for the given candidate.
|
||||
* <p>
|
||||
* The implementation:
|
||||
* <ol>
|
||||
* <li>Resolves the opaque locator to a filesystem path</li>
|
||||
* <li>Reads the entire file content</li>
|
||||
* <li>Applies SHA-256 hashing</li>
|
||||
* <li>Returns the hex-encoded result wrapped in a {@link FingerprintSuccess}</li>
|
||||
* </ol>
|
||||
* <p>
|
||||
* Any technical failures during these steps are caught and returned as
|
||||
* {@link FingerprintTechnicalError} without throwing exceptions.
|
||||
*
|
||||
* @param candidate the candidate whose file content is to be hashed; must not be null
|
||||
* @return {@link FingerprintSuccess} on success, or {@link FingerprintTechnicalError}
|
||||
* on any infrastructure failure
|
||||
* @throws NullPointerException if {@code candidate} is null
|
||||
*/
|
||||
@Override
|
||||
public FingerprintResult computeFingerprint(SourceDocumentCandidate candidate) {
|
||||
if (candidate == null) {
|
||||
throw new NullPointerException("candidate must not be null");
|
||||
}
|
||||
|
||||
try {
|
||||
// Resolve the opaque locator to a filesystem path
|
||||
Path filePath = resolveFilePath(candidate.locator());
|
||||
|
||||
// Compute the SHA-256 hash of the file content
|
||||
String sha256Hex = computeSha256Hash(filePath);
|
||||
|
||||
// Create and return the successful result
|
||||
DocumentFingerprint fingerprint = new DocumentFingerprint(sha256Hex);
|
||||
logger.debug("Successfully computed fingerprint for '{}': {}",
|
||||
candidate.uniqueIdentifier(), sha256Hex);
|
||||
return new FingerprintSuccess(fingerprint);
|
||||
|
||||
} catch (IOException e) {
|
||||
String errorMsg = String.format("Failed to read file for '%s': %s",
|
||||
candidate.uniqueIdentifier(), e.getMessage());
|
||||
logger.warn(errorMsg, e);
|
||||
return new FingerprintTechnicalError(errorMsg, e);
|
||||
} catch (InvalidPathException e) {
|
||||
String errorMsg = String.format("Invalid file path for '%s': %s",
|
||||
candidate.uniqueIdentifier(), e.getMessage());
|
||||
logger.warn(errorMsg, e);
|
||||
return new FingerprintTechnicalError(errorMsg, e);
|
||||
} catch (NoSuchAlgorithmException e) {
|
||||
String errorMsg = String.format("SHA-256 algorithm not available for '%s'",
|
||||
candidate.uniqueIdentifier());
|
||||
logger.error(errorMsg, e);
|
||||
return new FingerprintTechnicalError(errorMsg, e);
|
||||
} catch (Exception e) {
|
||||
String errorMsg = String.format("Unexpected error computing fingerprint for '%s': %s",
|
||||
candidate.uniqueIdentifier(), e.getMessage());
|
||||
logger.error(errorMsg, e);
|
||||
return new FingerprintTechnicalError(errorMsg, e);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolves the opaque locator value to a filesystem path.
|
||||
* <p>
|
||||
* The locator's value is expected to contain an absolute file path as a string.
|
||||
* This is the intra-adapter convention between the source document scanner and
|
||||
* this fingerprint adapter.
|
||||
*
|
||||
* @param locator the opaque locator containing the file path; must not be null
|
||||
* @return the resolved filesystem path
|
||||
* @throws InvalidPathException if the locator value is not a valid path
|
||||
*/
|
||||
private Path resolveFilePath(SourceDocumentLocator locator) throws InvalidPathException {
|
||||
return Paths.get(locator.value());
|
||||
}
|
||||
|
||||
/**
|
||||
* Computes the SHA-256 hash of the file content at the given path.
|
||||
* <p>
|
||||
* Reads the entire file content and applies SHA-256 hashing to produce
|
||||
* a lowercase hexadecimal representation of the digest.
|
||||
*
|
||||
* @param filePath the path to the file to hash; must not be null
|
||||
* @return the lowercase hexadecimal representation of the SHA-256 digest (64 characters)
|
||||
* @throws IOException if reading the file fails
|
||||
* @throws NoSuchAlgorithmException if the SHA-256 algorithm is not available
|
||||
*/
|
||||
private String computeSha256Hash(Path filePath) throws IOException, NoSuchAlgorithmException {
|
||||
MessageDigest digest = MessageDigest.getInstance("SHA-256");
|
||||
byte[] fileBytes = Files.readAllBytes(filePath);
|
||||
byte[] hashBytes = digest.digest(fileBytes);
|
||||
return bytesToHex(hashBytes);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a byte array to a lowercase hexadecimal string.
|
||||
* <p>
|
||||
* Each byte is represented by exactly two hexadecimal characters.
|
||||
*
|
||||
* @param bytes the byte array to convert; must not be null
|
||||
* @return the lowercase hexadecimal representation
|
||||
*/
|
||||
private String bytesToHex(byte[] bytes) {
|
||||
StringBuilder hexString = new StringBuilder();
|
||||
for (byte b : bytes) {
|
||||
String hex = Integer.toHexString(0xff & b);
|
||||
if (hex.length() == 1) {
|
||||
hexString.append('0');
|
||||
}
|
||||
hexString.append(hex);
|
||||
}
|
||||
return hexString.toString();
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
/**
|
||||
* SHA-256 fingerprint adapter for computing content-based document fingerprints.
|
||||
*
|
||||
* <p>This package contains the concrete implementation of the {@link de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort}
|
||||
* that computes SHA-256 hashes of PDF document content to create stable, deterministic fingerprints.
|
||||
*
|
||||
* <p>All file I/O and cryptographic operations are strictly confined to this adapter layer,
|
||||
* maintaining the hexagonal architecture boundary.
|
||||
*
|
||||
* @since M4-AP-002
|
||||
*/
|
||||
package de.gecheckt.pdf.umbenenner.adapter.out.fingerprint;
|
||||
@@ -0,0 +1,158 @@
|
||||
package de.gecheckt.pdf.umbenenner.adapter.out.fingerprint;
|
||||
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
|
||||
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
|
||||
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
|
||||
|
||||
import org.junit.jupiter.api.BeforeEach;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.io.TempDir;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
/**
|
||||
* Unit tests for {@link Sha256FingerprintAdapter}.
|
||||
*
|
||||
* @since M4-AP-002
|
||||
*/
|
||||
class Sha256FingerprintAdapterTest {
|
||||
|
||||
private Sha256FingerprintAdapter adapter;
|
||||
|
||||
@TempDir
|
||||
Path tempDir;
|
||||
|
||||
@BeforeEach
|
||||
void setUp() {
|
||||
adapter = new Sha256FingerprintAdapter();
|
||||
}
|
||||
|
||||
@Test
|
||||
void computeFingerprint_shouldReturnSuccess_whenFileExistsAndReadable() throws IOException {
|
||||
// Given
|
||||
String content = "Test PDF content for fingerprinting";
|
||||
Path testFile = tempDir.resolve("test.pdf");
|
||||
Files.write(testFile, content.getBytes());
|
||||
|
||||
SourceDocumentLocator locator = new SourceDocumentLocator(testFile.toString());
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate("test.pdf", content.length(), locator);
|
||||
|
||||
// When
|
||||
FingerprintResult result = adapter.computeFingerprint(candidate);
|
||||
|
||||
// Then
|
||||
assertThat(result).isInstanceOf(FingerprintSuccess.class);
|
||||
FingerprintSuccess success = (FingerprintSuccess) result;
|
||||
assertThat(success.fingerprint().sha256Hex()).hasSize(64);
|
||||
assertThat(success.fingerprint().sha256Hex()).matches("[0-9a-f]{64}");
|
||||
}
|
||||
|
||||
@Test
|
||||
void computeFingerprint_shouldReturnSameFingerprint_forSameContent() throws IOException {
|
||||
// Given
|
||||
String content = "Identical content for testing deterministic behavior";
|
||||
Path testFile1 = tempDir.resolve("test1.pdf");
|
||||
Path testFile2 = tempDir.resolve("test2.pdf");
|
||||
Files.write(testFile1, content.getBytes());
|
||||
Files.write(testFile2, content.getBytes());
|
||||
|
||||
SourceDocumentLocator locator1 = new SourceDocumentLocator(testFile1.toString());
|
||||
SourceDocumentLocator locator2 = new SourceDocumentLocator(testFile2.toString());
|
||||
SourceDocumentCandidate candidate1 = new SourceDocumentCandidate("test1.pdf", content.length(), locator1);
|
||||
SourceDocumentCandidate candidate2 = new SourceDocumentCandidate("test2.pdf", content.length(), locator2);
|
||||
|
||||
// When
|
||||
FingerprintResult result1 = adapter.computeFingerprint(candidate1);
|
||||
FingerprintResult result2 = adapter.computeFingerprint(candidate2);
|
||||
|
||||
// Then
|
||||
assertThat(result1).isInstanceOf(FingerprintSuccess.class);
|
||||
assertThat(result2).isInstanceOf(FingerprintSuccess.class);
|
||||
|
||||
FingerprintSuccess success1 = (FingerprintSuccess) result1;
|
||||
FingerprintSuccess success2 = (FingerprintSuccess) result2;
|
||||
|
||||
assertThat(success1.fingerprint().sha256Hex())
|
||||
.isEqualTo(success2.fingerprint().sha256Hex());
|
||||
}
|
||||
|
||||
@Test
|
||||
void computeFingerprint_shouldReturnDifferentFingerprints_forDifferentContent() throws IOException {
|
||||
// Given
|
||||
String content1 = "First PDF content";
|
||||
String content2 = "Second PDF content";
|
||||
Path testFile1 = tempDir.resolve("test1.pdf");
|
||||
Path testFile2 = tempDir.resolve("test2.pdf");
|
||||
Files.write(testFile1, content1.getBytes());
|
||||
Files.write(testFile2, content2.getBytes());
|
||||
|
||||
SourceDocumentLocator locator1 = new SourceDocumentLocator(testFile1.toString());
|
||||
SourceDocumentLocator locator2 = new SourceDocumentLocator(testFile2.toString());
|
||||
SourceDocumentCandidate candidate1 = new SourceDocumentCandidate("test1.pdf", content1.length(), locator1);
|
||||
SourceDocumentCandidate candidate2 = new SourceDocumentCandidate("test2.pdf", content2.length(), locator2);
|
||||
|
||||
// When
|
||||
FingerprintResult result1 = adapter.computeFingerprint(candidate1);
|
||||
FingerprintResult result2 = adapter.computeFingerprint(candidate2);
|
||||
|
||||
// Then
|
||||
assertThat(result1).isInstanceOf(FingerprintSuccess.class);
|
||||
assertThat(result2).isInstanceOf(FingerprintSuccess.class);
|
||||
|
||||
FingerprintSuccess success1 = (FingerprintSuccess) result1;
|
||||
FingerprintSuccess success2 = (FingerprintSuccess) result2;
|
||||
|
||||
assertThat(success1.fingerprint().sha256Hex())
|
||||
.isNotEqualTo(success2.fingerprint().sha256Hex());
|
||||
}
|
||||
|
||||
@Test
|
||||
void computeFingerprint_shouldReturnTechnicalError_whenFileDoesNotExist() {
|
||||
// Given
|
||||
Path nonExistentFile = tempDir.resolve("nonexistent.pdf");
|
||||
SourceDocumentLocator locator = new SourceDocumentLocator(nonExistentFile.toString());
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate("nonexistent.pdf", 0, locator);
|
||||
|
||||
// When
|
||||
FingerprintResult result = adapter.computeFingerprint(candidate);
|
||||
|
||||
// Then
|
||||
assertThat(result).isInstanceOf(FingerprintTechnicalError.class);
|
||||
FingerprintTechnicalError error = (FingerprintTechnicalError) result;
|
||||
assertThat(error.errorMessage()).contains("nonexistent.pdf");
|
||||
assertThat(error.errorMessage()).contains("Failed to read file");
|
||||
assertThat(error.cause()).isNotNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void computeFingerprint_shouldReturnTechnicalError_whenLocatorValueIsInvalid() {
|
||||
// Given
|
||||
SourceDocumentLocator locator = new SourceDocumentLocator("\0invalid\0path");
|
||||
SourceDocumentCandidate candidate = new SourceDocumentCandidate("invalid.pdf", 0, locator);
|
||||
|
||||
// When
|
||||
FingerprintResult result = adapter.computeFingerprint(candidate);
|
||||
|
||||
// Then
|
||||
assertThat(result).isInstanceOf(FingerprintTechnicalError.class);
|
||||
FingerprintTechnicalError error = (FingerprintTechnicalError) result;
|
||||
assertThat(error.errorMessage()).contains("invalid.pdf");
|
||||
assertThat(error.errorMessage()).contains("Invalid file path");
|
||||
assertThat(error.cause()).isNotNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void computeFingerprint_shouldThrowNullPointerException_whenCandidateIsNull() {
|
||||
// When & Then
|
||||
assertThatThrownBy(() -> adapter.computeFingerprint(null))
|
||||
.isInstanceOf(NullPointerException.class)
|
||||
.hasMessage("candidate must not be null");
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user