M4 AP-002 SHA-256-Fingerprint-Adapter implementieren

This commit is contained in:
2026-04-02 19:38:53 +02:00
parent 7448d1340b
commit cae9c944d7
5 changed files with 338 additions and 0 deletions
@@ -0,0 +1,157 @@
package de.gecheckt.pdf.umbenenner.adapter.out.fingerprint;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintResult;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintSuccess;
import de.gecheckt.pdf.umbenenner.application.port.out.FingerprintTechnicalError;
import de.gecheckt.pdf.umbenenner.domain.model.DocumentFingerprint;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentCandidate;
import de.gecheckt.pdf.umbenenner.domain.model.SourceDocumentLocator;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.InvalidPathException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/**
* SHA-256-based implementation of {@link FingerprintPort}.
* <p>
* Computes deterministic, content-based fingerprints for PDF documents by applying
* SHA-256 to the raw file content. The implementation ensures that:
* <ul>
* <li>Fingerprints are derived exclusively from file content, not metadata</li>
* <li>Identical content always produces the same fingerprint</li>
* <li>Different content always produces different fingerprints</li>
* <li>All file I/O and cryptographic operations remain in the adapter layer</li>
* </ul>
* <p>
* <strong>Technical failure handling:</strong> Any I/O errors, path resolution issues,
* or cryptographic problems are converted to {@link FingerprintTechnicalError} results
* without throwing exceptions. Pre-fingerprint failures are not historized in SQLite.
*
* @since M4-AP-002
*/
public class Sha256FingerprintAdapter implements FingerprintPort {
private static final Logger logger = LogManager.getLogger(Sha256FingerprintAdapter.class);
/**
* Computes the SHA-256 fingerprint for the given candidate.
* <p>
* The implementation:
* <ol>
* <li>Resolves the opaque locator to a filesystem path</li>
* <li>Reads the entire file content</li>
* <li>Applies SHA-256 hashing</li>
* <li>Returns the hex-encoded result wrapped in a {@link FingerprintSuccess}</li>
* </ol>
* <p>
* Any technical failures during these steps are caught and returned as
* {@link FingerprintTechnicalError} without throwing exceptions.
*
* @param candidate the candidate whose file content is to be hashed; must not be null
* @return {@link FingerprintSuccess} on success, or {@link FingerprintTechnicalError}
* on any infrastructure failure
* @throws NullPointerException if {@code candidate} is null
*/
@Override
public FingerprintResult computeFingerprint(SourceDocumentCandidate candidate) {
if (candidate == null) {
throw new NullPointerException("candidate must not be null");
}
try {
// Resolve the opaque locator to a filesystem path
Path filePath = resolveFilePath(candidate.locator());
// Compute the SHA-256 hash of the file content
String sha256Hex = computeSha256Hash(filePath);
// Create and return the successful result
DocumentFingerprint fingerprint = new DocumentFingerprint(sha256Hex);
logger.debug("Successfully computed fingerprint for '{}': {}",
candidate.uniqueIdentifier(), sha256Hex);
return new FingerprintSuccess(fingerprint);
} catch (IOException e) {
String errorMsg = String.format("Failed to read file for '%s': %s",
candidate.uniqueIdentifier(), e.getMessage());
logger.warn(errorMsg, e);
return new FingerprintTechnicalError(errorMsg, e);
} catch (InvalidPathException e) {
String errorMsg = String.format("Invalid file path for '%s': %s",
candidate.uniqueIdentifier(), e.getMessage());
logger.warn(errorMsg, e);
return new FingerprintTechnicalError(errorMsg, e);
} catch (NoSuchAlgorithmException e) {
String errorMsg = String.format("SHA-256 algorithm not available for '%s'",
candidate.uniqueIdentifier());
logger.error(errorMsg, e);
return new FingerprintTechnicalError(errorMsg, e);
} catch (Exception e) {
String errorMsg = String.format("Unexpected error computing fingerprint for '%s': %s",
candidate.uniqueIdentifier(), e.getMessage());
logger.error(errorMsg, e);
return new FingerprintTechnicalError(errorMsg, e);
}
}
/**
* Resolves the opaque locator value to a filesystem path.
* <p>
* The locator's value is expected to contain an absolute file path as a string.
* This is the intra-adapter convention between the source document scanner and
* this fingerprint adapter.
*
* @param locator the opaque locator containing the file path; must not be null
* @return the resolved filesystem path
* @throws InvalidPathException if the locator value is not a valid path
*/
private Path resolveFilePath(SourceDocumentLocator locator) throws InvalidPathException {
return Paths.get(locator.value());
}
/**
* Computes the SHA-256 hash of the file content at the given path.
* <p>
* Reads the entire file content and applies SHA-256 hashing to produce
* a lowercase hexadecimal representation of the digest.
*
* @param filePath the path to the file to hash; must not be null
* @return the lowercase hexadecimal representation of the SHA-256 digest (64 characters)
* @throws IOException if reading the file fails
* @throws NoSuchAlgorithmException if the SHA-256 algorithm is not available
*/
private String computeSha256Hash(Path filePath) throws IOException, NoSuchAlgorithmException {
MessageDigest digest = MessageDigest.getInstance("SHA-256");
byte[] fileBytes = Files.readAllBytes(filePath);
byte[] hashBytes = digest.digest(fileBytes);
return bytesToHex(hashBytes);
}
/**
* Converts a byte array to a lowercase hexadecimal string.
* <p>
* Each byte is represented by exactly two hexadecimal characters.
*
* @param bytes the byte array to convert; must not be null
* @return the lowercase hexadecimal representation
*/
private String bytesToHex(byte[] bytes) {
StringBuilder hexString = new StringBuilder();
for (byte b : bytes) {
String hex = Integer.toHexString(0xff & b);
if (hex.length() == 1) {
hexString.append('0');
}
hexString.append(hex);
}
return hexString.toString();
}
}
@@ -0,0 +1,12 @@
/**
* SHA-256 fingerprint adapter for computing content-based document fingerprints.
*
* <p>This package contains the concrete implementation of the {@link de.gecheckt.pdf.umbenenner.application.port.out.FingerprintPort}
* that computes SHA-256 hashes of PDF document content to create stable, deterministic fingerprints.
*
* <p>All file I/O and cryptographic operations are strictly confined to this adapter layer,
* maintaining the hexagonal architecture boundary.
*
* @since M4-AP-002
*/
package de.gecheckt.pdf.umbenenner.adapter.out.fingerprint;