Fix #60: SHA-256-Fingerprint streaming statt Files.readAllBytes berechnen

Files.readAllBytes laedt grosse PDFs vollstaendig in den Heap und riskiert OutOfMemoryError. Die Berechnung nutzt jetzt einen DigestInputStream mit 8-KB-Puffer in try-with-resources. Das Hash-Ergebnis ist bitidentisch zur vorigen Implementation, die Exception-Semantik bleibt unveraendert. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-04-29 06:21:25 +02:00
parent d10a572b50
commit 8aaa3331d7
1 changed files with 16 additions and 5 deletions
@@ -1,6 +1,8 @@
 package de.gecheckt.pdf.umbenenner.adapter.out.fingerprint;
 import java.io.IOException;
 import java.io.InputStream;
 import java.security.DigestInputStream;
 import java.nio.file.Files;
 import java.nio.file.InvalidPathException;
 import java.nio.file.Path;
@@ -45,7 +47,7 @@ public class Sha256FingerprintAdapter implements FingerprintPort {
     * The implementation:
     * <ol>
     *   <li>Resolves the opaque locator to a filesystem path</li>
-     *   <li>Reads the entire file content</li>
+     *   <li>Reads the file content in chunks via a streaming digest</li>
     *   <li>Applies SHA-256 hashing</li>
     *   <li>Returns the hex-encoded result wrapped in a {@link FingerprintSuccess}</li>
     * </ol>
@@ -113,8 +115,9 @@ public class Sha256FingerprintAdapter implements FingerprintPort {
    /**
     * Computes the SHA-256 hash of the file content at the given path.
     * <p>
-     * Reads the entire file content and applies SHA-256 hashing to produce
+     * Liest die Datei blockweise über einen {@link DigestInputStream}, um den Heap-Bedarf
-     * a lowercase hexadecimal representation of the digest.
+     * bei großen PDFs zu minimieren. Das erzeugte Hash-Ergebnis ist bitidentisch zur
     * byteweisen Verarbeitung des gesamten Dateiinhalts.
     *
     * @param filePath the path to the file to hash; must not be null
     * @return the lowercase hexadecimal representation of the SHA-256 digest (64 characters)
@@ -123,8 +126,16 @@ public class Sha256FingerprintAdapter implements FingerprintPort {
     */
    private String computeSha256Hash(Path filePath) throws IOException, NoSuchAlgorithmException {
        MessageDigest digest = MessageDigest.getInstance("SHA-256");
-        byte[] fileBytes = Files.readAllBytes(filePath);
+        // Streaming-Verarbeitung: Die Datei wird in 8-KB-Blöcken gelesen, damit auch
-        byte[] hashBytes = digest.digest(fileBytes);
+        // sehr große PDFs nicht vollständig in den Heap geladen werden müssen.
        byte[] buf = new byte[8192];
        try (InputStream is = Files.newInputStream(filePath);
             DigestInputStream dis = new DigestInputStream(is, digest)) {
            while (dis.read(buf) != -1) {
                // DigestInputStream leitet jeden Block automatisch an den MessageDigest weiter
            }
        }
        byte[] hashBytes = digest.digest();
        return bytesToHex(hashBytes);
    }