Fix #60: SHA-256-Fingerprint streaming statt Files.readAllBytes berechnen
Files.readAllBytes laedt grosse PDFs vollstaendig in den Heap und riskiert OutOfMemoryError. Die Berechnung nutzt jetzt einen DigestInputStream mit 8-KB-Puffer in try-with-resources. Das Hash-Ergebnis ist bitidentisch zur vorigen Implementation, die Exception-Semantik bleibt unveraendert. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
+16
-5
@@ -1,6 +1,8 @@
|
|||||||
package de.gecheckt.pdf.umbenenner.adapter.out.fingerprint;
|
package de.gecheckt.pdf.umbenenner.adapter.out.fingerprint;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.security.DigestInputStream;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
import java.nio.file.InvalidPathException;
|
import java.nio.file.InvalidPathException;
|
||||||
import java.nio.file.Path;
|
import java.nio.file.Path;
|
||||||
@@ -45,7 +47,7 @@ public class Sha256FingerprintAdapter implements FingerprintPort {
|
|||||||
* The implementation:
|
* The implementation:
|
||||||
* <ol>
|
* <ol>
|
||||||
* <li>Resolves the opaque locator to a filesystem path</li>
|
* <li>Resolves the opaque locator to a filesystem path</li>
|
||||||
* <li>Reads the entire file content</li>
|
* <li>Reads the file content in chunks via a streaming digest</li>
|
||||||
* <li>Applies SHA-256 hashing</li>
|
* <li>Applies SHA-256 hashing</li>
|
||||||
* <li>Returns the hex-encoded result wrapped in a {@link FingerprintSuccess}</li>
|
* <li>Returns the hex-encoded result wrapped in a {@link FingerprintSuccess}</li>
|
||||||
* </ol>
|
* </ol>
|
||||||
@@ -113,8 +115,9 @@ public class Sha256FingerprintAdapter implements FingerprintPort {
|
|||||||
/**
|
/**
|
||||||
* Computes the SHA-256 hash of the file content at the given path.
|
* Computes the SHA-256 hash of the file content at the given path.
|
||||||
* <p>
|
* <p>
|
||||||
* Reads the entire file content and applies SHA-256 hashing to produce
|
* Liest die Datei blockweise über einen {@link DigestInputStream}, um den Heap-Bedarf
|
||||||
* a lowercase hexadecimal representation of the digest.
|
* bei großen PDFs zu minimieren. Das erzeugte Hash-Ergebnis ist bitidentisch zur
|
||||||
|
* byteweisen Verarbeitung des gesamten Dateiinhalts.
|
||||||
*
|
*
|
||||||
* @param filePath the path to the file to hash; must not be null
|
* @param filePath the path to the file to hash; must not be null
|
||||||
* @return the lowercase hexadecimal representation of the SHA-256 digest (64 characters)
|
* @return the lowercase hexadecimal representation of the SHA-256 digest (64 characters)
|
||||||
@@ -123,8 +126,16 @@ public class Sha256FingerprintAdapter implements FingerprintPort {
|
|||||||
*/
|
*/
|
||||||
private String computeSha256Hash(Path filePath) throws IOException, NoSuchAlgorithmException {
|
private String computeSha256Hash(Path filePath) throws IOException, NoSuchAlgorithmException {
|
||||||
MessageDigest digest = MessageDigest.getInstance("SHA-256");
|
MessageDigest digest = MessageDigest.getInstance("SHA-256");
|
||||||
byte[] fileBytes = Files.readAllBytes(filePath);
|
// Streaming-Verarbeitung: Die Datei wird in 8-KB-Blöcken gelesen, damit auch
|
||||||
byte[] hashBytes = digest.digest(fileBytes);
|
// sehr große PDFs nicht vollständig in den Heap geladen werden müssen.
|
||||||
|
byte[] buf = new byte[8192];
|
||||||
|
try (InputStream is = Files.newInputStream(filePath);
|
||||||
|
DigestInputStream dis = new DigestInputStream(is, digest)) {
|
||||||
|
while (dis.read(buf) != -1) {
|
||||||
|
// DigestInputStream leitet jeden Block automatisch an den MessageDigest weiter
|
||||||
|
}
|
||||||
|
}
|
||||||
|
byte[] hashBytes = digest.digest();
|
||||||
return bytesToHex(hashBytes);
|
return bytesToHex(hashBytes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user