import hashlib import os.path import sys from typing import Dict import binascii import crcmod import requests import pure_blake3 def get_hash_string(hash_bytes: bytes) -> str: return binascii.hexlify(hash_bytes).decode() def compute_AutoV1Hash(file_stream) -> str: minFileSize = 0x100000 * 2 if file_stream.seek(0, 2) < minFileSize: return None file_stream.seek(0x100000) buffer = file_stream.read(0x10000) hashBytes = hashlib.sha256(buffer).digest() hashString = get_hash_string(hashBytes) return hashString[:8] def ComputeCRC32Hash(file_stream) -> str: crc32 = crcmod.predefined.Crc('crc-32c') file_stream.seek(0) for chunk in iter(lambda: file_stream.read(4096), b""): crc32.update(chunk) return get_hash_string(crc32.digest()) headers = { "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" } def generate_model_hashes(file_path: str) -> Dict[str, str]: if file_path.startswith("http"): import urllib.request tmp_path = f'/tmp/clamd_{file_path.split("/")[-1].split("?")[0]}' if os.path.exists(tmp_path): pass else: resp = requests.get(file_path, headers=headers).content with open(tmp_path, "wb") as f: f.write(resp) file_path = tmp_path sha256 = hashlib.sha256() blake3Hasher = pure_blake3.Hasher() with open(file_path, "rb") as fileStream: for chunk in iter(lambda: fileStream.read(4096), b""): sha256.update(chunk) blake3Hasher.update(chunk) sha256HashString = get_hash_string(sha256.digest()) autoV1HashString = compute_AutoV1Hash(open(file_path, "rb")) autoV2HashString = sha256HashString[:10] blake3HashString = blake3Hasher.finalize().hex() crc32HashString = ComputeCRC32Hash(open(file_path, "rb")) result = { "SHA256": sha256HashString, "AutoV1": autoV1HashString, "AutoV2": autoV2HashString, "BLAKE3": blake3HashString, "CRC32": crc32HashString, } return result if __name__ == "__main__": print(generate_model_hashes(".gitignore"))