Spaces:
Sleeping
Sleeping
import hashlib | |
import os.path | |
import sys | |
from typing import Dict | |
import binascii | |
import crcmod | |
import requests | |
import pure_blake3 | |
def get_hash_string(hash_bytes: bytes) -> str: | |
return binascii.hexlify(hash_bytes).decode() | |
def compute_AutoV1Hash(file_stream) -> str: | |
minFileSize = 0x100000 * 2 | |
if file_stream.seek(0, 2) < minFileSize: | |
return None | |
file_stream.seek(0x100000) | |
buffer = file_stream.read(0x10000) | |
hashBytes = hashlib.sha256(buffer).digest() | |
hashString = get_hash_string(hashBytes) | |
return hashString[:8] | |
def ComputeCRC32Hash(file_stream) -> str: | |
crc32 = crcmod.predefined.Crc('crc-32c') | |
file_stream.seek(0) | |
for chunk in iter(lambda: file_stream.read(4096), b""): | |
crc32.update(chunk) | |
return get_hash_string(crc32.digest()) | |
headers = { | |
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36" | |
} | |
def generate_model_hashes(file_path: str) -> Dict[str, str]: | |
if file_path.startswith("http"): | |
import urllib.request | |
tmp_path = f'/tmp/clamd_{file_path.split("/")[-1].split("?")[0]}' | |
if os.path.exists(tmp_path): | |
pass | |
else: | |
resp = requests.get(file_path, headers=headers).content | |
with open(tmp_path, "wb") as f: | |
f.write(resp) | |
file_path = tmp_path | |
sha256 = hashlib.sha256() | |
blake3Hasher = pure_blake3.Hasher() | |
with open(file_path, "rb") as fileStream: | |
for chunk in iter(lambda: fileStream.read(4096), b""): | |
sha256.update(chunk) | |
blake3Hasher.update(chunk) | |
sha256HashString = get_hash_string(sha256.digest()) | |
autoV1HashString = compute_AutoV1Hash(open(file_path, "rb")) | |
autoV2HashString = sha256HashString[:10] | |
blake3HashString = blake3Hasher.finalize().hex() | |
crc32HashString = ComputeCRC32Hash(open(file_path, "rb")) | |
result = { | |
"SHA256": sha256HashString, | |
"AutoV1": autoV1HashString, | |
"AutoV2": autoV2HashString, | |
"BLAKE3": blake3HashString, | |
"CRC32": crc32HashString, | |
} | |
return result | |
if __name__ == "__main__": | |
print(generate_model_hashes(".gitignore")) | |