model-scan-2 / scan_hash.py
pengdaqian
fix now
301572e
import hashlib
import os.path
import sys
from typing import Dict
import binascii
import crcmod
import requests
import pure_blake3
def get_hash_string(hash_bytes: bytes) -> str:
return binascii.hexlify(hash_bytes).decode()
def compute_AutoV1Hash(file_stream) -> str:
minFileSize = 0x100000 * 2
if file_stream.seek(0, 2) < minFileSize:
return None
file_stream.seek(0x100000)
buffer = file_stream.read(0x10000)
hashBytes = hashlib.sha256(buffer).digest()
hashString = get_hash_string(hashBytes)
return hashString[:8]
def ComputeCRC32Hash(file_stream) -> str:
crc32 = crcmod.predefined.Crc('crc-32c')
file_stream.seek(0)
for chunk in iter(lambda: file_stream.read(4096), b""):
crc32.update(chunk)
return get_hash_string(crc32.digest())
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"
}
def generate_model_hashes(file_path: str) -> Dict[str, str]:
if file_path.startswith("http"):
import urllib.request
tmp_path = f'/tmp/clamd_{file_path.split("/")[-1].split("?")[0]}'
if os.path.exists(tmp_path):
pass
else:
resp = requests.get(file_path, headers=headers).content
with open(tmp_path, "wb") as f:
f.write(resp)
file_path = tmp_path
sha256 = hashlib.sha256()
blake3Hasher = pure_blake3.Hasher()
with open(file_path, "rb") as fileStream:
for chunk in iter(lambda: fileStream.read(4096), b""):
sha256.update(chunk)
blake3Hasher.update(chunk)
sha256HashString = get_hash_string(sha256.digest())
autoV1HashString = compute_AutoV1Hash(open(file_path, "rb"))
autoV2HashString = sha256HashString[:10]
blake3HashString = blake3Hasher.finalize().hex()
crc32HashString = ComputeCRC32Hash(open(file_path, "rb"))
result = {
"SHA256": sha256HashString,
"AutoV1": autoV1HashString,
"AutoV2": autoV2HashString,
"BLAKE3": blake3HashString,
"CRC32": crc32HashString,
}
return result
if __name__ == "__main__":
print(generate_model_hashes(".gitignore"))