"""helpers for compute measures: hash, time benchmarks""" from pathlib import Path def hash_calculate(arr: any, debug: bool = False) -> str or bytes: """ Return computed hash from input variable (typically a numpy array). Args: arr: input variable debug: logging debug argument Returns: str or bytes: computed hash from input variable """ import hashlib import numpy as np from base64 import b64encode from src.utilities.utilities import setup_logging local_logger = setup_logging(debug) if isinstance(arr, np.ndarray): hash_fn = hashlib.sha256(arr.data) elif isinstance(arr, dict): import json from src.utilities.serialize import serialize serialized = serialize(arr) variable_to_hash = json.dumps(serialized, sort_keys=True).encode('utf-8') hash_fn = hashlib.sha256(variable_to_hash) elif isinstance(arr, str): try: hash_fn = hashlib.sha256(arr) except TypeError: local_logger.warning(f"TypeError, re-try encoding arg:{arr},type:{type(arr)}.") hash_fn = hashlib.sha256(arr.encode('utf-8')) elif isinstance(arr, bytes): hash_fn = hashlib.sha256(arr) else: raise ValueError(f"variable 'arr':{arr} not yet handled.") return b64encode(hash_fn.digest()) def sha256sum(filename: Path or str) -> str: """ Return computed hash for input file. Args: filename: input variable Returns: str: computed hash """ import hashlib import mmap h = hashlib.sha256() with open(filename, 'rb') as f: with mmap.mmap(f.fileno(), 0, prot=mmap.PROT_READ) as mm: h.update(mm) return h.hexdigest() def perf_counter() -> float: """ Performance counter for benchmarking. Returns: float: computed time value at execution time """ import time return time.perf_counter()