Spaces:
Running on Zero
Running on Zero
DESCRIPTION: 5-track coverage + Resilient inference (BF16 merged + NF4 fallback) + expanded artifacts row
4286bef verified | """ | |
| SolarHive β Community Solar Intelligence | |
| Gradio app for HuggingFace Spaces (persistent GPU) | |
| Fine-tuned Gemma 4 26B A4B with native function calling for | |
| real-time solar production, weather, battery, and grid data. | |
| SolarHive is an open-source intelligence layer designed to coordinate | |
| community microgrids & community-based storage via fuel cells, pool | |
| midday energy surplus across these microgrids, and eliminate stranded | |
| capacity. It also helps forecast solar irradiance and cloud cover to | |
| plan ahead. Gemma 4 is the brain that powers it. | |
| Gemma 4 Good Hackathon β Google DeepMind x Kaggle | |
| """ | |
| import os | |
| import json | |
| import re | |
| import random | |
| import inspect | |
| import requests | |
| import torch | |
| import gradio as gr | |
| from datetime import datetime, timezone, timedelta | |
| from zoneinfo import ZoneInfo | |
| from PIL import Image | |
| from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig | |
| # ZeroGPU support β falls back to no-op for local testing | |
| try: | |
| import spaces | |
| except ImportError: | |
| spaces = None | |
| # ββ Constants ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| LAT, LON = 42.2808, -83.7430 | |
| COMMUNITY_CAPACITY_KW = 72 | |
| BATTERY_CAPACITY_KWH = 100 | |
| # Demo-mode API key fallbacks for the live HF Space. | |
| # These are FREE-TIER keys (OWM 1000 calls/day, EIA unlimited, NREL | |
| # unlimited) β no billing exposure, easily rotatable. Hardcoded so | |
| # judges who land on the live demo without Space-secret config still | |
| # get a working experience. In production, set Space secrets to override. | |
| OWM_API_KEY = os.environ.get("OWM_API_KEY", "84a310689d5620edd1b5e4c14d8fb29b") | |
| EIA_API_KEY = os.environ.get("EIA_API_KEY", "ZXzaFCfSc1aU7nfu3Y6wctwxyiFPHEclHhycI3Xm") | |
| NREL_API_KEY = os.environ.get("NREL_API_KEY", "LI4AHQodsW7b0L0T3BCHtUA3PyjvOXP8zrFrZuiQ") | |
| # ββ Model Loading ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| BASE_MODEL_ID = "google/gemma-4-26b-a4b-it" | |
| MODEL_ID = "Truthseeker87/solarhive-26b-a4b-merged" | |
| # OOM fallback β pre-quantized NF4 (~13-16 GB) used if BF16 (~48 GB) doesn't | |
| # fit the allocated ZeroGPU tier (e.g., A10g 24 GB). Pre-quantized weights | |
| # load directly without BitsAndBytesConfig. Both variants score 9/10 + 3/3 | |
| # When2Call in the cross-variant validation β equivalent demo quality. | |
| MODEL_ID_NF4_FALLBACK = "Truthseeker87/solarhive-26b-a4b-nf4" | |
| # Env-var override β set SOLARHIVE_FORCE_NF4=1 in Space Variables (NOT | |
| # Secrets β Variables are visible to the runtime as os.environ) to skip | |
| # the BF16 attempt entirely and load NF4 directly. Useful when you've | |
| # confirmed the allocated tier is too small for BF16 (e.g., A10g) and | |
| # want to avoid the BF16-fail-then-fallback download time on every | |
| # cold-start. | |
| FORCE_NF4 = os.environ.get("SOLARHIVE_FORCE_NF4", "").lower() in ("1", "true", "yes") | |
| print(f"Loading processor from {BASE_MODEL_ID}") | |
| processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, trust_remote_code=True) | |
| print(f"Loading fine-tuned model from {MODEL_ID}") | |
| if spaces is not None: | |
| # ZeroGPU: defensive load with NF4 fallback. | |
| # Primary path: BF16 (highest fidelity, ~48 GB) β fits H200 / Half H200. | |
| # Fallback path: pre-quantized NF4 (~13-16 GB) β fits A10g and other | |
| # smaller tiers ZeroGPU may allocate. Both variants score 9/10 + 3/3 W2C | |
| # in the cross-variant validation, so the demo's quality bar is preserved | |
| # either way. Set SOLARHIVE_FORCE_NF4=1 in Space Variables to skip the | |
| # BF16 attempt entirely. | |
| # https://huggingface.co/docs/hub/spaces-zerogpu | |
| if FORCE_NF4: | |
| print(f"SOLARHIVE_FORCE_NF4 set β loading pre-quantized NF4 from {MODEL_ID_NF4_FALLBACK}") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID_NF4_FALLBACK, device_map="cuda:0", trust_remote_code=True, | |
| ) | |
| print("Model loaded in NF4 (forced via SOLARHIVE_FORCE_NF4 env var)") | |
| else: | |
| try: | |
| print(f"Loading BF16 from {MODEL_ID} (primary path)") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, dtype=torch.bfloat16, trust_remote_code=True, | |
| ) | |
| model.to("cuda") | |
| print("Model loaded in BF16 for ZeroGPU (primary path)") | |
| except Exception as _bf16_err: | |
| # Free anything partially loaded so the fallback has clean memory | |
| try: | |
| del model | |
| except NameError: | |
| pass | |
| import gc as _gc | |
| _gc.collect() | |
| try: | |
| torch.cuda.empty_cache() | |
| except Exception: | |
| pass | |
| print(f"BF16 load failed ({type(_bf16_err).__name__}): {_bf16_err}") | |
| print(f"Falling back to pre-quantized NF4 from {MODEL_ID_NF4_FALLBACK}") | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID_NF4_FALLBACK, device_map="cuda:0", trust_remote_code=True, | |
| ) | |
| print("Model loaded in NF4 (BF16 OOM fallback path)") | |
| else: | |
| # Non-ZeroGPU: auto-detect VRAM for quantization decision | |
| _free = 0 | |
| try: | |
| if torch.cuda.is_available(): | |
| _free = torch.cuda.mem_get_info(0)[0] / 1e9 | |
| except Exception: | |
| pass | |
| print(f"Available VRAM: {_free:.1f} GB") | |
| if _free >= 55: | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, dtype=torch.bfloat16, device_map="auto", trust_remote_code=True, | |
| ) | |
| else: | |
| bnb_config = BitsAndBytesConfig( | |
| load_in_4bit=True, | |
| bnb_4bit_compute_dtype=torch.bfloat16, | |
| bnb_4bit_quant_type="nf4", | |
| ) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_ID, quantization_config=bnb_config, | |
| device_map="cuda:0", trust_remote_code=True, | |
| ) | |
| print(f"Model loaded on {model.device} ({'BF16' if _free >= 55 else 'NF4'})") | |
| # ββ Tool Functions βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def get_weather(location: str = "Ann Arbor, MI") -> dict: | |
| """Gets current weather conditions for the community. | |
| Args: | |
| location: The city and state, e.g. "Ann Arbor, MI" | |
| Returns: | |
| Dictionary with temperature_f, clouds_pct, description, wind_mph, humidity_pct, sunrise, sunset. | |
| """ | |
| _tz = ZoneInfo("America/New_York") | |
| try: | |
| r = requests.get( | |
| "https://api.openweathermap.org/data/2.5/weather", | |
| params={"lat": LAT, "lon": LON, "appid": OWM_API_KEY, "units": "imperial"}, | |
| timeout=10, | |
| ).json() | |
| return { | |
| "temperature_f": r["main"]["temp"], | |
| "clouds_pct": r["clouds"]["all"], | |
| "description": r["weather"][0]["description"], | |
| "wind_mph": r["wind"]["speed"], | |
| "humidity_pct": r["main"]["humidity"], | |
| "sunrise": datetime.fromtimestamp(r["sys"]["sunrise"], tz=_tz).strftime("%H:%M"), | |
| "sunset": datetime.fromtimestamp(r["sys"]["sunset"], tz=_tz).strftime("%H:%M"), | |
| } | |
| except Exception as e: | |
| return {"error": str(e), "clouds_pct": 30, "temperature_f": 72, | |
| "description": "partly cloudy", "wind_mph": 5.0, | |
| "humidity_pct": 50, "sunrise": "07:00", "sunset": "20:00"} | |
| def _get_current_ghi(): | |
| """Fetch current Global Horizontal Irradiance (W/m2) from Open-Meteo. | |
| Free API, no key required. Uses NOAA GFS + HRRR satellite models. | |
| Inherently accounts for cloud thickness, sun angle, atmosphere, and season. | |
| """ | |
| try: | |
| r = requests.get( | |
| "https://api.open-meteo.com/v1/forecast", | |
| params={"latitude": LAT, "longitude": LON, "current": "shortwave_radiation"}, | |
| timeout=10, | |
| ).json() | |
| return r["current"]["shortwave_radiation"] | |
| except Exception: | |
| return None | |
| def get_solar_production(clouds_pct: int = 30, temp_f: float = 77.0) -> dict: | |
| """Estimates current community solar production using live solar irradiance data. | |
| Args: | |
| clouds_pct: Current cloud cover percentage (0-100). Get this from get_weather first. | |
| temp_f: Current temperature in Fahrenheit. Get this from get_weather first. | |
| Returns: | |
| Dictionary with production_kw, capacity_kw, efficiency_pct, ghi_wm2, temp_derate_pct, source. | |
| """ | |
| clouds_pct = max(0, min(100, int(clouds_pct))) | |
| temp_f = max(-40, min(130, float(temp_f))) | |
| # System losses: inverter 97% x wiring 98% x soiling 97% x mismatch 98% ~ 0.85 | |
| SYSTEM_EFF = 0.85 | |
| # Temperature derating: silicon panels lose ~0.4%/F above 77F (25C) | |
| temp_derate = max(0.75, 1.0 - 0.004 * max(0, temp_f - 77)) | |
| ghi = _get_current_ghi() | |
| if ghi is not None: | |
| production = round(max(0, COMMUNITY_CAPACITY_KW * (ghi / 1000) * SYSTEM_EFF * temp_derate), 1) | |
| return { | |
| "production_kw": production, | |
| "capacity_kw": COMMUNITY_CAPACITY_KW, | |
| "efficiency_pct": round(production / COMMUNITY_CAPACITY_KW * 100, 1), | |
| "ghi_wm2": round(ghi, 1), | |
| "temp_derate_pct": round(temp_derate * 100, 1), | |
| "source": "open-meteo", | |
| } | |
| # Fallback: cloud%-based estimate (less accurate β no seasonal sun angle) | |
| efficiency = max(0.15, 0.85 - (clouds_pct / 100) * 0.70) | |
| hour = datetime.now().hour | |
| time_factor = max(0, 1 - ((hour - 12) / 6) ** 2) if 6 <= hour <= 18 else 0 | |
| production = round(COMMUNITY_CAPACITY_KW * efficiency * time_factor * temp_derate, 1) | |
| return { | |
| "production_kw": production, | |
| "capacity_kw": COMMUNITY_CAPACITY_KW, | |
| "efficiency_pct": round(production / COMMUNITY_CAPACITY_KW * 100, 1), | |
| "temp_derate_pct": round(temp_derate * 100, 1), | |
| "source": "fallback", | |
| } | |
| class _BatterySimulator: | |
| """Maintains consistent battery SOC across tool calls within a session.""" | |
| def __init__(self, capacity_kwh=BATTERY_CAPACITY_KWH): | |
| self.capacity = capacity_kwh | |
| self.soc = round(random.uniform(55, 85), 1) | |
| def get_state(self): | |
| kwh = round(self.soc / 100 * self.capacity) | |
| return { | |
| "soc_pct": self.soc, | |
| "kwh_stored": kwh, | |
| "capacity_kwh": self.capacity, | |
| "charging": self.soc < 50, | |
| } | |
| _battery = _BatterySimulator() | |
| def get_battery_state() -> dict: | |
| """Gets the current state of the community shared battery storage. | |
| Returns: | |
| Dictionary with soc_pct (state of charge), kwh stored, capacity_kwh, charging status. | |
| """ | |
| return _battery.get_state() | |
| _EIA_RESPONDENT = {"MISO": "MISO", "CAISO": "CISO"} | |
| _FALLBACK_GRID = { | |
| "MISO": {"renewable_pct": 12.5, "co2_intensity": 520}, | |
| "CAISO": {"renewable_pct": 38.0, "co2_intensity": 280}, | |
| } | |
| def _fetch_eia_grid_mix(region="MISO"): | |
| """Fetch current grid mix from EIA API v2. Returns (renewable_pct, co2_intensity) or fallback.""" | |
| eia_code = _EIA_RESPONDENT.get(region, region) | |
| try: | |
| end = datetime.now(timezone.utc) - timedelta(days=1) | |
| start = end - timedelta(days=1) | |
| r = requests.get( | |
| "https://api.eia.gov/v2/electricity/rto/fuel-type-data/data/", | |
| params={ | |
| "api_key": EIA_API_KEY, | |
| "frequency": "hourly", | |
| "data[0]": "value", | |
| "facets[respondent][]": eia_code, | |
| "start": start.strftime("%Y-%m-%dT%H"), | |
| "end": end.strftime("%Y-%m-%dT%H"), | |
| "sort[0][column]": "period", | |
| "sort[0][direction]": "desc", | |
| "length": 200, | |
| }, | |
| timeout=15, | |
| ).json() | |
| rows = r.get("response", {}).get("data", []) | |
| if not rows: | |
| fb = _FALLBACK_GRID.get(region, _FALLBACK_GRID["MISO"]) | |
| return fb["renewable_pct"], fb["co2_intensity"] | |
| latest_period = rows[0].get("period") | |
| latest = [row for row in rows if row.get("period") == latest_period] | |
| total_mw, renewable_mw = 0, 0 | |
| _RENEWABLE = {"SUN", "WND", "WAT", "GEO"} | |
| _FOSSIL_CO2 = {"COL": 1000, "NG": 450, "PET": 900, "OTH": 500} | |
| co2_total = 0 | |
| for row in latest: | |
| mw = float(row.get("value") or 0) | |
| fuel = row.get("fueltype", "") | |
| total_mw += mw | |
| if fuel in _RENEWABLE: | |
| renewable_mw += mw | |
| co2_total += mw * _FOSSIL_CO2.get(fuel, 0) | |
| if total_mw > 0: | |
| renewable_pct = min(100.0, round(renewable_mw / total_mw * 100, 1)) | |
| co2_intensity = max(0, round(co2_total / total_mw, 1)) | |
| return renewable_pct, co2_intensity | |
| except Exception: | |
| pass | |
| fb = _FALLBACK_GRID.get(region, _FALLBACK_GRID["MISO"]) | |
| return fb["renewable_pct"], fb["co2_intensity"] | |
| def get_grid_status() -> dict: | |
| """Gets current electricity grid pricing period, rate, and grid mix (renewable percentage, CO2 intensity). | |
| Returns: | |
| Dictionary with period (peak/mid-peak/off-peak), rate_per_kwh in USD, | |
| renewable_pct, and co2_intensity (kg CO2/MWh). | |
| """ | |
| hour = datetime.now().hour | |
| if 14 <= hour < 19: # 2pm-6:59pm | |
| period, rate = "peak", 0.28 | |
| elif (7 <= hour < 14) or (19 <= hour < 23): # 7am-1:59pm OR 7pm-10:59pm | |
| period, rate = "mid-peak", 0.18 | |
| else: | |
| period, rate = "off-peak", 0.10 # 11pm-6:59am | |
| renewable_pct, co2_intensity = _fetch_eia_grid_mix("MISO") | |
| return { | |
| "period": period, | |
| "rate_per_kwh": rate, | |
| "renewable_pct": renewable_pct, | |
| "co2_intensity": co2_intensity, | |
| } | |
| # NREL PVWatts session-level cache (keyed by location + system capacity) | |
| _NREL_PVWATTS_CACHE = {} | |
| def get_nrel_pvwatts_baseline() -> dict: | |
| """Gets NREL PVWatts typical-year solar production baseline for the | |
| community 72 kW array. | |
| Use this to compare current real-time output (from get_solar_production) | |
| against typical-year performance β useful for diagnosing under-/over- | |
| performance and setting expectations for the current month. Cached per | |
| session. | |
| Returns: | |
| Dictionary with annual_kwh, current_month_typical_kwh, | |
| current_month_typical_kw_avg, capacity_kw, source. | |
| """ | |
| cache_key = ("ann_arbor", COMMUNITY_CAPACITY_KW) | |
| if cache_key in _NREL_PVWATTS_CACHE: | |
| return _NREL_PVWATTS_CACHE[cache_key] | |
| try: | |
| r = requests.get( | |
| "https://developer.nrel.gov/api/pvwatts/v8.json", | |
| params={ | |
| "api_key": NREL_API_KEY, | |
| "lat": LAT, "lon": LON, | |
| "system_capacity": COMMUNITY_CAPACITY_KW, | |
| "module_type": 0, "losses": 14, "array_type": 1, | |
| "tilt": 30, "azimuth": 180, | |
| }, | |
| timeout=15, | |
| ).json() | |
| outputs = r.get("outputs", {}) | |
| ac_monthly = outputs.get("ac_monthly", []) | |
| current_month_idx = datetime.now().month - 1 | |
| current_month_kwh = ac_monthly[current_month_idx] if ac_monthly else None | |
| result = { | |
| "annual_kwh": outputs.get("ac_annual"), | |
| "current_month_typical_kwh": current_month_kwh, | |
| "current_month_typical_kw_avg": ( | |
| round(current_month_kwh / (30 * 24), 2) | |
| if current_month_kwh else None | |
| ), | |
| "capacity_kw": COMMUNITY_CAPACITY_KW, | |
| "source": "NREL PVWatts v8", | |
| } | |
| _NREL_PVWATTS_CACHE[cache_key] = result | |
| return result | |
| except Exception as e: | |
| return {"error": str(e), "source": "fallback"} | |
| # Registry: maps function names to callables (5 tools β all 3 keyed APIs exercised) | |
| TOOLS = [ | |
| get_weather, get_solar_production, get_battery_state, | |
| get_grid_status, get_nrel_pvwatts_baseline, | |
| ] | |
| TOOL_MAP = {fn.__name__: fn for fn in TOOLS} | |
| # ββ ZeroGPU Fallback (Live Data Mode) ββββββββββββββββββββββββββββββββββββββββ | |
| def _fallback_respond(question): | |
| """Serve real API data when GPU inference fails (OOM safety net).""" | |
| weather = get_weather() | |
| solar = get_solar_production(weather.get("clouds_pct", 30), weather.get("temperature_f", 77)) | |
| battery = get_battery_state() | |
| grid = get_grid_status() | |
| q = question.lower() | |
| if any(k in q for k in ("solar", "production", "panel", "generat", "kwh", "kw")): | |
| section = "solar" | |
| elif any(k in q for k in ("weather", "temperature", "cloud", "wind", "rain", "sun", "forecast")): | |
| section = "weather" | |
| elif any(k in q for k in ("battery", "charge", "storage", "soc")): | |
| section = "battery" | |
| elif any(k in q for k in ("grid", "price", "rate", "peak", "tariff", "cost", "pricing")): | |
| section = "grid" | |
| else: | |
| section = "overview" | |
| banner = ( | |
| "> **Live Data Mode** β Real-time API data shown below. " | |
| "The AI model is temporarily unavailable; showing raw API data instead.\n\n" | |
| ) | |
| weather_md = ( | |
| f"### Weather\n" | |
| f"- **Temperature:** {weather['temperature_f']}Β°F\n" | |
| f"- **Conditions:** {weather['description']}\n" | |
| f"- **Cloud cover:** {weather['clouds_pct']}%\n" | |
| f"- **Wind:** {weather['wind_mph']} mph\n" | |
| f"- **Humidity:** {weather['humidity_pct']}%\n" | |
| f"- **Sunrise/Sunset:** {weather['sunrise']} / {weather['sunset']}\n" | |
| ) | |
| solar_md = ( | |
| f"### Solar Production\n" | |
| f"- **Current output:** {solar['production_kw']} kW of {solar['capacity_kw']} kW capacity\n" | |
| f"- **Efficiency:** {solar['efficiency_pct']}%\n" | |
| f"- **Temp derating:** {solar['temp_derate_pct']}%\n" | |
| ) | |
| if solar.get("ghi_wm2"): | |
| solar_md += f"- **Solar irradiance (GHI):** {solar['ghi_wm2']} W/mΒ²\n" | |
| solar_md += f"- **Data source:** {solar['source']}\n" | |
| battery_md = ( | |
| f"### Battery Storage\n" | |
| f"- **State of charge:** {battery['soc_pct']}%\n" | |
| f"- **Energy stored:** {battery['kwh_stored']} kWh of {battery['capacity_kwh']} kWh\n" | |
| f"- **Status:** {'Charging' if battery['charging'] else 'Discharging/Idle'}\n" | |
| ) | |
| grid_md = ( | |
| f"### Grid Status\n" | |
| f"- **Pricing period:** {grid['period']}\n" | |
| f"- **Rate:** ${grid['rate_per_kwh']}/kWh\n" | |
| f"- **Renewable mix:** {grid['renewable_pct']}%\n" | |
| f"- **CO2 intensity:** {grid['co2_intensity']} kg/MWh\n" | |
| ) | |
| if section == "solar": | |
| body = solar_md + "\n" + weather_md | |
| elif section == "weather": | |
| body = weather_md + "\n" + solar_md | |
| elif section == "battery": | |
| body = battery_md + "\n" + solar_md | |
| elif section == "grid": | |
| body = grid_md + "\n" + solar_md | |
| else: | |
| body = "## SolarHive Community Dashboard\n\n" + solar_md + "\n" + weather_md + "\n" + battery_md + "\n" + grid_md | |
| return banner + body | |
| # ββ System Prompt ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Repeated twice β prompt repetition improves instruction following in causal | |
| # LLMs. See: Leviathan et al. (2024), "Repeat to Improve Non-Reasoning LLMs". | |
| SYSTEM_PROMPT = ( | |
| "You are SolarHive, an AI energy advisor for a community of 12 homes " | |
| "with rooftop solar and shared battery storage in Ann Arbor, Michigan. " | |
| "Use the available tools to get real-time data before answering. " | |
| "Be specific, reference actual data, and keep responses concise (3-5 sentences).\n\n" | |
| "You are SolarHive, an AI energy advisor for a community of 12 homes " | |
| "with rooftop solar and shared battery storage in Ann Arbor, Michigan. " | |
| "Use the available tools to get real-time data before answering. " | |
| "Be specific, reference actual data, and keep responses concise (3-5 sentences)." | |
| ) | |
| # ββ Tool-Call Parsing Helpers βββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Ported verbatim from solarhive_inference.py to keep the demo's dispatch | |
| # behavior byte-identical with the cloud benchmark + finetune training format. | |
| # Catches: wrapped + bare regex forms, negative-number args (`temp_f:-5` for | |
| # winter scenarios), boolean / null args, hallucinated kwargs (model emits | |
| # `get_grid_status{location:...}` even though the function takes no args). | |
| # Wrapped form (preferred): `<|tool_call>call:fn{args}<tool_call|>` | |
| _TOOL_CALL_WRAPPED_RE = re.compile( | |
| r'<\|tool_call>\s*call:(\w+)\{([^}]*)\}\s*<tool_call\|>', | |
| re.DOTALL, | |
| ) | |
| # Bare form (fallback when thinking-mode strips the wrapper) | |
| _TOOL_CALL_BARE_RE = re.compile(r'\bcall:(\w+)\{([^}]*)\}') | |
| # Arg parser: supports strings via <|"|>, ints, floats, **negatives**, bool, null | |
| _ARG_FIELD_RE = re.compile( | |
| r'(\w+)\s*:\s*' | |
| r'(?:<\|"\|>([^<]*)<\|"\|>|(-?\d+\.?\d*)|(true|false|null))', | |
| ) | |
| def _extract_tool_calls(raw): | |
| """Extract (fn_name, args_str) tuples from a Gemma 4 model output. | |
| Wrapped form wins when both appear; bare form is the GGUF/thinking-mode | |
| fallback. Same two-pattern strategy as solarhive_inference.py. | |
| """ | |
| wrapped = _TOOL_CALL_WRAPPED_RE.findall(raw) | |
| if wrapped: | |
| return wrapped | |
| return _TOOL_CALL_BARE_RE.findall(raw) | |
| def _parse_tool_args(args_str): | |
| """Parse `key:val,key2:val2,...` from a Gemma 4 tool-call argument | |
| string. Handles strings (`<|"|>...<|"|>`), ints, floats, negatives, | |
| booleans, and null.""" | |
| out = {} | |
| for key, str_val, num_val, bool_val in _ARG_FIELD_RE.findall(args_str): | |
| if str_val: | |
| out[key] = str_val | |
| elif num_val: | |
| out[key] = float(num_val) if "." in num_val else int(num_val) | |
| elif bool_val: | |
| out[key] = {"true": True, "false": False, "null": None}[bool_val] | |
| return out | |
| def _safe_tool_call(fn, args): | |
| """Dispatch a tool call defensively β drop kwargs the function doesn't accept. | |
| The model occasionally hallucinates extra kwargs (e.g., emitting | |
| `call:get_grid_status{location:<|"|>Ann Arbor, MI<|"|>}` even though | |
| the function takes no args). Without filtering, `fn(**args)` raises | |
| `TypeError: ... got an unexpected keyword argument 'location'` and | |
| crashes the agentic loop. | |
| If the function declares `**kwargs`, we pass everything through | |
| unchanged β that's an explicit opt-in to accept unknowns. | |
| """ | |
| sig = inspect.signature(fn) | |
| if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()): | |
| return fn(**args) | |
| accepted = set(sig.parameters.keys()) | |
| filtered = {k: v for k, v in args.items() if k in accepted} | |
| if filtered != args: | |
| dropped = set(args) - set(filtered) | |
| print(f" [warn] {fn.__name__}: dropped hallucinated args {sorted(dropped)} (function takes {sorted(accepted) or 'no args'})") | |
| return fn(**filtered) | |
| # ββ Agentic Loop βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _generate_with_tools(messages, max_rounds=3): | |
| """ | |
| Gemma 4 native agentic loop (transformers). | |
| Two-step apply_chat_template to avoid transformers 5.5.x bug on messages | |
| without a 'content' key (e.g., tool_calls messages). | |
| Tool calls detected via regex on Gemma 4 control tokens: call:fn{args}. | |
| """ | |
| all_calls = [] | |
| for round_num in range(max_rounds): | |
| # Extract images from messages for the processor | |
| _images = [] | |
| for msg in messages: | |
| content = msg.get("content") | |
| if isinstance(content, list): | |
| for item in content: | |
| if isinstance(item, dict) and item.get("type") == "image": | |
| _images.append(item["image"]) | |
| text = processor.apply_chat_template( | |
| messages, tools=TOOLS, add_generation_prompt=True, | |
| enable_thinking=False, tokenize=False, | |
| ) | |
| if _images: | |
| inputs = processor(text=text, images=_images, return_tensors="pt").to(model.device) | |
| else: | |
| inputs = processor(text=text, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| out = model.generate( | |
| **inputs, max_new_tokens=1024, | |
| temperature=1.0, top_p=0.95, top_k=64, | |
| ) | |
| raw = processor.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False) | |
| # Detect tool calls via canonical wrapped+bare regex (ported from | |
| # solarhive_inference.py so dispatch behavior matches the cloud benchmark) | |
| found = _extract_tool_calls(raw) | |
| if not found: | |
| # No tool calls β final answer | |
| parsed = processor.parse_response(raw) | |
| clean = parsed.get("content", "") if isinstance(parsed, dict) else str(parsed) | |
| # Strip leftover special tokens (e.g. <eos>, <turn|>, <bos>) | |
| clean = re.sub(r'<[a-z_|]+>', '', clean).strip() | |
| # Fallback: extract text from raw output if parsing produced empty result | |
| if not clean: | |
| clean = re.sub(r'<[^>]+>', '', raw).strip() | |
| return {"response": clean, "tool_calls": all_calls, "rounds": round_num + 1, | |
| "raw_debug": raw[:300] if not clean else ""} | |
| # Parse and execute each tool call (canonical helpers β supports | |
| # negatives, booleans, null; defensive dispatch drops hallucinated kwargs) | |
| calls, results = [], [] | |
| for fn_name, args_str in found: | |
| args = _parse_tool_args(args_str) | |
| call = {"name": fn_name, "arguments": args} | |
| calls.append(call) | |
| all_calls.append(call) | |
| if fn_name in TOOL_MAP: | |
| result = _safe_tool_call(TOOL_MAP[fn_name], args) | |
| else: | |
| result = {"error": f"Unknown: {fn_name}"} | |
| results.append({"name": fn_name, "response": result}) | |
| # Feed results back β match finetune/datagen training format exactly: | |
| # 1) assistant message with tool_calls only | |
| # 2) one role=tool message per tool result (json.dumps content) | |
| messages.append({ | |
| "role": "assistant", | |
| "tool_calls": [{"function": c} for c in calls], | |
| }) | |
| for r_item in results: | |
| messages.append({ | |
| "role": "tool", | |
| "name": r_item["name"], | |
| "content": json.dumps(r_item["response"]), | |
| }) | |
| # Fallback: generate one final response without tool schemas to force a text answer | |
| text = processor.apply_chat_template( | |
| messages, add_generation_prompt=True, | |
| enable_thinking=False, tokenize=False, | |
| ) | |
| inputs = processor(text=text, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| out = model.generate(**inputs, max_new_tokens=1024, temperature=1.0, top_p=0.95, top_k=64) | |
| raw = processor.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False) | |
| parsed = processor.parse_response(raw) | |
| clean = parsed.get("content", "") if isinstance(parsed, dict) else str(parsed) | |
| clean = re.sub(r'<[a-z_|]+>', '', clean).strip() | |
| if not clean: | |
| clean = re.sub(r'<[^>]+>', '', raw).strip() | |
| if not clean: | |
| clean = ( | |
| "I gathered data from multiple tools but ran out of reasoning rounds " | |
| "before composing a final answer. This can happen with complex multi-tool " | |
| "queries. Please try rephrasing your question or asking about one topic at a time." | |
| ) | |
| return {"response": clean, "tool_calls": all_calls, "rounds": max_rounds} | |
| # Apply ZeroGPU decorator β 120s needed for multi-round tool calling (2-3 rounds) | |
| if spaces is not None: | |
| _generate_with_tools = spaces.GPU(duration=120)(_generate_with_tools) | |
| # ββ Agent Wrapper ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def solarhive_agent(question, image=None): | |
| """Full SolarHive agent with optional image input for VQA.""" | |
| content = [] | |
| if image: | |
| content.append({"type": "image", "image": image}) | |
| content.append({"type": "text", "text": question}) | |
| sys_prompt = SYSTEM_PROMPT | |
| if image: | |
| _vqa_inst = ( | |
| " When an image is provided, FIRST describe what you observe in the " | |
| "image (e.g., cloud cover, sky color, panel condition). Base your " | |
| "primary assessment on visual observation. You may call tools for " | |
| "additional context, but note any differences between what the image " | |
| "shows and what the station data reports." | |
| ) | |
| sys_prompt += _vqa_inst + _vqa_inst | |
| messages = [ | |
| {"role": "system", "content": sys_prompt}, | |
| {"role": "user", "content": content if image else question}, | |
| ] | |
| return _generate_with_tools(messages) | |
| # ββ Gradio Chat Handler βββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def respond(message, history): | |
| """Handle chat messages with optional image upload.""" | |
| # Extract text and files from multimodal input | |
| if isinstance(message, dict): | |
| text = message.get("text", "").strip() | |
| files = message.get("files", []) | |
| else: | |
| text = str(message).strip() | |
| files = [] | |
| if not text: | |
| return "Please enter a question about your community solar system." | |
| # Load image if provided | |
| image = None | |
| if files: | |
| try: | |
| fpath = files[0] if isinstance(files[0], str) else files[0].get("path", "") | |
| if fpath: | |
| image = Image.open(fpath).convert("RGB") | |
| except Exception: | |
| pass | |
| # Run agent β fall back to live API data on any inference error | |
| try: | |
| result = solarhive_agent(text, image) | |
| except Exception as e: | |
| err_str = f"{type(e).__name__}: {e}" | |
| fallback = _fallback_respond(text) | |
| fallback += ( | |
| f"\n\n---\n*AI model unavailable: `{err_str[:150]}`. " | |
| "Sign in with a free [HuggingFace account](https://huggingface.co/join) " | |
| "for GPU access, or try again later.*" | |
| ) | |
| return fallback | |
| # Format response | |
| response = result.get("response", "") | |
| tool_calls = result.get("tool_calls", []) | |
| rounds = result.get("rounds", 0) | |
| # If model called tools but produced empty response, show live data fallback | |
| if not response.strip() and tool_calls: | |
| fallback = _fallback_respond(text) | |
| tool_names = ", ".join(c["name"] for c in tool_calls) | |
| raw_debug = result.get("raw_debug", "") | |
| debug_line = f"\n\n---\n*Model called {tool_names} but returned empty. Debug: `{raw_debug[:150]}`*" if raw_debug else "" | |
| return f"**Tools called:** {tool_names} | Rounds: {rounds}\n\n{fallback}{debug_line}" | |
| if not response.strip(): | |
| response = "No response generated. Please try again." | |
| if tool_calls: | |
| tool_names = ", ".join(c["name"] for c in tool_calls) | |
| header = f"**Tools called:** {tool_names} | Rounds: {rounds}\n\n" | |
| response = header + response | |
| return response | |
| # ββ Gradio UI ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| CUSTOM_CSS = """\ | |
| .notice-banner { | |
| margin: 4px 0 12px 0; | |
| padding: 10px 14px; | |
| background: rgba(245, 158, 11, 0.10); | |
| border: 1px solid rgba(245, 158, 11, 0.28); | |
| border-radius: 8px; | |
| font-size: 12px; | |
| line-height: 1.5; | |
| color: #d1d5db; | |
| } | |
| .notice-banner a { color: #60a5fa; text-decoration: underline; } | |
| .notice-banner strong.notice-label { color: #fbbf24; } | |
| """ | |
| NOTICE_HTML = """\ | |
| <div class="notice-banner"> | |
| <strong class="notice-label">Usage Notice:</strong> | |
| This demo runs on <a href="https://huggingface.co/docs/hub/spaces-zerogpu" target="_blank" rel="noopener noreferrer">ZeroGPU</a> | |
| with limited GPU allocation. | |
| Anonymous and free users may only get <strong>1 full query</strong> (2 min/day GPU quota). | |
| <a href="https://huggingface.co/join" target="_blank" rel="noopener noreferrer">Sign in</a> | |
| for access, or | |
| <a href="https://huggingface.co/subscribe/pro" target="_blank" rel="noopener noreferrer">upgrade to HF Pro</a> | |
| for extended GPU time (25 min/day). | |
| <br><br> | |
| <strong class="notice-label">Disclaimer:</strong> | |
| This is a hackathon demo for evaluation purposes only. | |
| Do not submit confidential, sensitive, or personal data. | |
| Use of this demo is at your own risk and is subject to | |
| <a href="https://huggingface.co/terms-of-service" target="_blank" rel="noopener noreferrer">Hugging Face's Terms of Service</a> | |
| and | |
| <a href="https://huggingface.co/privacy" target="_blank" rel="noopener noreferrer">Privacy Policy</a>. | |
| </div> | |
| """ | |
| DESCRIPTION = """\ | |
| **AI-powered community solar energy intelligence** built with fine-tuned \ | |
| **Gemma 4 26B A4B** and **native function calling** for real-time data. | |
| The agent serves a **12-home solar community** in Ann Arbor, Michigan \ | |
| (72 kW panels, 100 kWh shared battery). Five tools fetch real-time data β \ | |
| OpenWeatherMap, Open-Meteo (irradiance), NREL PVWatts (typical-year baseline), \ | |
| EIA (grid pricing + renewable mix), and a battery-state simulator. \ | |
| Image upload supports sky-photo cloud-coverage analysis and panel-condition inspection. | |
| **Try it:** Type a question below or click an example. \ | |
| Upload a sky or panel photo (paperclip icon) for visual analysis. | |
| This submission targets the **Global Resilience** track (main) plus all five \ | |
| Special Tech tracks: **Ollama**, **llama.cpp**, **Unsloth**, **Cactus**, and \ | |
| **LiteRT**. The same Unsloth fine-tune ships as a 5.3 GB GGUF for Ollama + \ | |
| llama.cpp on a laptop CPU, a 6.94 GB Cactus INT4 bundle for Android, and a \ | |
| LiteRT-LM Python runtime demo for cross-platform edge (browser / Pi 5 / Jetson). | |
| **Resilient inference:** loads [SolarHive 26B A4B merged](https://huggingface.co/Truthseeker87/solarhive-26b-a4b-merged) \ | |
| in BF16, with the pre-quantized [SolarHive A4B NF4](https://huggingface.co/Truthseeker87/solarhive-26b-a4b-nf4) \ | |
| as an OOM-safe fallback β both score identical 9/10 + 3/3 When2Call validation, \ | |
| so the demo quality bar is preserved regardless of which variant loads. If GPU \ | |
| inference is unavailable, the demo gracefully serves live API data. | |
| π [GitHub repo](https://github.com/youshen-lim/the-gemma4-good-hackathon-solarhive) Β· \ | |
| π§ [Cloud model (A4B LoRA)](https://huggingface.co/Truthseeker87/solarhive-26b-a4b-lora) Β· \ | |
| β‘ [Edge model (E4B GGUF)](https://huggingface.co/Truthseeker87/solarhive-e4b-gguf) Β· \ | |
| π± [Mobile model (E4B Cactus)](https://huggingface.co/Truthseeker87/solarhive-e4b-cactus) Β· \ | |
| π [Training dataset](https://huggingface.co/datasets/Truthseeker87/solarhive-community-solar-multimodal) Β· \ | |
| π [Kaggle writeup](https://kaggle.com/competitions/gemma-4-good-hackathon) | |
| *Gemma 4 Good Hackathon β Google DeepMind Γ Kaggle*\ | |
| """ | |
| EXAMPLES = [ | |
| # Live tool routing | |
| {"text": "What's the current solar production?"}, | |
| {"text": "Full community energy audit β check weather, solar, battery, and grid pricing. Give a 3-sentence status report."}, | |
| # NREL probe β exercises the 5th tool (typical-year baseline comparison) | |
| {"text": "Is today's production above typical for this month?"}, | |
| # Domain knowledge (no tool expected) | |
| {"text": "Should I run my pool heater now or wait?"}, | |
| {"text": "Home #7's panels are producing 15% less than neighbors. What should we check?"}, | |
| # When2Call probes (Ross et al. 2025, arXiv:2504.18851) β let judges | |
| # experience the trained refusal/follow-up behavior live | |
| {"text": "What's the current grid rate?"}, # (b) well-specified β expect get_grid_status call | |
| {"text": "How much will a 10 kW array produce today?"}, # (c) under-specified β expect follow-up question, NO auto-fill | |
| {"text": "What's the current air quality index in Ann Arbor?"}, # (d) out-of-scope β expect graceful decline, NO hallucinated tool | |
| ] | |
| with gr.Blocks( | |
| title="SolarHive β Community Solar Intelligence", | |
| css=CUSTOM_CSS, | |
| theme=gr.themes.Default(), | |
| ) as demo: | |
| gr.Markdown("# SolarHive β Community Solar Intelligence") | |
| gr.HTML(NOTICE_HTML) | |
| gr.Markdown(DESCRIPTION) | |
| gr.ChatInterface( | |
| fn=respond, | |
| multimodal=True, | |
| examples=EXAMPLES, | |
| ) | |
| if __name__ == "__main__": | |
| demo.launch() | |