Spaces:

Truthseeker87
/

solarhive

Running on Zero

App Files Files Community

solarhive / app.py

Truthseeker87

DESCRIPTION: 5-track coverage + Resilient inference (BF16 merged + NF4 fallback) + expanded artifacts row

4286bef verified 11 days ago

raw

history blame contribute delete

38 kB

	"""
	SolarHive — Community Solar Intelligence
	Gradio app for HuggingFace Spaces (persistent GPU)

	Fine-tuned Gemma 4 26B A4B with native function calling for
	real-time solar production, weather, battery, and grid data.

	SolarHive is an open-source intelligence layer designed to coordinate
	community microgrids & community-based storage via fuel cells, pool
	midday energy surplus across these microgrids, and eliminate stranded
	capacity. It also helps forecast solar irradiance and cloud cover to
	plan ahead. Gemma 4 is the brain that powers it.

	Gemma 4 Good Hackathon — Google DeepMind x Kaggle
	"""

	import os
	import json
	import re
	import random
	import inspect
	import requests
	import torch
	import gradio as gr
	from datetime import datetime, timezone, timedelta
	from zoneinfo import ZoneInfo
	from PIL import Image
	from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig

	# ZeroGPU support — falls back to no-op for local testing
	try:
	import spaces
	except ImportError:
	spaces = None


	# ── Constants ────────────────────────────────────────────────────────────────

	LAT, LON = 42.2808, -83.7430
	COMMUNITY_CAPACITY_KW = 72
	BATTERY_CAPACITY_KWH = 100

	# Demo-mode API key fallbacks for the live HF Space.
	# These are FREE-TIER keys (OWM 1000 calls/day, EIA unlimited, NREL
	# unlimited) — no billing exposure, easily rotatable. Hardcoded so
	# judges who land on the live demo without Space-secret config still
	# get a working experience. In production, set Space secrets to override.
	OWM_API_KEY = os.environ.get("OWM_API_KEY", "84a310689d5620edd1b5e4c14d8fb29b")
	EIA_API_KEY = os.environ.get("EIA_API_KEY", "ZXzaFCfSc1aU7nfu3Y6wctwxyiFPHEclHhycI3Xm")
	NREL_API_KEY = os.environ.get("NREL_API_KEY", "LI4AHQodsW7b0L0T3BCHtUA3PyjvOXP8zrFrZuiQ")


	# ── Model Loading ────────────────────────────────────────────────────────────

	BASE_MODEL_ID = "google/gemma-4-26b-a4b-it"
	MODEL_ID = "Truthseeker87/solarhive-26b-a4b-merged"
	# OOM fallback — pre-quantized NF4 (~13-16 GB) used if BF16 (~48 GB) doesn't
	# fit the allocated ZeroGPU tier (e.g., A10g 24 GB). Pre-quantized weights
	# load directly without BitsAndBytesConfig. Both variants score 9/10 + 3/3
	# When2Call in the cross-variant validation — equivalent demo quality.
	MODEL_ID_NF4_FALLBACK = "Truthseeker87/solarhive-26b-a4b-nf4"

	# Env-var override — set SOLARHIVE_FORCE_NF4=1 in Space Variables (NOT
	# Secrets — Variables are visible to the runtime as os.environ) to skip
	# the BF16 attempt entirely and load NF4 directly. Useful when you've
	# confirmed the allocated tier is too small for BF16 (e.g., A10g) and
	# want to avoid the BF16-fail-then-fallback download time on every
	# cold-start.
	FORCE_NF4 = os.environ.get("SOLARHIVE_FORCE_NF4", "").lower() in ("1", "true", "yes")

	print(f"Loading processor from {BASE_MODEL_ID}")
	processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)

	print(f"Loading fine-tuned model from {MODEL_ID}")

	if spaces is not None:
	# ZeroGPU: defensive load with NF4 fallback.
	# Primary path: BF16 (highest fidelity, ~48 GB) — fits H200 / Half H200.
	# Fallback path: pre-quantized NF4 (~13-16 GB) — fits A10g and other
	# smaller tiers ZeroGPU may allocate. Both variants score 9/10 + 3/3 W2C
	# in the cross-variant validation, so the demo's quality bar is preserved
	# either way. Set SOLARHIVE_FORCE_NF4=1 in Space Variables to skip the
	# BF16 attempt entirely.
	# https://huggingface.co/docs/hub/spaces-zerogpu
	if FORCE_NF4:
	print(f"SOLARHIVE_FORCE_NF4 set — loading pre-quantized NF4 from {MODEL_ID_NF4_FALLBACK}")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID_NF4_FALLBACK, device_map="cuda:0", trust_remote_code=True,
	)
	print("Model loaded in NF4 (forced via SOLARHIVE_FORCE_NF4 env var)")
	else:
	try:
	print(f"Loading BF16 from {MODEL_ID} (primary path)")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID, dtype=torch.bfloat16, trust_remote_code=True,
	)
	model.to("cuda")
	print("Model loaded in BF16 for ZeroGPU (primary path)")
	except Exception as _bf16_err:
	# Free anything partially loaded so the fallback has clean memory
	try:
	del model
	except NameError:
	pass
	import gc as _gc
	_gc.collect()
	try:
	torch.cuda.empty_cache()
	except Exception:
	pass
	print(f"BF16 load failed ({type(_bf16_err).__name__}): {_bf16_err}")
	print(f"Falling back to pre-quantized NF4 from {MODEL_ID_NF4_FALLBACK}")
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID_NF4_FALLBACK, device_map="cuda:0", trust_remote_code=True,
	)
	print("Model loaded in NF4 (BF16 OOM fallback path)")
	else:
	# Non-ZeroGPU: auto-detect VRAM for quantization decision
	_free = 0
	try:
	if torch.cuda.is_available():
	_free = torch.cuda.mem_get_info(0)[0] / 1e9
	except Exception:
	pass
	print(f"Available VRAM: {_free:.1f} GB")

	if _free >= 55:
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID, dtype=torch.bfloat16, device_map="auto", trust_remote_code=True,
	)
	else:
	bnb_config = BitsAndBytesConfig(
	load_in_4bit=True,
	bnb_4bit_compute_dtype=torch.bfloat16,
	bnb_4bit_quant_type="nf4",
	)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID, quantization_config=bnb_config,
	device_map="cuda:0", trust_remote_code=True,
	)
	print(f"Model loaded on {model.device} ({'BF16' if _free >= 55 else 'NF4'})")


	# ── Tool Functions ───────────────────────────────────────────────────────────

	def get_weather(location: str = "Ann Arbor, MI") -> dict:
	"""Gets current weather conditions for the community.

	Args:
	location: The city and state, e.g. "Ann Arbor, MI"

	Returns:
	Dictionary with temperature_f, clouds_pct, description, wind_mph, humidity_pct, sunrise, sunset.
	"""
	_tz = ZoneInfo("America/New_York")
	try:
	r = requests.get(
	"https://api.openweathermap.org/data/2.5/weather",
	params={"lat": LAT, "lon": LON, "appid": OWM_API_KEY, "units": "imperial"},
	timeout=10,
	).json()
	return {
	"temperature_f": r["main"]["temp"],
	"clouds_pct": r["clouds"]["all"],
	"description": r["weather"][0]["description"],
	"wind_mph": r["wind"]["speed"],
	"humidity_pct": r["main"]["humidity"],
	"sunrise": datetime.fromtimestamp(r["sys"]["sunrise"], tz=_tz).strftime("%H:%M"),
	"sunset": datetime.fromtimestamp(r["sys"]["sunset"], tz=_tz).strftime("%H:%M"),
	}
	except Exception as e:
	return {"error": str(e), "clouds_pct": 30, "temperature_f": 72,
	"description": "partly cloudy", "wind_mph": 5.0,
	"humidity_pct": 50, "sunrise": "07:00", "sunset": "20:00"}


	def _get_current_ghi():
	"""Fetch current Global Horizontal Irradiance (W/m2) from Open-Meteo.

	Free API, no key required. Uses NOAA GFS + HRRR satellite models.
	Inherently accounts for cloud thickness, sun angle, atmosphere, and season.
	"""
	try:
	r = requests.get(
	"https://api.open-meteo.com/v1/forecast",
	params={"latitude": LAT, "longitude": LON, "current": "shortwave_radiation"},
	timeout=10,
	).json()
	return r["current"]["shortwave_radiation"]
	except Exception:
	return None


	def get_solar_production(clouds_pct: int = 30, temp_f: float = 77.0) -> dict:
	"""Estimates current community solar production using live solar irradiance data.

	Args:
	clouds_pct: Current cloud cover percentage (0-100). Get this from get_weather first.
	temp_f: Current temperature in Fahrenheit. Get this from get_weather first.

	Returns:
	Dictionary with production_kw, capacity_kw, efficiency_pct, ghi_wm2, temp_derate_pct, source.
	"""
	clouds_pct = max(0, min(100, int(clouds_pct)))
	temp_f = max(-40, min(130, float(temp_f)))

	# System losses: inverter 97% x wiring 98% x soiling 97% x mismatch 98% ~ 0.85
	SYSTEM_EFF = 0.85

	# Temperature derating: silicon panels lose ~0.4%/F above 77F (25C)
	temp_derate = max(0.75, 1.0 - 0.004 * max(0, temp_f - 77))

	ghi = _get_current_ghi()
	if ghi is not None:
	production = round(max(0, COMMUNITY_CAPACITY_KW * (ghi / 1000) * SYSTEM_EFF * temp_derate), 1)
	return {
	"production_kw": production,
	"capacity_kw": COMMUNITY_CAPACITY_KW,
	"efficiency_pct": round(production / COMMUNITY_CAPACITY_KW * 100, 1),
	"ghi_wm2": round(ghi, 1),
	"temp_derate_pct": round(temp_derate * 100, 1),
	"source": "open-meteo",
	}

	# Fallback: cloud%-based estimate (less accurate — no seasonal sun angle)
	efficiency = max(0.15, 0.85 - (clouds_pct / 100) * 0.70)
	hour = datetime.now().hour
	time_factor = max(0, 1 - ((hour - 12) / 6) ** 2) if 6 <= hour <= 18 else 0
	production = round(COMMUNITY_CAPACITY_KW * efficiency * time_factor * temp_derate, 1)
	return {
	"production_kw": production,
	"capacity_kw": COMMUNITY_CAPACITY_KW,
	"efficiency_pct": round(production / COMMUNITY_CAPACITY_KW * 100, 1),
	"temp_derate_pct": round(temp_derate * 100, 1),
	"source": "fallback",
	}


	class _BatterySimulator:
	"""Maintains consistent battery SOC across tool calls within a session."""

	def __init__(self, capacity_kwh=BATTERY_CAPACITY_KWH):
	self.capacity = capacity_kwh
	self.soc = round(random.uniform(55, 85), 1)

	def get_state(self):
	kwh = round(self.soc / 100 * self.capacity)
	return {
	"soc_pct": self.soc,
	"kwh_stored": kwh,
	"capacity_kwh": self.capacity,
	"charging": self.soc < 50,
	}

	_battery = _BatterySimulator()


	def get_battery_state() -> dict:
	"""Gets the current state of the community shared battery storage.

	Returns:
	Dictionary with soc_pct (state of charge), kwh stored, capacity_kwh, charging status.
	"""
	return _battery.get_state()


	_EIA_RESPONDENT = {"MISO": "MISO", "CAISO": "CISO"}
	_FALLBACK_GRID = {
	"MISO": {"renewable_pct": 12.5, "co2_intensity": 520},
	"CAISO": {"renewable_pct": 38.0, "co2_intensity": 280},
	}


	def _fetch_eia_grid_mix(region="MISO"):
	"""Fetch current grid mix from EIA API v2. Returns (renewable_pct, co2_intensity) or fallback."""
	eia_code = _EIA_RESPONDENT.get(region, region)
	try:
	end = datetime.now(timezone.utc) - timedelta(days=1)
	start = end - timedelta(days=1)
	r = requests.get(
	"https://api.eia.gov/v2/electricity/rto/fuel-type-data/data/",
	params={
	"api_key": EIA_API_KEY,
	"frequency": "hourly",
	"data[0]": "value",
	"facets[respondent][]": eia_code,
	"start": start.strftime("%Y-%m-%dT%H"),
	"end": end.strftime("%Y-%m-%dT%H"),
	"sort[0][column]": "period",
	"sort[0][direction]": "desc",
	"length": 200,
	},
	timeout=15,
	).json()
	rows = r.get("response", {}).get("data", [])
	if not rows:
	fb = _FALLBACK_GRID.get(region, _FALLBACK_GRID["MISO"])
	return fb["renewable_pct"], fb["co2_intensity"]
	latest_period = rows[0].get("period")
	latest = [row for row in rows if row.get("period") == latest_period]
	total_mw, renewable_mw = 0, 0
	_RENEWABLE = {"SUN", "WND", "WAT", "GEO"}
	_FOSSIL_CO2 = {"COL": 1000, "NG": 450, "PET": 900, "OTH": 500}
	co2_total = 0
	for row in latest:
	mw = float(row.get("value") or 0)
	fuel = row.get("fueltype", "")
	total_mw += mw
	if fuel in _RENEWABLE:
	renewable_mw += mw
	co2_total += mw * _FOSSIL_CO2.get(fuel, 0)
	if total_mw > 0:
	renewable_pct = min(100.0, round(renewable_mw / total_mw * 100, 1))
	co2_intensity = max(0, round(co2_total / total_mw, 1))
	return renewable_pct, co2_intensity
	except Exception:
	pass
	fb = _FALLBACK_GRID.get(region, _FALLBACK_GRID["MISO"])
	return fb["renewable_pct"], fb["co2_intensity"]


	def get_grid_status() -> dict:
	"""Gets current electricity grid pricing period, rate, and grid mix (renewable percentage, CO2 intensity).

	Returns:
	Dictionary with period (peak/mid-peak/off-peak), rate_per_kwh in USD,
	renewable_pct, and co2_intensity (kg CO2/MWh).
	"""
	hour = datetime.now().hour
	if 14 <= hour < 19: # 2pm-6:59pm
	period, rate = "peak", 0.28
	elif (7 <= hour < 14) or (19 <= hour < 23): # 7am-1:59pm OR 7pm-10:59pm
	period, rate = "mid-peak", 0.18
	else:
	period, rate = "off-peak", 0.10 # 11pm-6:59am
	renewable_pct, co2_intensity = _fetch_eia_grid_mix("MISO")
	return {
	"period": period,
	"rate_per_kwh": rate,
	"renewable_pct": renewable_pct,
	"co2_intensity": co2_intensity,
	}


	# NREL PVWatts session-level cache (keyed by location + system capacity)
	_NREL_PVWATTS_CACHE = {}


	def get_nrel_pvwatts_baseline() -> dict:
	"""Gets NREL PVWatts typical-year solar production baseline for the
	community 72 kW array.

	Use this to compare current real-time output (from get_solar_production)
	against typical-year performance — useful for diagnosing under-/over-
	performance and setting expectations for the current month. Cached per
	session.

	Returns:
	Dictionary with annual_kwh, current_month_typical_kwh,
	current_month_typical_kw_avg, capacity_kw, source.
	"""
	cache_key = ("ann_arbor", COMMUNITY_CAPACITY_KW)
	if cache_key in _NREL_PVWATTS_CACHE:
	return _NREL_PVWATTS_CACHE[cache_key]
	try:
	r = requests.get(
	"https://developer.nrel.gov/api/pvwatts/v8.json",
	params={
	"api_key": NREL_API_KEY,
	"lat": LAT, "lon": LON,
	"system_capacity": COMMUNITY_CAPACITY_KW,
	"module_type": 0, "losses": 14, "array_type": 1,
	"tilt": 30, "azimuth": 180,
	},
	timeout=15,
	).json()
	outputs = r.get("outputs", {})
	ac_monthly = outputs.get("ac_monthly", [])
	current_month_idx = datetime.now().month - 1
	current_month_kwh = ac_monthly[current_month_idx] if ac_monthly else None
	result = {
	"annual_kwh": outputs.get("ac_annual"),
	"current_month_typical_kwh": current_month_kwh,
	"current_month_typical_kw_avg": (
	round(current_month_kwh / (30 * 24), 2)
	if current_month_kwh else None
	),
	"capacity_kw": COMMUNITY_CAPACITY_KW,
	"source": "NREL PVWatts v8",
	}
	_NREL_PVWATTS_CACHE[cache_key] = result
	return result
	except Exception as e:
	return {"error": str(e), "source": "fallback"}


	# Registry: maps function names to callables (5 tools — all 3 keyed APIs exercised)
	TOOLS = [
	get_weather, get_solar_production, get_battery_state,
	get_grid_status, get_nrel_pvwatts_baseline,
	]
	TOOL_MAP = {fn.__name__: fn for fn in TOOLS}


	# ── ZeroGPU Fallback (Live Data Mode) ────────────────────────────────────────

	def _fallback_respond(question):
	"""Serve real API data when GPU inference fails (OOM safety net)."""
	weather = get_weather()
	solar = get_solar_production(weather.get("clouds_pct", 30), weather.get("temperature_f", 77))
	battery = get_battery_state()
	grid = get_grid_status()

	q = question.lower()

	if any(k in q for k in ("solar", "production", "panel", "generat", "kwh", "kw")):
	section = "solar"
	elif any(k in q for k in ("weather", "temperature", "cloud", "wind", "rain", "sun", "forecast")):
	section = "weather"
	elif any(k in q for k in ("battery", "charge", "storage", "soc")):
	section = "battery"
	elif any(k in q for k in ("grid", "price", "rate", "peak", "tariff", "cost", "pricing")):
	section = "grid"
	else:
	section = "overview"

	banner = (
	"> Live Data Mode — Real-time API data shown below. "
	"The AI model is temporarily unavailable; showing raw API data instead.\n\n"
	)

	weather_md = (
	f"### Weather\n"
	f"- Temperature: {weather['temperature_f']}°F\n"
	f"- Conditions: {weather['description']}\n"
	f"- Cloud cover: {weather['clouds_pct']}%\n"
	f"- Wind: {weather['wind_mph']} mph\n"
	f"- Humidity: {weather['humidity_pct']}%\n"
	f"- Sunrise/Sunset: {weather['sunrise']} / {weather['sunset']}\n"
	)

	solar_md = (
	f"### Solar Production\n"
	f"- Current output: {solar['production_kw']} kW of {solar['capacity_kw']} kW capacity\n"
	f"- Efficiency: {solar['efficiency_pct']}%\n"
	f"- Temp derating: {solar['temp_derate_pct']}%\n"
	)
	if solar.get("ghi_wm2"):
	solar_md += f"- Solar irradiance (GHI): {solar['ghi_wm2']} W/m²\n"
	solar_md += f"- Data source: {solar['source']}\n"

	battery_md = (
	f"### Battery Storage\n"
	f"- State of charge: {battery['soc_pct']}%\n"
	f"- Energy stored: {battery['kwh_stored']} kWh of {battery['capacity_kwh']} kWh\n"
	f"- Status: {'Charging' if battery['charging'] else 'Discharging/Idle'}\n"
	)

	grid_md = (
	f"### Grid Status\n"
	f"- Pricing period: {grid['period']}\n"
	f"- Rate: ${grid['rate_per_kwh']}/kWh\n"
	f"- Renewable mix: {grid['renewable_pct']}%\n"
	f"- CO2 intensity: {grid['co2_intensity']} kg/MWh\n"
	)

	if section == "solar":
	body = solar_md + "\n" + weather_md
	elif section == "weather":
	body = weather_md + "\n" + solar_md
	elif section == "battery":
	body = battery_md + "\n" + solar_md
	elif section == "grid":
	body = grid_md + "\n" + solar_md
	else:
	body = "## SolarHive Community Dashboard\n\n" + solar_md + "\n" + weather_md + "\n" + battery_md + "\n" + grid_md

	return banner + body


	# ── System Prompt ────────────────────────────────────────────────────────────
	# Repeated twice — prompt repetition improves instruction following in causal
	# LLMs. See: Leviathan et al. (2024), "Repeat to Improve Non-Reasoning LLMs".

	SYSTEM_PROMPT = (
	"You are SolarHive, an AI energy advisor for a community of 12 homes "
	"with rooftop solar and shared battery storage in Ann Arbor, Michigan. "
	"Use the available tools to get real-time data before answering. "
	"Be specific, reference actual data, and keep responses concise (3-5 sentences).\n\n"
	"You are SolarHive, an AI energy advisor for a community of 12 homes "
	"with rooftop solar and shared battery storage in Ann Arbor, Michigan. "
	"Use the available tools to get real-time data before answering. "
	"Be specific, reference actual data, and keep responses concise (3-5 sentences)."
	)


	# ── Tool-Call Parsing Helpers ───────────────────────────────────────────────
	# Ported verbatim from solarhive_inference.py to keep the demo's dispatch
	# behavior byte-identical with the cloud benchmark + finetune training format.
	# Catches: wrapped + bare regex forms, negative-number args (`temp_f:-5` for
	# winter scenarios), boolean / null args, hallucinated kwargs (model emits
	# `get_grid_status{location:...}` even though the function takes no args).

	# Wrapped form (preferred): `<\|tool_call>call:fn{args}<tool_call\|>`
	_TOOL_CALL_WRAPPED_RE = re.compile(
	r'<\\|tool_call>\scall:(\w+)\{([^}])\}\s*<tool_call\\|>',
	re.DOTALL,
	)
	# Bare form (fallback when thinking-mode strips the wrapper)
	_TOOL_CALL_BARE_RE = re.compile(r'\bcall:(\w+)\{([^}]*)\}')
	# Arg parser: supports strings via <\|"\|>, ints, floats, negatives, bool, null
	_ARG_FIELD_RE = re.compile(
	r'(\w+)\s:\s'
	r'(?:<\\|"\\|>([^<])<\\|"\\|>\|(-?\d+\.?\d)\|(true\|false\|null))',
	)


	def _extract_tool_calls(raw):
	"""Extract (fn_name, args_str) tuples from a Gemma 4 model output.

	Wrapped form wins when both appear; bare form is the GGUF/thinking-mode
	fallback. Same two-pattern strategy as solarhive_inference.py.
	"""
	wrapped = _TOOL_CALL_WRAPPED_RE.findall(raw)
	if wrapped:
	return wrapped
	return _TOOL_CALL_BARE_RE.findall(raw)


	def _parse_tool_args(args_str):
	"""Parse `key:val,key2:val2,...` from a Gemma 4 tool-call argument
	string. Handles strings (`<\|"\|>...<\|"\|>`), ints, floats, negatives,
	booleans, and null."""
	out = {}
	for key, str_val, num_val, bool_val in _ARG_FIELD_RE.findall(args_str):
	if str_val:
	out[key] = str_val
	elif num_val:
	out[key] = float(num_val) if "." in num_val else int(num_val)
	elif bool_val:
	out[key] = {"true": True, "false": False, "null": None}[bool_val]
	return out


	def _safe_tool_call(fn, args):
	"""Dispatch a tool call defensively — drop kwargs the function doesn't accept.

	The model occasionally hallucinates extra kwargs (e.g., emitting
	`call:get_grid_status{location:<\|"\|>Ann Arbor, MI<\|"\|>}` even though
	the function takes no args). Without filtering, `fn(**args)` raises
	`TypeError: ... got an unexpected keyword argument 'location'` and
	crashes the agentic loop.

	If the function declares `**kwargs`, we pass everything through
	unchanged — that's an explicit opt-in to accept unknowns.
	"""
	sig = inspect.signature(fn)
	if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()):
	return fn(**args)
	accepted = set(sig.parameters.keys())
	filtered = {k: v for k, v in args.items() if k in accepted}
	if filtered != args:
	dropped = set(args) - set(filtered)
	print(f" [warn] {fn.__name__}: dropped hallucinated args {sorted(dropped)} (function takes {sorted(accepted) or 'no args'})")
	return fn(**filtered)


	# ── Agentic Loop ─────────────────────────────────────────────────────────────

	def _generate_with_tools(messages, max_rounds=3):
	"""
	Gemma 4 native agentic loop (transformers).

	Two-step apply_chat_template to avoid transformers 5.5.x bug on messages
	without a 'content' key (e.g., tool_calls messages).
	Tool calls detected via regex on Gemma 4 control tokens: call:fn{args}.
	"""
	all_calls = []

	for round_num in range(max_rounds):
	# Extract images from messages for the processor
	_images = []
	for msg in messages:
	content = msg.get("content")
	if isinstance(content, list):
	for item in content:
	if isinstance(item, dict) and item.get("type") == "image":
	_images.append(item["image"])

	text = processor.apply_chat_template(
	messages, tools=TOOLS, add_generation_prompt=True,
	enable_thinking=False, tokenize=False,
	)
	if _images:
	inputs = processor(text=text, images=_images, return_tensors="pt").to(model.device)
	else:
	inputs = processor(text=text, return_tensors="pt").to(model.device)

	with torch.no_grad():
	out = model.generate(
	**inputs, max_new_tokens=1024,
	temperature=1.0, top_p=0.95, top_k=64,
	)

	raw = processor.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False)

	# Detect tool calls via canonical wrapped+bare regex (ported from
	# solarhive_inference.py so dispatch behavior matches the cloud benchmark)
	found = _extract_tool_calls(raw)

	if not found:
	# No tool calls — final answer
	parsed = processor.parse_response(raw)
	clean = parsed.get("content", "") if isinstance(parsed, dict) else str(parsed)
	# Strip leftover special tokens (e.g. <eos>, <turn\|>, <bos>)
	clean = re.sub(r'<[a-z_\|]+>', '', clean).strip()
	# Fallback: extract text from raw output if parsing produced empty result
	if not clean:
	clean = re.sub(r'<[^>]+>', '', raw).strip()
	return {"response": clean, "tool_calls": all_calls, "rounds": round_num + 1,
	"raw_debug": raw[:300] if not clean else ""}

	# Parse and execute each tool call (canonical helpers — supports
	# negatives, booleans, null; defensive dispatch drops hallucinated kwargs)
	calls, results = [], []
	for fn_name, args_str in found:
	args = _parse_tool_args(args_str)

	call = {"name": fn_name, "arguments": args}
	calls.append(call)
	all_calls.append(call)

	if fn_name in TOOL_MAP:
	result = _safe_tool_call(TOOL_MAP[fn_name], args)
	else:
	result = {"error": f"Unknown: {fn_name}"}
	results.append({"name": fn_name, "response": result})

	# Feed results back — match finetune/datagen training format exactly:
	# 1) assistant message with tool_calls only
	# 2) one role=tool message per tool result (json.dumps content)
	messages.append({
	"role": "assistant",
	"tool_calls": [{"function": c} for c in calls],
	})
	for r_item in results:
	messages.append({
	"role": "tool",
	"name": r_item["name"],
	"content": json.dumps(r_item["response"]),
	})

	# Fallback: generate one final response without tool schemas to force a text answer
	text = processor.apply_chat_template(
	messages, add_generation_prompt=True,
	enable_thinking=False, tokenize=False,
	)
	inputs = processor(text=text, return_tensors="pt").to(model.device)
	with torch.no_grad():
	out = model.generate(**inputs, max_new_tokens=1024, temperature=1.0, top_p=0.95, top_k=64)
	raw = processor.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False)
	parsed = processor.parse_response(raw)
	clean = parsed.get("content", "") if isinstance(parsed, dict) else str(parsed)
	clean = re.sub(r'<[a-z_\|]+>', '', clean).strip()
	if not clean:
	clean = re.sub(r'<[^>]+>', '', raw).strip()
	if not clean:
	clean = (
	"I gathered data from multiple tools but ran out of reasoning rounds "
	"before composing a final answer. This can happen with complex multi-tool "
	"queries. Please try rephrasing your question or asking about one topic at a time."
	)
	return {"response": clean, "tool_calls": all_calls, "rounds": max_rounds}


	# Apply ZeroGPU decorator — 120s needed for multi-round tool calling (2-3 rounds)
	if spaces is not None:
	_generate_with_tools = spaces.GPU(duration=120)(_generate_with_tools)


	# ── Agent Wrapper ────────────────────────────────────────────────────────────

	def solarhive_agent(question, image=None):
	"""Full SolarHive agent with optional image input for VQA."""
	content = []
	if image:
	content.append({"type": "image", "image": image})
	content.append({"type": "text", "text": question})

	sys_prompt = SYSTEM_PROMPT
	if image:
	_vqa_inst = (
	" When an image is provided, FIRST describe what you observe in the "
	"image (e.g., cloud cover, sky color, panel condition). Base your "
	"primary assessment on visual observation. You may call tools for "
	"additional context, but note any differences between what the image "
	"shows and what the station data reports."
	)
	sys_prompt += _vqa_inst + _vqa_inst

	messages = [
	{"role": "system", "content": sys_prompt},
	{"role": "user", "content": content if image else question},
	]
	return _generate_with_tools(messages)


	# ── Gradio Chat Handler ─────────────────────────────────────────────────────

	def respond(message, history):
	"""Handle chat messages with optional image upload."""
	# Extract text and files from multimodal input
	if isinstance(message, dict):
	text = message.get("text", "").strip()
	files = message.get("files", [])
	else:
	text = str(message).strip()
	files = []

	if not text:
	return "Please enter a question about your community solar system."

	# Load image if provided
	image = None
	if files:
	try:
	fpath = files[0] if isinstance(files[0], str) else files[0].get("path", "")
	if fpath:
	image = Image.open(fpath).convert("RGB")
	except Exception:
	pass

	# Run agent — fall back to live API data on any inference error
	try:
	result = solarhive_agent(text, image)
	except Exception as e:
	err_str = f"{type(e).__name__}: {e}"
	fallback = _fallback_respond(text)
	fallback += (
	f"\n\n---\n*AI model unavailable: `{err_str[:150]}`. "
	"Sign in with a free [HuggingFace account](https://huggingface.co/join) "
	"for GPU access, or try again later.*"
	)
	return fallback

	# Format response
	response = result.get("response", "")
	tool_calls = result.get("tool_calls", [])
	rounds = result.get("rounds", 0)

	# If model called tools but produced empty response, show live data fallback
	if not response.strip() and tool_calls:
	fallback = _fallback_respond(text)
	tool_names = ", ".join(c["name"] for c in tool_calls)
	raw_debug = result.get("raw_debug", "")
	debug_line = f"\n\n---\nModel called {tool_names} but returned empty. Debug: `{raw_debug[:150]}`" if raw_debug else ""
	return f"Tools called: {tool_names} \| Rounds: {rounds}\n\n{fallback}{debug_line}"

	if not response.strip():
	response = "No response generated. Please try again."

	if tool_calls:
	tool_names = ", ".join(c["name"] for c in tool_calls)
	header = f"Tools called: {tool_names} \| Rounds: {rounds}\n\n"
	response = header + response

	return response


	# ── Gradio UI ────────────────────────────────────────────────────────────────

	CUSTOM_CSS = """\
	.notice-banner {
	margin: 4px 0 12px 0;
	padding: 10px 14px;
	background: rgba(245, 158, 11, 0.10);
	border: 1px solid rgba(245, 158, 11, 0.28);
	border-radius: 8px;
	font-size: 12px;
	line-height: 1.5;
	color: #d1d5db;
	}
	.notice-banner a { color: #60a5fa; text-decoration: underline; }
	.notice-banner strong.notice-label { color: #fbbf24; }
	"""

	NOTICE_HTML = """\
	<div class="notice-banner">
	<strong class="notice-label">Usage Notice:</strong>
	This demo runs on <a href="https://huggingface.co/docs/hub/spaces-zerogpu" target="_blank" rel="noopener noreferrer">ZeroGPU</a>
	with limited GPU allocation.
	Anonymous and free users may only get <strong>1 full query</strong> (2 min/day GPU quota).
	<a href="https://huggingface.co/join" target="_blank" rel="noopener noreferrer">Sign in</a>
	for access, or
	<a href="https://huggingface.co/subscribe/pro" target="_blank" rel="noopener noreferrer">upgrade to HF Pro</a>
	for extended GPU time (25 min/day).
	<br><br>
	<strong class="notice-label">Disclaimer:</strong>
	This is a hackathon demo for evaluation purposes only.
	Do not submit confidential, sensitive, or personal data.
	Use of this demo is at your own risk and is subject to
	<a href="https://huggingface.co/terms-of-service" target="_blank" rel="noopener noreferrer">Hugging Face's Terms of Service</a>
	and
	<a href="https://huggingface.co/privacy" target="_blank" rel="noopener noreferrer">Privacy Policy</a>.
	</div>
	"""

	DESCRIPTION = """\
	AI-powered community solar energy intelligence built with fine-tuned \
	Gemma 4 26B A4B and native function calling for real-time data.

	The agent serves a 12-home solar community in Ann Arbor, Michigan \
	(72 kW panels, 100 kWh shared battery). Five tools fetch real-time data — \
	OpenWeatherMap, Open-Meteo (irradiance), NREL PVWatts (typical-year baseline), \
	EIA (grid pricing + renewable mix), and a battery-state simulator. \
	Image upload supports sky-photo cloud-coverage analysis and panel-condition inspection.

	Try it: Type a question below or click an example. \
	Upload a sky or panel photo (paperclip icon) for visual analysis.

	This submission targets the Global Resilience track (main) plus all five \
	Special Tech tracks: Ollama, llama.cpp, Unsloth, Cactus, and \
	LiteRT. The same Unsloth fine-tune ships as a 5.3 GB GGUF for Ollama + \
	llama.cpp on a laptop CPU, a 6.94 GB Cactus INT4 bundle for Android, and a \
	LiteRT-LM Python runtime demo for cross-platform edge (browser / Pi 5 / Jetson).

	Resilient inference: loads [SolarHive 26B A4B merged](https://huggingface.co/Truthseeker87/solarhive-26b-a4b-merged) \
	in BF16, with the pre-quantized [SolarHive A4B NF4](https://huggingface.co/Truthseeker87/solarhive-26b-a4b-nf4) \
	as an OOM-safe fallback — both score identical 9/10 + 3/3 When2Call validation, \
	so the demo quality bar is preserved regardless of which variant loads. If GPU \
	inference is unavailable, the demo gracefully serves live API data.

	📖 [GitHub repo](https://github.com/youshen-lim/the-gemma4-good-hackathon-solarhive) · \
	🧠 [Cloud model (A4B LoRA)](https://huggingface.co/Truthseeker87/solarhive-26b-a4b-lora) · \
	⚡ [Edge model (E4B GGUF)](https://huggingface.co/Truthseeker87/solarhive-e4b-gguf) · \
	📱 [Mobile model (E4B Cactus)](https://huggingface.co/Truthseeker87/solarhive-e4b-cactus) · \
	📊 [Training dataset](https://huggingface.co/datasets/Truthseeker87/solarhive-community-solar-multimodal) · \
	📝 [Kaggle writeup](https://kaggle.com/competitions/gemma-4-good-hackathon)

	Gemma 4 Good Hackathon — Google DeepMind × Kaggle\
	"""

	EXAMPLES = [
	# Live tool routing
	{"text": "What's the current solar production?"},
	{"text": "Full community energy audit — check weather, solar, battery, and grid pricing. Give a 3-sentence status report."},
	# NREL probe — exercises the 5th tool (typical-year baseline comparison)
	{"text": "Is today's production above typical for this month?"},
	# Domain knowledge (no tool expected)
	{"text": "Should I run my pool heater now or wait?"},
	{"text": "Home #7's panels are producing 15% less than neighbors. What should we check?"},
	# When2Call probes (Ross et al. 2025, arXiv:2504.18851) — let judges
	# experience the trained refusal/follow-up behavior live
	{"text": "What's the current grid rate?"}, # (b) well-specified → expect get_grid_status call
	{"text": "How much will a 10 kW array produce today?"}, # (c) under-specified → expect follow-up question, NO auto-fill
	{"text": "What's the current air quality index in Ann Arbor?"}, # (d) out-of-scope → expect graceful decline, NO hallucinated tool
	]

	with gr.Blocks(
	title="SolarHive — Community Solar Intelligence",
	css=CUSTOM_CSS,
	theme=gr.themes.Default(),
	) as demo:
	gr.Markdown("# SolarHive — Community Solar Intelligence")
	gr.HTML(NOTICE_HTML)
	gr.Markdown(DESCRIPTION)
	gr.ChatInterface(
	fn=respond,
	multimodal=True,
	examples=EXAMPLES,
	)

	if __name__ == "__main__":
	demo.launch()