solarhive / app.py
Truthseeker87's picture
DESCRIPTION: 5-track coverage + Resilient inference (BF16 merged + NF4 fallback) + expanded artifacts row
4286bef verified
"""
SolarHive β€” Community Solar Intelligence
Gradio app for HuggingFace Spaces (persistent GPU)
Fine-tuned Gemma 4 26B A4B with native function calling for
real-time solar production, weather, battery, and grid data.
SolarHive is an open-source intelligence layer designed to coordinate
community microgrids & community-based storage via fuel cells, pool
midday energy surplus across these microgrids, and eliminate stranded
capacity. It also helps forecast solar irradiance and cloud cover to
plan ahead. Gemma 4 is the brain that powers it.
Gemma 4 Good Hackathon β€” Google DeepMind x Kaggle
"""
import os
import json
import re
import random
import inspect
import requests
import torch
import gradio as gr
from datetime import datetime, timezone, timedelta
from zoneinfo import ZoneInfo
from PIL import Image
from transformers import AutoProcessor, AutoModelForCausalLM, BitsAndBytesConfig
# ZeroGPU support β€” falls back to no-op for local testing
try:
import spaces
except ImportError:
spaces = None
# ── Constants ────────────────────────────────────────────────────────────────
LAT, LON = 42.2808, -83.7430
COMMUNITY_CAPACITY_KW = 72
BATTERY_CAPACITY_KWH = 100
# Demo-mode API key fallbacks for the live HF Space.
# These are FREE-TIER keys (OWM 1000 calls/day, EIA unlimited, NREL
# unlimited) β€” no billing exposure, easily rotatable. Hardcoded so
# judges who land on the live demo without Space-secret config still
# get a working experience. In production, set Space secrets to override.
OWM_API_KEY = os.environ.get("OWM_API_KEY", "84a310689d5620edd1b5e4c14d8fb29b")
EIA_API_KEY = os.environ.get("EIA_API_KEY", "ZXzaFCfSc1aU7nfu3Y6wctwxyiFPHEclHhycI3Xm")
NREL_API_KEY = os.environ.get("NREL_API_KEY", "LI4AHQodsW7b0L0T3BCHtUA3PyjvOXP8zrFrZuiQ")
# ── Model Loading ────────────────────────────────────────────────────────────
BASE_MODEL_ID = "google/gemma-4-26b-a4b-it"
MODEL_ID = "Truthseeker87/solarhive-26b-a4b-merged"
# OOM fallback β€” pre-quantized NF4 (~13-16 GB) used if BF16 (~48 GB) doesn't
# fit the allocated ZeroGPU tier (e.g., A10g 24 GB). Pre-quantized weights
# load directly without BitsAndBytesConfig. Both variants score 9/10 + 3/3
# When2Call in the cross-variant validation β€” equivalent demo quality.
MODEL_ID_NF4_FALLBACK = "Truthseeker87/solarhive-26b-a4b-nf4"
# Env-var override β€” set SOLARHIVE_FORCE_NF4=1 in Space Variables (NOT
# Secrets β€” Variables are visible to the runtime as os.environ) to skip
# the BF16 attempt entirely and load NF4 directly. Useful when you've
# confirmed the allocated tier is too small for BF16 (e.g., A10g) and
# want to avoid the BF16-fail-then-fallback download time on every
# cold-start.
FORCE_NF4 = os.environ.get("SOLARHIVE_FORCE_NF4", "").lower() in ("1", "true", "yes")
print(f"Loading processor from {BASE_MODEL_ID}")
processor = AutoProcessor.from_pretrained(BASE_MODEL_ID, trust_remote_code=True)
print(f"Loading fine-tuned model from {MODEL_ID}")
if spaces is not None:
# ZeroGPU: defensive load with NF4 fallback.
# Primary path: BF16 (highest fidelity, ~48 GB) β€” fits H200 / Half H200.
# Fallback path: pre-quantized NF4 (~13-16 GB) β€” fits A10g and other
# smaller tiers ZeroGPU may allocate. Both variants score 9/10 + 3/3 W2C
# in the cross-variant validation, so the demo's quality bar is preserved
# either way. Set SOLARHIVE_FORCE_NF4=1 in Space Variables to skip the
# BF16 attempt entirely.
# https://huggingface.co/docs/hub/spaces-zerogpu
if FORCE_NF4:
print(f"SOLARHIVE_FORCE_NF4 set β€” loading pre-quantized NF4 from {MODEL_ID_NF4_FALLBACK}")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID_NF4_FALLBACK, device_map="cuda:0", trust_remote_code=True,
)
print("Model loaded in NF4 (forced via SOLARHIVE_FORCE_NF4 env var)")
else:
try:
print(f"Loading BF16 from {MODEL_ID} (primary path)")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID, dtype=torch.bfloat16, trust_remote_code=True,
)
model.to("cuda")
print("Model loaded in BF16 for ZeroGPU (primary path)")
except Exception as _bf16_err:
# Free anything partially loaded so the fallback has clean memory
try:
del model
except NameError:
pass
import gc as _gc
_gc.collect()
try:
torch.cuda.empty_cache()
except Exception:
pass
print(f"BF16 load failed ({type(_bf16_err).__name__}): {_bf16_err}")
print(f"Falling back to pre-quantized NF4 from {MODEL_ID_NF4_FALLBACK}")
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID_NF4_FALLBACK, device_map="cuda:0", trust_remote_code=True,
)
print("Model loaded in NF4 (BF16 OOM fallback path)")
else:
# Non-ZeroGPU: auto-detect VRAM for quantization decision
_free = 0
try:
if torch.cuda.is_available():
_free = torch.cuda.mem_get_info(0)[0] / 1e9
except Exception:
pass
print(f"Available VRAM: {_free:.1f} GB")
if _free >= 55:
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID, dtype=torch.bfloat16, device_map="auto", trust_remote_code=True,
)
else:
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_quant_type="nf4",
)
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID, quantization_config=bnb_config,
device_map="cuda:0", trust_remote_code=True,
)
print(f"Model loaded on {model.device} ({'BF16' if _free >= 55 else 'NF4'})")
# ── Tool Functions ───────────────────────────────────────────────────────────
def get_weather(location: str = "Ann Arbor, MI") -> dict:
"""Gets current weather conditions for the community.
Args:
location: The city and state, e.g. "Ann Arbor, MI"
Returns:
Dictionary with temperature_f, clouds_pct, description, wind_mph, humidity_pct, sunrise, sunset.
"""
_tz = ZoneInfo("America/New_York")
try:
r = requests.get(
"https://api.openweathermap.org/data/2.5/weather",
params={"lat": LAT, "lon": LON, "appid": OWM_API_KEY, "units": "imperial"},
timeout=10,
).json()
return {
"temperature_f": r["main"]["temp"],
"clouds_pct": r["clouds"]["all"],
"description": r["weather"][0]["description"],
"wind_mph": r["wind"]["speed"],
"humidity_pct": r["main"]["humidity"],
"sunrise": datetime.fromtimestamp(r["sys"]["sunrise"], tz=_tz).strftime("%H:%M"),
"sunset": datetime.fromtimestamp(r["sys"]["sunset"], tz=_tz).strftime("%H:%M"),
}
except Exception as e:
return {"error": str(e), "clouds_pct": 30, "temperature_f": 72,
"description": "partly cloudy", "wind_mph": 5.0,
"humidity_pct": 50, "sunrise": "07:00", "sunset": "20:00"}
def _get_current_ghi():
"""Fetch current Global Horizontal Irradiance (W/m2) from Open-Meteo.
Free API, no key required. Uses NOAA GFS + HRRR satellite models.
Inherently accounts for cloud thickness, sun angle, atmosphere, and season.
"""
try:
r = requests.get(
"https://api.open-meteo.com/v1/forecast",
params={"latitude": LAT, "longitude": LON, "current": "shortwave_radiation"},
timeout=10,
).json()
return r["current"]["shortwave_radiation"]
except Exception:
return None
def get_solar_production(clouds_pct: int = 30, temp_f: float = 77.0) -> dict:
"""Estimates current community solar production using live solar irradiance data.
Args:
clouds_pct: Current cloud cover percentage (0-100). Get this from get_weather first.
temp_f: Current temperature in Fahrenheit. Get this from get_weather first.
Returns:
Dictionary with production_kw, capacity_kw, efficiency_pct, ghi_wm2, temp_derate_pct, source.
"""
clouds_pct = max(0, min(100, int(clouds_pct)))
temp_f = max(-40, min(130, float(temp_f)))
# System losses: inverter 97% x wiring 98% x soiling 97% x mismatch 98% ~ 0.85
SYSTEM_EFF = 0.85
# Temperature derating: silicon panels lose ~0.4%/F above 77F (25C)
temp_derate = max(0.75, 1.0 - 0.004 * max(0, temp_f - 77))
ghi = _get_current_ghi()
if ghi is not None:
production = round(max(0, COMMUNITY_CAPACITY_KW * (ghi / 1000) * SYSTEM_EFF * temp_derate), 1)
return {
"production_kw": production,
"capacity_kw": COMMUNITY_CAPACITY_KW,
"efficiency_pct": round(production / COMMUNITY_CAPACITY_KW * 100, 1),
"ghi_wm2": round(ghi, 1),
"temp_derate_pct": round(temp_derate * 100, 1),
"source": "open-meteo",
}
# Fallback: cloud%-based estimate (less accurate β€” no seasonal sun angle)
efficiency = max(0.15, 0.85 - (clouds_pct / 100) * 0.70)
hour = datetime.now().hour
time_factor = max(0, 1 - ((hour - 12) / 6) ** 2) if 6 <= hour <= 18 else 0
production = round(COMMUNITY_CAPACITY_KW * efficiency * time_factor * temp_derate, 1)
return {
"production_kw": production,
"capacity_kw": COMMUNITY_CAPACITY_KW,
"efficiency_pct": round(production / COMMUNITY_CAPACITY_KW * 100, 1),
"temp_derate_pct": round(temp_derate * 100, 1),
"source": "fallback",
}
class _BatterySimulator:
"""Maintains consistent battery SOC across tool calls within a session."""
def __init__(self, capacity_kwh=BATTERY_CAPACITY_KWH):
self.capacity = capacity_kwh
self.soc = round(random.uniform(55, 85), 1)
def get_state(self):
kwh = round(self.soc / 100 * self.capacity)
return {
"soc_pct": self.soc,
"kwh_stored": kwh,
"capacity_kwh": self.capacity,
"charging": self.soc < 50,
}
_battery = _BatterySimulator()
def get_battery_state() -> dict:
"""Gets the current state of the community shared battery storage.
Returns:
Dictionary with soc_pct (state of charge), kwh stored, capacity_kwh, charging status.
"""
return _battery.get_state()
_EIA_RESPONDENT = {"MISO": "MISO", "CAISO": "CISO"}
_FALLBACK_GRID = {
"MISO": {"renewable_pct": 12.5, "co2_intensity": 520},
"CAISO": {"renewable_pct": 38.0, "co2_intensity": 280},
}
def _fetch_eia_grid_mix(region="MISO"):
"""Fetch current grid mix from EIA API v2. Returns (renewable_pct, co2_intensity) or fallback."""
eia_code = _EIA_RESPONDENT.get(region, region)
try:
end = datetime.now(timezone.utc) - timedelta(days=1)
start = end - timedelta(days=1)
r = requests.get(
"https://api.eia.gov/v2/electricity/rto/fuel-type-data/data/",
params={
"api_key": EIA_API_KEY,
"frequency": "hourly",
"data[0]": "value",
"facets[respondent][]": eia_code,
"start": start.strftime("%Y-%m-%dT%H"),
"end": end.strftime("%Y-%m-%dT%H"),
"sort[0][column]": "period",
"sort[0][direction]": "desc",
"length": 200,
},
timeout=15,
).json()
rows = r.get("response", {}).get("data", [])
if not rows:
fb = _FALLBACK_GRID.get(region, _FALLBACK_GRID["MISO"])
return fb["renewable_pct"], fb["co2_intensity"]
latest_period = rows[0].get("period")
latest = [row for row in rows if row.get("period") == latest_period]
total_mw, renewable_mw = 0, 0
_RENEWABLE = {"SUN", "WND", "WAT", "GEO"}
_FOSSIL_CO2 = {"COL": 1000, "NG": 450, "PET": 900, "OTH": 500}
co2_total = 0
for row in latest:
mw = float(row.get("value") or 0)
fuel = row.get("fueltype", "")
total_mw += mw
if fuel in _RENEWABLE:
renewable_mw += mw
co2_total += mw * _FOSSIL_CO2.get(fuel, 0)
if total_mw > 0:
renewable_pct = min(100.0, round(renewable_mw / total_mw * 100, 1))
co2_intensity = max(0, round(co2_total / total_mw, 1))
return renewable_pct, co2_intensity
except Exception:
pass
fb = _FALLBACK_GRID.get(region, _FALLBACK_GRID["MISO"])
return fb["renewable_pct"], fb["co2_intensity"]
def get_grid_status() -> dict:
"""Gets current electricity grid pricing period, rate, and grid mix (renewable percentage, CO2 intensity).
Returns:
Dictionary with period (peak/mid-peak/off-peak), rate_per_kwh in USD,
renewable_pct, and co2_intensity (kg CO2/MWh).
"""
hour = datetime.now().hour
if 14 <= hour < 19: # 2pm-6:59pm
period, rate = "peak", 0.28
elif (7 <= hour < 14) or (19 <= hour < 23): # 7am-1:59pm OR 7pm-10:59pm
period, rate = "mid-peak", 0.18
else:
period, rate = "off-peak", 0.10 # 11pm-6:59am
renewable_pct, co2_intensity = _fetch_eia_grid_mix("MISO")
return {
"period": period,
"rate_per_kwh": rate,
"renewable_pct": renewable_pct,
"co2_intensity": co2_intensity,
}
# NREL PVWatts session-level cache (keyed by location + system capacity)
_NREL_PVWATTS_CACHE = {}
def get_nrel_pvwatts_baseline() -> dict:
"""Gets NREL PVWatts typical-year solar production baseline for the
community 72 kW array.
Use this to compare current real-time output (from get_solar_production)
against typical-year performance β€” useful for diagnosing under-/over-
performance and setting expectations for the current month. Cached per
session.
Returns:
Dictionary with annual_kwh, current_month_typical_kwh,
current_month_typical_kw_avg, capacity_kw, source.
"""
cache_key = ("ann_arbor", COMMUNITY_CAPACITY_KW)
if cache_key in _NREL_PVWATTS_CACHE:
return _NREL_PVWATTS_CACHE[cache_key]
try:
r = requests.get(
"https://developer.nrel.gov/api/pvwatts/v8.json",
params={
"api_key": NREL_API_KEY,
"lat": LAT, "lon": LON,
"system_capacity": COMMUNITY_CAPACITY_KW,
"module_type": 0, "losses": 14, "array_type": 1,
"tilt": 30, "azimuth": 180,
},
timeout=15,
).json()
outputs = r.get("outputs", {})
ac_monthly = outputs.get("ac_monthly", [])
current_month_idx = datetime.now().month - 1
current_month_kwh = ac_monthly[current_month_idx] if ac_monthly else None
result = {
"annual_kwh": outputs.get("ac_annual"),
"current_month_typical_kwh": current_month_kwh,
"current_month_typical_kw_avg": (
round(current_month_kwh / (30 * 24), 2)
if current_month_kwh else None
),
"capacity_kw": COMMUNITY_CAPACITY_KW,
"source": "NREL PVWatts v8",
}
_NREL_PVWATTS_CACHE[cache_key] = result
return result
except Exception as e:
return {"error": str(e), "source": "fallback"}
# Registry: maps function names to callables (5 tools β€” all 3 keyed APIs exercised)
TOOLS = [
get_weather, get_solar_production, get_battery_state,
get_grid_status, get_nrel_pvwatts_baseline,
]
TOOL_MAP = {fn.__name__: fn for fn in TOOLS}
# ── ZeroGPU Fallback (Live Data Mode) ────────────────────────────────────────
def _fallback_respond(question):
"""Serve real API data when GPU inference fails (OOM safety net)."""
weather = get_weather()
solar = get_solar_production(weather.get("clouds_pct", 30), weather.get("temperature_f", 77))
battery = get_battery_state()
grid = get_grid_status()
q = question.lower()
if any(k in q for k in ("solar", "production", "panel", "generat", "kwh", "kw")):
section = "solar"
elif any(k in q for k in ("weather", "temperature", "cloud", "wind", "rain", "sun", "forecast")):
section = "weather"
elif any(k in q for k in ("battery", "charge", "storage", "soc")):
section = "battery"
elif any(k in q for k in ("grid", "price", "rate", "peak", "tariff", "cost", "pricing")):
section = "grid"
else:
section = "overview"
banner = (
"> **Live Data Mode** β€” Real-time API data shown below. "
"The AI model is temporarily unavailable; showing raw API data instead.\n\n"
)
weather_md = (
f"### Weather\n"
f"- **Temperature:** {weather['temperature_f']}Β°F\n"
f"- **Conditions:** {weather['description']}\n"
f"- **Cloud cover:** {weather['clouds_pct']}%\n"
f"- **Wind:** {weather['wind_mph']} mph\n"
f"- **Humidity:** {weather['humidity_pct']}%\n"
f"- **Sunrise/Sunset:** {weather['sunrise']} / {weather['sunset']}\n"
)
solar_md = (
f"### Solar Production\n"
f"- **Current output:** {solar['production_kw']} kW of {solar['capacity_kw']} kW capacity\n"
f"- **Efficiency:** {solar['efficiency_pct']}%\n"
f"- **Temp derating:** {solar['temp_derate_pct']}%\n"
)
if solar.get("ghi_wm2"):
solar_md += f"- **Solar irradiance (GHI):** {solar['ghi_wm2']} W/mΒ²\n"
solar_md += f"- **Data source:** {solar['source']}\n"
battery_md = (
f"### Battery Storage\n"
f"- **State of charge:** {battery['soc_pct']}%\n"
f"- **Energy stored:** {battery['kwh_stored']} kWh of {battery['capacity_kwh']} kWh\n"
f"- **Status:** {'Charging' if battery['charging'] else 'Discharging/Idle'}\n"
)
grid_md = (
f"### Grid Status\n"
f"- **Pricing period:** {grid['period']}\n"
f"- **Rate:** ${grid['rate_per_kwh']}/kWh\n"
f"- **Renewable mix:** {grid['renewable_pct']}%\n"
f"- **CO2 intensity:** {grid['co2_intensity']} kg/MWh\n"
)
if section == "solar":
body = solar_md + "\n" + weather_md
elif section == "weather":
body = weather_md + "\n" + solar_md
elif section == "battery":
body = battery_md + "\n" + solar_md
elif section == "grid":
body = grid_md + "\n" + solar_md
else:
body = "## SolarHive Community Dashboard\n\n" + solar_md + "\n" + weather_md + "\n" + battery_md + "\n" + grid_md
return banner + body
# ── System Prompt ────────────────────────────────────────────────────────────
# Repeated twice β€” prompt repetition improves instruction following in causal
# LLMs. See: Leviathan et al. (2024), "Repeat to Improve Non-Reasoning LLMs".
SYSTEM_PROMPT = (
"You are SolarHive, an AI energy advisor for a community of 12 homes "
"with rooftop solar and shared battery storage in Ann Arbor, Michigan. "
"Use the available tools to get real-time data before answering. "
"Be specific, reference actual data, and keep responses concise (3-5 sentences).\n\n"
"You are SolarHive, an AI energy advisor for a community of 12 homes "
"with rooftop solar and shared battery storage in Ann Arbor, Michigan. "
"Use the available tools to get real-time data before answering. "
"Be specific, reference actual data, and keep responses concise (3-5 sentences)."
)
# ── Tool-Call Parsing Helpers ───────────────────────────────────────────────
# Ported verbatim from solarhive_inference.py to keep the demo's dispatch
# behavior byte-identical with the cloud benchmark + finetune training format.
# Catches: wrapped + bare regex forms, negative-number args (`temp_f:-5` for
# winter scenarios), boolean / null args, hallucinated kwargs (model emits
# `get_grid_status{location:...}` even though the function takes no args).
# Wrapped form (preferred): `<|tool_call>call:fn{args}<tool_call|>`
_TOOL_CALL_WRAPPED_RE = re.compile(
r'<\|tool_call>\s*call:(\w+)\{([^}]*)\}\s*<tool_call\|>',
re.DOTALL,
)
# Bare form (fallback when thinking-mode strips the wrapper)
_TOOL_CALL_BARE_RE = re.compile(r'\bcall:(\w+)\{([^}]*)\}')
# Arg parser: supports strings via <|"|>, ints, floats, **negatives**, bool, null
_ARG_FIELD_RE = re.compile(
r'(\w+)\s*:\s*'
r'(?:<\|"\|>([^<]*)<\|"\|>|(-?\d+\.?\d*)|(true|false|null))',
)
def _extract_tool_calls(raw):
"""Extract (fn_name, args_str) tuples from a Gemma 4 model output.
Wrapped form wins when both appear; bare form is the GGUF/thinking-mode
fallback. Same two-pattern strategy as solarhive_inference.py.
"""
wrapped = _TOOL_CALL_WRAPPED_RE.findall(raw)
if wrapped:
return wrapped
return _TOOL_CALL_BARE_RE.findall(raw)
def _parse_tool_args(args_str):
"""Parse `key:val,key2:val2,...` from a Gemma 4 tool-call argument
string. Handles strings (`<|"|>...<|"|>`), ints, floats, negatives,
booleans, and null."""
out = {}
for key, str_val, num_val, bool_val in _ARG_FIELD_RE.findall(args_str):
if str_val:
out[key] = str_val
elif num_val:
out[key] = float(num_val) if "." in num_val else int(num_val)
elif bool_val:
out[key] = {"true": True, "false": False, "null": None}[bool_val]
return out
def _safe_tool_call(fn, args):
"""Dispatch a tool call defensively β€” drop kwargs the function doesn't accept.
The model occasionally hallucinates extra kwargs (e.g., emitting
`call:get_grid_status{location:<|"|>Ann Arbor, MI<|"|>}` even though
the function takes no args). Without filtering, `fn(**args)` raises
`TypeError: ... got an unexpected keyword argument 'location'` and
crashes the agentic loop.
If the function declares `**kwargs`, we pass everything through
unchanged β€” that's an explicit opt-in to accept unknowns.
"""
sig = inspect.signature(fn)
if any(p.kind == inspect.Parameter.VAR_KEYWORD for p in sig.parameters.values()):
return fn(**args)
accepted = set(sig.parameters.keys())
filtered = {k: v for k, v in args.items() if k in accepted}
if filtered != args:
dropped = set(args) - set(filtered)
print(f" [warn] {fn.__name__}: dropped hallucinated args {sorted(dropped)} (function takes {sorted(accepted) or 'no args'})")
return fn(**filtered)
# ── Agentic Loop ─────────────────────────────────────────────────────────────
def _generate_with_tools(messages, max_rounds=3):
"""
Gemma 4 native agentic loop (transformers).
Two-step apply_chat_template to avoid transformers 5.5.x bug on messages
without a 'content' key (e.g., tool_calls messages).
Tool calls detected via regex on Gemma 4 control tokens: call:fn{args}.
"""
all_calls = []
for round_num in range(max_rounds):
# Extract images from messages for the processor
_images = []
for msg in messages:
content = msg.get("content")
if isinstance(content, list):
for item in content:
if isinstance(item, dict) and item.get("type") == "image":
_images.append(item["image"])
text = processor.apply_chat_template(
messages, tools=TOOLS, add_generation_prompt=True,
enable_thinking=False, tokenize=False,
)
if _images:
inputs = processor(text=text, images=_images, return_tensors="pt").to(model.device)
else:
inputs = processor(text=text, return_tensors="pt").to(model.device)
with torch.no_grad():
out = model.generate(
**inputs, max_new_tokens=1024,
temperature=1.0, top_p=0.95, top_k=64,
)
raw = processor.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False)
# Detect tool calls via canonical wrapped+bare regex (ported from
# solarhive_inference.py so dispatch behavior matches the cloud benchmark)
found = _extract_tool_calls(raw)
if not found:
# No tool calls β€” final answer
parsed = processor.parse_response(raw)
clean = parsed.get("content", "") if isinstance(parsed, dict) else str(parsed)
# Strip leftover special tokens (e.g. <eos>, <turn|>, <bos>)
clean = re.sub(r'<[a-z_|]+>', '', clean).strip()
# Fallback: extract text from raw output if parsing produced empty result
if not clean:
clean = re.sub(r'<[^>]+>', '', raw).strip()
return {"response": clean, "tool_calls": all_calls, "rounds": round_num + 1,
"raw_debug": raw[:300] if not clean else ""}
# Parse and execute each tool call (canonical helpers β€” supports
# negatives, booleans, null; defensive dispatch drops hallucinated kwargs)
calls, results = [], []
for fn_name, args_str in found:
args = _parse_tool_args(args_str)
call = {"name": fn_name, "arguments": args}
calls.append(call)
all_calls.append(call)
if fn_name in TOOL_MAP:
result = _safe_tool_call(TOOL_MAP[fn_name], args)
else:
result = {"error": f"Unknown: {fn_name}"}
results.append({"name": fn_name, "response": result})
# Feed results back β€” match finetune/datagen training format exactly:
# 1) assistant message with tool_calls only
# 2) one role=tool message per tool result (json.dumps content)
messages.append({
"role": "assistant",
"tool_calls": [{"function": c} for c in calls],
})
for r_item in results:
messages.append({
"role": "tool",
"name": r_item["name"],
"content": json.dumps(r_item["response"]),
})
# Fallback: generate one final response without tool schemas to force a text answer
text = processor.apply_chat_template(
messages, add_generation_prompt=True,
enable_thinking=False, tokenize=False,
)
inputs = processor(text=text, return_tensors="pt").to(model.device)
with torch.no_grad():
out = model.generate(**inputs, max_new_tokens=1024, temperature=1.0, top_p=0.95, top_k=64)
raw = processor.decode(out[0][inputs["input_ids"].shape[1]:], skip_special_tokens=False)
parsed = processor.parse_response(raw)
clean = parsed.get("content", "") if isinstance(parsed, dict) else str(parsed)
clean = re.sub(r'<[a-z_|]+>', '', clean).strip()
if not clean:
clean = re.sub(r'<[^>]+>', '', raw).strip()
if not clean:
clean = (
"I gathered data from multiple tools but ran out of reasoning rounds "
"before composing a final answer. This can happen with complex multi-tool "
"queries. Please try rephrasing your question or asking about one topic at a time."
)
return {"response": clean, "tool_calls": all_calls, "rounds": max_rounds}
# Apply ZeroGPU decorator β€” 120s needed for multi-round tool calling (2-3 rounds)
if spaces is not None:
_generate_with_tools = spaces.GPU(duration=120)(_generate_with_tools)
# ── Agent Wrapper ────────────────────────────────────────────────────────────
def solarhive_agent(question, image=None):
"""Full SolarHive agent with optional image input for VQA."""
content = []
if image:
content.append({"type": "image", "image": image})
content.append({"type": "text", "text": question})
sys_prompt = SYSTEM_PROMPT
if image:
_vqa_inst = (
" When an image is provided, FIRST describe what you observe in the "
"image (e.g., cloud cover, sky color, panel condition). Base your "
"primary assessment on visual observation. You may call tools for "
"additional context, but note any differences between what the image "
"shows and what the station data reports."
)
sys_prompt += _vqa_inst + _vqa_inst
messages = [
{"role": "system", "content": sys_prompt},
{"role": "user", "content": content if image else question},
]
return _generate_with_tools(messages)
# ── Gradio Chat Handler ─────────────────────────────────────────────────────
def respond(message, history):
"""Handle chat messages with optional image upload."""
# Extract text and files from multimodal input
if isinstance(message, dict):
text = message.get("text", "").strip()
files = message.get("files", [])
else:
text = str(message).strip()
files = []
if not text:
return "Please enter a question about your community solar system."
# Load image if provided
image = None
if files:
try:
fpath = files[0] if isinstance(files[0], str) else files[0].get("path", "")
if fpath:
image = Image.open(fpath).convert("RGB")
except Exception:
pass
# Run agent β€” fall back to live API data on any inference error
try:
result = solarhive_agent(text, image)
except Exception as e:
err_str = f"{type(e).__name__}: {e}"
fallback = _fallback_respond(text)
fallback += (
f"\n\n---\n*AI model unavailable: `{err_str[:150]}`. "
"Sign in with a free [HuggingFace account](https://huggingface.co/join) "
"for GPU access, or try again later.*"
)
return fallback
# Format response
response = result.get("response", "")
tool_calls = result.get("tool_calls", [])
rounds = result.get("rounds", 0)
# If model called tools but produced empty response, show live data fallback
if not response.strip() and tool_calls:
fallback = _fallback_respond(text)
tool_names = ", ".join(c["name"] for c in tool_calls)
raw_debug = result.get("raw_debug", "")
debug_line = f"\n\n---\n*Model called {tool_names} but returned empty. Debug: `{raw_debug[:150]}`*" if raw_debug else ""
return f"**Tools called:** {tool_names} | Rounds: {rounds}\n\n{fallback}{debug_line}"
if not response.strip():
response = "No response generated. Please try again."
if tool_calls:
tool_names = ", ".join(c["name"] for c in tool_calls)
header = f"**Tools called:** {tool_names} | Rounds: {rounds}\n\n"
response = header + response
return response
# ── Gradio UI ────────────────────────────────────────────────────────────────
CUSTOM_CSS = """\
.notice-banner {
margin: 4px 0 12px 0;
padding: 10px 14px;
background: rgba(245, 158, 11, 0.10);
border: 1px solid rgba(245, 158, 11, 0.28);
border-radius: 8px;
font-size: 12px;
line-height: 1.5;
color: #d1d5db;
}
.notice-banner a { color: #60a5fa; text-decoration: underline; }
.notice-banner strong.notice-label { color: #fbbf24; }
"""
NOTICE_HTML = """\
<div class="notice-banner">
<strong class="notice-label">Usage Notice:</strong>
This demo runs on <a href="https://huggingface.co/docs/hub/spaces-zerogpu" target="_blank" rel="noopener noreferrer">ZeroGPU</a>
with limited GPU allocation.
Anonymous and free users may only get <strong>1 full query</strong> (2 min/day GPU quota).
<a href="https://huggingface.co/join" target="_blank" rel="noopener noreferrer">Sign in</a>
for access, or
<a href="https://huggingface.co/subscribe/pro" target="_blank" rel="noopener noreferrer">upgrade to HF Pro</a>
for extended GPU time (25 min/day).
<br><br>
<strong class="notice-label">Disclaimer:</strong>
This is a hackathon demo for evaluation purposes only.
Do not submit confidential, sensitive, or personal data.
Use of this demo is at your own risk and is subject to
<a href="https://huggingface.co/terms-of-service" target="_blank" rel="noopener noreferrer">Hugging Face's Terms of Service</a>
and
<a href="https://huggingface.co/privacy" target="_blank" rel="noopener noreferrer">Privacy Policy</a>.
</div>
"""
DESCRIPTION = """\
**AI-powered community solar energy intelligence** built with fine-tuned \
**Gemma 4 26B A4B** and **native function calling** for real-time data.
The agent serves a **12-home solar community** in Ann Arbor, Michigan \
(72 kW panels, 100 kWh shared battery). Five tools fetch real-time data β€” \
OpenWeatherMap, Open-Meteo (irradiance), NREL PVWatts (typical-year baseline), \
EIA (grid pricing + renewable mix), and a battery-state simulator. \
Image upload supports sky-photo cloud-coverage analysis and panel-condition inspection.
**Try it:** Type a question below or click an example. \
Upload a sky or panel photo (paperclip icon) for visual analysis.
This submission targets the **Global Resilience** track (main) plus all five \
Special Tech tracks: **Ollama**, **llama.cpp**, **Unsloth**, **Cactus**, and \
**LiteRT**. The same Unsloth fine-tune ships as a 5.3 GB GGUF for Ollama + \
llama.cpp on a laptop CPU, a 6.94 GB Cactus INT4 bundle for Android, and a \
LiteRT-LM Python runtime demo for cross-platform edge (browser / Pi 5 / Jetson).
**Resilient inference:** loads [SolarHive 26B A4B merged](https://huggingface.co/Truthseeker87/solarhive-26b-a4b-merged) \
in BF16, with the pre-quantized [SolarHive A4B NF4](https://huggingface.co/Truthseeker87/solarhive-26b-a4b-nf4) \
as an OOM-safe fallback β€” both score identical 9/10 + 3/3 When2Call validation, \
so the demo quality bar is preserved regardless of which variant loads. If GPU \
inference is unavailable, the demo gracefully serves live API data.
πŸ“– [GitHub repo](https://github.com/youshen-lim/the-gemma4-good-hackathon-solarhive) Β· \
🧠 [Cloud model (A4B LoRA)](https://huggingface.co/Truthseeker87/solarhive-26b-a4b-lora) · \
⚑ [Edge model (E4B GGUF)](https://huggingface.co/Truthseeker87/solarhive-e4b-gguf) · \
πŸ“± [Mobile model (E4B Cactus)](https://huggingface.co/Truthseeker87/solarhive-e4b-cactus) Β· \
πŸ“Š [Training dataset](https://huggingface.co/datasets/Truthseeker87/solarhive-community-solar-multimodal) Β· \
πŸ“ [Kaggle writeup](https://kaggle.com/competitions/gemma-4-good-hackathon)
*Gemma 4 Good Hackathon β€” Google DeepMind Γ— Kaggle*\
"""
EXAMPLES = [
# Live tool routing
{"text": "What's the current solar production?"},
{"text": "Full community energy audit β€” check weather, solar, battery, and grid pricing. Give a 3-sentence status report."},
# NREL probe β€” exercises the 5th tool (typical-year baseline comparison)
{"text": "Is today's production above typical for this month?"},
# Domain knowledge (no tool expected)
{"text": "Should I run my pool heater now or wait?"},
{"text": "Home #7's panels are producing 15% less than neighbors. What should we check?"},
# When2Call probes (Ross et al. 2025, arXiv:2504.18851) β€” let judges
# experience the trained refusal/follow-up behavior live
{"text": "What's the current grid rate?"}, # (b) well-specified β†’ expect get_grid_status call
{"text": "How much will a 10 kW array produce today?"}, # (c) under-specified β†’ expect follow-up question, NO auto-fill
{"text": "What's the current air quality index in Ann Arbor?"}, # (d) out-of-scope β†’ expect graceful decline, NO hallucinated tool
]
with gr.Blocks(
title="SolarHive β€” Community Solar Intelligence",
css=CUSTOM_CSS,
theme=gr.themes.Default(),
) as demo:
gr.Markdown("# SolarHive β€” Community Solar Intelligence")
gr.HTML(NOTICE_HTML)
gr.Markdown(DESCRIPTION)
gr.ChatInterface(
fn=respond,
multimodal=True,
examples=EXAMPLES,
)
if __name__ == "__main__":
demo.launch()