Spaces:
Running
Running
Commit Β·
cf5586f
1
Parent(s): 6f0142a
Add Gradio demo with six ClimateBERT classifiers
Browse filesLoads detector, environmental-claims, specificity, commitment, sentiment
and netzero-reduction models and aggregates them into a cheap-talk
greenwashing risk score inspired by Bingler et al. (2022).
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
- .gitignore +7 -0
- app.py +235 -0
- requirements.txt +2 -0
.gitignore
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
__pycache__/
|
| 2 |
+
*.pyc
|
| 3 |
+
*.pyo
|
| 4 |
+
.venv/
|
| 5 |
+
venv/
|
| 6 |
+
.env
|
| 7 |
+
.DS_Store
|
app.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
ClimateBERT β Greenwashing Signal Detector (Gradio demo)
|
| 3 |
+
|
| 4 |
+
Runs six specialized ClimateBERT models on a paragraph of text and returns
|
| 5 |
+
a proxy "cheap talk" greenwashing risk score. Aligned with the EU ECGT
|
| 6 |
+
Directive (applies 27 September 2026) and the proposed Green Claims Directive.
|
| 7 |
+
|
| 8 |
+
All models are Apache-2.0, from https://huggingface.co/climatebert
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
import gradio as gr
|
| 12 |
+
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
| 13 |
+
|
| 14 |
+
# CPU-only (HF Spaces free tier)
|
| 15 |
+
DEVICE = -1
|
| 16 |
+
MAX_LEN = 512
|
| 17 |
+
|
| 18 |
+
# Each entry: (internal_key, model_repo, tokenizer_repo_or_None)
|
| 19 |
+
MODELS = [
|
| 20 |
+
("detector", "climatebert/distilroberta-base-climate-detector", None),
|
| 21 |
+
("env_claims", "climatebert/environmental-claims", None),
|
| 22 |
+
("specificity", "climatebert/distilroberta-base-climate-specificity", None),
|
| 23 |
+
("commitment", "climatebert/distilroberta-base-climate-commitment", None),
|
| 24 |
+
("sentiment", "climatebert/distilroberta-base-climate-sentiment", None),
|
| 25 |
+
# netzero-reduction does not ship its own tokenizer β use the base LM
|
| 26 |
+
("netzero", "climatebert/netzero-reduction",
|
| 27 |
+
"climatebert/distilroberta-base-climate-f"),
|
| 28 |
+
]
|
| 29 |
+
|
| 30 |
+
print("Loading ClimateBERT models (first run downloads ~2 GB)...")
|
| 31 |
+
PIPES = {}
|
| 32 |
+
for key, model_repo, tok_repo in MODELS:
|
| 33 |
+
tok = AutoTokenizer.from_pretrained(tok_repo or model_repo, model_max_length=MAX_LEN)
|
| 34 |
+
mdl = AutoModelForSequenceClassification.from_pretrained(model_repo)
|
| 35 |
+
PIPES[key] = pipeline(
|
| 36 |
+
"text-classification",
|
| 37 |
+
model=mdl,
|
| 38 |
+
tokenizer=tok,
|
| 39 |
+
truncation=True,
|
| 40 |
+
padding=True,
|
| 41 |
+
max_length=MAX_LEN,
|
| 42 |
+
device=DEVICE,
|
| 43 |
+
)
|
| 44 |
+
print(f" loaded {key}: {model_repo}")
|
| 45 |
+
print("All models loaded.")
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _norm(label: str) -> str:
|
| 49 |
+
return (label or "").strip().lower()
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def _is_positive(label: str, positive_keywords=("yes", "claim", "climate", "true", "1")) -> bool:
|
| 53 |
+
label = _norm(label)
|
| 54 |
+
return any(kw in label for kw in positive_keywords)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _is_non_specific(label: str) -> bool:
|
| 58 |
+
label = _norm(label)
|
| 59 |
+
return "non" in label # "non-specific", "nonspecific"
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
def _no_commitment(label: str) -> bool:
|
| 63 |
+
label = _norm(label)
|
| 64 |
+
return label in ("no", "none") or "no" == label[:2] or "none" in label
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def classify(text: str):
|
| 68 |
+
if not text or not text.strip():
|
| 69 |
+
return "Please enter some text to analyze.", {}, "", ""
|
| 70 |
+
|
| 71 |
+
text = text.strip()
|
| 72 |
+
|
| 73 |
+
results = {key: pipe(text)[0] for key, pipe in PIPES.items()}
|
| 74 |
+
|
| 75 |
+
det = results["detector"]
|
| 76 |
+
is_climate = _is_positive(det["label"])
|
| 77 |
+
|
| 78 |
+
# Greenwashing risk score (only meaningful if climate-related)
|
| 79 |
+
# Weights follow the Bingler/Kraus/Leippold/Webersinke "cheap talk" pattern:
|
| 80 |
+
# environmental claim + non-specific + no commitment + opportunity framing.
|
| 81 |
+
risk = 0.0
|
| 82 |
+
reasons = []
|
| 83 |
+
if is_climate:
|
| 84 |
+
claim = results["env_claims"]
|
| 85 |
+
spec = results["specificity"]
|
| 86 |
+
commit = results["commitment"]
|
| 87 |
+
senti = results["sentiment"]
|
| 88 |
+
|
| 89 |
+
if _is_positive(claim["label"]):
|
| 90 |
+
risk += 0.40 * claim["score"]
|
| 91 |
+
reasons.append(
|
| 92 |
+
"- **Environmental claim detected** β subject to the EU ECGT Directive (from 27 Sep 2026)."
|
| 93 |
+
)
|
| 94 |
+
if _is_non_specific(spec["label"]):
|
| 95 |
+
risk += 0.30 * spec["score"]
|
| 96 |
+
reasons.append("- **Non-specific language** β a classic cheap-talk signal.")
|
| 97 |
+
if _no_commitment(commit["label"]):
|
| 98 |
+
risk += 0.20 * commit["score"]
|
| 99 |
+
reasons.append("- **No concrete commitment detected** β claim without follow-through.")
|
| 100 |
+
if "opportunity" in _norm(senti["label"]):
|
| 101 |
+
risk += 0.10 * senti["score"]
|
| 102 |
+
reasons.append("- **Opportunity framing** β positive cherry-picking is common in greenwashing.")
|
| 103 |
+
|
| 104 |
+
risk_pct = round(risk * 100, 1)
|
| 105 |
+
|
| 106 |
+
# Verdict summary
|
| 107 |
+
if not is_climate:
|
| 108 |
+
summary = (
|
| 109 |
+
f"### Verdict: Not climate-related\n\n"
|
| 110 |
+
f"Detector confidence: **{det['score']:.1%}**\n\n"
|
| 111 |
+
f"_Greenwashing scoring is skipped for non-climate text. "
|
| 112 |
+
f"Other signals below are informational only._"
|
| 113 |
+
)
|
| 114 |
+
else:
|
| 115 |
+
if risk >= 0.5:
|
| 116 |
+
badge = "HIGH greenwashing risk"
|
| 117 |
+
elif risk >= 0.25:
|
| 118 |
+
badge = "MODERATE greenwashing risk"
|
| 119 |
+
else:
|
| 120 |
+
badge = "LOW greenwashing risk"
|
| 121 |
+
summary = (
|
| 122 |
+
f"### Verdict: {badge}\n\n"
|
| 123 |
+
f"**Risk score: {risk_pct} / 100**\n\n"
|
| 124 |
+
f"Climate detector confidence: {det['score']:.1%}"
|
| 125 |
+
)
|
| 126 |
+
|
| 127 |
+
# Signal breakdown (dict for Gradio JSON component)
|
| 128 |
+
def fmt(r):
|
| 129 |
+
return {"label": r["label"], "confidence": round(float(r["score"]), 4)}
|
| 130 |
+
|
| 131 |
+
signals = {
|
| 132 |
+
"climate_related": fmt(det),
|
| 133 |
+
"environmental_claim": fmt(results["env_claims"]),
|
| 134 |
+
"specificity": fmt(results["specificity"]),
|
| 135 |
+
"commitment": fmt(results["commitment"]),
|
| 136 |
+
"sentiment": fmt(results["sentiment"]),
|
| 137 |
+
"netzero_reduction": fmt(results["netzero"]),
|
| 138 |
+
}
|
| 139 |
+
|
| 140 |
+
explanation = "\n".join(reasons) if reasons else "_No strong greenwashing signals detected._"
|
| 141 |
+
|
| 142 |
+
raw = "\n".join(f"{k}: {v}" for k, v in results.items())
|
| 143 |
+
|
| 144 |
+
return summary, signals, explanation, raw
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
EXAMPLES = [
|
| 148 |
+
[
|
| 149 |
+
"We are proud to announce our commitment to become climate neutral by 2040 "
|
| 150 |
+
"through a combination of renewable energy investments and carbon offsetting."
|
| 151 |
+
],
|
| 152 |
+
[
|
| 153 |
+
"In 2024 we reduced our Scope 1 and Scope 2 emissions by 23% year-over-year, "
|
| 154 |
+
"from 145,000 tCO2e to 111,650 tCO2e, verified by an independent third-party "
|
| 155 |
+
"auditor and aligned with our SBTi-validated 1.5C pathway."
|
| 156 |
+
],
|
| 157 |
+
[
|
| 158 |
+
"Our eco-friendly products are designed with the planet in mind, featuring "
|
| 159 |
+
"sustainable materials and a greener approach to packaging that customers love."
|
| 160 |
+
],
|
| 161 |
+
[
|
| 162 |
+
"The quarterly earnings report showed revenue growth of 12% driven by strong "
|
| 163 |
+
"performance in our core European markets and improved operational efficiency."
|
| 164 |
+
],
|
| 165 |
+
[
|
| 166 |
+
"By 2030 we aim to achieve net-zero emissions across our entire value chain, "
|
| 167 |
+
"aligned with a 1.5C science-based target validated by SBTi, with interim "
|
| 168 |
+
"milestones of 50% absolute reduction by 2027 against a 2020 baseline."
|
| 169 |
+
],
|
| 170 |
+
]
|
| 171 |
+
|
| 172 |
+
|
| 173 |
+
INTRO = """
|
| 174 |
+
# ClimateBERT β Greenwashing Signal Detector
|
| 175 |
+
|
| 176 |
+
Paste a paragraph from a sustainability report, marketing copy, or corporate
|
| 177 |
+
disclosure. This demo runs **six specialized ClimateBERT classifiers** in parallel
|
| 178 |
+
to surface cheap-talk signals relevant to the upcoming EU regulations:
|
| 179 |
+
|
| 180 |
+
- **ECGT Directive** β applies 27 September 2026, bans vague green claims
|
| 181 |
+
and "climate neutral via offsetting" statements.
|
| 182 |
+
- **Green Claims Directive** (proposed) β pre-verification of environmental claims.
|
| 183 |
+
- **CSRD / ESRS** β the source of text that will be scrutinized.
|
| 184 |
+
|
| 185 |
+
**Models** (all from [climatebert on Hugging Face](https://huggingface.co/climatebert), Apache-2.0):
|
| 186 |
+
`distilroberta-base-climate-detector`, `environmental-claims`,
|
| 187 |
+
`distilroberta-base-climate-specificity`, `distilroberta-base-climate-commitment`,
|
| 188 |
+
`distilroberta-base-climate-sentiment`, `netzero-reduction`.
|
| 189 |
+
|
| 190 |
+
> **Caveats.** Models are trained on **paragraphs** (not single sentences) and on
|
| 191 |
+
> **English** only. Outputs are proxy signals, not a legal verdict. Ground-truth
|
| 192 |
+
> greenwashing labels do not exist in any public dataset β every detector
|
| 193 |
+
> operationalizes proxies (specificity, commitment gap, cheap talk).
|
| 194 |
+
"""
|
| 195 |
+
|
| 196 |
+
with gr.Blocks(title="ClimateBERT β Greenwashing Signal Detector") as demo:
|
| 197 |
+
gr.Markdown(INTRO)
|
| 198 |
+
|
| 199 |
+
with gr.Row():
|
| 200 |
+
with gr.Column(scale=2):
|
| 201 |
+
text_in = gr.Textbox(
|
| 202 |
+
label="Text to analyze (a paragraph works best)",
|
| 203 |
+
lines=8,
|
| 204 |
+
placeholder="Paste a paragraph from a sustainability report, press release, or marketing page...",
|
| 205 |
+
)
|
| 206 |
+
analyze_btn = gr.Button("Analyze", variant="primary")
|
| 207 |
+
gr.Examples(examples=EXAMPLES, inputs=text_in, label="Try an example")
|
| 208 |
+
with gr.Column(scale=3):
|
| 209 |
+
summary_out = gr.Markdown(label="Verdict")
|
| 210 |
+
explain_out = gr.Markdown(label="Why this score")
|
| 211 |
+
signals_out = gr.JSON(label="Per-model signal breakdown")
|
| 212 |
+
with gr.Accordion("Raw model outputs", open=False):
|
| 213 |
+
raw_out = gr.Textbox(label="Raw", lines=8, show_copy_button=True)
|
| 214 |
+
|
| 215 |
+
gr.Markdown(
|
| 216 |
+
"---\n"
|
| 217 |
+
"Built on [ClimateBERT](https://huggingface.co/climatebert) by Webersinke, "
|
| 218 |
+
"Kraus, Bingler & Leippold. Scoring heuristic inspired by Bingler et al., "
|
| 219 |
+
"*Cheap talk and cherry-picking: What ClimateBERT has to say on corporate "
|
| 220 |
+
"climate risk disclosures*, Finance Research Letters (2022)."
|
| 221 |
+
)
|
| 222 |
+
|
| 223 |
+
analyze_btn.click(
|
| 224 |
+
classify,
|
| 225 |
+
inputs=text_in,
|
| 226 |
+
outputs=[summary_out, signals_out, explain_out, raw_out],
|
| 227 |
+
)
|
| 228 |
+
text_in.submit(
|
| 229 |
+
classify,
|
| 230 |
+
inputs=text_in,
|
| 231 |
+
outputs=[summary_out, signals_out, explain_out, raw_out],
|
| 232 |
+
)
|
| 233 |
+
|
| 234 |
+
if __name__ == "__main__":
|
| 235 |
+
demo.launch()
|
requirements.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
transformers>=4.40.0
|
| 2 |
+
torch
|