Spaces:

narcis2007
/

ClimateBERT

Running

narcis2007 Claude Opus 4.6 (1M context) commited on Apr 10

Commit

cf5586f

1 Parent(s): 6f0142a

Add Gradio demo with six ClimateBERT classifiers

Loads detector, environmental-claims, specificity, commitment, sentiment
and netzero-reduction models and aggregates them into a cheap-talk
greenwashing risk score inspired by Bingler et al. (2022).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (3) hide show

.gitignore +7 -0
app.py +235 -0
requirements.txt +2 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,7 @@

+__pycache__/
+*.pyc
+*.pyo
+.venv/
+venv/
+.env
+.DS_Store

app.py ADDED Viewed

	@@ -0,0 +1,235 @@

+"""
+ClimateBERT — Greenwashing Signal Detector (Gradio demo)
+Runs six specialized ClimateBERT models on a paragraph of text and returns
+a proxy "cheap talk" greenwashing risk score. Aligned with the EU ECGT
+Directive (applies 27 September 2026) and the proposed Green Claims Directive.
+All models are Apache-2.0, from https://huggingface.co/climatebert
+"""
+import gradio as gr
+from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
+# CPU-only (HF Spaces free tier)
+DEVICE = -1
+MAX_LEN = 512
+# Each entry: (internal_key, model_repo, tokenizer_repo_or_None)
+MODELS = [
+    ("detector",    "climatebert/distilroberta-base-climate-detector",    None),
+    ("env_claims",  "climatebert/environmental-claims",                   None),
+    ("specificity", "climatebert/distilroberta-base-climate-specificity", None),
+    ("commitment",  "climatebert/distilroberta-base-climate-commitment",  None),
+    ("sentiment",   "climatebert/distilroberta-base-climate-sentiment",   None),
+    # netzero-reduction does not ship its own tokenizer — use the base LM
+    ("netzero",     "climatebert/netzero-reduction",
+                    "climatebert/distilroberta-base-climate-f"),
+]
+print("Loading ClimateBERT models (first run downloads ~2 GB)...")
+PIPES = {}
+for key, model_repo, tok_repo in MODELS:
+    tok = AutoTokenizer.from_pretrained(tok_repo or model_repo, model_max_length=MAX_LEN)
+    mdl = AutoModelForSequenceClassification.from_pretrained(model_repo)
+    PIPES[key] = pipeline(
+        "text-classification",
+        model=mdl,
+        tokenizer=tok,
+        truncation=True,
+        padding=True,
+        max_length=MAX_LEN,
+        device=DEVICE,
+    )
+    print(f"  loaded {key}: {model_repo}")
+print("All models loaded.")
+def _norm(label: str) -> str:
+    return (label or "").strip().lower()
+def _is_positive(label: str, positive_keywords=("yes", "claim", "climate", "true", "1")) -> bool:
+    label = _norm(label)
+    return any(kw in label for kw in positive_keywords)
+def _is_non_specific(label: str) -> bool:
+    label = _norm(label)
+    return "non" in label  # "non-specific", "nonspecific"
+def _no_commitment(label: str) -> bool:
+    label = _norm(label)
+    return label in ("no", "none") or "no" == label[:2] or "none" in label
+def classify(text: str):
+    if not text or not text.strip():
+        return "Please enter some text to analyze.", {}, "", ""
+    text = text.strip()
+    results = {key: pipe(text)[0] for key, pipe in PIPES.items()}
+    det = results["detector"]
+    is_climate = _is_positive(det["label"])
+    # Greenwashing risk score (only meaningful if climate-related)
+    # Weights follow the Bingler/Kraus/Leippold/Webersinke "cheap talk" pattern:
+    # environmental claim + non-specific + no commitment + opportunity framing.
+    risk = 0.0
+    reasons = []
+    if is_climate:
+        claim = results["env_claims"]
+        spec = results["specificity"]
+        commit = results["commitment"]
+        senti = results["sentiment"]
+        if _is_positive(claim["label"]):
+            risk += 0.40 * claim["score"]
+            reasons.append(
+                "- **Environmental claim detected** — subject to the EU ECGT Directive (from 27 Sep 2026)."
+            )
+        if _is_non_specific(spec["label"]):
+            risk += 0.30 * spec["score"]
+            reasons.append("- **Non-specific language** — a classic cheap-talk signal.")
+        if _no_commitment(commit["label"]):
+            risk += 0.20 * commit["score"]
+            reasons.append("- **No concrete commitment detected** — claim without follow-through.")
+        if "opportunity" in _norm(senti["label"]):
+            risk += 0.10 * senti["score"]
+            reasons.append("- **Opportunity framing** — positive cherry-picking is common in greenwashing.")
+    risk_pct = round(risk * 100, 1)
+    # Verdict summary
+    if not is_climate:
+        summary = (
+            f"### Verdict: Not climate-related\n\n"
+            f"Detector confidence: **{det['score']:.1%}**\n\n"
+            f"_Greenwashing scoring is skipped for non-climate text. "
+            f"Other signals below are informational only._"
+        )
+    else:
+        if risk >= 0.5:
+            badge = "HIGH greenwashing risk"
+        elif risk >= 0.25:
+            badge = "MODERATE greenwashing risk"
+        else:
+            badge = "LOW greenwashing risk"
+        summary = (
+            f"### Verdict: {badge}\n\n"
+            f"**Risk score: {risk_pct} / 100**\n\n"
+            f"Climate detector confidence: {det['score']:.1%}"
+        )
+    # Signal breakdown (dict for Gradio JSON component)
+    def fmt(r):
+        return {"label": r["label"], "confidence": round(float(r["score"]), 4)}
+    signals = {
+        "climate_related": fmt(det),
+        "environmental_claim": fmt(results["env_claims"]),
+        "specificity": fmt(results["specificity"]),
+        "commitment": fmt(results["commitment"]),
+        "sentiment": fmt(results["sentiment"]),
+        "netzero_reduction": fmt(results["netzero"]),
+    }
+    explanation = "\n".join(reasons) if reasons else "_No strong greenwashing signals detected._"
+    raw = "\n".join(f"{k}: {v}" for k, v in results.items())
+    return summary, signals, explanation, raw
+EXAMPLES = [
+    [
+        "We are proud to announce our commitment to become climate neutral by 2040 "
+        "through a combination of renewable energy investments and carbon offsetting."
+    ],
+    [
+        "In 2024 we reduced our Scope 1 and Scope 2 emissions by 23% year-over-year, "
+        "from 145,000 tCO2e to 111,650 tCO2e, verified by an independent third-party "
+        "auditor and aligned with our SBTi-validated 1.5C pathway."
+    ],
+    [
+        "Our eco-friendly products are designed with the planet in mind, featuring "
+        "sustainable materials and a greener approach to packaging that customers love."
+    ],
+    [
+        "The quarterly earnings report showed revenue growth of 12% driven by strong "
+        "performance in our core European markets and improved operational efficiency."
+    ],
+    [
+        "By 2030 we aim to achieve net-zero emissions across our entire value chain, "
+        "aligned with a 1.5C science-based target validated by SBTi, with interim "
+        "milestones of 50% absolute reduction by 2027 against a 2020 baseline."
+    ],
+]
+INTRO = """
+# ClimateBERT — Greenwashing Signal Detector
+Paste a paragraph from a sustainability report, marketing copy, or corporate
+disclosure. This demo runs **six specialized ClimateBERT classifiers** in parallel
+to surface cheap-talk signals relevant to the upcoming EU regulations:
+- **ECGT Directive** — applies 27 September 2026, bans vague green claims
+  and "climate neutral via offsetting" statements.
+- **Green Claims Directive** (proposed) — pre-verification of environmental claims.
+- **CSRD / ESRS** — the source of text that will be scrutinized.
+**Models** (all from [climatebert on Hugging Face](https://huggingface.co/climatebert), Apache-2.0):
+`distilroberta-base-climate-detector`, `environmental-claims`,
+`distilroberta-base-climate-specificity`, `distilroberta-base-climate-commitment`,
+`distilroberta-base-climate-sentiment`, `netzero-reduction`.
+> **Caveats.** Models are trained on **paragraphs** (not single sentences) and on
+> **English** only. Outputs are proxy signals, not a legal verdict. Ground-truth
+> greenwashing labels do not exist in any public dataset — every detector
+> operationalizes proxies (specificity, commitment gap, cheap talk).
+"""
+with gr.Blocks(title="ClimateBERT — Greenwashing Signal Detector") as demo:
+    gr.Markdown(INTRO)
+    with gr.Row():
+        with gr.Column(scale=2):
+            text_in = gr.Textbox(
+                label="Text to analyze (a paragraph works best)",
+                lines=8,
+                placeholder="Paste a paragraph from a sustainability report, press release, or marketing page...",
+            )
+            analyze_btn = gr.Button("Analyze", variant="primary")
+            gr.Examples(examples=EXAMPLES, inputs=text_in, label="Try an example")
+        with gr.Column(scale=3):
+            summary_out = gr.Markdown(label="Verdict")
+            explain_out = gr.Markdown(label="Why this score")
+            signals_out = gr.JSON(label="Per-model signal breakdown")
+            with gr.Accordion("Raw model outputs", open=False):
+                raw_out = gr.Textbox(label="Raw", lines=8, show_copy_button=True)
+    gr.Markdown(
+        "---\n"
+        "Built on [ClimateBERT](https://huggingface.co/climatebert) by Webersinke, "
+        "Kraus, Bingler & Leippold. Scoring heuristic inspired by Bingler et al., "
+        "*Cheap talk and cherry-picking: What ClimateBERT has to say on corporate "
+        "climate risk disclosures*, Finance Research Letters (2022)."
+    )
+    analyze_btn.click(
+        classify,
+        inputs=text_in,
+        outputs=[summary_out, signals_out, explain_out, raw_out],
+    )
+    text_in.submit(
+        classify,
+        inputs=text_in,
+        outputs=[summary_out, signals_out, explain_out, raw_out],
+    )
+if __name__ == "__main__":
+    demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ transformers>=4.40.0
2	+ torch