narcis2007 Claude Opus 4.6 (1M context) commited on
Commit
cf5586f
Β·
1 Parent(s): 6f0142a

Add Gradio demo with six ClimateBERT classifiers

Browse files

Loads detector, environmental-claims, specificity, commitment, sentiment
and netzero-reduction models and aggregates them into a cheap-talk
greenwashing risk score inspired by Bingler et al. (2022).

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

Files changed (3) hide show
  1. .gitignore +7 -0
  2. app.py +235 -0
  3. requirements.txt +2 -0
.gitignore ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ .venv/
5
+ venv/
6
+ .env
7
+ .DS_Store
app.py ADDED
@@ -0,0 +1,235 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ ClimateBERT β€” Greenwashing Signal Detector (Gradio demo)
3
+
4
+ Runs six specialized ClimateBERT models on a paragraph of text and returns
5
+ a proxy "cheap talk" greenwashing risk score. Aligned with the EU ECGT
6
+ Directive (applies 27 September 2026) and the proposed Green Claims Directive.
7
+
8
+ All models are Apache-2.0, from https://huggingface.co/climatebert
9
+ """
10
+
11
+ import gradio as gr
12
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
13
+
14
+ # CPU-only (HF Spaces free tier)
15
+ DEVICE = -1
16
+ MAX_LEN = 512
17
+
18
+ # Each entry: (internal_key, model_repo, tokenizer_repo_or_None)
19
+ MODELS = [
20
+ ("detector", "climatebert/distilroberta-base-climate-detector", None),
21
+ ("env_claims", "climatebert/environmental-claims", None),
22
+ ("specificity", "climatebert/distilroberta-base-climate-specificity", None),
23
+ ("commitment", "climatebert/distilroberta-base-climate-commitment", None),
24
+ ("sentiment", "climatebert/distilroberta-base-climate-sentiment", None),
25
+ # netzero-reduction does not ship its own tokenizer β€” use the base LM
26
+ ("netzero", "climatebert/netzero-reduction",
27
+ "climatebert/distilroberta-base-climate-f"),
28
+ ]
29
+
30
+ print("Loading ClimateBERT models (first run downloads ~2 GB)...")
31
+ PIPES = {}
32
+ for key, model_repo, tok_repo in MODELS:
33
+ tok = AutoTokenizer.from_pretrained(tok_repo or model_repo, model_max_length=MAX_LEN)
34
+ mdl = AutoModelForSequenceClassification.from_pretrained(model_repo)
35
+ PIPES[key] = pipeline(
36
+ "text-classification",
37
+ model=mdl,
38
+ tokenizer=tok,
39
+ truncation=True,
40
+ padding=True,
41
+ max_length=MAX_LEN,
42
+ device=DEVICE,
43
+ )
44
+ print(f" loaded {key}: {model_repo}")
45
+ print("All models loaded.")
46
+
47
+
48
+ def _norm(label: str) -> str:
49
+ return (label or "").strip().lower()
50
+
51
+
52
+ def _is_positive(label: str, positive_keywords=("yes", "claim", "climate", "true", "1")) -> bool:
53
+ label = _norm(label)
54
+ return any(kw in label for kw in positive_keywords)
55
+
56
+
57
+ def _is_non_specific(label: str) -> bool:
58
+ label = _norm(label)
59
+ return "non" in label # "non-specific", "nonspecific"
60
+
61
+
62
+ def _no_commitment(label: str) -> bool:
63
+ label = _norm(label)
64
+ return label in ("no", "none") or "no" == label[:2] or "none" in label
65
+
66
+
67
+ def classify(text: str):
68
+ if not text or not text.strip():
69
+ return "Please enter some text to analyze.", {}, "", ""
70
+
71
+ text = text.strip()
72
+
73
+ results = {key: pipe(text)[0] for key, pipe in PIPES.items()}
74
+
75
+ det = results["detector"]
76
+ is_climate = _is_positive(det["label"])
77
+
78
+ # Greenwashing risk score (only meaningful if climate-related)
79
+ # Weights follow the Bingler/Kraus/Leippold/Webersinke "cheap talk" pattern:
80
+ # environmental claim + non-specific + no commitment + opportunity framing.
81
+ risk = 0.0
82
+ reasons = []
83
+ if is_climate:
84
+ claim = results["env_claims"]
85
+ spec = results["specificity"]
86
+ commit = results["commitment"]
87
+ senti = results["sentiment"]
88
+
89
+ if _is_positive(claim["label"]):
90
+ risk += 0.40 * claim["score"]
91
+ reasons.append(
92
+ "- **Environmental claim detected** β€” subject to the EU ECGT Directive (from 27 Sep 2026)."
93
+ )
94
+ if _is_non_specific(spec["label"]):
95
+ risk += 0.30 * spec["score"]
96
+ reasons.append("- **Non-specific language** β€” a classic cheap-talk signal.")
97
+ if _no_commitment(commit["label"]):
98
+ risk += 0.20 * commit["score"]
99
+ reasons.append("- **No concrete commitment detected** β€” claim without follow-through.")
100
+ if "opportunity" in _norm(senti["label"]):
101
+ risk += 0.10 * senti["score"]
102
+ reasons.append("- **Opportunity framing** β€” positive cherry-picking is common in greenwashing.")
103
+
104
+ risk_pct = round(risk * 100, 1)
105
+
106
+ # Verdict summary
107
+ if not is_climate:
108
+ summary = (
109
+ f"### Verdict: Not climate-related\n\n"
110
+ f"Detector confidence: **{det['score']:.1%}**\n\n"
111
+ f"_Greenwashing scoring is skipped for non-climate text. "
112
+ f"Other signals below are informational only._"
113
+ )
114
+ else:
115
+ if risk >= 0.5:
116
+ badge = "HIGH greenwashing risk"
117
+ elif risk >= 0.25:
118
+ badge = "MODERATE greenwashing risk"
119
+ else:
120
+ badge = "LOW greenwashing risk"
121
+ summary = (
122
+ f"### Verdict: {badge}\n\n"
123
+ f"**Risk score: {risk_pct} / 100**\n\n"
124
+ f"Climate detector confidence: {det['score']:.1%}"
125
+ )
126
+
127
+ # Signal breakdown (dict for Gradio JSON component)
128
+ def fmt(r):
129
+ return {"label": r["label"], "confidence": round(float(r["score"]), 4)}
130
+
131
+ signals = {
132
+ "climate_related": fmt(det),
133
+ "environmental_claim": fmt(results["env_claims"]),
134
+ "specificity": fmt(results["specificity"]),
135
+ "commitment": fmt(results["commitment"]),
136
+ "sentiment": fmt(results["sentiment"]),
137
+ "netzero_reduction": fmt(results["netzero"]),
138
+ }
139
+
140
+ explanation = "\n".join(reasons) if reasons else "_No strong greenwashing signals detected._"
141
+
142
+ raw = "\n".join(f"{k}: {v}" for k, v in results.items())
143
+
144
+ return summary, signals, explanation, raw
145
+
146
+
147
+ EXAMPLES = [
148
+ [
149
+ "We are proud to announce our commitment to become climate neutral by 2040 "
150
+ "through a combination of renewable energy investments and carbon offsetting."
151
+ ],
152
+ [
153
+ "In 2024 we reduced our Scope 1 and Scope 2 emissions by 23% year-over-year, "
154
+ "from 145,000 tCO2e to 111,650 tCO2e, verified by an independent third-party "
155
+ "auditor and aligned with our SBTi-validated 1.5C pathway."
156
+ ],
157
+ [
158
+ "Our eco-friendly products are designed with the planet in mind, featuring "
159
+ "sustainable materials and a greener approach to packaging that customers love."
160
+ ],
161
+ [
162
+ "The quarterly earnings report showed revenue growth of 12% driven by strong "
163
+ "performance in our core European markets and improved operational efficiency."
164
+ ],
165
+ [
166
+ "By 2030 we aim to achieve net-zero emissions across our entire value chain, "
167
+ "aligned with a 1.5C science-based target validated by SBTi, with interim "
168
+ "milestones of 50% absolute reduction by 2027 against a 2020 baseline."
169
+ ],
170
+ ]
171
+
172
+
173
+ INTRO = """
174
+ # ClimateBERT β€” Greenwashing Signal Detector
175
+
176
+ Paste a paragraph from a sustainability report, marketing copy, or corporate
177
+ disclosure. This demo runs **six specialized ClimateBERT classifiers** in parallel
178
+ to surface cheap-talk signals relevant to the upcoming EU regulations:
179
+
180
+ - **ECGT Directive** β€” applies 27 September 2026, bans vague green claims
181
+ and "climate neutral via offsetting" statements.
182
+ - **Green Claims Directive** (proposed) β€” pre-verification of environmental claims.
183
+ - **CSRD / ESRS** β€” the source of text that will be scrutinized.
184
+
185
+ **Models** (all from [climatebert on Hugging Face](https://huggingface.co/climatebert), Apache-2.0):
186
+ `distilroberta-base-climate-detector`, `environmental-claims`,
187
+ `distilroberta-base-climate-specificity`, `distilroberta-base-climate-commitment`,
188
+ `distilroberta-base-climate-sentiment`, `netzero-reduction`.
189
+
190
+ > **Caveats.** Models are trained on **paragraphs** (not single sentences) and on
191
+ > **English** only. Outputs are proxy signals, not a legal verdict. Ground-truth
192
+ > greenwashing labels do not exist in any public dataset β€” every detector
193
+ > operationalizes proxies (specificity, commitment gap, cheap talk).
194
+ """
195
+
196
+ with gr.Blocks(title="ClimateBERT β€” Greenwashing Signal Detector") as demo:
197
+ gr.Markdown(INTRO)
198
+
199
+ with gr.Row():
200
+ with gr.Column(scale=2):
201
+ text_in = gr.Textbox(
202
+ label="Text to analyze (a paragraph works best)",
203
+ lines=8,
204
+ placeholder="Paste a paragraph from a sustainability report, press release, or marketing page...",
205
+ )
206
+ analyze_btn = gr.Button("Analyze", variant="primary")
207
+ gr.Examples(examples=EXAMPLES, inputs=text_in, label="Try an example")
208
+ with gr.Column(scale=3):
209
+ summary_out = gr.Markdown(label="Verdict")
210
+ explain_out = gr.Markdown(label="Why this score")
211
+ signals_out = gr.JSON(label="Per-model signal breakdown")
212
+ with gr.Accordion("Raw model outputs", open=False):
213
+ raw_out = gr.Textbox(label="Raw", lines=8, show_copy_button=True)
214
+
215
+ gr.Markdown(
216
+ "---\n"
217
+ "Built on [ClimateBERT](https://huggingface.co/climatebert) by Webersinke, "
218
+ "Kraus, Bingler & Leippold. Scoring heuristic inspired by Bingler et al., "
219
+ "*Cheap talk and cherry-picking: What ClimateBERT has to say on corporate "
220
+ "climate risk disclosures*, Finance Research Letters (2022)."
221
+ )
222
+
223
+ analyze_btn.click(
224
+ classify,
225
+ inputs=text_in,
226
+ outputs=[summary_out, signals_out, explain_out, raw_out],
227
+ )
228
+ text_in.submit(
229
+ classify,
230
+ inputs=text_in,
231
+ outputs=[summary_out, signals_out, explain_out, raw_out],
232
+ )
233
+
234
+ if __name__ == "__main__":
235
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ transformers>=4.40.0
2
+ torch