| """Speaker chip palette, styler, and grouped-row time mapping for transcript DataFrame.""" |
|
|
| import re |
|
|
| import pandas as pd |
|
|
|
|
| SPEAKER_PALETTE = [ |
| "#FFD23F", |
| "#FF6B6B", |
| "#5CE1E6", |
| "#A4F47A", |
| "#FF6FC0", |
| "#FF9F1C", |
| ] |
|
|
|
|
| def _extract_index(label: str) -> int: |
| """Pull a stable integer index out of a speaker label. |
| |
| SPEAKER_00 β 0, SPK-1 β 1, "Alice" β hash-fallback so renames keep deterministic colors |
| only when caller passes original label; once renamed without a digit, fall back to hash. |
| """ |
| if not label: |
| return 0 |
| m = re.search(r"(\d+)", str(label)) |
| if m: |
| return int(m.group(1)) |
| return abs(hash(str(label))) % len(SPEAKER_PALETTE) |
|
|
|
|
| def color_for_speaker(label: str) -> str: |
| return SPEAKER_PALETTE[_extract_index(label) % len(SPEAKER_PALETTE)] |
|
|
|
|
| def render_chip(label: str) -> str: |
| color = color_for_speaker(label) |
| return f'<span class="brut-chip" style="background:{color}">β£ {label}</span>' |
|
|
|
|
| def style_transcript(df: pd.DataFrame): |
| """Return a Styler that paints the Speaker column cell background per palette.""" |
| if df is None or df.empty or "Speaker" not in df.columns: |
| return df |
|
|
| def _color(value): |
| c = color_for_speaker(value) |
| return f"background-color: {c}; font-family: 'Archivo Black', sans-serif;" |
|
|
| try: |
| return df.style.applymap(_color, subset=["Speaker"]) |
| except AttributeError: |
| |
| return df.style.map(_color, subset=["Speaker"]) |
|
|
|
|
| def grouped_row_start_seconds(merged: list, row_idx: int) -> float: |
| """Walk merged segments, group consecutive same-speaker turns, return start seconds of group at row_idx.""" |
| if not merged or row_idx is None or row_idx < 0: |
| return 0.0 |
|
|
| group_idx = -1 |
| prev_speaker = None |
| for seg in merged: |
| spk = seg.get("speaker") |
| if spk != prev_speaker: |
| group_idx += 1 |
| if group_idx == row_idx: |
| return float(seg.get("start", 0.0)) |
| prev_speaker = spk |
| return 0.0 |
|
|
|
|
| def build_label_map(edited_df, merged: list) -> dict: |
| """From edited DataFrame Speaker column + original merged segments, build {orig_label β new_label}. |
| |
| Walks grouped segments in order, mapping each group's original speaker label to the |
| corresponding row's edited Speaker value. |
| """ |
| if edited_df is None or merged is None: |
| return {} |
|
|
| if hasattr(edited_df, "values"): |
| rows = edited_df.values.tolist() if isinstance(edited_df, pd.DataFrame) else edited_df |
| if isinstance(edited_df, pd.DataFrame) and "Speaker" in edited_df.columns: |
| new_labels = edited_df["Speaker"].tolist() |
| else: |
| new_labels = [r[2] for r in rows if len(r) > 2] |
| elif isinstance(edited_df, list): |
| new_labels = [r[2] for r in edited_df if len(r) > 2] |
| else: |
| return {} |
|
|
| label_map: dict = {} |
| group_idx = -1 |
| prev_speaker = None |
| for seg in merged: |
| spk = seg.get("speaker") |
| if spk != prev_speaker: |
| group_idx += 1 |
| if group_idx < len(new_labels): |
| new_label = str(new_labels[group_idx]).strip() |
| if new_label and new_label != spk: |
| label_map[spk] = new_label |
| prev_speaker = spk |
| return label_map |
|
|