Spaces:
Sleeping
Sleeping
| """ | |
| Paragraph‑level annotation tool for rating two prompts from multiple LLMs. | |
| Patch 3 – show hidden rows | |
| -------------------------- | |
| * **Bug fix:** the model rows stayed invisible after you hit **Run** | |
| because Gradio needs `gr.update(visible=…)` objects returned, not | |
| on‑the‑fly attribute tweaks. The init callback now returns a | |
| visibility update for every row container, so you’ll see the prompt, | |
| rating and comment widgets immediately. | |
| * Logic still hides surplus rows when your CSV contains fewer than | |
| `MAX_MODELS` models. | |
| * No other behaviour changed. | |
| """ | |
| from __future__ import annotations | |
| import gradio as gr | |
| import pandas as pd | |
| import time, random | |
| from typing import List | |
| # ---------- CONFIG ---------- | |
| CONTENT_COL = "Content_Paragraph" | |
| PROMPT1_SUFFIX = "_prompt1" | |
| PROMPT2_SUFFIX = "_prompt2" | |
| PERM_COL = "perm_models" | |
| RATING_OPTS = ["A", "B", "C"] | |
| NO_COMMENT = "No comment" | |
| MAX_MODELS = 8 # UI reserves slots for up to this many models | |
| # ---------- GLOBAL STATE ---------- | |
| df: pd.DataFrame | None = None | |
| models: List[str] = [] | |
| csv_path: str = "" | |
| annotator: str = "" | |
| TOTAL: int = 0 | |
| current_start: float | None = None | |
| # ---------- HELPERS ---------- | |
| def discover_models() -> None: | |
| global models, df | |
| models = [] | |
| for c in df.columns: | |
| if c.endswith(PROMPT1_SUFFIX) and not ( | |
| c.startswith("rating_") or c.startswith("comment_") or | |
| c in ["perm_models", "annotator", "annotation_time"] | |
| ): | |
| m = c[:-len(PROMPT1_SUFFIX)] | |
| if f"{m}{PROMPT2_SUFFIX}" not in df.columns: | |
| raise ValueError(f"Found '{c}' but no matching '{m}{PROMPT2_SUFFIX}'") | |
| models.append(m) | |
| if not models: | |
| raise ValueError(f"No '*{PROMPT1_SUFFIX}' columns found in CSV") | |
| if len(models) > MAX_MODELS: | |
| raise ValueError(f"CSV has {len(models)} model columns but UI can display only {MAX_MODELS}. Increase MAX_MODELS and restart.") | |
| def ensure_helper_columns() -> None: | |
| global df, models | |
| if PERM_COL not in df.columns: | |
| df[PERM_COL] = "" | |
| for m in models: | |
| for p in ("prompt1", "prompt2"): | |
| rcol = f"rating_{m}__{p}" | |
| ccol = f"comment_{m}__{p}" | |
| if rcol not in df.columns: | |
| df[rcol] = "" | |
| if ccol not in df.columns: | |
| df[ccol] = "" | |
| for col in ("annotator", "annotation_time"): | |
| if col not in df.columns: | |
| df[col] = "" if col == "annotator" else 0.0 | |
| def first_incomplete() -> int: | |
| global df, models | |
| for i, row in df.iterrows(): | |
| for m in models: | |
| if row[f"rating_{m}__prompt1"] == "" or row[f"rating_{m}__prompt2"] == "": | |
| return i | |
| return 0 | |
| def get_perm(idx: int) -> List[str]: | |
| global df, models | |
| cell = str(df.at[idx, PERM_COL]).strip() | |
| if cell: | |
| seq = cell.split("|") | |
| if set(seq) == set(models): | |
| return seq | |
| seq = models.copy(); random.shuffle(seq) | |
| df.at[idx, PERM_COL] = "|".join(seq) | |
| return seq | |
| # ---------- ROW I/O ---------- | |
| def build_row(idx: int): | |
| """Return a list of widget values with length matching *common_outputs*.""" | |
| global df, models, current_start, TOTAL | |
| row = df.loc[idx] | |
| order = get_perm(idx) | |
| txt_outputs, ratings, comments = [], [], [] | |
| for m in order: | |
| txt_outputs.extend([ | |
| row[f"{m}{PROMPT1_SUFFIX}"], | |
| row[f"{m}{PROMPT2_SUFFIX}"], | |
| ]) | |
| ratings.extend([ | |
| row[f"rating_{m}__prompt1"] or None, | |
| row[f"rating_{m}__prompt2"] or None, | |
| ]) | |
| for p in ("prompt1", "prompt2"): | |
| comments.append(row[f"comment_{m}__{p}"]) | |
| pad_slots = MAX_MODELS - len(order) | |
| txt_outputs.extend(["", ""] * pad_slots) | |
| ratings.extend(["A", "A"] * pad_slots) | |
| comments.extend(["", ""] * pad_slots) | |
| current_start = time.time() | |
| ready = all(r in RATING_OPTS for r in ratings[:2*len(models)]) | |
| header = f"Example {idx + 1}/{TOTAL}" | |
| return [idx, idx, header, row[CONTENT_COL]] + \ | |
| txt_outputs + ratings + comments + \ | |
| [gr.update(), gr.update(interactive=ready)] | |
| def save_row(idx: int, ratings: List[str], comments: List[str]): | |
| global df, annotator, csv_path, current_start | |
| needed = 2 * len(models) | |
| if not all(r in RATING_OPTS for r in ratings[:needed]): | |
| return | |
| elapsed = time.time() - current_start if current_start else 0.0 | |
| p = q = 0 | |
| for m in get_perm(idx): | |
| df.at[idx, f"rating_{m}__prompt1"] = ratings[p]; p += 1 | |
| df.at[idx, f"rating_{m}__prompt2"] = ratings[p]; p += 1 | |
| c1 = comments[q].strip() or NO_COMMENT; q += 1 | |
| c2 = comments[q].strip() or NO_COMMENT; q += 1 | |
| df.at[idx, f"comment_{m}__prompt1"] = c1 | |
| df.at[idx, f"comment_{m}__prompt2"] = c2 | |
| df.at[idx, "annotator"] = annotator | |
| df.at[idx, "annotation_time"] = float(elapsed) | |
| df.to_csv(csv_path, index=False) | |
| # ---------- GRADIO ---------- | |
| with gr.Blocks(title="Paragraph Annotation Tool") as demo: | |
| gr.Markdown("# Paragraph Annotation Tool") | |
| # Setup panel | |
| with gr.Column() as setup_panel: | |
| csv_upload = gr.File(label="Upload CSV", file_types=[".csv"]) | |
| name_input = gr.Textbox(label="Your Name") | |
| run_btn = gr.Button("Run") | |
| annotator_md = gr.Markdown(visible=False) | |
| # Annotation panel (hidden until CSV is loaded) | |
| with gr.Column(visible=False) as annotation_panel: | |
| state = gr.State(0) | |
| idx_box = gr.Number(label="Index", interactive=False) | |
| hdr_box = gr.Markdown() | |
| para_box = gr.Textbox(label="Content Paragraph", interactive=False, lines=6) | |
| out_boxes, radio_widgets, comment_widgets = [], [], [] | |
| row_containers = [] | |
| for _ in range(MAX_MODELS): | |
| with gr.Row(visible=False) as r: | |
| with gr.Column(): | |
| out1 = gr.Textbox(interactive=False, lines=6) | |
| rad1 = gr.Radio(RATING_OPTS, label="Rating (P1)") | |
| com1 = gr.Textbox(lines=2, label="Comment (P1)") | |
| with gr.Column(): | |
| out2 = gr.Textbox(interactive=False, lines=6) | |
| rad2 = gr.Radio(RATING_OPTS, label="Rating (P2)") | |
| com2 = gr.Textbox(lines=2, label="Comment (P2)") | |
| out_boxes.extend((out1, out2)) | |
| radio_widgets.extend((rad1, rad2)) | |
| comment_widgets.extend((com1, com2)) | |
| row_containers.append(r) | |
| back_btn = gr.Button("⟵ Back") | |
| next_btn = gr.Button("Save & Next ⟶", interactive=False) | |
| download_btn = gr.Button("Download CSV") | |
| csv_file_out = gr.File() | |
| # ---------- CALLBACKS ---------- | |
| def toggle_next(*vals): | |
| needed = 2 * len(models) | |
| return gr.update(interactive=all(v in RATING_OPTS for v in vals[:needed])) | |
| for r in radio_widgets: | |
| r.change(toggle_next, inputs=radio_widgets, outputs=next_btn) | |
| def goto(step: int): | |
| def _fn(idx: int, *vals): | |
| n_rad = len(radio_widgets) | |
| ratings = list(vals[:n_rad]) | |
| comments = list(vals[n_rad:-1]) | |
| if step != -1 or all(r in RATING_OPTS for r in ratings[:2*len(models)]): | |
| save_row(idx, ratings, comments) | |
| new_idx = max(0, min(idx + step, TOTAL - 1)) | |
| return build_row(new_idx) | |
| return _fn | |
| common_inputs = [state] + radio_widgets + comment_widgets + [next_btn] | |
| common_outputs = [state, idx_box, hdr_box, para_box] + \ | |
| out_boxes + radio_widgets + comment_widgets + \ | |
| [back_btn, next_btn] | |
| back_btn.click(goto(-1), inputs=common_inputs, outputs=common_outputs) | |
| next_btn.click(goto(1), inputs=common_inputs, outputs=common_outputs) | |
| download_btn.click(lambda: csv_path, outputs=csv_file_out) | |
| # ---------- INIT ---------- | |
| def init_annotation(uploaded_file, name): | |
| global df, csv_path, annotator, TOTAL | |
| if uploaded_file is None or not name.strip(): | |
| raise gr.Error("Please upload a CSV and enter your name.") | |
| annotator = name.strip() | |
| csv_path = uploaded_file.name | |
| local_df = pd.read_csv(csv_path, keep_default_na=False) | |
| if CONTENT_COL not in local_df.columns: | |
| raise gr.Error(f"Missing required column '{CONTENT_COL}' in CSV") | |
| globals()['df'] = local_df | |
| discover_models() | |
| ensure_helper_columns() | |
| TOTAL = len(df) | |
| df.to_csv(csv_path, index=False) | |
| first_idx = first_incomplete() | |
| row_vals = build_row(first_idx) | |
| # visibility updates for rows | |
| vis_updates = [gr.update(visible=i < len(models)) for i in range(MAX_MODELS)] | |
| return [f"**Annotator:** {annotator}", gr.update(visible=True)] + vis_updates + row_vals | |
| # run_outputs: annotator_md, annotation_panel, row_vis_updates..., common_outputs | |
| run_outputs = [annotator_md, annotation_panel] + [gr.Row()]*MAX_MODELS + common_outputs | |
| # substitute placeholder Rows with actual containers | |
| run_outputs[2:2+MAX_MODELS] = row_containers | |
| run_btn.click(init_annotation, inputs=[csv_upload, name_input], outputs=run_outputs) | |
| if __name__ == "__main__": | |
| demo.queue() | |
| demo.launch() | |