Spaces:

Kamyar-zeinalipour
/

CS-mohadesse

Sleeping

App Files Files Community

CS-mohadesse / app.py

Kamyar-zeinalipour

Update app.py

5cb8f75 verified 6 months ago

raw

history blame contribute delete

9.38 kB

	"""
	Paragraph‑level annotation tool for rating two prompts from multiple LLMs.

	Patch 3 – show hidden rows
	--------------------------
	* Bug fix: the model rows stayed invisible after you hit Run
	because Gradio needs `gr.update(visible=…)` objects returned, not
	on‑the‑fly attribute tweaks. The init callback now returns a
	visibility update for every row container, so you’ll see the prompt,
	rating and comment widgets immediately.
	* Logic still hides surplus rows when your CSV contains fewer than
	`MAX_MODELS` models.
	* No other behaviour changed.
	"""
	from __future__ import annotations

	import gradio as gr
	import pandas as pd
	import time, random
	from typing import List

	# ---------- CONFIG ----------
	CONTENT_COL = "Content_Paragraph"
	PROMPT1_SUFFIX = "_prompt1"
	PROMPT2_SUFFIX = "_prompt2"
	PERM_COL = "perm_models"
	RATING_OPTS = ["A", "B", "C"]
	NO_COMMENT = "No comment"
	MAX_MODELS = 8 # UI reserves slots for up to this many models

	# ---------- GLOBAL STATE ----------
	df: pd.DataFrame \| None = None
	models: List[str] = []
	csv_path: str = ""
	annotator: str = ""
	TOTAL: int = 0
	current_start: float \| None = None

	# ---------- HELPERS ----------

	def discover_models() -> None:
	global models, df
	models = []
	for c in df.columns:
	if c.endswith(PROMPT1_SUFFIX) and not (
	c.startswith("rating_") or c.startswith("comment_") or
	c in ["perm_models", "annotator", "annotation_time"]
	):
	m = c[:-len(PROMPT1_SUFFIX)]
	if f"{m}{PROMPT2_SUFFIX}" not in df.columns:
	raise ValueError(f"Found '{c}' but no matching '{m}{PROMPT2_SUFFIX}'")
	models.append(m)
	if not models:
	raise ValueError(f"No '*{PROMPT1_SUFFIX}' columns found in CSV")
	if len(models) > MAX_MODELS:
	raise ValueError(f"CSV has {len(models)} model columns but UI can display only {MAX_MODELS}. Increase MAX_MODELS and restart.")


	def ensure_helper_columns() -> None:
	global df, models
	if PERM_COL not in df.columns:
	df[PERM_COL] = ""

	for m in models:
	for p in ("prompt1", "prompt2"):
	rcol = f"rating_{m}__{p}"
	ccol = f"comment_{m}__{p}"
	if rcol not in df.columns:
	df[rcol] = ""
	if ccol not in df.columns:
	df[ccol] = ""

	for col in ("annotator", "annotation_time"):
	if col not in df.columns:
	df[col] = "" if col == "annotator" else 0.0


	def first_incomplete() -> int:
	global df, models
	for i, row in df.iterrows():
	for m in models:
	if row[f"rating_{m}__prompt1"] == "" or row[f"rating_{m}__prompt2"] == "":
	return i
	return 0


	def get_perm(idx: int) -> List[str]:
	global df, models
	cell = str(df.at[idx, PERM_COL]).strip()
	if cell:
	seq = cell.split("\|")
	if set(seq) == set(models):
	return seq
	seq = models.copy(); random.shuffle(seq)
	df.at[idx, PERM_COL] = "\|".join(seq)
	return seq

	# ---------- ROW I/O ----------

	def build_row(idx: int):
	"""Return a list of widget values with length matching common_outputs."""
	global df, models, current_start, TOTAL
	row = df.loc[idx]
	order = get_perm(idx)

	txt_outputs, ratings, comments = [], [], []
	for m in order:
	txt_outputs.extend([
	row[f"{m}{PROMPT1_SUFFIX}"],
	row[f"{m}{PROMPT2_SUFFIX}"],
	])
	ratings.extend([
	row[f"rating_{m}__prompt1"] or None,
	row[f"rating_{m}__prompt2"] or None,
	])
	for p in ("prompt1", "prompt2"):
	comments.append(row[f"comment_{m}__{p}"])

	pad_slots = MAX_MODELS - len(order)
	txt_outputs.extend(["", ""] * pad_slots)
	ratings.extend(["A", "A"] * pad_slots)
	comments.extend(["", ""] * pad_slots)

	current_start = time.time()
	ready = all(r in RATING_OPTS for r in ratings[:2*len(models)])
	header = f"Example {idx + 1}/{TOTAL}"

	return [idx, idx, header, row[CONTENT_COL]] + \
	txt_outputs + ratings + comments + \
	[gr.update(), gr.update(interactive=ready)]


	def save_row(idx: int, ratings: List[str], comments: List[str]):
	global df, annotator, csv_path, current_start
	needed = 2 * len(models)
	if not all(r in RATING_OPTS for r in ratings[:needed]):
	return
	elapsed = time.time() - current_start if current_start else 0.0
	p = q = 0
	for m in get_perm(idx):
	df.at[idx, f"rating_{m}__prompt1"] = ratings[p]; p += 1
	df.at[idx, f"rating_{m}__prompt2"] = ratings[p]; p += 1
	c1 = comments[q].strip() or NO_COMMENT; q += 1
	c2 = comments[q].strip() or NO_COMMENT; q += 1
	df.at[idx, f"comment_{m}__prompt1"] = c1
	df.at[idx, f"comment_{m}__prompt2"] = c2
	df.at[idx, "annotator"] = annotator
	df.at[idx, "annotation_time"] = float(elapsed)
	df.to_csv(csv_path, index=False)

	# ---------- GRADIO ----------
	with gr.Blocks(title="Paragraph Annotation Tool") as demo:
	gr.Markdown("# Paragraph Annotation Tool")

	# Setup panel
	with gr.Column() as setup_panel:
	csv_upload = gr.File(label="Upload CSV", file_types=[".csv"])
	name_input = gr.Textbox(label="Your Name")
	run_btn = gr.Button("Run")

	annotator_md = gr.Markdown(visible=False)

	# Annotation panel (hidden until CSV is loaded)
	with gr.Column(visible=False) as annotation_panel:
	state = gr.State(0)

	idx_box = gr.Number(label="Index", interactive=False)
	hdr_box = gr.Markdown()
	para_box = gr.Textbox(label="Content Paragraph", interactive=False, lines=6)

	out_boxes, radio_widgets, comment_widgets = [], [], []
	row_containers = []
	for _ in range(MAX_MODELS):
	with gr.Row(visible=False) as r:
	with gr.Column():
	out1 = gr.Textbox(interactive=False, lines=6)
	rad1 = gr.Radio(RATING_OPTS, label="Rating (P1)")
	com1 = gr.Textbox(lines=2, label="Comment (P1)")
	with gr.Column():
	out2 = gr.Textbox(interactive=False, lines=6)
	rad2 = gr.Radio(RATING_OPTS, label="Rating (P2)")
	com2 = gr.Textbox(lines=2, label="Comment (P2)")
	out_boxes.extend((out1, out2))
	radio_widgets.extend((rad1, rad2))
	comment_widgets.extend((com1, com2))
	row_containers.append(r)

	back_btn = gr.Button("⟵ Back")
	next_btn = gr.Button("Save & Next ⟶", interactive=False)
	download_btn = gr.Button("Download CSV")
	csv_file_out = gr.File()

	# ---------- CALLBACKS ----------
	def toggle_next(*vals):
	needed = 2 * len(models)
	return gr.update(interactive=all(v in RATING_OPTS for v in vals[:needed]))

	for r in radio_widgets:
	r.change(toggle_next, inputs=radio_widgets, outputs=next_btn)

	def goto(step: int):
	def _fn(idx: int, *vals):
	n_rad = len(radio_widgets)
	ratings = list(vals[:n_rad])
	comments = list(vals[n_rad:-1])
	if step != -1 or all(r in RATING_OPTS for r in ratings[:2*len(models)]):
	save_row(idx, ratings, comments)
	new_idx = max(0, min(idx + step, TOTAL - 1))
	return build_row(new_idx)
	return _fn

	common_inputs = [state] + radio_widgets + comment_widgets + [next_btn]
	common_outputs = [state, idx_box, hdr_box, para_box] + \
	out_boxes + radio_widgets + comment_widgets + \
	[back_btn, next_btn]

	back_btn.click(goto(-1), inputs=common_inputs, outputs=common_outputs)
	next_btn.click(goto(1), inputs=common_inputs, outputs=common_outputs)

	download_btn.click(lambda: csv_path, outputs=csv_file_out)

	# ---------- INIT ----------
	def init_annotation(uploaded_file, name):
	global df, csv_path, annotator, TOTAL
	if uploaded_file is None or not name.strip():
	raise gr.Error("Please upload a CSV and enter your name.")
	annotator = name.strip()
	csv_path = uploaded_file.name
	local_df = pd.read_csv(csv_path, keep_default_na=False)
	if CONTENT_COL not in local_df.columns:
	raise gr.Error(f"Missing required column '{CONTENT_COL}' in CSV")
	globals()['df'] = local_df
	discover_models()
	ensure_helper_columns()
	TOTAL = len(df)
	df.to_csv(csv_path, index=False)

	first_idx = first_incomplete()
	row_vals = build_row(first_idx)

	# visibility updates for rows
	vis_updates = [gr.update(visible=i < len(models)) for i in range(MAX_MODELS)]

	return [f"Annotator: {annotator}", gr.update(visible=True)] + vis_updates + row_vals

	# run_outputs: annotator_md, annotation_panel, row_vis_updates..., common_outputs
	run_outputs = [annotator_md, annotation_panel] + [gr.Row()]*MAX_MODELS + common_outputs
	# substitute placeholder Rows with actual containers
	run_outputs[2:2+MAX_MODELS] = row_containers

	run_btn.click(init_annotation, inputs=[csv_upload, name_input], outputs=run_outputs)

	if __name__ == "__main__":
	demo.queue()
	demo.launch()