| | |
| |
|
| | |
| | |
| |
|
| | import json |
| | import random |
| | import pandas as pd |
| | import streamlit as st |
| | from datasets import load_dataset |
| | from datasets import get_dataset_config_names |
| |
|
| | st.title("Code:blue[Arena]") |
| |
|
| | problem_dict = dict() |
| |
|
| | |
| | with st.spinner("Loading Venus data...", show_time=True): |
| | venus_ds = load_dataset("Elfsong/leetcode_data", split='train') |
| | for problem in venus_ds: |
| | problem_id = problem["title"] |
| | problem['type'] = "leetcode" |
| | problem_dict[problem_id] = problem |
| |
|
| | |
| | with st.spinner("Loading APPS data...", show_time=True): |
| | apps_ds = load_dataset("Elfsong/APPS_Python", split='test') |
| | for problem in apps_ds: |
| | problem_id = f'apps_{problem["problem_id"]}' |
| | problem['type'] = "apps" |
| | problem_dict[problem_id] = problem |
| | |
| | problem_count = len(problem_dict) |
| |
|
| |
|
| | if "problem" in st.query_params: |
| | problem_id = str(st.query_params["problem"]) |
| | problem_instance = problem_dict[problem_id] |
| | problem_type = problem_instance['type'] |
| |
|
| | st.header(problem_id) |
| | |
| | with st.expander("Problem Description"): |
| | if problem_type == "leetcode": |
| | st.markdown(problem_instance["question_content"]) |
| | elif problem_type == "apps": |
| | st.markdown(problem_instance["problem_content"]) |
| |
|
| | with st.expander("Test Cases"): |
| | test_cases = json.loads(problem_instance["test_cases"]) |
| | df = pd.DataFrame( |
| | { |
| | "input": [test_case['input'] for test_case in test_cases], |
| | "output": [test_case['output'] for test_case in test_cases], |
| | } |
| | ) |
| | st.dataframe( |
| | df, |
| | column_config={ |
| | "input": st.column_config.TextColumn("Input"), |
| | "output": st.column_config.TextColumn("Output"), |
| | }, |
| | column_order=("input", "output"), |
| | ) |
| |
|
| | with st.expander("Test Case Generator"): |
| | if problem_type == "leetcode": |
| | test_case_generator = problem_instance["test_case_generator"] |
| | prompt = "# For now, we only disclose the top 20 lines of the test case generator.\n# the full version will be released after the paper review process.\n" |
| | test_case_generator = "\n".join(test_case_generator.split("\n")[:20]) |
| | st.code(prompt+test_case_generator) |
| | else: |
| | st.code("Stay tuned!") |
| |
|
| | |
| | else: |
| | tab_problem, tab_submission, tab_model, tab_about = st.tabs(["Problems", "Submissions", "Models", "About"]) |
| |
|
| | with tab_problem: |
| | with st.spinner("Loading Framework...", show_time=True): |
| | df = pd.DataFrame( |
| | { |
| | "problem_id": [int(problem['problem_id']) for problem in problem_dict.values()], |
| | "difficulty": [str(problem['difficulty']) for problem in problem_dict.values()], |
| | "type": [str(problem['type']) for problem in problem_dict.values()], |
| | "problem_link": ["https://huggingface.co/spaces/Elfsong/CodeArena/?problem=" + (str(problem['title']) if problem['type'] == "leetcode" else f'apps_{problem["problem_id"]}') for problem in problem_dict.values()], |
| | "acceptance_rate": [[random.randint(0, 100) for _ in range(20)] for problem in problem_dict.values()], |
| | } |
| | ) |
| | st.dataframe( |
| | df, |
| | column_config={ |
| | "problem_id": st.column_config.NumberColumn("Problem ID", width='small'), |
| | "difficulty": st.column_config.TextColumn("Difficulty", width='small'), |
| | "type": st.column_config.TextColumn("Type", width='small'), |
| | "acceptance_rate": st.column_config.LineChartColumn("Acceptance Rate", y_min=0, y_max=100), |
| | "problem_link": st.column_config.LinkColumn("Link", display_text="Open", width='small'), |
| | }, |
| | height=800, |
| | column_order=("problem_id", "difficulty", "type", "acceptance_rate", "problem_link"), |
| | hide_index=True, |
| | ) |
| |
|
| | with tab_submission: |
| | st.header("Submissions") |
| | models = get_dataset_config_names("Elfsong/Venus_Model_Evaluation") |
| | model_name = st.selectbox("Which model you are looking for?", models, placeholder="Select a model...") |
| | st.write("You selected:", model_name) |
| |
|
| | with st.spinner("Loading Data...", show_time=True): |
| | ds = load_dataset("Elfsong/Venus_Model_Evaluation", model_name, split='train') |
| | df = pd.DataFrame( |
| | { |
| | "problem_id": [int(problem['problem_id']) for problem in ds], |
| | "solution": [str(problem['solution']) for problem in ds], |
| | } |
| | ) |
| | st.dataframe( |
| | df, |
| | column_config={ |
| | "problem_id": st.column_config.NumberColumn("Problem ID", width='small'), |
| | "solution": st.column_config.TextColumn("Solution", width='big'), |
| | }, |
| | height=800, |
| | column_order=("problem_id", "solution"), |
| | hide_index=True, |
| | ) |
| |
|
| |
|
| | with tab_model: |
| | model_list = [ |
| | "deepSeek-Coder", |
| | "GPT-4o", |
| | "Claude-3-5-sonnet", |
| | "Gemini-1.5-flash", |
| | "DeepSeek-Coder-V2-Lite", |
| | "Claude-3-Opus", |
| | "Gemini-1.5-pro", |
| | "Llama-3.1-8B", |
| | "Llama-3-8B", |
| | "GPT-4-Turbo", |
| | "GPT-3.5-Turbo", |
| | "Mistral-Nemo", |
| | "CodeLlama-13b", |
| | "Claude-3-Haiku", |
| | "Mistral-7B-v0.3", |
| | "Codestral-22B-v0.1", |
| | "Claude-3-sonnet", |
| | "CodeLlama-34b", |
| | "CodeLlama-7b" |
| | ] |
| |
|
| |
|
| | df = pd.DataFrame( |
| | { |
| | "model_name": [model_name for model_name in model_list], |
| | "dynamic_point": [0 for model_name in model_list], |
| | "pass@1": [0 for model_name in model_list], |
| | "beyond@t": [0 for model_name in model_list], |
| | "beyond@m": [0 for model_name in model_list], |
| | "model_progress": [int(random.randint(0, problem_count+1)) for model_name in model_list], |
| | } |
| | ) |
| |
|
| | st.dataframe( |
| | df, |
| | column_config={ |
| | "model_name": st.column_config.TextColumn("Model Name"), |
| | "dynamic_point": st.column_config.NumberColumn("Dynamic Point"), |
| | "pass@1": st.column_config.NumberColumn("Pass@1"), |
| | "beyond@t": st.column_config.NumberColumn("Beyond@Time"), |
| | "beyond@m": st.column_config.NumberColumn("Beyond@Memory"), |
| | "model_progress": st.column_config.ProgressColumn("Progress", min_value=0, max_value=problem_count, format="compact"), |
| | }, |
| | column_order=("model_name", "Dynamic Point", "pass@1", "beyond@t", "beyond@m", "model_progress"), |
| | height=800, |
| | ) |
| |
|
| | with tab_about: |
| | st.write("Hello World!") |
| | st.write("This is the new version of Code Arena. Refer to [Monolith](https://github.com/Elfsong/Monolith) for instructions on how to submit code.") |
| | st.write("🚧 WIP: We will update real data very soon!") |
| |
|