|
|
|
|
|
from mmengine.config import read_base |
|
|
import os.path as osp |
|
|
from opencompass.runners import LocalRunner, VOLCRunner |
|
|
from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner |
|
|
from opencompass.tasks import OpenICLInferTask, OpenICLEvalTask |
|
|
|
|
|
with read_base(): |
|
|
|
|
|
|
|
|
from opencompass.configs.datasets.bigcodebench.bigcodebench_full_instruct_gen import ( |
|
|
bigcodebench_full_instruct_datasets |
|
|
) |
|
|
from opencompass.configs.datasets.bigcodebench.bigcodebench_hard_instruct_gen import ( |
|
|
bigcodebench_hard_instruct_datasets |
|
|
) |
|
|
|
|
|
from opencompass.configs.datasets.livecodebench.livecodebench_time_split_gen_a4f90b import ( |
|
|
LCB_datasets |
|
|
) |
|
|
|
|
|
from opencompass.configs.datasets.humaneval.humaneval_openai_sample_evals_gen_dcae0e import ( |
|
|
humaneval_datasets |
|
|
) |
|
|
from opencompass.configs.datasets.humaneval_pro.humaneval_pro_gen import ( |
|
|
humanevalpro_datasets |
|
|
) |
|
|
from opencompass.configs.datasets.humanevalx.humanevalx_gen_620cfa import ( |
|
|
humanevalx_datasets |
|
|
) |
|
|
from opencompass.configs.datasets.humaneval_plus.humaneval_plus_gen import ( |
|
|
humaneval_plus_datasets |
|
|
) |
|
|
|
|
|
from opencompass.configs.datasets.mbpp.mbpp_gen import ( |
|
|
mbpp_datasets |
|
|
) |
|
|
from opencompass.configs.datasets.mbpp_pro.mbpp_pro_gen import ( |
|
|
mbpppro_datasets |
|
|
) |
|
|
|
|
|
from opencompass.configs.datasets.multipl_e.multiple_gen import ( |
|
|
multiple_datasets |
|
|
) |
|
|
|
|
|
from opencompass.configs.datasets.ds1000.ds1000_service_eval_gen_cbc84f import ( |
|
|
ds1000_datasets |
|
|
) |
|
|
|
|
|
|
|
|
from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import ( |
|
|
models as lmdeploy_qwen2_5_7b_instruct_model, |
|
|
) |
|
|
|
|
|
|
|
|
from opencompass.configs.summarizers.groups.ds1000 import ( |
|
|
ds1000_summary_groups, |
|
|
) |
|
|
from opencompass.configs.summarizers.groups.multipl_e import ( |
|
|
multiple_summary_groups, |
|
|
) |
|
|
from opencompass.configs.summarizers.groups.humanevalx import ( |
|
|
humanevalx_summary_groups, |
|
|
) |
|
|
|
|
|
|
|
|
models = sum([v for k, v in locals().items() if k.endswith('_model')], []) |
|
|
|
|
|
for model in models: |
|
|
model['max_seq_len'] = 16384 |
|
|
model['max_out_len'] = 8192 |
|
|
|
|
|
|
|
|
datasets = sum( |
|
|
(v for k, v in locals().items() if k.endswith('_datasets')), |
|
|
[], |
|
|
) |
|
|
|
|
|
for item in humanevalx_datasets: |
|
|
item['eval_cfg']['evaluator'][ |
|
|
'ip_address' |
|
|
] = 'codeeval.opencompass.org.cn/humanevalx' |
|
|
item['eval_cfg']['evaluator']['port'] = '' |
|
|
for item in ds1000_datasets: |
|
|
item['eval_cfg']['evaluator'][ |
|
|
'ip_address' |
|
|
] = 'codeeval.opencompass.org.cn/ds1000' |
|
|
item['eval_cfg']['evaluator']['port'] = '' |
|
|
|
|
|
|
|
|
for dataset in datasets: |
|
|
dataset['infer_cfg']['inferencer']['max_out_len'] = 8192 |
|
|
|
|
|
|
|
|
|
|
|
summary_groups = sum( |
|
|
[v for k, v in locals().items() if k.endswith('_summary_groups')], [] |
|
|
) |
|
|
summary_groups.append( |
|
|
{'name': 'humanevalx', |
|
|
'subsets': ['humanevalx-python', 'humanevalx-cpp', 'humanevalx-java', 'humanevalx-js']} |
|
|
) |
|
|
summarizer = dict( |
|
|
dataset_abbrs = [ |
|
|
['bigcodebench_hard_instruct', 'pass@1'], |
|
|
['bigcodebench_full_instruct', 'pass@1'], |
|
|
['lcb_code_generation', 'pass@1'], |
|
|
['openai_humaneval', 'humaneval_pass@1'], |
|
|
['mbpp', 'score'], |
|
|
['humaneval_pro', 'pass@1'], |
|
|
['mbpp_pro', 'pass@1'], |
|
|
['humaneval_plus', 'humaneval_plus_pass@1'], |
|
|
['multiple', 'naive_average'], |
|
|
['humanevalx', 'naive_average'], |
|
|
['ds1000', 'naive_average'], |
|
|
'', |
|
|
'humanevalx-python', |
|
|
'humanevalx-cpp', |
|
|
'humanevalx-java', |
|
|
'humanevalx-js', |
|
|
'', |
|
|
'ds1000_Pandas', |
|
|
'ds1000_Numpy', |
|
|
'ds1000_Tensorflow', |
|
|
'ds1000_Scipy', |
|
|
'ds1000_Sklearn', |
|
|
'ds1000_Pytorch', |
|
|
'ds1000_Matplotlib', |
|
|
'', |
|
|
'humaneval-multiple-cpp', |
|
|
'humaneval-multiple-cs', |
|
|
'humaneval-multiple-go', |
|
|
'humaneval-multiple-java', |
|
|
'humaneval-multiple-rb', |
|
|
'humaneval-multiple-js', |
|
|
'humaneval-multiple-php', |
|
|
'humaneval-multiple-r', |
|
|
'humaneval-multiple-rs', |
|
|
'humaneval-multiple-sh', |
|
|
'', |
|
|
'mbpp-multiple-cpp', |
|
|
'mbpp-multiple-cs', |
|
|
'mbpp-multiple-go', |
|
|
'mbpp-multiple-java', |
|
|
'mbpp-multiple-rb', |
|
|
'mbpp-multiple-js', |
|
|
'mbpp-multiple-php', |
|
|
'mbpp-multiple-r', |
|
|
'mbpp-multiple-rs', |
|
|
'mbpp-multiple-sh' |
|
|
], |
|
|
summary_groups=summary_groups, |
|
|
) |
|
|
|
|
|
work_dir = 'outputs/code' |
|
|
|