Spaces:
Runtime error
Runtime error
Riddhi Bhagwat
commited on
Commit
·
4b82d89
1
Parent(s):
5df30d7
minor changes for debugging
Browse files
ml/eval/evaluate_arguments.py
CHANGED
|
@@ -3,7 +3,7 @@ from dataclasses import dataclass, field
|
|
| 3 |
@dataclass
|
| 4 |
class EvalArguments:
|
| 5 |
model_name_or_path: str = field(
|
| 6 |
-
default="
|
| 7 |
model_pretrained_lora_weights: str = field(
|
| 8 |
default=None, metadata={"help": "Path to a checkpoint directory."})
|
| 9 |
output_filepath: str = field(
|
|
|
|
| 3 |
@dataclass
|
| 4 |
class EvalArguments:
|
| 5 |
model_name_or_path: str = field(
|
| 6 |
+
default="CohereForAI/aya-expanse-8b", metadata={"help": "Name to a huggingface native pretrained model or path to a model on disk."})
|
| 7 |
model_pretrained_lora_weights: str = field(
|
| 8 |
default=None, metadata={"help": "Path to a checkpoint directory."})
|
| 9 |
output_filepath: str = field(
|
ml/eval/evaluation_pipeline.py
CHANGED
|
@@ -5,6 +5,8 @@ from reward_eval import process_evaluation
|
|
| 5 |
from generate import generate_files
|
| 6 |
from alpaca import alpaca_evaluator
|
| 7 |
from bt import bradley_terry_comparison, save_results, print_metrics
|
|
|
|
|
|
|
| 8 |
|
| 9 |
##################
|
| 10 |
# M-REWARD BENCH #
|
|
@@ -30,14 +32,13 @@ def evaluator_master_fn(eval_dataset: list[dict],
|
|
| 30 |
model="CohereForAI/aya-23-8B"):
|
| 31 |
|
| 32 |
# 1. Reward score evaluation:
|
| 33 |
-
args =
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
}
|
| 41 |
process_evaluation(args, model_name=model, eval_data_list_dict=eval_dataset)
|
| 42 |
|
| 43 |
# 2.
|
|
|
|
| 5 |
from generate import generate_files
|
| 6 |
from alpaca import alpaca_evaluator
|
| 7 |
from bt import bradley_terry_comparison, save_results, print_metrics
|
| 8 |
+
from evaluate_arguments import EvalArguments
|
| 9 |
+
|
| 10 |
|
| 11 |
##################
|
| 12 |
# M-REWARD BENCH #
|
|
|
|
| 32 |
model="CohereForAI/aya-23-8B"):
|
| 33 |
|
| 34 |
# 1. Reward score evaluation:
|
| 35 |
+
args = EvalArguments(bfloat16=True,
|
| 36 |
+
reward_output_fmt='1-0',
|
| 37 |
+
apply_sigmoid_to_reward=False,
|
| 38 |
+
per_device_batch_size=8,
|
| 39 |
+
output_filepath= '/path/to/your/data.json',
|
| 40 |
+
result_filename=None,
|
| 41 |
+
model_name_or_path="CohereForAI/aya-expanse-8b")
|
|
|
|
| 42 |
process_evaluation(args, model_name=model, eval_data_list_dict=eval_dataset)
|
| 43 |
|
| 44 |
# 2.
|
ml/eval/generate_sanity_check.py
CHANGED
|
@@ -45,7 +45,7 @@ ref_model = AutoModelForCausalLM.from_pretrained(
|
|
| 45 |
).to("cuda")
|
| 46 |
print(f'loaded reference model')
|
| 47 |
|
| 48 |
-
# load a
|
| 49 |
ref_tokenizer = AutoTokenizer.from_pretrained(
|
| 50 |
ref_model_args.model_name_or_path, trust_remote_code=ref_model_args.trust_remote_code
|
| 51 |
)
|
|
|
|
| 45 |
).to("cuda")
|
| 46 |
print(f'loaded reference model')
|
| 47 |
|
| 48 |
+
# load a tokenizer
|
| 49 |
ref_tokenizer = AutoTokenizer.from_pretrained(
|
| 50 |
ref_model_args.model_name_or_path, trust_remote_code=ref_model_args.trust_remote_code
|
| 51 |
)
|