#!/bin/bash cd /home/ubuntu/RLVR/Absolute-Zero-Reasoner/evaluation/math_eval # Base model (Qwen2.5-7B) 평가 echo "=== Evaluating Base Model: Qwen2.5-7B ===" bash eval_math_nodes.sh \ --run_name qwen25_7b_base \ --init_model "Qwen/Qwen2.5-7B" \ --template qwen25 \ --tp_size 1 \ --temperature 0 \ --top_p 0.95 \ --max_tokens 16000 \ --benchmarks aime24,aime25,amc23,math500,olympiadbench,minerva_math \ --n_sampling 1 \ --just_wandb false \ --seed 42 # AZR Coder 7B 평가 echo "=== Evaluating AZR Coder 7B ===" bash eval_math_nodes.sh \ --run_name azr_coder_7b_hf \ --init_model "andrewzh/Absolute_Zero_Reasoner-Coder-7b" \ --template azr \ --tp_size 1 \ --temperature 0 \ --top_p 0.95 \ --max_tokens 16000 \ --benchmarks aime24,aime25,amc23,math500,olympiadbench,minerva_math \ --n_sampling 1 \ --just_wandb false \ --seed 42 # AZR Base 7B 평가 echo "=== Evaluating AZR Base 7B ===" bash eval_math_nodes.sh \ --run_name azr_base_7b_hf \ --init_model "andrewzh2/Absolute_Zero_Reasoner-Base-7b" \ --template azr \ --tp_size 1 \ --temperature 0 \ --top_p 0.95 \ --max_tokens 16000 \ --benchmarks aime24,aime25,amc23,math500,olympiadbench,minerva_math \ --n_sampling 1 \ --just_wandb false \ --seed 42 echo "=== All evaluations completed! ==="