|
PARTITION=${PARTITION:-"Intern5"} |
|
GPUS=${GPUS:-16} |
|
GPUS_PER_NODE=${GPUS_PER_NODE:-8} |
|
GPUS_PER_TASK=${GPUS_PER_TASK:-1} |
|
QUOTA_TYPE=${QUOTA_TYPE:-"reserved"} |
|
|
|
|
|
set -x |
|
|
|
CHECKPOINT=${1} |
|
|
|
JOB_FOLDER=$(dirname "$CHECKPOINT") |
|
files=( |
|
"$JOB_FOLDER/configuration_intern_vit.py" |
|
"$JOB_FOLDER/configuration_internlm2.py" |
|
"$JOB_FOLDER/configuration_internvl_chat.py" |
|
"$JOB_FOLDER/conversation.py" |
|
"$JOB_FOLDER/modeling_intern_vit.py" |
|
"$JOB_FOLDER/modeling_internlm2.py" |
|
"$JOB_FOLDER/modeling_internvl_chat.py" |
|
"$JOB_FOLDER/tokenization_internlm2_fast.py" |
|
"$JOB_FOLDER/tokenization_internlm2.py" |
|
"test_long_niah.sh" |
|
) |
|
for file in "${files[@]}"; do |
|
dest_file="$CHECKPOINT/$(basename "$file")" |
|
if [ ! -f "$dest_file" ]; then |
|
cp "$file" "$CHECKPOINT" |
|
fi |
|
done |
|
ARGS=("$@") |
|
|
|
declare -a tasks=( \ |
|
'retrieval-image-test-long-subset' \ |
|
'retrieval-image-test-long-128k' \ |
|
) |
|
|
|
|
|
|
|
|
|
declare -a tasks=(\ |
|
'retrieval-image-test-long-800k' \ |
|
'retrieval-image-test-long-512k' \ |
|
) |
|
|
|
model_name="internvl" |
|
for STRIDE in 1; do |
|
LOG_DIR=$CHECKPOINT/eval_mm_niah_ring_attn_$STRIDE |
|
mkdir -p $LOG_DIR |
|
|
|
for ((j=0; j<${#tasks[@]}; j++)); do |
|
task=${tasks[j]} |
|
|
|
echo "$(date) ${model_name}_${task}_stride_${STRIDE}" |
|
|
|
srun -p ${PARTITION} \ |
|
--gres=gpu:${GPUS_PER_NODE} \ |
|
--ntasks=$((GPUS / GPUS_PER_TASK)) \ |
|
--ntasks-per-node=$((GPUS_PER_NODE / GPUS_PER_TASK)) \ |
|
--quotatype=${QUOTA_TYPE} \ |
|
--job-name="${STRIDE}${task}" \ |
|
-o "${LOG_DIR}/${task}_stride_${STRIDE}.log" \ |
|
-e "${LOG_DIR}/${task}_stride_${STRIDE}.log" \ |
|
--async \ |
|
python -u eval/mm_niah/eval_mm_niah_long.py \ |
|
--checkpoint $CHECKPOINT \ |
|
--outputs-dir $LOG_DIR \ |
|
--task $task \ |
|
--num-gpus-per-rank ${GPUS_PER_TASK} "${ARGS[@]:1}" \ |
|
--rope_pos_id_version 'v5' \ |
|
--ring_attn \ |
|
--rope_pos_id_stride $STRIDE \ |
|
|
|
|
|
sleep 0.2 |
|
done |
|
done |