InternVL2-2B / test_long_niah.sh
czczup's picture
fix compatibility issue for transformers 4.46+
644e0b6 verified
raw
history blame
2.19 kB
PARTITION=${PARTITION:-"Intern5"}
GPUS=${GPUS:-16}
GPUS_PER_NODE=${GPUS_PER_NODE:-8}
GPUS_PER_TASK=${GPUS_PER_TASK:-1}
QUOTA_TYPE=${QUOTA_TYPE:-"reserved"}
set -x
CHECKPOINT=${1}
JOB_FOLDER=$(dirname "$CHECKPOINT")
files=(
"$JOB_FOLDER/configuration_intern_vit.py"
"$JOB_FOLDER/configuration_internlm2.py"
"$JOB_FOLDER/configuration_internvl_chat.py"
"$JOB_FOLDER/conversation.py"
"$JOB_FOLDER/modeling_intern_vit.py"
"$JOB_FOLDER/modeling_internlm2.py"
"$JOB_FOLDER/modeling_internvl_chat.py"
"$JOB_FOLDER/tokenization_internlm2_fast.py"
"$JOB_FOLDER/tokenization_internlm2.py"
"test_long_niah.sh"
)
for file in "${files[@]}"; do
dest_file="$CHECKPOINT/$(basename "$file")"
if [ ! -f "$dest_file" ]; then
cp "$file" "$CHECKPOINT"
fi
done
ARGS=("$@")
declare -a tasks=( \
'retrieval-image-test-long-subset' \
'retrieval-image-test-long-128k' \
)
# declare -a tasks=(\
# 'retrieval-image-test-long-1M'\
# )
declare -a tasks=(\
'retrieval-image-test-long-800k' \
'retrieval-image-test-long-512k' \
)
model_name="internvl"
for STRIDE in 1; do
LOG_DIR=$CHECKPOINT/eval_mm_niah_ring_attn_$STRIDE
mkdir -p $LOG_DIR
for ((j=0; j<${#tasks[@]}; j++)); do
task=${tasks[j]}
echo "$(date) ${model_name}_${task}_stride_${STRIDE}"
srun -p ${PARTITION} \
--gres=gpu:${GPUS_PER_NODE} \
--ntasks=$((GPUS / GPUS_PER_TASK)) \
--ntasks-per-node=$((GPUS_PER_NODE / GPUS_PER_TASK)) \
--quotatype=${QUOTA_TYPE} \
--job-name="${STRIDE}${task}" \
-o "${LOG_DIR}/${task}_stride_${STRIDE}.log" \
-e "${LOG_DIR}/${task}_stride_${STRIDE}.log" \
--async \
python -u eval/mm_niah/eval_mm_niah_long.py \
--checkpoint $CHECKPOINT \
--outputs-dir $LOG_DIR \
--task $task \
--num-gpus-per-rank ${GPUS_PER_TASK} "${ARGS[@]:1}" \
--rope_pos_id_version 'v5' \
--ring_attn \
--rope_pos_id_stride $STRIDE \
sleep 0.2
done
done