diff --git "a/train.log" "b/train.log" new file mode 100644--- /dev/null +++ "b/train.log" @@ -0,0 +1,1146 @@ +W0920 18:03:04.927000 339084 site-packages/torch/distributed/run.py:793] +W0920 18:03:04.927000 339084 site-packages/torch/distributed/run.py:793] ***************************************** +W0920 18:03:04.927000 339084 site-packages/torch/distributed/run.py:793] Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed. +W0920 18:03:04.927000 339084 site-packages/torch/distributed/run.py:793] ***************************************** +Trainer._get_train_sampler replaced with custom implementation. +[2025-09-20 18:03:10,287] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect) +Trainer._get_train_sampler replaced with custom implementation. +[2025-09-20 18:03:10,649] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect) +Trainer._get_train_sampler replaced with custom implementation. +Trainer._get_train_sampler replaced with custom implementation. +Trainer._get_train_sampler replaced with custom implementation. +Trainer._get_train_sampler replaced with custom implementation. +Trainer._get_train_sampler replaced with custom implementation. +Trainer._get_train_sampler replaced with custom implementation. +[2025-09-20 18:03:11,232] [INFO] [comm.py:658:init_distributed] cdb=None +[2025-09-20 18:03:11,273] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-09-20 18:03:11,290] [INFO] [comm.py:658:init_distributed] cdb=None +[2025-09-20 18:03:11,303] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-09-20 18:03:11,307] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-09-20 18:03:11,313] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-09-20 18:03:11,328] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-09-20 18:03:11,330] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect) +[2025-09-20 18:03:12,030] [INFO] [comm.py:658:init_distributed] cdb=None +[2025-09-20 18:03:12,046] [INFO] [comm.py:658:init_distributed] cdb=None +[2025-09-20 18:03:12,047] [INFO] [comm.py:689:init_distributed] Initializing TorchBackend in DeepSpeed with backend nccl +[2025-09-20 18:03:12,076] [INFO] [comm.py:658:init_distributed] cdb=None +[2025-09-20 18:03:12,080] [INFO] [comm.py:658:init_distributed] cdb=None +FlashAttention 3 is available +[2025-09-20 18:03:12,085] [INFO] [comm.py:658:init_distributed] cdb=None +FlashAttention 3 is available +[2025-09-20 18:03:12,257] [INFO] [comm.py:658:init_distributed] cdb=None +/home/zhengduo/miniconda3/envs/vgllm2/lib/python3.10/site-packages/magi_attention/__init__.py:23: UserWarning: You are using magi_attention without installing it. This may cause some unexpected errors. + warnings.warn( +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +/home/zhengduo/miniconda3/envs/vgllm2/lib/python3.10/site-packages/magi_attention/__init__.py:23: UserWarning: You are using magi_attention without installing it. This may cause some unexpected errors. + warnings.warn( +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +FlashAttention 3 is available +FlashAttention 3 is available +FlashAttention 3 is available +FlashAttention 3 is available +FlashAttention 3 is available +/home/zhengduo/miniconda3/envs/vgllm2/lib/python3.10/site-packages/magi_attention/__init__.py:23: UserWarning: You are using magi_attention without installing it. This may cause some unexpected errors. + warnings.warn( +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +/home/zhengduo/miniconda3/envs/vgllm2/lib/python3.10/site-packages/magi_attention/__init__.py:23: UserWarning: You are using magi_attention without installing it. This may cause some unexpected errors. + warnings.warn( +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +/home/zhengduo/miniconda3/envs/vgllm2/lib/python3.10/site-packages/magi_attention/__init__.py:23: UserWarning: You are using magi_attention without installing it. This may cause some unexpected errors. + warnings.warn( +/home/zhengduo/miniconda3/envs/vgllm2/lib/python3.10/site-packages/magi_attention/__init__.py:23: UserWarning: You are using magi_attention without installing it. This may cause some unexpected errors. + warnings.warn( +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +/home/zhengduo/miniconda3/envs/vgllm2/lib/python3.10/site-packages/magi_attention/__init__.py:23: UserWarning: You are using magi_attention without installing it. This may cause some unexpected errors. + warnings.warn( +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. +FlashAttention 3 is available +/home/zhengduo/miniconda3/envs/vgllm2/lib/python3.10/site-packages/magi_attention/__init__.py:23: UserWarning: You are using magi_attention without installing it. This may cause some unexpected errors. + warnings.warn( +You are attempting to use Flash Attention 2.0 with a model not initialized on GPU. Make sure to move the model to GPU after initializing it on CPU with `model.to('cuda')`. + Loading checkpoint shards: 0%| | 0/5 [00:00