由于经常需要做测试,所以撰写了一个测试单机多卡的bash脚本,前提需要环境中已经安装nvidia驱动和cuda库,且cuda库安装在默认目录/usr/local/下,然后nccl我是下载的zip包,名字是nccl-master.zip,nccl-tests包也是下载的zip的包,名字是nccl-tests-master.zip,这两个包名字写死了,将下面脚本内容存为脚本,前面提的包放在同一目录,然后使用bash命令进行执行。
脚本内容如下:
#!/bin/bash
set -e
export CURRENT_PATH=`readlink -f $(dirname $0)`
export RESULT_PATH=${CURRENT_PATH}/result_$(hostname)
function CHECK_GPU_COMMAND(){
if ! command -v "nvidia-smi" >/dev/null 2>&1; then
echo "英伟达驱动未安装!"
exit 1
else
export GPU_TOTAL=`nvidia-smi -L | wc -l`
fi
}
function CHECK_GPU_SM(){
export GPU_NAME=`nvidia-smi -q -i 0 | grep 'Product Name' | awk -F ': ' '{print $2}'`
if [[ $GPU_NAME =~ A100$ ]] || [[ $GPU_NAME =~ A800-SXM4-80GB$ ]]; then
COMPUTE_SM=80
fi
if [[ $GPU_NAME =~ 3090$ ]]; then
COMPUTE_SM=86
fi
if [[ $GPU_NAME =~ 4090$ ]] || [[ $GPU_NAME =~ 4090D$ ]]; then
COMPUTE_SM=89
fi
if [[ $GPU_NAME =~ H100$ ]] || [[ $GPU_NAME =~ H800$ ]] || [[ $GPU_NAME =~ H200$ ]]|| [[ $GPU_NAME =~ H20$ ]]; then
COMPUTE_SM=90
fi
if [[ $GPU_NAME =~ B200$ ]] || [[ $GPU_NAME =~ GB200$ ]]; then
COMPUTE_SM=100
fi
if [[ $GPU_NAME =~ 5090$ ]] || [[ $GPU_NAME =~ 5090D$ ]]; then
COMPUTE_SM=120
fi
if [ -z "${COMPUTE_SM+x}" ]; then
echo "没有获取到算力,请检查!"
exit 1
else
echo "当前算力:${COMPUTE_SM}"
export COMPUTE_SM=${COMPUTE_SM}
fi
}
function CUBLAS_BENCHMARK_TEST(){
chmod +x ${CURRENT_PATH}/cublasMatmulBench
export GPU_TOTAL=`nvidia-smi -L | wc -l`
echo "总共有 ${GPU_TOTAL} 张GPU卡!"
for i in $(seq 0 $((GPU_TOTAL - 1))); do
echo "开始测试第 $i 号卡..."
export CUDA_VISIBLE_DEVICES=$i
FPD=`./cublasMatmulBench -P=ddd -m=15360 -n=18176 -k=8192 -T=8 -ta=1 -B=0 -p=0 | grep -i CUDA | awk '{print $10}'`
FP=`./cublasMatmulBench -P=sss -m=15360 -n=18176 -k=8192 -T=500 -ta=1 -B=0 -p=0 | grep -i CUDA | awk '{print$10}'`
TF=`./cublasMatmulBench -P=sss_fast_tf32 -m=15360 -n=18176 -k=8192 -T=500 -ta=1 -B=0 -p=0 | grep -i CUDA | awk '{print $10}'`
HHH=`./cublasMatmulBench -P=hhh -m=15360 -n=18176 -k=8192 -T=1000 -ta=1 -B=0 -p=0 | grep -i CUDA | awk '{print $10}'`
HSH=`./cublasMatmulBench -P=hsh -m=15360 -n=18176 -k=8192 -T=1000 -ta=1 -B=0 -p=0 | grep -i CUDA | awk '{print $10}'`
HSS=`./cublasMatmulBench -P=hss -m=15360 -n=18176 -k=8192 -T=1000 -ta=1 -B=0 -p=0 | grep -i CUDA | awk '{print $10}'`
BF=`./cublasMatmulBench -P=tst -m=15360 -n=18176 -k=8192 -T=1000 -ta=1 -B=0 -p=0 | grep -i CUDA | awk '{print $10}'`
FPS=`./cublasMatmulBench -P=qqssq -m=15360 -n=18176 -k=8192 -T=1000 -ta=1 -B=0 -p=0 | grep -i CUDA | awk '{print $10}'`
echo -e "GPU $i \t| FP64: ${FPD} \t| FP32: ${FP} \t| TF32: ${TF} \t| HHH: ${HHH} \t| HSH: ${HSH} \t| HSS: ${HSS} \t| BF16: ${BF} \t| FP8: ${FPS}" >> ${RESULT_PATH}/cublas.log
echo "第 $i 号卡测试完成!"
done
}
function NCCL_COMM_TESTS(){
echo ${CURRENT_PATH}
export CUDA_PATH=`ls -l /usr/local/cuda | awk '{print $NF}'`
if [ ! -d ${CURRENT_PATH}/nccl-master ]; then
echo "${CURRENT_PATH}/nccl-master 不目录存,开始解压!"
unzip ${CURRENT_PATH}/nccl-master.zip
fi
cd ${CURRENT_PATH}/nccl-master
mkdir -p ${CURRENT_PATH}/nccl
if [ -d ${CURRENT_PATH}/nccl/lib ]; then
echo "检测到编译路径 ${CURRENT_PATH}/nccl/lib 存在,开始清理编译文件!"
make clean
fi
make -j$(nproc) src.build BUILDDIR=${CURRENT_PATH}/nccl CUDA_HOME=${CUDA_PATH} NVCC_GENCODE="-gencode=arch=compute_${COMPUTE_SM},code=sm_${COMPUTE_SM}"
if [ $? -eq 0 ]; then
echo "nccl 编译完成!"
else
echo "nccl 编译失败!"
exit 1
fi
export LD_LIBRARY_PATH=${CURRENT_PATH}/nccl/lib:$LD_LIBRARY_PATH
export PATH=${CURRENT_PATH}/nccl/bin:$PATH
cd ${CURRENT_PATH}
if [ ! -d ${CURRENT_PATH}/nccl-tests-master ]; then
echo "${CURRENT_PATH}/nccl-tests-master 不目录存,开始解压!"
unzip ${CURRENT_PATH}/nccl-tests-master.zip
fi
cd ${CURRENT_PATH}/nccl-tests-master
if [ -d ${CURRENT_PATH}/nccl-tests-master/build ]; then
echo "检测到编译路径 ${CURRENT_PATH}/nccl-tests-master/build 存在,开始清理编译文件!"
make clean
fi
make CUDA_HOME=${CUDA_PATH} NCCL_HOME=${CURRENT_PATH}/nccl
if [ $? -eq 0 ]; then
echo "nccl-tests 编译完成!"
else
echo "nccl-tests 编译失败!"
exit 1
fi
echo "开始测试 all_reduce_perf"
${CURRENT_PATH}/nccl-tests-master/build/all_reduce_perf -b 8 -e 4G -f 2 -g ${GPU_TOTAL} > ${RESULT_PATH}/single_all_reduce_perf.log
sleep 5s
echo "开始测试 all_gather_perf"
${CURRENT_PATH}/nccl-tests-master/build/all_gather_perf -b 8 -e 4G -f 2 -g ${GPU_TOTAL} > ${RESULT_PATH}/single_all_gather_perf.log
sleep 5s
echo "开始测试 alltoall_perf"
${CURRENT_PATH}/nccl-tests-master/build/alltoall_perf -b 8 -e 4G -f 2 -g ${GPU_TOTAL} > ${RESULT_PATH}/single_alltoall_perf.log
echo -e "nccl-tests 测试完成!"
}
apt install unzip -y
echo "当前路径:${CURRENT_PATH}"
source /etc/profile
source ~/.bashrc
mkdir -p $RESULT_PATH
CHECK_GPU_COMMAND
CHECK_GPU_SM
#CUBLAS_BENCHMARK_TEST
NCCL_COMM_TESTS
然后存为single_nccl_test.sh文件,执行以下命令进行执行
bash single_nccl_test.sh
测试结果会写入当前目录下的result_$(hostname)目录下。
内容版权声明:除非注明,否则皆为本站原创文章。
转载注明出处:https://sulao.cn/post/1125
评论列表