由于containerd下配置nvidia-container-runtime比较繁琐,所以写了脚本去自动修改
使用python修改config.toml文件首先需要安装toml模块
pip3 install toml
然后就可以使用toml模块的load和dump方法进行toml格式的数据处理了,load方法就是将toml格式数据读取成字典格式数据,dump方式则反过来将字典转换为toml格式的数据。
以下是自动修改containerd的配置为nvidia-container-runtime的脚本。
#!/usr/bin/python3
#coding: utf-8
import toml
import sys
import os
import json
import time
import shutil
import logging
import subprocess
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s %(filename)s[line:%(lineno)d] %(message)s', datefmt='%Y-%m-%d')
config_path = r"/etc/containerd"
config_name = "config.toml"
nvidia_toolkit = ["/usr/bin/nvidia-container-runtime", "/usr/bin/nvidia-container-runtime-hook", "/usr/bin/nvidia-container-cli"]
def formmat_out_put(cmd, json_format=False):
output=subprocess.getoutput(cmd)
if json_format == False:
output =output.split('\n')
return output
def check_nvidia_runtime(nvidia_toolkit):
for toolkit in nvidia_toolkit:
if not os.path.exists(toolkit):
sys.exit("不能找到 {} 命令, 请检查nvidia-container-runtime是否安装!".format(toolkit))
def modify_config(config):
# logging.info("修改sandbox_image配置...")
# config["plugins"]["io.containerd.grpc.v1.cri"]["sandbox_image"] = "registry.cn-hangzhou.aliyuncs.com/google_containers/pause:3.8"
if "io.containerd.runtime.v1.linux" in config["plugins"]:
logging.info("找到io.containerd.runtime.v1.linux配置, 修改runtime为nvidia-container-runtime.")
config["plugins"]["io.containerd.runtime.v1.linux"]["runtime"] = "nvidia-container-runtime"
else:
logging.warning("未找到io.containerd.runtime.v1.linux配置, 直接添加该配置...")
config["plugins"]["io.containerd.runtime.v1.linux"] = {
"no_shim": False,
"runtime": "nvidia-container-runtime",
"runtime_root": "",
"shim": "containerd-shim",
"shim_debug": False
}
logging.debug("添加/修改的io.containerd.runtime.v1.linux配置为: ".format(json.dumps(config["plugins"]["io.containerd.runtime.v1.linux"], indent=2)))
logging.info("添加nvidia相关配置...")
config["plugins"]["io.containerd.grpc.v1.cri"]["containerd"]["runtimes"]["nvidia"] = {
"privileged_without_host_devices": False,
"runtime_engine": "",
"runtime_root": "",
"runtime_type": "io.containerd.runc.v2"
}
logging.info("修改containerd默认运行时为nvidia.")
config["plugins"]["io.containerd.grpc.v1.cri"]["containerd"]["default_runtime_name"] = "nvidia"
config["plugins"]["io.containerd.grpc.v1.cri"]["containerd"]["runtimes"]["nvidia"]["options"] = {
"BinaryName": "/usr/bin/nvidia-container-runtime",
"SystemdCgroup": True
}
logging.debug("添加的nvidia配置为: {}".format(json.dumps(config["plugins"]["io.containerd.grpc.v1.cri"]["containerd"]["runtimes"]["nvidia"], indent=2)))
return config
if __name__ == "__main__":
# logging.info("检测nvidia-container-runtime安装状态...")
# check_nvidia_runtime(nvidia_toolkit)
if not os.path.exists(os.path.join(config_path, config_name)):
sys.exit("不能找到container配置文件 {}".format(os.path.join(config_path, config_name)))
try:
with open(os.path.join(config_path, config_name), "r") as f:
config = toml.load(f)
logging.debug("打印当前config配置: {}".format(config))
new_config = modify_config(config)
logging.debug("打印修改后的config配置: {}".format(new_config))
except Exception as e:
sys.exit("修改container配置文件失败, {}".format(e))
logging.info("containerd配置修改完成, 开始备份{}文件...".format(config_name))
shutil.copyfile(os.path.join(config_path, config_name), os.path.join(config_path, config_name+"_"+time.strftime("%Y%m%d%H%M%S", time.localtime())))
logging.info("将修改后的container配置写回 {} 文件".format(config_name))
try:
with open(os.path.join(config_path, config_name), "w") as f:
toml.dump(new_config, f)
except Exception as e:
sys.exit("写回containerd配置失败, {}".format(e))
logging.info("修改containerd配置操作完成!!!")
内容版权声明:除非注明,否则皆为本站原创文章。
转载注明出处:https://sulao.cn/post/951
评论列表